From ef5fcdb7300abafd610fd8f815e77dd687eced65 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:31 +0300 Subject: [PATCH 01/85] RDMA/sa_query: Add RMPP support for SA queries Register GSI mad agent with RMPP support and add rmpp_callback for SA queries. This is needed for querying more than one service record in one query. Signed-off-by: Or Har-Toov Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/81dbcb48682e1838dc40f381cdcc0dc63f25f0f1.1751279793.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/sa_query.c | 39 +++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 53571e6b3162..770e9f18349b 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -107,6 +107,8 @@ struct ib_sa_device { struct ib_sa_query { void (*callback)(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad); + void (*rmpp_callback)(struct ib_sa_query *sa_query, int status, + struct ib_mad_recv_wc *mad); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; @@ -1987,23 +1989,29 @@ static void send_handler(struct ib_mad_agent *agent, { struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; unsigned long flags; + int status = 0; - if (query->callback) + if (query->callback || query->rmpp_callback) { switch (mad_send_wc->status) { case IB_WC_SUCCESS: /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: - query->callback(query, -ETIMEDOUT, NULL); + status = -ETIMEDOUT; break; case IB_WC_WR_FLUSH_ERR: - query->callback(query, -EINTR, NULL); + status = -EINTR; break; default: - query->callback(query, -EIO, NULL); + status = -EIO; break; } + if (status) + query->callback ? query->callback(query, status, NULL) : + query->rmpp_callback(query, status, NULL); + } + xa_lock_irqsave(&queries, flags); __xa_erase(&queries, query->id); xa_unlock_irqrestore(&queries, flags); @@ -2019,17 +2027,25 @@ static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; + struct ib_mad *mad; + if (!send_buf) return; query = send_buf->context[0]; - if (query->callback) { + mad = mad_recv_wc->recv_buf.mad; + + if (query->rmpp_callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) - query->callback(query, - mad_recv_wc->recv_buf.mad->mad_hdr.status ? - -EINVAL : 0, - (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); + query->rmpp_callback(query, mad->mad_hdr.status ? + -EINVAL : 0, mad_recv_wc); + else + query->rmpp_callback(query, -EIO, NULL); + } else if (query->callback) { + if (mad_recv_wc->wc->status == IB_WC_SUCCESS) + query->callback(query, mad->mad_hdr.status ? + -EINVAL : 0, (struct ib_sa_mad *)mad); else query->callback(query, -EIO, NULL); } @@ -2181,8 +2197,9 @@ static int ib_sa_add_one(struct ib_device *device) sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, - NULL, 0, send_handler, - recv_handler, sa_dev, 0); + NULL, IB_MGMT_RMPP_VERSION, + send_handler, recv_handler, + sa_dev, 0); if (IS_ERR(sa_dev->port[i].agent)) { ret = PTR_ERR(sa_dev->port[i].agent); goto err; From a892a3e74fb4f6ef040659297603abf11ccf29a7 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:32 +0300 Subject: [PATCH 02/85] RDMA/sa_query: Support IB service records resolution Add an SA query API ib_sa_service_rec_get() to support building and sending SA query MADs that ask for service records with a specific name or ID, and receiving and parsing responses from the SM. Signed-off-by: Or Har-Toov Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/9af6c82f3a3a9d975115a33235fb4ffc7c8edb21.1751279793.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/sa_query.c | 238 +++++++++++++++++++++++++++++ include/rdma/ib_mad.h | 1 + include/rdma/ib_sa.h | 37 +++++ 3 files changed, 276 insertions(+) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 770e9f18349b..c0a7af1b4fe4 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -152,6 +152,13 @@ struct ib_sa_mcmember_query { struct ib_sa_query sa_query; }; +struct ib_sa_service_query { + void (*callback)(int status, struct sa_service_rec *rec, + unsigned int num_services, void *context); + void *context; + struct ib_sa_query sa_query; +}; + static LIST_HEAD(ib_nl_request_list); static DEFINE_SPINLOCK(ib_nl_request_lock); static atomic_t ib_nl_sa_request_seq; @@ -686,6 +693,58 @@ static const struct ib_field guidinfo_rec_table[] = { .size_bits = 512 }, }; +#define SERVICE_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct sa_service_rec, field), \ + .struct_size_bytes = sizeof_field(struct sa_service_rec, field), \ + .field_name = "sa_service_rec:" #field + +static const struct ib_field service_rec_table[] = { + { SERVICE_REC_FIELD(id), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 64 }, + { SERVICE_REC_FIELD(gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(pkey), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 16 }, + { RESERVED, + .offset_words = 6, + .offset_bits = 16, + .size_bits = 16 }, + { SERVICE_REC_FIELD(lease), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 32 }, + { SERVICE_REC_FIELD(key), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(name), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 512 }, + { SERVICE_REC_FIELD(data_8), + .offset_words = 28, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(data_16), + .offset_words = 32, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(data_32), + .offset_words = 36, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(data_64), + .offset_words = 40, + .offset_bits = 0, + .size_bits = 128 }, +}; + #define RDMA_PRIMARY_PATH_MAX_REC_NUM 3 static inline void ib_sa_disable_local_svc(struct ib_sa_query *query) @@ -1392,6 +1451,20 @@ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute) } EXPORT_SYMBOL(ib_sa_pack_path); +void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute) +{ + ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), rec, + attribute); +} +EXPORT_SYMBOL(ib_sa_pack_service); + +void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec) +{ + ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), attribute, + rec); +} +EXPORT_SYMBOL(ib_sa_unpack_service); + static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client, struct ib_sa_device *sa_dev, u32 port_num) @@ -1481,6 +1554,68 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, } } +#define IB_SA_DATA_OFFS 56 +#define IB_SERVICE_REC_SZ 176 + +static void ib_unpack_service_rmpp(struct sa_service_rec *rec, + struct ib_mad_recv_wc *mad_wc, + int num_services) +{ + unsigned int cp_sz, data_i, data_size, rec_i = 0, buf_i = 0; + struct ib_mad_recv_buf *mad_buf; + u8 buf[IB_SERVICE_REC_SZ]; + u8 *data; + + data_size = sizeof(((struct ib_sa_mad *) mad_buf->mad)->data); + + list_for_each_entry(mad_buf, &mad_wc->rmpp_list, list) { + data = ((struct ib_sa_mad *) mad_buf->mad)->data; + data_i = 0; + while (data_i < data_size && rec_i < num_services) { + cp_sz = min(IB_SERVICE_REC_SZ - buf_i, + data_size - data_i); + memcpy(buf + buf_i, data + data_i, cp_sz); + data_i += cp_sz; + buf_i += cp_sz; + if (buf_i == IB_SERVICE_REC_SZ) { + ib_sa_unpack_service(buf, rec + rec_i); + buf_i = 0; + rec_i++; + } + } + } +} + +static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status, + struct ib_mad_recv_wc *mad_wc) +{ + struct ib_sa_service_query *query = + container_of(sa_query, struct ib_sa_service_query, sa_query); + struct sa_service_rec *rec; + int num_services; + + if (!mad_wc || !mad_wc->recv_buf.mad) { + query->callback(status, NULL, 0, query->context); + return; + } + + num_services = (mad_wc->mad_len - IB_SA_DATA_OFFS) / IB_SERVICE_REC_SZ; + if (!num_services) { + query->callback(-ENODATA, NULL, 0, query->context); + return; + } + + rec = kmalloc_array(num_services, sizeof(*rec), GFP_KERNEL); + if (!rec) { + query->callback(-ENOMEM, NULL, 0, query->context); + return; + } + + ib_unpack_service_rmpp(rec, mad_wc, num_services); + query->callback(status, rec, num_services, query->context); + kfree(rec); +} + static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { struct ib_sa_path_query *query = @@ -1490,6 +1625,14 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) kfree(query); } +static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) +{ + struct ib_sa_service_query *query = + container_of(sa_query, struct ib_sa_service_query, sa_query); + + kfree(query); +} + /** * ib_sa_path_rec_get - Start a Path get query * @client:SA client @@ -1620,6 +1763,101 @@ err1: } EXPORT_SYMBOL(ib_sa_path_rec_get); +/** + * ib_sa_service_rec_get - Start a Service get query + * @client: SA client + * @device: device to send query on + * @port_num: port number to send query on + * @rec: Service Record to send in query + * @comp_mask: component mask to send in query + * @timeout_ms: time to wait for response + * @gfp_mask: GFP mask to use for internal allocations + * @callback: function called when query completes, times out or is + * canceled + * @context: opaque user context passed to callback + * @sa_query: query context, used to cancel query + * + * Send a Service Record Get query to the SA to look up a path. The + * callback function will be called when the query completes (or + * fails); status is 0 for a successful response, -EINTR if the query + * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error + * occurred sending the query. The resp parameter of the callback is + * only valid if status is 0. + * + * If the return value of ib_sa_service_rec_get() is negative, it is an + * error code. Otherwise it is a query ID that can be used to cancel + * the query. + */ +int ib_sa_service_rec_get(struct ib_sa_client *client, + struct ib_device *device, u32 port_num, + struct sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + unsigned long timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct sa_service_rec *resp, + unsigned int num_services, + void *context), + void *context, struct ib_sa_query **sa_query) +{ + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_service_query *query; + struct ib_mad_agent *agent; + struct ib_sa_port *port; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kzalloc(sizeof(*query), gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(&query->sa_query, agent); + + query->sa_query.rmpp_callback = callback ? ib_sa_service_rec_callback : + NULL; + query->sa_query.release = ib_sa_service_rec_release; + mad->mad_hdr.method = IB_MGMT_METHOD_GET_TABLE; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_sa_pack_service(rec, mad->data); + + *sa_query = &query->sa_query; + query->sa_query.mad_buf->context[1] = rec; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_service_rec_get); + static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 3f1b58d8b4bf..8bd0e1eb393b 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -48,6 +48,7 @@ #define IB_MGMT_METHOD_REPORT 0x06 #define IB_MGMT_METHOD_REPORT_RESP 0x86 #define IB_MGMT_METHOD_TRAP_REPRESS 0x07 +#define IB_MGMT_METHOD_GET_TABLE 0x12 #define IB_MGMT_METHOD_RESP 0x80 #define IB_BM_ATTR_MOD_RESP cpu_to_be32(1) diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index b46353fc53bf..95e8924ad563 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -189,6 +189,20 @@ struct sa_path_rec { u32 flags; }; +struct sa_service_rec { + __be64 id; + __u8 gid[16]; + __be16 pkey; + __u8 reserved[2]; + __be32 lease; + __u8 key[16]; + __u8 name[64]; + __u8 data_8[16]; + __be16 data_16[8]; + __be32 data_32[4]; + __be64 data_64[2]; +}; + static inline enum ib_gid_type sa_conv_pathrec_to_gid_type(struct sa_path_rec *rec) { @@ -417,6 +431,17 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, unsigned int num_prs, void *context), void *context, struct ib_sa_query **query); +int ib_sa_service_rec_get(struct ib_sa_client *client, + struct ib_device *device, u32 port_num, + struct sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + unsigned long timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct sa_service_rec *resp, + unsigned int num_services, + void *context), + void *context, struct ib_sa_query **sa_query); + struct ib_sa_multicast { struct ib_sa_mcmember_rec rec; ib_sa_comp_mask comp_mask; @@ -508,6 +533,18 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num, */ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute); +/** + * ib_sa_pack_service - Convert a service record from struct ib_sa_service_rec + * to IB MAD wire format. + */ +void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute); + +/** + * ib_sa_unpack_service - Convert a service record from MAD format to struct + * ib_sa_service_rec. + */ +void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec); + /** * ib_sa_unpack_path - Convert a path record from MAD format to struct * ib_sa_path_rec. From a6404823fe20e06d4061bc63e0295b7165af4c14 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:33 +0300 Subject: [PATCH 03/85] RDMA/cma: Support IB service record resolution Add new UCMA command and the corresponding CMA implementation. Userspace can send this command to request service resolution based on service name or ID. On a successful resolution, one or multiple service records are returned, the first one will be used as destination address by default. Two new CM events are added and returned to caller accordingly: - RDMA_CM_EVENT_ADDRINFO_RESOLVED: Resolve succeeded; - RDMA_CM_EVENT_ADDRINFO_ERROR: Resolve failed. Internally two new CM states are added: - RDMA_CM_ADDRINFO_QUERY: CM is in the process of IB service resolution; - RDMA_CM_ADDRINFO_RESOLVED: CM has finished the resolve process. With these new states, beside existing state transfer processes, 2 new processes are supported: 1. The default address is used: RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDRINFO_QUERY -> RDMA_CM_ADDRINFO_RESOLVED -> RDMA_CM_ROUTE_QUERY 2. To use a different address: RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDRINFO_QUERY-> RDMA_CM_ADDRINFO_RESOLVED -> RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_RESOLVED -> RDMA_CM_ROUTE_QUERY In the 2nd case, resolve_addrinfo returns multiple records, a user could call rdma_resolve_addr() with the one that is not the first. Signed-off-by: Or Har-Toov Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/b6e82ad75522a13b5efe4ff86da0e465aab04cc2.1751279794.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 136 ++++++++++++++++++++++++++++- drivers/infiniband/core/cma_priv.h | 4 +- drivers/infiniband/core/ucma.c | 30 ++++++- include/rdma/rdma_cm.h | 18 +++- include/uapi/rdma/rdma_user_cm.h | 20 ++++- 5 files changed, 202 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 9b471548e7ae..5b2d3ae3f9fc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2076,6 +2076,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, kfree(id_priv->id.route.path_rec); kfree(id_priv->id.route.path_rec_inbound); kfree(id_priv->id.route.path_rec_outbound); + kfree(id_priv->id.route.service_recs); put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); @@ -3382,13 +3383,18 @@ err1: int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) { struct rdma_id_private *id_priv; + enum rdma_cm_state state; int ret; if (!timeout_ms) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) + state = id_priv->state; + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ROUTE_QUERY) && + !cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_RESOLVED, + RDMA_CM_ROUTE_QUERY)) return -EINVAL; cma_id_get(id_priv); @@ -3409,7 +3415,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) return 0; err: - cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); + cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, state); cma_id_put(id_priv); return ret; } @@ -5506,3 +5512,129 @@ static void __exit cma_cleanup(void) module_init(cma_init); module_exit(cma_cleanup); + +static void cma_query_ib_service_handler(int status, + struct sa_service_rec *recs, + unsigned int num_recs, void *context) +{ + struct cma_work *work = context; + struct rdma_id_private *id_priv = work->id; + struct sockaddr_ib *addr; + + if (status) + goto fail; + + if (!num_recs) { + status = -ENOENT; + goto fail; + } + + if (id_priv->id.route.service_recs) { + status = -EALREADY; + goto fail; + } + + id_priv->id.route.service_recs = + kmalloc_array(num_recs, sizeof(*recs), GFP_KERNEL); + if (!id_priv->id.route.service_recs) { + status = -ENOMEM; + goto fail; + } + + id_priv->id.route.num_service_recs = num_recs; + memcpy(id_priv->id.route.service_recs, recs, sizeof(*recs) * num_recs); + + addr = (struct sockaddr_ib *)&id_priv->id.route.addr.dst_addr; + addr->sib_family = AF_IB; + addr->sib_addr = *(struct ib_addr *)&recs->gid; + addr->sib_pkey = recs->pkey; + addr->sib_sid = recs->id; + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, + (union ib_gid *)&addr->sib_addr); + ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, + ntohs(addr->sib_pkey)); + + queue_work(cma_wq, &work->work); + return; + +fail: + work->old_state = RDMA_CM_ADDRINFO_QUERY; + work->new_state = RDMA_CM_ADDR_BOUND; + work->event.event = RDMA_CM_EVENT_ADDRINFO_ERROR; + work->event.status = status; + pr_debug_ratelimited( + "RDMA CM: SERVICE_ERROR: failed to query service record. status %d\n", + status); + queue_work(cma_wq, &work->work); +} + +static int cma_resolve_ib_service(struct rdma_id_private *id_priv, + struct rdma_ucm_ib_service *ibs) +{ + struct sa_service_rec sr = {}; + ib_sa_comp_mask mask = 0; + struct cma_work *work; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + cma_id_get(id_priv); + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ADDRINFO_QUERY; + work->new_state = RDMA_CM_ADDRINFO_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDRINFO_RESOLVED; + + if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_ID) { + sr.id = cpu_to_be64(ibs->service_id); + mask |= IB_SA_SERVICE_REC_SERVICE_ID; + } + if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_NAME) { + strscpy(sr.name, ibs->service_name, sizeof(sr.name)); + mask |= IB_SA_SERVICE_REC_SERVICE_NAME; + } + + id_priv->query_id = ib_sa_service_rec_get(&sa_client, + id_priv->id.device, + id_priv->id.port_num, + &sr, mask, + 2000, GFP_KERNEL, + cma_query_ib_service_handler, + work, &id_priv->query); + + if (id_priv->query_id < 0) { + cma_id_put(id_priv); + kfree(work); + return id_priv->query_id; + } + + return 0; +} + +int rdma_resolve_ib_service(struct rdma_cm_id *id, + struct rdma_ucm_ib_service *ibs) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!id_priv->cma_dev || + !cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDRINFO_QUERY)) + return -EINVAL; + + if (rdma_cap_ib_sa(id->device, id->port_num)) + ret = cma_resolve_ib_service(id_priv, ibs); + else + ret = -EOPNOTSUPP; + + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_ib_service); diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index b7354c94cf1b..c604b601f4d9 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -47,7 +47,9 @@ enum rdma_cm_state { RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN, RDMA_CM_DEVICE_REMOVAL, - RDMA_CM_DESTROYING + RDMA_CM_DESTROYING, + RDMA_CM_ADDRINFO_QUERY, + RDMA_CM_ADDRINFO_RESOLVED }; struct rdma_id_private { diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 6e700b974033..1915f4e68308 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -282,6 +282,10 @@ static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, } uevent->resp.event = event->event; uevent->resp.status = event->status; + + if (event->event == RDMA_CM_EVENT_ADDRINFO_RESOLVED) + goto out; + if (ctx->cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, &event->param.ud); @@ -289,6 +293,7 @@ static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); +out: uevent->resp.ece.vendor_id = event->ece.vendor_id; uevent->resp.ece.attr_mod = event->ece.attr_mod; return uevent; @@ -728,6 +733,28 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file, return ret; } +static ssize_t ucma_resolve_ib_service(struct ucma_file *file, + const char __user *inbuf, int in_len, + int out_len) +{ + struct rdma_ucm_resolve_ib_service cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + mutex_lock(&ctx->mutex); + ret = rdma_resolve_ib_service(ctx->cm_id, &cmd.ibs); + mutex_unlock(&ctx->mutex); + ucma_put_ctx(ctx); + return ret; +} + static ssize_t ucma_resolve_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -1703,7 +1730,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_QUERY] = ucma_query, [RDMA_USER_CM_CMD_BIND] = ucma_bind, [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, - [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast + [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, + [RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE] = ucma_resolve_ib_service }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index d1593ad47e28..72d1568e4cfb 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -33,7 +33,9 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_MULTICAST_JOIN, RDMA_CM_EVENT_MULTICAST_ERROR, RDMA_CM_EVENT_ADDR_CHANGE, - RDMA_CM_EVENT_TIMEWAIT_EXIT + RDMA_CM_EVENT_TIMEWAIT_EXIT, + RDMA_CM_EVENT_ADDRINFO_RESOLVED, + RDMA_CM_EVENT_ADDRINFO_ERROR }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); @@ -63,6 +65,9 @@ struct rdma_route { * 2 - Both primary and alternate path are available */ int num_pri_alt_paths; + + unsigned int num_service_recs; + struct sa_service_rec *service_recs; }; struct rdma_conn_param { @@ -197,6 +202,17 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, */ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms); +/** + * rdma_resolve_ib_service - Resolve the IB service record of the + * service with the given service ID or name. + * + * This function is optional in the rdma cm flow. It is called on the client + * side of a connection, before calling rdma_resolve_route. The resolution + * can be done once per rdma_cm_id. + */ +int rdma_resolve_ib_service(struct rdma_cm_id *id, + struct rdma_ucm_ib_service *ibs); + /** * rdma_create_qp - Allocate a QP and associate it with the specified RDMA * identifier. diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 7cea03581f79..8799623bcba0 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -67,7 +67,8 @@ enum { RDMA_USER_CM_CMD_QUERY, RDMA_USER_CM_CMD_BIND, RDMA_USER_CM_CMD_RESOLVE_ADDR, - RDMA_USER_CM_CMD_JOIN_MCAST + RDMA_USER_CM_CMD_JOIN_MCAST, + RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE }; /* See IBTA Annex A11, servies ID bytes 4 & 5 */ @@ -338,4 +339,21 @@ struct rdma_ucm_migrate_resp { __u32 events_reported; }; +enum { + RDMA_USER_CM_IB_SERVICE_FLAG_ID = 1 << 0, + RDMA_USER_CM_IB_SERVICE_FLAG_NAME = 1 << 1, +}; + +#define RDMA_USER_CM_IB_SERVICE_NAME_SIZE 64 +struct rdma_ucm_ib_service { + __u64 service_id; + __u8 service_name[RDMA_USER_CM_IB_SERVICE_NAME_SIZE]; + __u32 flags; + __u32 reserved; +}; + +struct rdma_ucm_resolve_ib_service { + __u32 id; + struct rdma_ucm_ib_service ibs; +}; #endif /* RDMA_USER_CM_H */ From 810f874eda8e492701ba3623aa298caad61bb47c Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:34 +0300 Subject: [PATCH 04/85] RDMA/ucma: Support query resolved service records Enable user-space to query resolved service records through a ucma command when a RDMA_CM_EVENT_ADDRINFO_RESOLVED event is received. Signed-off-by: Or Har-Toov Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/1090ee7c00c3f8058c4f9e7557de983504a16715.1751279794.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/ucma.c | 40 ++++++++++++++++++++++++++++++++ include/uapi/rdma/ib_user_sa.h | 14 +++++++++++ include/uapi/rdma/rdma_user_cm.h | 8 ++++++- 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 1915f4e68308..3b9ca6d7a21b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1021,6 +1021,43 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx, return ret; } +static ssize_t ucma_query_ib_service(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_ib_service_resp *resp; + int n, ret = 0; + + if (out_len < sizeof(struct rdma_ucm_query_ib_service_resp)) + return -ENOSPC; + + if (!ctx->cm_id->route.service_recs) + return -ENODATA; + + resp = kzalloc(out_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + resp->num_service_recs = ctx->cm_id->route.num_service_recs; + + n = (out_len - sizeof(struct rdma_ucm_query_ib_service_resp)) / + sizeof(struct ib_user_service_rec); + + if (!n) + goto out; + + if (n > ctx->cm_id->route.num_service_recs) + n = ctx->cm_id->route.num_service_recs; + + memcpy(resp->recs, ctx->cm_id->route.service_recs, + sizeof(*resp->recs) * n); + if (copy_to_user(response, resp, struct_size(resp, recs, n))) + ret = -EFAULT; + +out: + kfree(resp); + return ret; +} + static ssize_t ucma_query(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -1049,6 +1086,9 @@ static ssize_t ucma_query(struct ucma_file *file, case RDMA_USER_CM_QUERY_GID: ret = ucma_query_gid(ctx, response, out_len); break; + case RDMA_USER_CM_QUERY_IB_SERVICE: + ret = ucma_query_ib_service(ctx, response, out_len); + break; default: ret = -ENOSYS; break; diff --git a/include/uapi/rdma/ib_user_sa.h b/include/uapi/rdma/ib_user_sa.h index 435155d6e1c6..acfa20816bc6 100644 --- a/include/uapi/rdma/ib_user_sa.h +++ b/include/uapi/rdma/ib_user_sa.h @@ -74,4 +74,18 @@ struct ib_user_path_rec { __u8 preference; }; +struct ib_user_service_rec { + __be64 id; + __u8 gid[16]; + __be16 pkey; + __u8 reserved[2]; + __be32 lease; + __u8 key[16]; + __u8 name[64]; + __u8 data_8[16]; + __be16 data_16[8]; + __be32 data_32[4]; + __be64 data_64[2]; +}; + #endif /* IB_USER_SA_H */ diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 8799623bcba0..00501da0567e 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -148,7 +148,8 @@ struct rdma_ucm_resolve_route { enum { RDMA_USER_CM_QUERY_ADDR, RDMA_USER_CM_QUERY_PATH, - RDMA_USER_CM_QUERY_GID + RDMA_USER_CM_QUERY_GID, + RDMA_USER_CM_QUERY_IB_SERVICE }; struct rdma_ucm_query { @@ -188,6 +189,11 @@ struct rdma_ucm_query_path_resp { struct ib_path_rec_data path_data[]; }; +struct rdma_ucm_query_ib_service_resp { + __u32 num_service_recs; + struct ib_user_service_rec recs[]; +}; + struct rdma_ucm_conn_param { __u32 qp_num; __u32 qkey; From a3c9d0fcd3715541bbf97da2ddde9d032e2fe6d5 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:35 +0300 Subject: [PATCH 05/85] RDMA/ucma: Support write an event into a CM Enable user-space to inject an event into a CM through it's event channel. Two new events are added and supported: RDMA_CM_EVENT_USER and RDMA_CM_EVENT_INTERNAL. With these 2 events a new event parameter "arg" is supported, which is passed from sender to receiver transparently. With this feature an application is able to write an event into a CM channel with a new user-space rdmacm API. For example thread T1 could write an event with the API: rdma_write_cm_event(cm_id, RDMA_CM_EVENT_USER, status, arg); and thread T2 could receive the event with rdma_get_cm_event(). Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/fdf49d0b17a45933c5d8c1d90605c9447d9a3c73.1751279794.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/ucma.c | 52 +++++++++++++++++++++++++++++++- include/rdma/rdma_cm.h | 5 ++- include/uapi/rdma/rdma_user_cm.h | 16 +++++++++- 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3b9ca6d7a21b..f86ece701db6 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1745,6 +1745,55 @@ err_unlock: return ret; } +static ssize_t ucma_write_cm_event(struct ucma_file *file, + const char __user *inbuf, int in_len, + int out_len) +{ + struct rdma_ucm_write_cm_event cmd; + struct rdma_cm_event event = {}; + struct ucma_event *uevent; + struct ucma_context *ctx; + int ret = 0; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + if ((cmd.event != RDMA_CM_EVENT_USER) && + (cmd.event != RDMA_CM_EVENT_INTERNAL)) + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + event.event = cmd.event; + event.status = cmd.status; + event.param.arg = cmd.param.arg; + + uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); + if (!uevent) { + ret = -ENOMEM; + goto out; + } + + uevent->ctx = ctx; + uevent->resp.uid = ctx->uid; + uevent->resp.id = ctx->id; + uevent->resp.event = event.event; + uevent->resp.status = event.status; + memcpy(uevent->resp.param.arg32, &event.param.arg, + sizeof(event.param.arg)); + + mutex_lock(&ctx->file->mut); + list_add_tail(&uevent->list, &ctx->file->event_list); + mutex_unlock(&ctx->file->mut); + wake_up_interruptible(&ctx->file->poll_wait); + +out: + ucma_put_ctx(ctx); + return ret; +} + static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { @@ -1771,7 +1820,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_BIND] = ucma_bind, [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, - [RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE] = ucma_resolve_ib_service + [RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE] = ucma_resolve_ib_service, + [RDMA_USER_CM_CMD_WRITE_CM_EVENT] = ucma_write_cm_event, }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 72d1568e4cfb..9bd930a83e6e 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -35,7 +35,9 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_ADDR_CHANGE, RDMA_CM_EVENT_TIMEWAIT_EXIT, RDMA_CM_EVENT_ADDRINFO_RESOLVED, - RDMA_CM_EVENT_ADDRINFO_ERROR + RDMA_CM_EVENT_ADDRINFO_ERROR, + RDMA_CM_EVENT_USER, + RDMA_CM_EVENT_INTERNAL, }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); @@ -98,6 +100,7 @@ struct rdma_cm_event { union { struct rdma_conn_param conn; struct rdma_ud_param ud; + u64 arg; } param; struct rdma_ucm_ece ece; }; diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 00501da0567e..5ded174687ee 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -68,7 +68,8 @@ enum { RDMA_USER_CM_CMD_BIND, RDMA_USER_CM_CMD_RESOLVE_ADDR, RDMA_USER_CM_CMD_JOIN_MCAST, - RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE + RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE, + RDMA_USER_CM_CMD_WRITE_CM_EVENT, }; /* See IBTA Annex A11, servies ID bytes 4 & 5 */ @@ -304,6 +305,7 @@ struct rdma_ucm_event_resp { union { struct rdma_ucm_conn_param conn; struct rdma_ucm_ud_param ud; + __u32 arg32[2]; } param; __u32 reserved; struct rdma_ucm_ece ece; @@ -362,4 +364,16 @@ struct rdma_ucm_resolve_ib_service { __u32 id; struct rdma_ucm_ib_service ibs; }; + +struct rdma_ucm_write_cm_event { + __u32 id; + __u32 reserved; + __u32 event; + __u32 status; + union { + struct rdma_ucm_conn_param conn; + struct rdma_ucm_ud_param ud; + __u64 arg; + } param; +}; #endif /* RDMA_USER_CM_H */ From 4d674c478dc51fe241376e215861d15f1a3a9dc4 Mon Sep 17 00:00:00 2001 From: Boshi Yu Date: Fri, 25 Jul 2025 13:53:54 +0800 Subject: [PATCH 06/85] RDMA/erdma: Use dma_map_page to map scatter MTT buffer Each high-level indirect MTT entry is assumed to point to exactly one page of the low-level MTT buffer, but dma_map_sg may merge contiguous physical pages when mapping. To avoid extra overhead from splitting merged regions, use dma_map_page to map the scatter MTT buffer page by page. Reviewed-by: Cheng Xu Signed-off-by: Boshi Yu Link: https://patch.msgid.link/20250725055410.67520-2-boshiyu@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 110 ++++++++++++++-------- drivers/infiniband/hw/erdma/erdma_verbs.h | 4 +- 2 files changed, 71 insertions(+), 43 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 94c211df09d8..b4dadd306837 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -149,7 +149,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) req.phy_addr[0] = mr->mem.mtt->buf_dma; mtt_level = ERDMA_MR_MTT_1LEVEL; } else { - req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist); + req.phy_addr[0] = mr->mem.mtt->dma_addrs[0]; mtt_level = mr->mem.mtt->level; } } else if (mr->type != ERDMA_MR_TYPE_DMA) { @@ -626,18 +626,27 @@ err_free_mtt: return ERR_PTR(-ENOMEM); } -static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev, - struct erdma_mtt *mtt) +static void erdma_unmap_page_list(struct erdma_dev *dev, dma_addr_t *pg_dma, + u32 npages) { - dma_unmap_sg(&dev->pdev->dev, mtt->sglist, - DIV_ROUND_UP(mtt->size, PAGE_SIZE), DMA_TO_DEVICE); - vfree(mtt->sglist); + u32 i; + + for (i = 0; i < npages; i++) + dma_unmap_page(&dev->pdev->dev, pg_dma[i], PAGE_SIZE, + DMA_TO_DEVICE); +} + +static void erdma_destroy_mtt_buf_dma_addrs(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + erdma_unmap_page_list(dev, mtt->dma_addrs, mtt->npages); + vfree(mtt->dma_addrs); } static void erdma_destroy_scatter_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt) { - erdma_destroy_mtt_buf_sg(dev, mtt); + erdma_destroy_mtt_buf_dma_addrs(dev, mtt); vfree(mtt->buf); kfree(mtt); } @@ -645,50 +654,69 @@ static void erdma_destroy_scatter_mtt(struct erdma_dev *dev, static void erdma_init_middle_mtt(struct erdma_mtt *mtt, struct erdma_mtt *low_mtt) { - struct scatterlist *sg; - u32 idx = 0, i; + dma_addr_t *pg_addr = mtt->buf; + u32 i; - for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i) - mtt->buf[idx++] = sg_dma_address(sg); + for (i = 0; i < low_mtt->npages; i++) + pg_addr[i] = low_mtt->dma_addrs[i]; } -static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt) +static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs, + void *buf, u64 len) { - struct scatterlist *sglist; - void *buf = mtt->buf; - u32 npages, i, nsg; + dma_addr_t *pg_dma; struct page *pg; + u32 npages, i; + void *addr; - /* Failed if buf is not page aligned */ - if ((uintptr_t)buf & ~PAGE_MASK) - return -EINVAL; + npages = (PAGE_ALIGN((u64)buf + len) - PAGE_ALIGN_DOWN((u64)buf)) >> + PAGE_SHIFT; + pg_dma = vzalloc(npages * sizeof(dma_addr_t)); + if (!pg_dma) + return 0; - npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE); - sglist = vzalloc(npages * sizeof(*sglist)); - if (!sglist) - return -ENOMEM; - - sg_init_table(sglist, npages); + addr = buf; for (i = 0; i < npages; i++) { - pg = vmalloc_to_page(buf); + pg = vmalloc_to_page(addr); if (!pg) goto err; - sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); - buf += PAGE_SIZE; + + pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, pg_dma[i])) + goto err; + + addr += PAGE_SIZE; } - nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE); - if (!nsg) - goto err; + *dma_addrs = pg_dma; - mtt->sglist = sglist; - mtt->nsg = nsg; + return npages; +err: + erdma_unmap_page_list(dev, pg_dma, i); + vfree(pg_dma); return 0; -err: - vfree(sglist); +} - return -ENOMEM; +static int erdma_create_mtt_buf_dma_addrs(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + dma_addr_t *addrs; + u32 npages; + + /* Failed if buf is not page aligned */ + if ((uintptr_t)mtt->buf & ~PAGE_MASK) + return -EINVAL; + + npages = vmalloc_to_dma_addrs(dev, &addrs, mtt->buf, mtt->size); + if (!npages) + return -ENOMEM; + + mtt->dma_addrs = addrs; + mtt->npages = npages; + + return 0; } static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, @@ -707,12 +735,12 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, if (!mtt->buf) goto err_free_mtt; - ret = erdma_create_mtt_buf_sg(dev, mtt); + ret = erdma_create_mtt_buf_dma_addrs(dev, mtt); if (ret) goto err_free_mtt_buf; - ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n", - mtt->size, mtt->nsg); + ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, npages:%u\n", + mtt->size, mtt->npages); return mtt; @@ -746,8 +774,8 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, level = 1; /* convergence the mtt table. */ - while (mtt->nsg != 1 && level <= 3) { - tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg)); + while (mtt->npages != 1 && level <= 3) { + tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->npages)); if (IS_ERR(tmp_mtt)) { ret = PTR_ERR(tmp_mtt); goto err_free_mtt; @@ -765,7 +793,7 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, mtt->level = level; ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n", - mtt->level, mtt->sglist[0].dma_address); + mtt->level, mtt->dma_addrs[0]); return mtt; err_free_mtt: diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index ef411b81fbd7..7d8d3fe501d5 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -99,8 +99,8 @@ struct erdma_mtt { union { dma_addr_t buf_dma; struct { - struct scatterlist *sglist; - u32 nsg; + dma_addr_t *dma_addrs; + u32 npages; u32 level; }; }; From 44d69d3cf2e8047c279cbb9708f05e2c43e33234 Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Tue, 29 Jul 2025 02:00:18 -0700 Subject: [PATCH 07/85] RDMA/mana_ib: Drain send wrs of GSI QP Drain send WRs of the GSI QP on device removal. In rare servicing scenarios, the hardware may delete the state of the GSI QP, preventing it from generating CQEs for pending send WRs. Since WRs submitted to the GSI QP hold CM resources, the device cannot be removed until those WRs are completed. This patch marks all pending send WRs as failed, allowing the GSI QP to release the CM resources and enabling safe device removal. Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1753779618-23629-1-git-send-email-kotaranov@linux.microsoft.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/cq.c | 26 ++++++++++++++++++++++++++ drivers/infiniband/hw/mana/device.c | 3 +++ drivers/infiniband/hw/mana/mana_ib.h | 3 +++ 3 files changed, 32 insertions(+) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index 28e154bbb50f..1becc8779123 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -291,6 +291,32 @@ out: return wc_index; } +void mana_drain_gsi_sqs(struct mana_ib_dev *mdev) +{ + struct mana_ib_qp *qp = mana_get_qp_ref(mdev, MANA_GSI_QPN, false); + struct ud_sq_shadow_wqe *shadow_wqe; + struct mana_ib_cq *cq; + unsigned long flags; + + if (!qp) + return; + + cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq); + + spin_lock_irqsave(&cq->cq_lock, flags); + while ((shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq)) + != NULL) { + shadow_wqe->header.error_code = IB_WC_GENERAL_ERR; + shadow_queue_advance_next_to_complete(&qp->shadow_sq); + } + spin_unlock_irqrestore(&cq->cq_lock, flags); + + if (cq->ibcq.comp_handler) + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + + mana_put_qp_ref(qp); +} + int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index fa60872f169f..bdeddb642b87 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -230,6 +230,9 @@ static void mana_ib_remove(struct auxiliary_device *adev) { struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); + if (mana_ib_is_rnic(dev)) + mana_drain_gsi_sqs(dev); + ib_unregister_device(&dev->ib_dev); dma_pool_destroy(dev->av_pool); if (mana_ib_is_rnic(dev)) { diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 5d31034ac7fb..af09a3e6ccb7 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -43,6 +43,8 @@ */ #define MANA_AV_BUFFER_SIZE 64 +#define MANA_GSI_QPN (1) + struct mana_ib_adapter_caps { u32 max_sq_id; u32 max_rq_id; @@ -718,6 +720,7 @@ int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); +void mana_drain_gsi_sqs(struct mana_ib_dev *mdev); int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); From 211dc59b7bb5ea0a6ea29072f04a4498c7ab046b Mon Sep 17 00:00:00 2001 From: Fushuai Wang Date: Mon, 11 Aug 2025 14:25:34 +0800 Subject: [PATCH 08/85] IB/hfi1: Use for_each_online_cpu() instead of for_each_cpu() Replace the opencoded for_each_cpu(cpu, cpu_online_mask) loop with the more readable and equivalent for_each_online_cpu(cpu) macro. Signed-off-by: Fushuai Wang Link: https://patch.msgid.link/20250811062534.1041-1-wangfushuai@baidu.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 719b7c34e238..5cfa4f8fbf3d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -990,7 +990,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, } /* Clean up old mappings */ - for_each_cpu(cpu, cpu_online_mask) { + for_each_online_cpu(cpu) { struct sdma_rht_node *rht_node; /* Don't cleanup sdes that are set in the new mask */ From 8e5442f3935d269f15b0b9a0d77cdf1cf3bd006f Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Thu, 3 Jul 2025 18:23:14 +0000 Subject: [PATCH 09/85] RDMA/efa: Extend admin timeout error print Add command id to the printed message for additional debug information. Link: https://patch.msgid.link/r/20250703182314.16442-1-mrgolin@amazon.com Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index bafd210dd43e..e6377602a9c4 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -30,6 +30,7 @@ struct efa_comp_ctx { struct efa_admin_acq_entry *user_cqe; u32 comp_size; enum efa_cmd_status status; + u16 cmd_id; u8 cmd_opcode; u8 occupied; }; @@ -333,6 +334,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu comp_ctx->comp_size = comp_size_in_bytes; comp_ctx->user_cqe = comp; comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode; + comp_ctx->cmd_id = cmd_id; reinit_completion(&comp_ctx->wait_event); @@ -557,17 +559,19 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com if (comp_ctx->status == EFA_CMD_COMPLETED) ibdev_err_ratelimited( aq->efa_dev, - "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (id: %d, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", efa_com_cmd_str(comp_ctx->cmd_opcode), comp_ctx->cmd_opcode, comp_ctx->status, - comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + comp_ctx->cmd_id, aq->sq.pc, aq->sq.cc, + aq->cq.cc); else ibdev_err_ratelimited( aq->efa_dev, - "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + "The device didn't send any completion for admin cmd %s(%d) status %d (id: %d, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", efa_com_cmd_str(comp_ctx->cmd_opcode), comp_ctx->cmd_opcode, comp_ctx->status, - comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + comp_ctx->cmd_id, aq->sq.pc, aq->sq.cc, + aq->cq.cc); clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); err = -ETIME; From d9e6e85b7beb2aeb8defac2f705b23532ddb25d4 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 13 Aug 2025 15:36:01 +0300 Subject: [PATCH 10/85] RDMA/mlx5: Enable Data-Direct with Relaxed Ordering Relaxed Ordering can improve performance in certain scenarios. Enable it in the Data-Direct use case as well. Link: https://patch.msgid.link/r/1221dcdda8061ba5f6bc3519044083c7438b257e.1755088503.git.leon@kernel.org Signed-off-by: Yishai Hadas Reviewed-by: Gal Shalom Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 32 ++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/mr.c | 8 +++---- drivers/infiniband/hw/mlx5/umr.c | 6 +++++- 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d456e4fde3e1..505349c68e79 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3118,6 +3118,7 @@ mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev) { int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_core_dev *mdev = dev->mdev; + bool ro_supp = false; void *mkc; u32 mkey; u32 pdn; @@ -3146,14 +3147,37 @@ mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev) MLX5_SET(mkc, mkc, length64, 1); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_core_create_mkey(mdev, &mkey, in, inlen); - kvfree(in); if (err) - goto err; + goto err_mkey; dev->ddr.mkey = mkey; dev->ddr.pdn = pdn; + + /* create another mkey with RO support */ + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) { + MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); + ro_supp = true; + } + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) { + MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); + ro_supp = true; + } + + if (ro_supp) { + err = mlx5_core_create_mkey(mdev, &mkey, in, inlen); + /* RO is defined as best effort */ + if (!err) { + dev->ddr.mkey_ro = mkey; + dev->ddr.mkey_ro_valid = true; + } + } + + kvfree(in); return 0; +err_mkey: + kvfree(in); err: mlx5_core_dealloc_pd(mdev, pdn); return err; @@ -3162,6 +3186,10 @@ err: static void mlx5_ib_free_data_direct_resources(struct mlx5_ib_dev *dev) { + + if (dev->ddr.mkey_ro_valid) + mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey_ro); + mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey); mlx5_core_dealloc_pd(dev->mdev, dev->ddr.pdn); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7ffc7ee92cf0..309f256fc27d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -854,6 +854,8 @@ struct mlx5_ib_port_resources { struct mlx5_data_direct_resources { u32 pdn; u32 mkey; + u32 mkey_ro; + u8 mkey_ro_valid :1; }; struct mlx5_ib_resources { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1317f2cb38a4..d3c82ffa300e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1717,11 +1717,11 @@ reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset, goto end; } - /* The device's 'data direct mkey' was created without RO flags to - * simplify things and allow for a single mkey per device. - * Since RO is not a must, mask it out accordingly. + /* If no device's 'data direct mkey' with RO flags exists + * mask it out accordingly. */ - access_flags &= ~IB_ACCESS_RELAXED_ORDERING; + if (!dev->ddr.mkey_ro_valid) + access_flags &= ~IB_ACCESS_RELAXED_ORDERING; crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev, offset, length, virt_addr, fd, access_flags, MLX5_MKC_ACCESS_MODE_KSM, diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 7ef35cddce81..4e562e0dd9e1 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -761,7 +761,11 @@ _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd, if (dd) { cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter)); - cur_ksm->key = cpu_to_be32(dev->ddr.mkey); + if (mr->access_flags & IB_ACCESS_RELAXED_ORDERING && + dev->ddr.mkey_ro_valid) + cur_ksm->key = cpu_to_be32(dev->ddr.mkey_ro); + else + cur_ksm->key = cpu_to_be32(dev->ddr.mkey); if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) { cur_ksm->va = 0; From 1a7c18c485bf17ef408d5ebb7f83e1f8ef329585 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Wed, 13 Aug 2025 15:39:56 +0300 Subject: [PATCH 11/85] RDMA/mlx5: Better estimate max_qp_wr to reflect WQE count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mlx5 driver currently derives max_qp_wr directly from the log_max_qp_sz HCA capability: props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); However, this value represents the number of WQEs in units of Basic Blocks (see MLX5_SEND_WQE_BB), not actual number of WQEs. Since the size of a WQE can vary depending on transport type and features (e.g., atomic operations, UMR, LSO), the actual number of WQEs can be significantly smaller than the WQEBB count suggests. This patch introduces a conservative estimation of the worst-case WQE size — considering largest segments possible with 1 SGE and no inline data or special features. It uses this to derive a more accurate max_qp_wr value. Fixes: 938fe83c8dcb ("net/mlx5_core: New device capabilities handling") Link: https://patch.msgid.link/r/7d992c9831c997ed5c33d30973406dc2dcaf5e89.1755088725.git.leon@kernel.org Reported-by: Chuck Lever Closes: https://lore.kernel.org/all/20250506142202.GJ2260621@ziepe.ca/ Signed-off-by: Or Har-Toov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 48 ++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 505349c68e79..de5339170876 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -883,6 +884,51 @@ static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev, resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask(); } +/* + * Calculate maximum SQ overhead across all QP types. + * Other QP types (REG_UMR, UC, RC, UD/SMI/GSI, XRC_TGT) + * have smaller overhead than the types calculated below, + * so they are implicitly included. + */ +static u32 mlx5_ib_calc_max_sq_overhead(void) +{ + u32 max_overhead_xrc, overhead_ud_lso, a, b; + + /* XRC_INI */ + max_overhead_xrc = sizeof(struct mlx5_wqe_xrc_seg); + max_overhead_xrc += sizeof(struct mlx5_wqe_ctrl_seg); + a = sizeof(struct mlx5_wqe_atomic_seg) + + sizeof(struct mlx5_wqe_raddr_seg); + b = sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg) + + MLX5_IB_SQ_UMR_INLINE_THRESHOLD / MLX5_IB_UMR_OCTOWORD; + max_overhead_xrc += max(a, b); + + /* UD with LSO */ + overhead_ud_lso = sizeof(struct mlx5_wqe_ctrl_seg); + overhead_ud_lso += sizeof(struct mlx5_wqe_eth_pad); + overhead_ud_lso += sizeof(struct mlx5_wqe_eth_seg); + overhead_ud_lso += sizeof(struct mlx5_wqe_datagram_seg); + + return max(max_overhead_xrc, overhead_ud_lso); +} + +static u32 mlx5_ib_calc_max_qp_wr(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + u32 max_wqe_bb_units = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + u32 max_wqe_size; + /* max QP overhead + 1 SGE, no inline, no special features */ + max_wqe_size = mlx5_ib_calc_max_sq_overhead() + + sizeof(struct mlx5_wqe_data_seg); + + max_wqe_size = roundup_pow_of_two(max_wqe_size); + + max_wqe_size = ALIGN(max_wqe_size, MLX5_SEND_WQE_BB); + + return (max_wqe_bb_units * MLX5_SEND_WQE_BB) / max_wqe_size; +} + static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -1041,7 +1087,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->page_size_cap = ~(min_page_size - 1); props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); - props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + props->max_qp_wr = mlx5_ib_calc_max_qp_wr(dev); max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / sizeof(struct mlx5_wqe_data_seg); max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); From 08aae7860450c89eebbc6fd4d38416e53c7a33d2 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 13 Aug 2025 15:41:19 +0300 Subject: [PATCH 12/85] RDMA/mlx5: Fix vport loopback forcing for MPV device Previously loopback for MPV was supposed to be permanently enabled, however other driver flows were able to over-ride that configuration and disable it. Add force_lb parameter that indicates that loopback should always be enabled which prevents all other driver flows from disabling it. Fixes: a9a9e68954f2 ("RDMA/mlx5: Fix vport loopback for MPV device") Link: https://patch.msgid.link/r/cfc6b1f0f99f8100b087483cc14da6025317f901.1755088808.git.leon@kernel.org Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 19 +++++++++++++++---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index de5339170876..1fbc0351063d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1839,7 +1839,8 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, } static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master, - struct mlx5_core_dev *slave) + struct mlx5_core_dev *slave, + struct mlx5_ib_lb_state *lb_state) { int err; @@ -1851,6 +1852,7 @@ static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master, if (err) goto out; + lb_state->force_enable = true; return 0; out: @@ -1859,16 +1861,22 @@ out: } static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master, - struct mlx5_core_dev *slave) + struct mlx5_core_dev *slave, + struct mlx5_ib_lb_state *lb_state) { mlx5_nic_vport_update_local_lb(slave, false); mlx5_nic_vport_update_local_lb(master, false); + + lb_state->force_enable = false; } int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) { int err = 0; + if (dev->lb.force_enable) + return 0; + mutex_lock(&dev->lb.mutex); if (td) dev->lb.user_td++; @@ -1890,6 +1898,9 @@ int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) { + if (dev->lb.force_enable) + return; + mutex_lock(&dev->lb.mutex); if (td) dev->lb.user_td--; @@ -3597,7 +3608,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); - mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev); + mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb); mlx5_core_mp_event_replay(ibdev->mdev, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, @@ -3694,7 +3705,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, MLX5_DRIVER_EVENT_AFFILIATION_DONE, &key); - err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev); + err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb); if (err) goto unbind; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 309f256fc27d..b43cf9c7e140 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1111,6 +1111,7 @@ struct mlx5_ib_lb_state { u32 user_td; int qps; bool enabled; + bool force_enable; }; struct mlx5_ib_pf_eq { From 2aa35b24ad12fb960d30e9e282f768e1a0af9291 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Thu, 21 Aug 2025 15:22:09 +0800 Subject: [PATCH 13/85] RDMA/erdma: Use vcalloc() instead of vzalloc() Replace vzalloc() with vcalloc() in vmalloc_to_dma_addrs(). As noted in the kernel documentation [1], open-coded multiplication in allocator arguments is discouraged because it can lead to integer overflow. Use vcalloc() to gain built-in overflow protection, making memory allocation safer when calculating allocation size compared to explicit multiplication. [1]: https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Link: https://patch.msgid.link/r/20250821072209.510348-1-rongqianfeng@vivo.com Signed-off-by: Qianfeng Rong Reviewed-by: Cheng Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/erdma/erdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index b4dadd306837..bb8809ed2f75 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -671,7 +671,7 @@ static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs, npages = (PAGE_ALIGN((u64)buf + len) - PAGE_ALIGN_DOWN((u64)buf)) >> PAGE_SHIFT; - pg_dma = vzalloc(npages * sizeof(dma_addr_t)); + pg_dma = vcalloc(npages, sizeof(*pg_dma)); if (!pg_dma) return 0; From 8191e874156ca2e51b31a5880fb050a66177def5 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Thu, 14 Aug 2025 16:55:48 +0530 Subject: [PATCH 14/85] RDMA/bnxt_re: Show srq_limit in fill_res_srq_entry hook Added srq_limit in rdma show resource srq hook. Signed-off-by: Kashyap Desai Signed-off-by: Saravanan Vajravel Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250814112555.221665-3-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 293b0a96c8e3..059cc1a15165 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1207,6 +1207,8 @@ static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq goto err; if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge)) goto err; + if (rdma_nl_put_driver_u32_hex(msg, "srq_limit", srq->qplib_srq.threshold)) + goto err; nla_nest_end(msg, table_attr); return 0; From ae7990ce1f605d74b41afe0dff8ec8030105c8cc Mon Sep 17 00:00:00 2001 From: Chenna Arnoori Date: Thu, 14 Aug 2025 16:55:49 +0530 Subject: [PATCH 15/85] RDMA/bnxt_re: RoCE Driver Dynamic Debug for HWRM's Add Linux kernel dynamic debug prints to ROCE HWRM's. Dumping request and response buffers for the ROCE HWRM's using print_hex_dump_bytes() to be part of kernel dynmic debug. Signed-off-by: Chenna Arnoori Signed-off-by: Saravanan Vajravel Link: https://patch.msgid.link/20250814112555.221665-4-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 804bc773b4ef..b97e75404139 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -366,6 +366,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, wmb(); writel(cmdq_prod, cmdq->cmdq_mbox.prod); writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db); + print_hex_dump_bytes("req: ", DUMP_PREFIX_OFFSET, msg->req, msg->req_sz); spin_unlock_bh(&hwq->lock); /* Return the CREQ response pointer */ return 0; @@ -631,6 +632,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, int rc = 0; pdev = rcfw->pdev; + print_hex_dump_bytes("event: ", DUMP_PREFIX_OFFSET, qp_event, sizeof(*qp_event)); switch (qp_event->event) { case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: err_event = (struct creq_qp_error_notification *)qp_event; From 832fc9e1614e00bb9d7b8350c65b4637a9941cdf Mon Sep 17 00:00:00 2001 From: Damodharam Ammepalli Date: Thu, 14 Aug 2025 16:55:51 +0530 Subject: [PATCH 16/85] RDMA/bnxt_re: Optimize bnxt_qplib_get_dev_attr function Optimize bnxt_qplib_get_dev_attr() by separating out query_version which uses creq notification method to host. Due to serialization of cmdq by firmware, expected latency in response to heavy multi-threaded rdma applications might be observed. This patch separates the version_query logic out of device attribute query and called only during rdma driver init. Signed-off-by: Damodharam Ammepalli Reviewed-by: Hongguang Gao Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250814112555.221665-6-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 1 + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 15 +++++++-------- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 059cc1a15165..10dd498bd206 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -2178,6 +2178,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) if (rc) goto disable_rcfw; + bnxt_qplib_query_version(&rdev->rcfw); bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 68981399598d..b602a1da19cd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -66,14 +66,15 @@ static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw) return (pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ); } -static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, - char *fw_ver) +void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw) { struct creq_query_version_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct cmdq_query_version req = {}; + struct bnxt_qplib_dev_attr *attr; int rc; + attr = rcfw->res->dattr; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_VERSION, sizeof(req)); @@ -82,10 +83,10 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) return; - fw_ver[0] = resp.fw_maj; - fw_ver[1] = resp.fw_minor; - fw_ver[2] = resp.fw_bld; - fw_ver[3] = resp.fw_rsvd; + attr->fw_ver[0] = resp.fw_maj; + attr->fw_ver[1] = resp.fw_minor; + attr->fw_ver[2] = resp.fw_bld; + attr->fw_ver[3] = resp.fw_rsvd; } int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw) @@ -179,8 +180,6 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw) if (_is_max_srq_ext_supported(attr->dev_cap_flags2)) attr->max_srq += le16_to_cpu(sb->max_srq_ext); - bnxt_qplib_query_version(rcfw, attr->fw_ver); - for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) { temp = le32_to_cpu(sb->tqm_alloc_reqs[i]); tqm_alloc = (u8 *)&temp; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 09faf4a1e849..e9834e7fc383 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -358,6 +358,7 @@ int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid, u32 resp_size, void *resp_va); int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res, struct bnxt_qplib_cc_param *cc_param); +void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw); #define BNXT_VAR_MAX_WQE 4352 #define BNXT_VAR_MAX_SLOT_ALIGN 256 From ef56081d1864582a6db50710733416c0510b7826 Mon Sep 17 00:00:00 2001 From: Vasuthevan Maheswaran Date: Thu, 14 Aug 2025 16:55:52 +0530 Subject: [PATCH 17/85] RDMA/bnxt_re: RoCE related hardware counters update Support for new hardware counters added, and existing hardware counters have been modified according to the design documents for compatibility with open-source monitoring agents. Signed-off-by: Vasuthevan Maheswaran Signed-off-by: Saravanan Vajravel Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250814112555.221665-7-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 51 ++++++++++++++------- drivers/infiniband/hw/bnxt_re/hw_counters.h | 3 ++ 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 44bb082e0a60..32ec67e4d922 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -79,22 +79,22 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_TX_DISCARDS].name = "tx_roce_discards", [BNXT_RE_RX_ERRORS].name = "rx_roce_errors", [BNXT_RE_RX_DISCARDS].name = "rx_roce_discards", - [BNXT_RE_TO_RETRANSMITS].name = "to_retransmits", - [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "seq_err_naks_rcvd", - [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded", - [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_naks_rcvd", - [BNXT_RE_MISSING_RESP].name = "missing_resp", + [BNXT_RE_TO_RETRANSMITS].name = "local_ack_timeout_err", + [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "packet_seq_err", + [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded", + [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_nak_retry_err", + [BNXT_RE_MISSING_RESP].name = "implied_nak_seq_err", [BNXT_RE_UNRECOVERABLE_ERR].name = "unrecoverable_err", [BNXT_RE_BAD_RESP_ERR].name = "bad_resp_err", [BNXT_RE_LOCAL_QP_OP_ERR].name = "local_qp_op_err", [BNXT_RE_LOCAL_PROTECTION_ERR].name = "local_protection_err", [BNXT_RE_MEM_MGMT_OP_ERR].name = "mem_mgmt_op_err", - [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "remote_invalid_req_err", - [BNXT_RE_REMOTE_ACCESS_ERR].name = "remote_access_err", + [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "req_remote_invalid_request", + [BNXT_RE_REMOTE_ACCESS_ERR].name = "req_remote_access_errors", [BNXT_RE_REMOTE_OP_ERR].name = "remote_op_err", - [BNXT_RE_DUP_REQ].name = "dup_req", + [BNXT_RE_DUP_REQ].name = "duplicate_request", [BNXT_RE_RES_EXCEED_MAX].name = "res_exceed_max", - [BNXT_RE_RES_LENGTH_MISMATCH].name = "res_length_mismatch", + [BNXT_RE_RES_LENGTH_MISMATCH].name = "resp_local_length_error", [BNXT_RE_RES_EXCEEDS_WQE].name = "res_exceeds_wqe", [BNXT_RE_RES_OPCODE_ERR].name = "res_opcode_err", [BNXT_RE_RES_RX_INVALID_RKEY].name = "res_rx_invalid_rkey", @@ -118,7 +118,7 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_RES_SRQ_LOAD_ERR].name = "res_srq_load_err", [BNXT_RE_RES_TX_PCI_ERR].name = "res_tx_pci_err", [BNXT_RE_RES_RX_PCI_ERR].name = "res_rx_pci_err", - [BNXT_RE_OUT_OF_SEQ_ERR].name = "oos_drop_count", + [BNXT_RE_OUT_OF_SEQ_ERR].name = "out_of_sequence", [BNXT_RE_TX_ATOMIC_REQ].name = "tx_atomic_req", [BNXT_RE_TX_READ_REQ].name = "tx_read_req", [BNXT_RE_TX_READ_RES].name = "tx_read_resp", @@ -126,23 +126,26 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_TX_SEND_REQ].name = "tx_send_req", [BNXT_RE_TX_ROCE_PKTS].name = "tx_roce_only_pkts", [BNXT_RE_TX_ROCE_BYTES].name = "tx_roce_only_bytes", - [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_req", - [BNXT_RE_RX_READ_REQ].name = "rx_read_req", + [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_requests", + [BNXT_RE_RX_READ_REQ].name = "rx_read_requests", [BNXT_RE_RX_READ_RESP].name = "rx_read_resp", - [BNXT_RE_RX_WRITE_REQ].name = "rx_write_req", + [BNXT_RE_RX_WRITE_REQ].name = "rx_write_requests", [BNXT_RE_RX_SEND_REQ].name = "rx_send_req", [BNXT_RE_RX_ROCE_PKTS].name = "rx_roce_only_pkts", [BNXT_RE_RX_ROCE_BYTES].name = "rx_roce_only_bytes", [BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts", [BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes", - [BNXT_RE_OOB].name = "rx_out_of_buffer", - [BNXT_RE_TX_CNP].name = "tx_cnp_pkts", - [BNXT_RE_RX_CNP].name = "rx_cnp_pkts", - [BNXT_RE_RX_ECN].name = "rx_ecn_marked_pkts", + [BNXT_RE_OOB].name = "out_of_buffer", + [BNXT_RE_TX_CNP].name = "np_cnp_pkts", + [BNXT_RE_RX_CNP].name = "rp_cnp_handled", + [BNXT_RE_RX_ECN].name = "np_ecn_marked_roce_packets", [BNXT_RE_PACING_RESCHED].name = "pacing_reschedule", [BNXT_RE_PACING_CMPL].name = "pacing_complete", [BNXT_RE_PACING_ALERT].name = "pacing_alerts", [BNXT_RE_DB_FIFO_REG].name = "db_fifo_register", + [BNXT_RE_REQ_CQE_ERROR].name = "req_cqe_error", + [BNXT_RE_RESP_CQE_ERROR].name = "resp_cqe_error", + [BNXT_RE_RESP_REMOTE_ACCESS_ERRS].name = "resp_remote_access_errors", }; static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, @@ -273,6 +276,20 @@ static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev, err_s->res_rx_pci_err; stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = err_s->res_oos_drop_count; + stats->value[BNXT_RE_REQ_CQE_ERROR] = + err_s->bad_resp_err + + err_s->local_qp_op_err + + err_s->local_protection_err + + err_s->mem_mgmt_op_err + + err_s->remote_invalid_req_err + + err_s->remote_access_err + + err_s->remote_op_err; + stats->value[BNXT_RE_RESP_CQE_ERROR] = + err_s->res_cmp_err + + err_s->res_cq_load_err; + stats->value[BNXT_RE_RESP_REMOTE_ACCESS_ERRS] = + err_s->res_rx_no_perm + + err_s->res_tx_no_perm; } static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev, diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index e541b6f8ca9f..be8e69458734 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -133,6 +133,9 @@ enum bnxt_re_hw_stats { BNXT_RE_PACING_CMPL, BNXT_RE_PACING_ALERT, BNXT_RE_DB_FIFO_REG, + BNXT_RE_REQ_CQE_ERROR, + BNXT_RE_RESP_CQE_ERROR, + BNXT_RE_RESP_REMOTE_ACCESS_ERRS, BNXT_RE_NUM_EXT_COUNTERS }; From dc61e916f1ce4770c98d8dd5857c174b73be8251 Mon Sep 17 00:00:00 2001 From: Abhishek Mohapatra Date: Thu, 14 Aug 2025 16:55:53 +0530 Subject: [PATCH 18/85] RDMA/bnxt_re: Report udp source port for flow_label in bnxt_re_query_qp The firmware doesn't capture the flow_label. Therefore the value that's always returned by qplib_qp->ah.flow_label is 0 whenever a qp is created. And as per IB spec, udp source port can be reported for flow_label. Hence reported udp source port for flow_label in bnxt_re_query_qp by populating the value of qplib_qp->udp_sport into qp_attr->ah_attr.grh.flow_label. Signed-off-by: Abhishek Mohapatra Signed-off-by: Saravanan Vajravel Link: https://patch.msgid.link/20250814112555.221665-8-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 1 + drivers/infiniband/hw/bnxt_re/qplib_fp.h | 1 + drivers/infiniband/hw/bnxt_re/roce_hsi.h | 3 ++- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 37c2bc3bdba5..98bc8b6290f1 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2305,7 +2305,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp_attr->pkey_index = qplib_qp->pkey_index; qp_attr->qkey = qplib_qp->qkey; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; - rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->ah.flow_label, + rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->udp_sport, qplib_qp->ah.host_sgid_index, qplib_qp->ah.hop_limit, qplib_qp->ah.traffic_class); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index dfe3177123e5..092310571dcc 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1492,6 +1492,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->access = sb->access; qp->pkey_index = le16_to_cpu(sb->pkey); qp->qkey = le32_to_cpu(sb->qkey); + qp->udp_sport = le16_to_cpu(sb->udp_src_port); temp32[0] = le32_to_cpu(sb->dgid[0]); temp32[1] = le32_to_cpu(sb->dgid[1]); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index ab125f1d949e..074c539c69c1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -299,6 +299,7 @@ struct bnxt_qplib_qp { u8 smac[6]; u16 vlan_id; u16 port_id; + u16 udp_sport; u8 nw_type; struct bnxt_qplib_ah ah; diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 024845f945ff..f9ac37335a1d 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -788,7 +788,8 @@ struct creq_query_qp_resp_sb { #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC 0x8UL __le16 pkey; __le32 qkey; - __le32 reserved32; + __le16 udp_src_port; + __le16 reserved16; __le32 dgid[4]; __le32 flow_label; __le16 sgid_index; From 4a9fba4d008a0d395407224f18def611ee85ba09 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 14 Aug 2025 16:55:54 +0530 Subject: [PATCH 19/85] RDMA/bnxt_re: Delete always true SGID table check The "sgid_tbl" inside "rdev->qplib_res" is a static memory. Hence, the check always return true. Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250814112555.221665-9-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 98bc8b6290f1..195a9ba6f65d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -375,7 +375,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) if (!ctx) return -EINVAL; - if (sgid_tbl && sgid_tbl->active) { + if (sgid_tbl->active) { if (ctx->idx >= sgid_tbl->max) return -EINVAL; gid_to_del = &sgid_tbl->tbl[ctx->idx].gid; From c0da8dc534218b8371c778b184453e8dd5246189 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 14 Aug 2025 16:55:55 +0530 Subject: [PATCH 20/85] RDMA/bnxt_re: Enhance a log message when bnxt_re_register_netdev fails Make a error log message more user friendly. When bnxt_re_register_netdev()() fails, the current log does not convey much information. Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250814112555.221665-10-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Saravanan Vajravel Reviewed-by: Selvin Xavier Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 10dd498bd206..0bdc2f66e136 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -2088,8 +2088,9 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) rc = bnxt_re_register_netdev(rdev); if (rc) { ibdev_err(&rdev->ibdev, - "Failed to register with netedev: %#x\n", rc); - return -EINVAL; + "Failed to register with Ethernet driver, rc %d\n", + rc); + return rc; } } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); From 217156bb70afb8e4c9263f7f892bb171ce4b463d Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Aug 2025 09:37:52 +0530 Subject: [PATCH 21/85] bnxt_en: Enhance stats context reservation logic When the firmware advertises that the device is capable of supporting port mirroring on RoCE device, reserve one additional stat_ctx. To support port mirroring feature, RDMA driver allocates one stat_ctx for exclusive use in RawEth QP. Signed-off-by: Saravanan Vajravel Reviewed-by: Somnath Kotur Reviewed-by: Kashyap Desai Reviewed-by: Selvin Xavier Reviewed-by: Michael Chan Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-2-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Jacob Keller Acked-by: Jakub Kicinski Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 3 +++ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 6 ++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5578ddcb465d..751840fff0dc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9601,10 +9601,10 @@ no_ptp: static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) { + u32 flags, flags_ext, flags_ext2, flags_ext3; + struct bnxt_hw_resc *hw_resc = &bp->hw_resc; struct hwrm_func_qcaps_output *resp; struct hwrm_func_qcaps_input *req; - struct bnxt_hw_resc *hw_resc = &bp->hw_resc; - u32 flags, flags_ext, flags_ext2; int rc; rc = hwrm_req_init(bp, req, HWRM_FUNC_QCAPS); @@ -9671,6 +9671,10 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_ROCE_VF_RESOURCE_MGMT_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED; + flags_ext3 = le32_to_cpu(resp->flags_ext3); + if (flags_ext3 & FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_MIRROR_ON_ROCE; + bp->tx_push_thresh = 0; if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && BNXT_FW_MAJ(bp) > 217) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index fda0d3cc6227..fa2b39b55e68 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2507,6 +2507,7 @@ struct bnxt { #define BNXT_FW_CAP_VNIC_RE_FLUSH BIT_ULL(40) #define BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS BIT_ULL(41) #define BNXT_FW_CAP_NPAR_1_2 BIT_ULL(42) + #define BNXT_FW_CAP_MIRROR_ON_ROCE BIT_ULL(43) u32 fw_dbg_cap; @@ -2528,6 +2529,8 @@ struct bnxt { ((bp)->fw_cap & BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED) #define BNXT_SW_RES_LMT(bp) \ ((bp)->fw_cap & BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS) +#define BNXT_MIRROR_ON_ROCE_CAP(bp) \ + ((bp)->fw_cap & BNXT_FW_CAP_MIRROR_ON_ROCE) u32 hwrm_spec_code; u16 hwrm_cmd_seq; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 61cf201bb0dc..f8c2c72b382d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -100,6 +100,12 @@ void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp) if (BNXT_PF(bp) && !bp->pf.port_id && bp->port_count > 1) bp->edev->ulp_num_ctxs++; + + /* Reserve one additional stat_ctx when the device is capable + * of supporting port mirroring on RDMA device. + */ + if (BNXT_MIRROR_ON_ROCE_CAP(bp)) + bp->edev->ulp_num_ctxs++; } } From 8f47f12db518af43bc24f7fd14e4fc84aeb043b6 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Aug 2025 09:37:53 +0530 Subject: [PATCH 22/85] RDMA/bnxt_re: Add data structures for RoCE mirror support Added data structures required for supporting mirroring on RoCE device. Signed-off-by: Saravanan Vajravel Reviewed-by: Selvin Xavier Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-3-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 5 +++++ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 5 +++++ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 1 + drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 1 + drivers/infiniband/hw/bnxt_re/qplib_res.h | 6 ++++++ 5 files changed, 18 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 6df5a2738c95..b315e40c2e9d 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -172,6 +172,7 @@ struct bnxt_re_dev { struct list_head list; unsigned long flags; #define BNXT_RE_FLAG_NETDEV_REGISTERED 0 +#define BNXT_RE_FLAG_STATS_CTX3_ALLOC 1 #define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 #define BNXT_RE_FLAG_QOS_WORK_REG 5 @@ -227,6 +228,10 @@ struct bnxt_re_dev { struct workqueue_struct *dcb_wq; struct dentry *cc_config; struct bnxt_re_dbg_cc_config_params *cc_config_params; + /* RoCE mirror */ + u16 mirror_vnic_id; + union ib_gid ugid; + u32 ugid_index; }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index fe00ab691a51..445a28b3cd96 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -164,6 +164,11 @@ struct bnxt_re_user_mmap_entry { u8 mmap_flag; }; +struct bnxt_re_flow { + struct ib_flow ib_flow; + struct bnxt_re_dev *rdev; +}; + static inline u16 bnxt_re_get_swqe_size(int nsge) { return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 074c539c69c1..3bd995ced9ca 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -345,6 +345,7 @@ struct bnxt_qplib_qp { u32 msn_tbl_sz; bool is_host_msn_tbl; u8 tos_dscp; + u32 ugid_index; }; #define BNXT_RE_MAX_MSG_SIZE 0x80000000 diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index ff873c5f1b25..988c89b4232e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -236,6 +236,7 @@ struct bnxt_qplib_rcfw { atomic_t timeout_send; /* cached from chip cctx for quick reference in slow path */ u16 max_timeout; + bool roce_mirror; }; struct bnxt_qplib_cmdqmsg { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 6a13927674b4..12e2fa23794a 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -65,6 +65,7 @@ struct bnxt_qplib_drv_modes { bool db_push; bool dbr_pacing; u32 toggle_bits; + u8 roce_mirror; }; enum bnxt_re_toggle_modes { @@ -582,6 +583,11 @@ static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx) return cctx->modes.dbr_pacing; } +static inline u8 bnxt_qplib_roce_mirror_supported(struct bnxt_qplib_chip_ctx *cctx) +{ + return cctx->modes.roce_mirror; +} + static inline bool _is_alloc_mr_unified(u16 dev_cap_flags) { return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC; From 2419b16a3db539ce5c506a60ee4b85723ce78bd5 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Fri, 22 Aug 2025 09:37:54 +0530 Subject: [PATCH 23/85] RDMA/bnxt_re: Refactor hw context memory allocation This patch is in preparation for other patches in this series. There is no functional changes intended. 1. Rename bnxt_qplib_alloc_ctx() to bnxt_qplib_alloc_hwctx(). 2. Rename bnxt_qplib_free_ctx() to bnxt_qplib_free_hwctx(). 3. Reduce the number of arguments of bnxt_qplib_alloc_hwctx() by moving a check outside of it. Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-4-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 18 ++++++++++-------- drivers/infiniband/hw/bnxt_re/qplib_res.c | 17 ++++++----------- drivers/infiniband/hw/bnxt_re/qplib_res.h | 9 ++++----- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 0bdc2f66e136..cc75e79b93a6 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -2044,7 +2044,7 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) ibdev_warn(&rdev->ibdev, "Failed to deinitialize RCFW: %#x", rc); bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); - bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx); + bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx); bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); type = bnxt_qplib_get_ring_type(rdev->chip_ctx); bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); @@ -2182,12 +2182,14 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) bnxt_qplib_query_version(&rdev->rcfw); bnxt_re_set_resource_limits(rdev); - rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0, - bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)); - if (rc) { - ibdev_err(&rdev->ibdev, - "Failed to allocate QPLIB context: %#x\n", rc); - goto disable_rcfw; + if (!rdev->is_virtfn && + !bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { + rc = bnxt_qplib_alloc_hwctx(&rdev->qplib_res, &rdev->qplib_ctx); + if (rc) { + ibdev_err(&rdev->ibdev, + "Failed to allocate hw context: %#x\n", rc); + goto disable_rcfw; + } } rc = bnxt_re_net_stats_ctx_alloc(rdev, rdev->qplib_ctx.stats.dma_map, @@ -2255,7 +2257,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) free_sctx: bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); free_ctx: - bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx); + bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx); disable_rcfw: bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); free_ring: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 6cd05207ffed..db2ee7246861 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -350,8 +350,8 @@ fail: } /* Context Tables */ -void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res, - struct bnxt_qplib_ctx *ctx) +void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res, + struct bnxt_qplib_ctx *ctx) { int i; @@ -464,7 +464,7 @@ fail: } /* - * Routine: bnxt_qplib_alloc_ctx + * Routine: bnxt_qplib_alloc_hwctx * Description: * Context tables are memories which are used by the chip fw. * The 6 tables defined are: @@ -484,17 +484,13 @@ fail: * Returns: * 0 if success, else -ERRORS */ -int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, - struct bnxt_qplib_ctx *ctx, - bool virt_fn, bool is_p5) +int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res, + struct bnxt_qplib_ctx *ctx) { struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; int rc; - if (virt_fn || is_p5) - goto stats_alloc; - /* QPC Tables */ sginfo.pgsize = PAGE_SIZE; sginfo.pgshft = PAGE_SHIFT; @@ -540,7 +536,6 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, rc = bnxt_qplib_alloc_init_hwq(&ctx->tim_tbl, &hwq_attr); if (rc) goto fail; -stats_alloc: /* Stats */ rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats); if (rc) @@ -549,7 +544,7 @@ stats_alloc: return 0; fail: - bnxt_qplib_free_ctx(res, ctx); + bnxt_qplib_free_hwctx(res, ctx); return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 12e2fa23794a..9d866cfdebab 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -433,11 +433,10 @@ void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res); int bnxt_qplib_init_res(struct bnxt_qplib_res *res); void bnxt_qplib_free_res(struct bnxt_qplib_res *res); int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev); -void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res, - struct bnxt_qplib_ctx *ctx); -int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, - struct bnxt_qplib_ctx *ctx, - bool virt_fn, bool is_p5); +void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res, + struct bnxt_qplib_ctx *ctx); +int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res, + struct bnxt_qplib_ctx *ctx); int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res); void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res); From b5942828ea5f8ba5fcdde71b1ac5473bdb5004e2 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Fri, 22 Aug 2025 09:37:55 +0530 Subject: [PATCH 24/85] RDMA/bnxt_re: Refactor stats context memory allocation Moved the stats context allocation logic to a new function. The stats context memory allocation code has been moved from bnxt_qplib_alloc_hwctx() to the newly added bnxt_re_get_stats_ctx() function. Also, the code to send the firmware command has been moved. This patch is in preparation for other patches in this series. There is no functional changes intended. Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-5-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 46 ++++++++++++++++++----- drivers/infiniband/hw/bnxt_re/qplib_res.c | 21 +++-------- drivers/infiniband/hw/bnxt_re/qplib_res.h | 5 +++ 3 files changed, 47 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index cc75e79b93a6..6788f74ed8eb 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -935,8 +935,7 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, } static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, - dma_addr_t dma_map, - u32 *fw_stats_ctx_id) + struct bnxt_qplib_stats *stats) { struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; struct hwrm_stat_ctx_alloc_output resp = {}; @@ -945,21 +944,21 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, struct bnxt_fw_msg fw_msg = {}; int rc = -EINVAL; - *fw_stats_ctx_id = INVALID_STATS_CTX_ID; + stats->fw_id = INVALID_STATS_CTX_ID; if (!en_dev) return rc; bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC); req.update_period_ms = cpu_to_le32(1000); - req.stats_dma_addr = cpu_to_le64(dma_map); + req.stats_dma_addr = cpu_to_le64(stats->dma_map); req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = bnxt_send_msg(en_dev, &fw_msg); if (!rc) - *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id); + stats->fw_id = le32_to_cpu(resp.stat_ctx_id); return rc; } @@ -2019,6 +2018,36 @@ static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev) rdev->nqr = NULL; } +static int bnxt_re_get_stats_ctx(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx; + struct bnxt_qplib_res *res = &rdev->qplib_res; + int rc; + + rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &hctx->stats); + if (rc) + return rc; + + rc = bnxt_re_net_stats_ctx_alloc(rdev, &hctx->stats); + if (rc) + goto free_stat_mem; + + return 0; +free_stat_mem: + bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats); + + return rc; +} + +static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx; + struct bnxt_qplib_res *res = &rdev->qplib_res; + + bnxt_re_net_stats_ctx_free(rdev, hctx->stats.fw_id); + bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats); +} + static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) { u8 type; @@ -2043,7 +2072,7 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) if (rc) ibdev_warn(&rdev->ibdev, "Failed to deinitialize RCFW: %#x", rc); - bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); + bnxt_re_put_stats_ctx(rdev); bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx); bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); type = bnxt_qplib_get_ring_type(rdev->chip_ctx); @@ -2191,9 +2220,8 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) goto disable_rcfw; } } - rc = bnxt_re_net_stats_ctx_alloc(rdev, - rdev->qplib_ctx.stats.dma_map, - &rdev->qplib_ctx.stats.fw_id); + + rc = bnxt_re_get_stats_ctx(rdev); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate stats context: %#x\n", rc); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index db2ee7246861..8d04f98b2606 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -53,12 +53,6 @@ #include "qplib_sp.h" #include "qplib_rcfw.h" -static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, - struct bnxt_qplib_stats *stats); -static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, - struct bnxt_qplib_chip_ctx *cctx, - struct bnxt_qplib_stats *stats); - /* PBL */ static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl, bool is_umem) @@ -365,7 +359,6 @@ void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res, /* restore original pde level before destroy */ ctx->tqm_ctx.pde.level = ctx->tqm_ctx.pde_level; bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.pde); - bnxt_qplib_free_stats_ctx(res->pdev, &ctx->stats); } static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res, @@ -534,10 +527,6 @@ int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res, hwq_attr.depth = ctx->qpc_count * 16; hwq_attr.stride = 1; rc = bnxt_qplib_alloc_init_hwq(&ctx->tim_tbl, &hwq_attr); - if (rc) - goto fail; - /* Stats */ - rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats); if (rc) goto fail; @@ -825,8 +814,8 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, } /* Stats */ -static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, - struct bnxt_qplib_stats *stats) +void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_stats *stats) { if (stats->dma) { dma_free_coherent(&pdev->dev, stats->size, @@ -836,9 +825,9 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, stats->fw_id = -1; } -static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, - struct bnxt_qplib_chip_ctx *cctx, - struct bnxt_qplib_stats *stats) +int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, + struct bnxt_qplib_stats *stats) { memset(stats, 0, sizeof(*stats)); stats->fw_id = -1; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 9d866cfdebab..ed1be06c2c60 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -441,6 +441,11 @@ int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res); void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res); int bnxt_qplib_determine_atomics(struct pci_dev *dev); +int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, + struct bnxt_qplib_stats *stats); +void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_stats *stats); static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo, struct bnxt_qplib_hwq *hwq, u32 cnt) From af7f9d0d5745fb4831c71d981a93f44f8fd3f10d Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Aug 2025 09:37:56 +0530 Subject: [PATCH 25/85] RDMA/bnxt_re: Add support for unique GID - RawEth QP requires unique GID so that per function stats_ctx is not polluted by packets mirrored to RoCE vnic. - Added support to add unique GID when RawEth type QP is created. - Added support to destroy unique GID when RawEth type QP is destroyed. - Allocated exclusive stats_ctx to use for RawEth type QP. Signed-off-by: Saravanan Vajravel Reviewed-by: Kashyap Desai Reviewed-by: Selvin Xavier Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-6-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 57 ++++++++++++++++++++++- drivers/infiniband/hw/bnxt_re/main.c | 42 +++++++++++++++++ drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 6 ++- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 3 +- 5 files changed, 105 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 195a9ba6f65d..c83809c72f5b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -288,7 +288,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, } port_attr->max_mtu = IB_MTU_4096; port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu); - port_attr->gid_tbl_len = dev_attr->max_sgid; + /* One GID is reserved for RawEth QP. Report one less */ + port_attr->gid_tbl_len = (rdev->rcfw.roce_mirror ? (dev_attr->max_sgid - 1) : + dev_attr->max_sgid); port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP; @@ -429,7 +431,7 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context) rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid, rdev->qplib_res.netdev->dev_addr, - vlan_id, true, &tbl_idx); + vlan_id, true, &tbl_idx, false, 0); if (rc == -EALREADY) { ctx_tbl = sgid_tbl->ctx; ctx_tbl[tbl_idx]->refcnt++; @@ -955,6 +957,20 @@ fail: return rc; } +static void bnxt_re_del_unique_gid(struct bnxt_re_dev *rdev) +{ + int rc; + + if (!rdev->rcfw.roce_mirror) + return; + + rc = bnxt_qplib_del_sgid(&rdev->qplib_res.sgid_tbl, + (struct bnxt_qplib_gid *)&rdev->ugid, + 0xFFFF, true); + if (rc) + dev_err(rdev_to_dev(rdev), "Failed to delete unique GID, rc: %d\n", rc); +} + /* Queue Pairs */ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { @@ -994,6 +1010,9 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD) atomic_dec(&rdev->stats.res.ud_qp_count); + if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE) + bnxt_re_del_unique_gid(rdev); + ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); @@ -1595,6 +1614,29 @@ static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev, return rc; } +static int bnxt_re_add_unique_gid(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx; + struct bnxt_qplib_res *res = &rdev->qplib_res; + int rc; + + if (!rdev->rcfw.roce_mirror) + return 0; + + rdev->ugid.global.subnet_prefix = cpu_to_be64(0xfe8000000000abcdLL); + addrconf_ifid_eui48(&rdev->ugid.raw[8], rdev->netdev); + + rc = bnxt_qplib_add_sgid(&res->sgid_tbl, + (struct bnxt_qplib_gid *)&rdev->ugid, + rdev->qplib_res.netdev->dev_addr, + 0xFFFF, true, &rdev->ugid_index, true, + hctx->stats3.fw_id); + if (rc) + dev_err(rdev_to_dev(rdev), "Failed to add unique GID. rc = %d\n", rc); + + return rc; +} + int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata) { @@ -1656,6 +1698,17 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, } } + /* Support for RawEth QP is added to capture TCP pkt dump. + * So unique SGID is used to avoid incorrect statistics on per + * function stats_ctx + */ + if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE) { + rc = bnxt_re_add_unique_gid(rdev); + if (rc) + goto qp_destroy; + qp->qplib_qp.ugid_index = rdev->ugid_index; + } + qp->ib_qp.qp_num = qp->qplib_qp.id; if (qp_init_attr->qp_type == IB_QPT_GSI) rdev->gsi_ctx.gsi_qp = qp; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 6788f74ed8eb..a6ff21aa139a 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -2039,6 +2039,42 @@ free_stat_mem: return rc; } +static int bnxt_re_get_stats3_ctx(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx; + struct bnxt_qplib_res *res = &rdev->qplib_res; + int rc; + + if (!rdev->rcfw.roce_mirror) + return 0; + + rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &hctx->stats3); + if (rc) + return rc; + + rc = bnxt_re_net_stats_ctx_alloc(rdev, &hctx->stats3); + if (rc) + goto free_stat_mem; + + return 0; +free_stat_mem: + bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats3); + + return rc; +} + +static void bnxt_re_put_stats3_ctx(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx; + struct bnxt_qplib_res *res = &rdev->qplib_res; + + if (!rdev->rcfw.roce_mirror) + return; + + bnxt_re_net_stats_ctx_free(rdev, hctx->stats3.fw_id); + bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats3); +} + static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx; @@ -2061,6 +2097,8 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) cancel_delayed_work_sync(&rdev->worker); + bnxt_re_put_stats3_ctx(rdev); + if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags)) bnxt_re_cleanup_res(rdev); @@ -2281,6 +2319,10 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) bnxt_re_init_dcb_wq(rdev); bnxt_re_net_register_async_event(rdev); + rc = bnxt_re_get_stats3_ctx(rdev); + if (rc) + goto fail; + return 0; free_sctx: bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index ed1be06c2c60..2ea3b7f232a3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -304,6 +304,7 @@ struct bnxt_qplib_ctx { struct bnxt_qplib_hwq tim_tbl; struct bnxt_qplib_tqm_ctx tqm_ctx; struct bnxt_qplib_stats stats; + struct bnxt_qplib_stats stats3; struct bnxt_qplib_vf_res vf_res; }; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index b602a1da19cd..3d68e44a2dce 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -308,7 +308,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, const u8 *smac, - u16 vlan_id, bool update, u32 *index) + u16 vlan_id, bool update, u32 *index, + bool is_ugid, u32 stats_ctx_id) { struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, struct bnxt_qplib_res, @@ -373,6 +374,9 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]); req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]); + req.stats_ctx = cpu_to_le16(CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID | + (u16)stats_ctx_id); + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index e9834e7fc383..58f90f3e57f7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -323,7 +323,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 vlan_id, bool update); int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, const u8 *mac, u16 vlan_id, - bool update, u32 *index); + bool update, u32 *index, + bool is_ugid, u32 stats_ctx_id); int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 gid_idx, const u8 *smac); From fd6c9ae7c182344612fc3709ebd837cba6007b83 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Aug 2025 09:37:57 +0530 Subject: [PATCH 26/85] RDMA/bnxt_re: Add support for mirror vnic Added below support: - Querying the pre-reserved mirror_vnic_id - Allocating/freeing mirror_vnic - Configuring mirror vnic to associate it with raw qp These functions will be used in the subsequent patch in this series. Signed-off-by: Saravanan Vajravel Reviewed-by: Kashyap Desai Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-7-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 4 ++ drivers/infiniband/hw/bnxt_re/main.c | 67 +++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index b315e40c2e9d..4bf10d7db77b 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -248,6 +248,10 @@ int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *ou int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad); +void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev); +int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev); +int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id); + static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev) { if (rdev) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index a6ff21aa139a..a284f762738a 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -540,6 +540,72 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, fw_msg->timeout = timeout; } +void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_vnic_free_input req = {}; + struct bnxt_fw_msg fw_msg = {}; + int rc; + + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_FREE); + + req.vnic_id = cpu_to_le32(rdev->mirror_vnic_id); + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL, + 0, DFLT_HWRM_CMD_TIMEOUT); + rc = bnxt_send_msg(en_dev, &fw_msg); + if (rc) + ibdev_dbg(&rdev->ibdev, + "Failed to free vnic, rc = %d\n", rc); +} + +int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_vnic_alloc_output resp = {}; + struct hwrm_vnic_alloc_input req = {}; + struct bnxt_fw_msg fw_msg = {}; + int rc; + + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_ALLOC); + + req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id); + req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_VNIC_ID_VALID); + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + rc = bnxt_send_msg(en_dev, &fw_msg); + if (rc) + ibdev_dbg(&rdev->ibdev, + "Failed to alloc vnic, rc = %d\n", rc); + + return rc; +} + +int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_vnic_cfg_input req = {}; + struct bnxt_fw_msg fw_msg = {}; + int rc; + + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_CFG); + + req.flags = cpu_to_le32(VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE); + req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_RAW_QP_ID | + VNIC_CFG_REQ_ENABLES_MRU); + req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id); + req.raw_qp_id = cpu_to_le32(qp_id); + req.mru = cpu_to_le16(rdev->netdev->mtu + VLAN_ETH_HLEN); + + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL, + 0, DFLT_HWRM_CMD_TIMEOUT); + rc = bnxt_send_msg(en_dev, &fw_msg); + if (rc) + ibdev_dbg(&rdev->ibdev, + "Failed to cfg vnic, rc = %d\n", rc); + + return rc; +} + /* Query device config using common hwrm */ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, u32 *offset) @@ -558,6 +624,7 @@ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, if (!rc) { *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024); *offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024); + rdev->mirror_vnic_id = le16_to_cpu(resp.mirror_vnic_id); } return rc; } From 7c7511f16512832486518fca3e61b5f5d45e329b Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Aug 2025 09:37:58 +0530 Subject: [PATCH 27/85] RDMA/bnxt_re: Add support for flow create/destroy - Added support for create_flow and destroy_flow verbs. These verbs are used on RawEth QP to add a specific flow action. - To support TCP dump on RoCE, added IB_FLOW_ATTR_SNIFFER attribute. - In create_flow verb, driver allocates mirror_vnic and configure it with RawEth QP. Once this is done, driver will enable mirroring. - In destroy_flow, driver will disable mirroring and free the mirror vnic. Signed-off-by: Saravanan Vajravel Reviewed-by: Kashyap Desai Reviewed-by: Selvin Xavier Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-8-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 1 + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 87 ++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 5 ++ drivers/infiniband/hw/bnxt_re/main.c | 2 + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 37 ++++++++++ drivers/infiniband/hw/bnxt_re/qplib_sp.h | 2 + drivers/infiniband/hw/bnxt_re/roce_hsi.h | 40 ++++++++++- 7 files changed, 173 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 4bf10d7db77b..ae4476c87be7 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -232,6 +232,7 @@ struct bnxt_re_dev { u16 mirror_vnic_id; union ib_gid ugid; u32 ugid_index; + u8 sniffer_flow_created : 1; }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index c83809c72f5b..90c23d0ee262 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -4449,6 +4449,93 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) } } +static int bnxt_re_setup_vnic(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp) +{ + int rc; + + rc = bnxt_re_hwrm_alloc_vnic(rdev); + if (rc) + return rc; + + rc = bnxt_re_hwrm_cfg_vnic(rdev, qp->qplib_qp.id); + if (rc) + goto out_free_vnic; + + return 0; +out_free_vnic: + bnxt_re_hwrm_free_vnic(rdev); + return rc; +} + +struct ib_flow *bnxt_re_create_flow(struct ib_qp *ib_qp, + struct ib_flow_attr *attr, + struct ib_udata *udata) +{ + struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); + struct bnxt_re_dev *rdev = qp->rdev; + struct bnxt_re_flow *flow; + int rc; + + if (attr->type != IB_FLOW_ATTR_SNIFFER || + !rdev->rcfw.roce_mirror) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&rdev->qp_lock); + if (rdev->sniffer_flow_created) { + ibdev_err(&rdev->ibdev, "RoCE Mirroring is already Configured\n"); + mutex_unlock(&rdev->qp_lock); + return ERR_PTR(-EBUSY); + } + + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + if (!flow) { + mutex_unlock(&rdev->qp_lock); + return ERR_PTR(-ENOMEM); + } + + flow->rdev = rdev; + + rc = bnxt_re_setup_vnic(rdev, qp); + if (rc) + goto out_free_flow; + + rc = bnxt_qplib_create_flow(&rdev->qplib_res); + if (rc) + goto out_free_vnic; + + rdev->sniffer_flow_created = 1; + mutex_unlock(&rdev->qp_lock); + + return &flow->ib_flow; + +out_free_vnic: + bnxt_re_hwrm_free_vnic(rdev); +out_free_flow: + mutex_unlock(&rdev->qp_lock); + kfree(flow); + return ERR_PTR(rc); +} + +int bnxt_re_destroy_flow(struct ib_flow *flow_id) +{ + struct bnxt_re_flow *flow = + container_of(flow_id, struct bnxt_re_flow, ib_flow); + struct bnxt_re_dev *rdev = flow->rdev; + int rc; + + mutex_lock(&rdev->qp_lock); + rc = bnxt_qplib_destroy_flow(&rdev->qplib_res); + if (rc) + ibdev_dbg(&rdev->ibdev, "failed to destroy_flow rc = %d\n", rc); + rdev->sniffer_flow_created = 0; + + bnxt_re_hwrm_free_vnic(rdev); + mutex_unlock(&rdev->qp_lock); + kfree(flow); + + return rc; +} + static struct bnxt_re_cq *bnxt_re_search_for_cq(struct bnxt_re_dev *rdev, u32 cq_id) { struct bnxt_re_cq *cq = NULL, *tmp_cq; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 445a28b3cd96..76ba9ab04d5c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -272,6 +272,11 @@ struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start, struct uverbs_attr_bundle *attrs); int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata); void bnxt_re_dealloc_ucontext(struct ib_ucontext *context); +struct ib_flow *bnxt_re_create_flow(struct ib_qp *ib_qp, + struct ib_flow_attr *attr, + struct ib_udata *udata); +int bnxt_re_destroy_flow(struct ib_flow *flow_id); + int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index a284f762738a..8179501a0135 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1365,6 +1365,8 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf, .req_notify_cq = bnxt_re_req_notify_cq, .resize_cq = bnxt_re_resize_cq, + .create_flow = bnxt_re_create_flow, + .destroy_flow = bnxt_re_destroy_flow, INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq), INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 3d68e44a2dce..698fcad9f034 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -1146,3 +1146,40 @@ out: dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); return rc; } + +int bnxt_qplib_create_flow(struct bnxt_qplib_res *res) +{ + struct creq_roce_mirror_cfg_resp resp = {}; + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct cmdq_roce_mirror_cfg req = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG, + sizeof(req)); + + req.mirror_flags = (u8)CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE; + + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), + sizeof(resp), 0); + return bnxt_qplib_rcfw_send_message(rcfw, &msg); +} + +int bnxt_qplib_destroy_flow(struct bnxt_qplib_res *res) +{ + struct creq_roce_mirror_cfg_resp resp = {}; + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct cmdq_roce_mirror_cfg req = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG, + sizeof(req)); + + req.mirror_flags &= ~((u8)CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE); + + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), + sizeof(resp), 0); + + return bnxt_qplib_rcfw_send_message(rcfw, &msg); +} diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 58f90f3e57f7..147b5d9c0313 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -360,6 +360,8 @@ int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid, int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res, struct bnxt_qplib_cc_param *cc_param); void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw); +int bnxt_qplib_create_flow(struct bnxt_qplib_res *res); +int bnxt_qplib_destroy_flow(struct bnxt_qplib_res *res); #define BNXT_VAR_MAX_WQE 4352 #define BNXT_VAR_MAX_SLOT_ALIGN 256 diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index f9ac37335a1d..cfdf69a3fe9a 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -144,7 +144,8 @@ struct cmdq_base { #define CMDQ_BASE_OPCODE_MODIFY_CQ 0x90UL #define CMDQ_BASE_OPCODE_QUERY_QP_EXTEND 0x91UL #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL - #define CMDQ_BASE_OPCODE_LAST CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT + #define CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG 0x99UL + #define CMDQ_BASE_OPCODE_LAST CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG u8 cmd_size; __le16 flags; __le16 cookie; @@ -2109,6 +2110,43 @@ struct creq_query_roce_stats_ext_resp_sb { __le64 dup_req; }; +/* cmdq_roce_mirror_cfg (size:192b/24B) */ +struct cmdq_roce_mirror_cfg { + u8 opcode; + #define CMDQ_ROCE_MIRROR_CFG_OPCODE_ROCE_MIRROR_CFG 0x99UL + #define CMDQ_ROCE_MIRROR_CFG_OPCODE_LAST \ + CMDQ_ROCE_MIRROR_CFG_OPCODE_ROCE_MIRROR_CFG + u8 cmd_size; + __le16 flags; + __le16 cookie; + u8 resp_size; + u8 reserved8; + __le64 resp_addr; + u8 mirror_flags; + #define CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE 0x1UL + u8 rsvd[7]; +}; + +/* creq_roce_mirror_cfg_resp (size:128b/16B) */ +struct creq_roce_mirror_cfg_resp { + u8 type; + #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_MASK 0x3fUL + #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_SFT 0 + #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_QP_EVENT 0x38UL + #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_LAST \ + CREQ_ROCE_MIRROR_CFG_RESP_TYPE_QP_EVENT + u8 status; + __le16 cookie; + __le32 reserved32; + u8 v; + #define CREQ_ROCE_MIRROR_CFG_RESP_V 0x1UL + u8 event; + #define CREQ_ROCE_MIRROR_CFG_RESP_EVENT_ROCE_MIRROR_CFG 0x99UL + #define CREQ_ROCE_MIRROR_CFG_RESP_EVENT_LAST \ + CREQ_ROCE_MIRROR_CFG_RESP_EVENT_ROCE_MIRROR_CFG + u8 reserved48[6]; +}; + /* cmdq_query_func (size:128b/16B) */ struct cmdq_query_func { u8 opcode; From 959d10d642c7e4527edccf01f7b5970a6d838b84 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Aug 2025 09:37:59 +0530 Subject: [PATCH 28/85] RDMA/bnxt_re: Initialize fw with roce_mirror support - Check FW capability for roce_mirror support. - Initialize FW with roce_mirror support. - When modifying QP, use unique GID for sgid incase of RawEth QP. Signed-off-by: Saravanan Vajravel Reviewed-by: Kashyap Desai Reviewed-by: Selvin Xavier Reviewed-by: Anantha Prabhu Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-9-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 ++ drivers/infiniband/hw/bnxt_re/main.c | 2 ++ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 12 +++++++++--- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ++++ drivers/infiniband/hw/bnxt_re/roce_hsi.h | 1 + 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 90c23d0ee262..f12d6cd3ae93 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1037,6 +1037,8 @@ static u8 __from_ib_qp_type(enum ib_qp_type type) return CMDQ_CREATE_QP_TYPE_RC; case IB_QPT_UD: return CMDQ_CREATE_QP_TYPE_UD; + case IB_QPT_RAW_PACKET: + return CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE; default: return IB_QPT_MAX; } diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 8179501a0135..0225e1d2ada0 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -654,6 +654,8 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) flags_ext2 = le32_to_cpu(resp.flags_ext2); cctx->modes.dbr_pacing = flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED || flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED; + cctx->modes.roce_mirror = !!(le32_to_cpu(resp.flags_ext3) & + FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED); return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 092310571dcc..43a4ef76272d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1335,6 +1335,7 @@ static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp) int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { + struct bnxt_qplib_sgid_tbl *sgid_tbl = &res->sgid_tbl; struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct creq_modify_qp_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; @@ -1386,9 +1387,14 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL) req.flow_label = cpu_to_le32(qp->ah.flow_label); - if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX) - req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id - [qp->ah.sgid_index]); + if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX) { + if (qp->type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE) + req.sgid_index = + cpu_to_le16(sgid_tbl->hw_id[qp->ugid_index]); + else + req.sgid_index = + cpu_to_le16(sgid_tbl->hw_id[qp->ah.sgid_index]); + } if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT) req.hop_limit = qp->ah.hop_limit; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index b97e75404139..5e34395472c5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -905,6 +905,10 @@ skip_ctx_setup: flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED; if (rcfw->res->en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT) flags |= CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT; + if (bnxt_qplib_roce_mirror_supported(rcfw->res->cctx)) { + flags |= CMDQ_INITIALIZE_FW_FLAGS_MIRROR_ON_ROCE_SUPPORTED; + rcfw->roce_mirror = true; + } req.flags |= cpu_to_le16(flags); req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id); bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index cfdf69a3fe9a..99ecd72e72e2 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -219,6 +219,7 @@ struct cmdq_initialize_fw { #define CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED 0x2UL #define CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED 0x8UL #define CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT 0x10UL + #define CMDQ_INITIALIZE_FW_FLAGS_MIRROR_ON_ROCE_SUPPORTED 0x80UL __le16 cookie; u8 resp_size; u8 reserved8; From 6133c13154effc0a4c8f637ce750822f7d05a267 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Aug 2025 09:38:00 +0530 Subject: [PATCH 29/85] RDMA/bnxt_re: Use firmware provided message timeout value Before this patch, we used a hardcoded value of 500 msec as the default value for L2 firmware message response timeout. With this commit, the driver is using the firmware timeout value from the firmware. As part of this change moved bnxt_re_query_hwrm_intf_version() to bnxt_re_setup_chip_ctx() so that timeout value is queries before sending first command. Signed-off-by: Saravanan Vajravel Reviewed-by: Selvin Xavier Reviewed-by: Kashyap Desai Reviewed-by: Bhargava Chenna Marreddy Co-developed-by: Kalesh AP Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-10-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 3 +++ drivers/infiniband/hw/bnxt_re/main.c | 33 ++++++++++++++----------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index ae4476c87be7..297e0b79e885 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -286,4 +286,7 @@ static inline int bnxt_re_read_context_allowed(struct bnxt_re_dev *rdev) #define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 192 #define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 192 +#define BNXT_RE_HWRM_CMD_TIMEOUT(rdev) \ + ((rdev)->chip_ctx->hwrm_cmd_max_timeout * 1000) + #endif diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 0225e1d2ada0..c066c84a7044 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -80,6 +80,7 @@ MODULE_LICENSE("Dual BSD/GPL"); static DEFINE_MUTEX(bnxt_re_mutex); static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev); +static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev); static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, u32 *offset); @@ -188,6 +189,10 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); rdev->qplib_res.en_dev = en_dev; + rc = bnxt_re_query_hwrm_intf_version(rdev); + if (rc) + goto free_dev_attr; + bnxt_re_set_drv_mode(rdev); bnxt_re_set_db_offset(rdev); @@ -551,7 +556,7 @@ void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev) req.vnic_id = cpu_to_le32(rdev->mirror_vnic_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL, - 0, DFLT_HWRM_CMD_TIMEOUT); + 0, BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (rc) ibdev_dbg(&rdev->ibdev, @@ -571,7 +576,7 @@ int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev) req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id); req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_VNIC_ID_VALID); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, - sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (rc) ibdev_dbg(&rdev->ibdev, @@ -597,7 +602,7 @@ int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id) req.mru = cpu_to_le16(rdev->netdev->mtu + VLAN_ETH_HLEN); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL, - 0, DFLT_HWRM_CMD_TIMEOUT); + 0, BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (rc) ibdev_dbg(&rdev->ibdev, @@ -619,7 +624,7 @@ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG); req.fid = cpu_to_le16(0xffff); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, - sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (!rc) { *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024); @@ -644,7 +649,7 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS); req.fid = cpu_to_le16(0xffff); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, - sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (rc) @@ -672,7 +677,7 @@ static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev) cctx = rdev->chip_ctx; bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, - sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (rc) return rc; @@ -932,7 +937,7 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, req.ring_type = type; req.ring_id = cpu_to_le16(fw_ring_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, - sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (rc) ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x", @@ -968,7 +973,7 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, req.ring_type = ring_attr->type; req.int_mode = ring_attr->mode; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, - sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (!rc) *fw_ring_id = le16_to_cpu(resp.ring_id); @@ -994,7 +999,7 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE); req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, - sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (rc) ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x", @@ -1024,7 +1029,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, - sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = bnxt_send_msg(en_dev, &fw_msg); if (!rc) stats->fw_id = le32_to_cpu(resp.stat_ctx_id); @@ -2017,7 +2022,7 @@ static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev) ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); } -static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) +static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_ver_get_output resp = {}; @@ -2036,7 +2041,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x", rc); - return; + return rc; } cctx = rdev->chip_ctx; @@ -2050,6 +2055,8 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) if (!cctx->hwrm_cmd_max_timeout) cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT; + + return 0; } static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) @@ -2266,8 +2273,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) /* Check whether VF or PF */ bnxt_re_get_sriov_func_type(rdev); - bnxt_re_query_hwrm_intf_version(rdev); - /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ From aae757ec20d23c3c9251227eb5c8ca623afad007 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Fri, 22 Aug 2025 09:38:01 +0530 Subject: [PATCH 30/85] RDMA/bnxt_re: Remove unnecessary condition checks The check for "rdev" and "en_dev" pointer validity always return false. Remove them. Reviewed-by: Saravanan Vajravel Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250822040801.776196-11-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index c066c84a7044..f872211717fd 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -916,20 +916,12 @@ static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev) static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id, int type) { - struct bnxt_en_dev *en_dev; + struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_ring_free_input req = {}; struct hwrm_ring_free_output resp; struct bnxt_fw_msg fw_msg = {}; int rc = -EINVAL; - if (!rdev) - return rc; - - en_dev = rdev->en_dev; - - if (!en_dev) - return rc; - if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return 0; @@ -955,9 +947,6 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, struct bnxt_fw_msg fw_msg = {}; int rc = -EINVAL; - if (!en_dev) - return rc; - bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC); req.enables = 0; req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]); @@ -990,9 +979,6 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, struct bnxt_fw_msg fw_msg = {}; int rc = -EINVAL; - if (!en_dev) - return rc; - if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return 0; @@ -1020,9 +1006,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, stats->fw_id = INVALID_STATS_CTX_ID; - if (!en_dev) - return rc; - bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(stats->dma_map); From 372fdb5c75b61f038f4abf596abdcf01acbdb7af Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Sun, 24 Aug 2025 17:48:39 +0300 Subject: [PATCH 31/85] RDMA/mlx5: Fix page size bitmap calculation for KSM mode When using KSM (Key Scatter-gather Memory) access mode, the HW requires the IOVA to be aligned to the selected page size. Without this alignment, the HW may not function correctly. Currently, mlx5_umem_mkc_find_best_pgsz() does not filter out page sizes that would result in misaligned IOVAs for KSM mode. This can lead to selecting page sizes that are incompatible with the given IOVA. Fix this by filtering the page size bitmap when in KSM mode, keeping only page sizes to which the IOVA is aligned to. Fixes: fcfb03597b7d ("RDMA/mlx5: Align mkc page size capability check to PRM") Signed-off-by: Edward Srouji Link: https://patch.msgid.link/20250824144839.154717-1-edwards@nvidia.com Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b43cf9c7e140..09d82d5f95e3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1805,6 +1805,10 @@ mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem, bitmap = GENMASK_ULL(max_log_entity_size_cap, min_log_entity_size_cap); + /* In KSM mode HW requires IOVA and mkey's page size to be aligned */ + if (access_mode == MLX5_MKC_ACCESS_MODE_KSM && iova) + bitmap &= GENMASK_ULL(__ffs64(iova), 0); + return ib_umem_find_best_pgsz(umem, bitmap, iova); } From 490a253cb4893043266622f24153026d454abcdb Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Tue, 26 Aug 2025 23:05:56 +0800 Subject: [PATCH 32/85] RDMA/rdmavt: Use int type to store negative error codes Change 'ret' from u32 to int in alloc_qpn() to store -EINVAL, and remove the 'bail' label as it simply returns 'ret'. Storing negative error codes in an u32 causes no runtime issues, but it's ugly as pants, Change 'ret' from u32 to int type - this change has no runtime impact. Signed-off-by: Qianfeng Rong Link: https://patch.msgid.link/20250826150556.541440-1-rongqianfeng@vivo.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rdmavt/qp.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e825e2ef7966..134a79eecfcb 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -492,7 +492,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; - u32 ret; + int ret; u32 max_qpn = exclude_prefix == RVT_AIP_QP_PREFIX ? RVT_AIP_QPN_MAX : RVT_QPN_MAX; @@ -510,7 +510,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, else qpt->flags |= n; spin_unlock(&qpt->lock); - goto bail; + + return ret; } qpn = qpt->last + qpt->incr; @@ -530,7 +531,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, if (!test_and_set_bit(offset, map->page)) { qpt->last = qpn; ret = qpn; - goto bail; + + return ret; } offset += qpt->incr; /* @@ -565,10 +567,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, qpn = mk_qpn(qpt, map, offset); } - ret = -ENOMEM; - -bail: - return ret; + return -ENOMEM; } /** From 2ed096dc41536ded233612ac9f308dcdefebd286 Mon Sep 17 00:00:00 2001 From: Xichao Zhao Date: Wed, 27 Aug 2025 20:00:07 +0800 Subject: [PATCH 33/85] RDMA/core: fix "truely"->"truly" Trivial fix to spelling mistake in comment text. Signed-off-by: Xichao Zhao Link: https://patch.msgid.link/20250827120007.489496-1-zhao.xichao@vivo.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3145cb34a1d2..b4f3c835844a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1543,7 +1543,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev) /* * We have a registration lock so that all the calls to unregister are - * fully fenced, once any unregister returns the device is truely + * fully fenced, once any unregister returns the device is truly * unregistered even if multiple callers are unregistering it at the * same time. This also interacts with the registration flow and * provides sane semantics if register and unregister are racing. From f5b6b4639b3d999c507de349221c48af638c06dc Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 1 Sep 2025 17:00:39 +0200 Subject: [PATCH 34/85] RDMA/bnxt_re: Call strscpy() with correct size argument In bnxt_re_register_ib(), strscpy() is called with the length of the source string rather than the size of the destination buffer. This is fine as long as the destination buffer is larger than the source string, but we should still use the destination buffer size instead to call strscpy() as intended. And since 'node_desc' has a fixed size, we can safely omit the size argument and let strscpy() infer it using sizeof(). Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250901150038.227036-2-thorsten.blum@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f872211717fd..e5a6f36e1656 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1383,8 +1383,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) /* ib device init */ ibdev->node_type = RDMA_NODE_IB_CA; - strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA", - strlen(BNXT_RE_DESC) + 5); + strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA"); ibdev->phys_port_cnt = 1; addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); From ffad4278c2ac31c91b563036121cdcf4d5dd45ec Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:45:53 +0530 Subject: [PATCH 35/85] net: ionic: Create an auxiliary device for rdma driver To support RDMA capable ethernet device, create an auxiliary device in the ionic Ethernet driver. The RDMA device is modeled as an auxiliary device to the Ethernet device. Reviewed-by: Shannon Nelson Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-2-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- .../ethernet/pensando/ionic.rst | 10 +++ drivers/net/ethernet/pensando/Kconfig | 1 + drivers/net/ethernet/pensando/ionic/Makefile | 2 +- .../net/ethernet/pensando/ionic/ionic_api.h | 21 +++++ .../net/ethernet/pensando/ionic/ionic_aux.c | 80 +++++++++++++++++++ .../net/ethernet/pensando/ionic/ionic_aux.h | 10 +++ .../ethernet/pensando/ionic/ionic_bus_pci.c | 5 ++ .../net/ethernet/pensando/ionic/ionic_lif.c | 7 ++ .../net/ethernet/pensando/ionic/ionic_lif.h | 3 + 9 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/pensando/ionic/ionic_api.h create mode 100644 drivers/net/ethernet/pensando/ionic/ionic_aux.c create mode 100644 drivers/net/ethernet/pensando/ionic/ionic_aux.h diff --git a/Documentation/networking/device_drivers/ethernet/pensando/ionic.rst b/Documentation/networking/device_drivers/ethernet/pensando/ionic.rst index 05fe2b11bb18..a0029b6db31e 100644 --- a/Documentation/networking/device_drivers/ethernet/pensando/ionic.rst +++ b/Documentation/networking/device_drivers/ethernet/pensando/ionic.rst @@ -13,6 +13,7 @@ Contents - Identifying the Adapter - Enabling the driver - Configuring the driver +- RDMA Support via Auxiliary Device - Statistics - Support @@ -105,6 +106,15 @@ XDP Support for XDP includes the basics, plus Jumbo frames, Redirect and ndo_xmit. There is no current support for zero-copy sockets or HW offload. +RDMA Support via Auxiliary Device +================================= + +The ionic driver supports RDMA (Remote Direct Memory Access) functionality +through the Linux auxiliary device framework when advertised by the firmware. +RDMA capability is detected during device initialization, and if supported, +the ethernet driver will create an auxiliary device that allows the RDMA +driver to bind and provide InfiniBand/RoCE functionality. + Statistics ========== diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig index 01fe76786f77..c99758adf3ad 100644 --- a/drivers/net/ethernet/pensando/Kconfig +++ b/drivers/net/ethernet/pensando/Kconfig @@ -24,6 +24,7 @@ config IONIC select NET_DEVLINK select DIMLIB select PAGE_POOL + select AUXILIARY_BUS help This enables the support for the Pensando family of Ethernet adapters. More specific information on this driver can be diff --git a/drivers/net/ethernet/pensando/ionic/Makefile b/drivers/net/ethernet/pensando/ionic/Makefile index 4e7642a2d25f..a598972fef41 100644 --- a/drivers/net/ethernet/pensando/ionic/Makefile +++ b/drivers/net/ethernet/pensando/ionic/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_IONIC) := ionic.o ionic-y := ionic_main.o ionic_bus_pci.o ionic_devlink.o ionic_dev.o \ ionic_debugfs.o ionic_lif.o ionic_rx_filter.o ionic_ethtool.o \ - ionic_txrx.o ionic_stats.o ionic_fw.o + ionic_txrx.o ionic_stats.o ionic_fw.o ionic_aux.o ionic-$(CONFIG_PTP_1588_CLOCK) += ionic_phc.o diff --git a/drivers/net/ethernet/pensando/ionic/ionic_api.h b/drivers/net/ethernet/pensando/ionic/ionic_api.h new file mode 100644 index 000000000000..f9fcd1b67b35 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_api.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#ifndef _IONIC_API_H_ +#define _IONIC_API_H_ + +#include + +/** + * struct ionic_aux_dev - Auxiliary device information + * @lif: Logical interface + * @idx: Index identifier + * @adev: Auxiliary device + */ +struct ionic_aux_dev { + struct ionic_lif *lif; + int idx; + struct auxiliary_device adev; +}; + +#endif /* _IONIC_API_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_aux.c b/drivers/net/ethernet/pensando/ionic/ionic_aux.c new file mode 100644 index 000000000000..f3c2a5227b36 --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_aux.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include +#include "ionic.h" +#include "ionic_lif.h" +#include "ionic_aux.h" + +static DEFINE_IDA(aux_ida); + +static void ionic_auxbus_release(struct device *dev) +{ + struct ionic_aux_dev *ionic_adev; + + ionic_adev = container_of(dev, struct ionic_aux_dev, adev.dev); + ida_free(&aux_ida, ionic_adev->adev.id); + kfree(ionic_adev); +} + +int ionic_auxbus_register(struct ionic_lif *lif) +{ + struct ionic_aux_dev *ionic_adev; + struct auxiliary_device *aux_dev; + int err, id; + + if (!(le64_to_cpu(lif->ionic->ident.lif.capabilities) & IONIC_LIF_CAP_RDMA)) + return 0; + + ionic_adev = kzalloc(sizeof(*ionic_adev), GFP_KERNEL); + if (!ionic_adev) + return -ENOMEM; + + aux_dev = &ionic_adev->adev; + + id = ida_alloc(&aux_ida, GFP_KERNEL); + if (id < 0) { + dev_err(lif->ionic->dev, "Failed to allocate aux id: %d\n", id); + kfree(ionic_adev); + return id; + } + + aux_dev->id = id; + aux_dev->name = "rdma"; + aux_dev->dev.parent = &lif->ionic->pdev->dev; + aux_dev->dev.release = ionic_auxbus_release; + ionic_adev->lif = lif; + err = auxiliary_device_init(aux_dev); + if (err) { + dev_err(lif->ionic->dev, "Failed to initialize %s aux device: %d\n", + aux_dev->name, err); + ida_free(&aux_ida, id); + kfree(ionic_adev); + return err; + } + + err = auxiliary_device_add(aux_dev); + if (err) { + dev_err(lif->ionic->dev, "Failed to add %s aux device: %d\n", + aux_dev->name, err); + auxiliary_device_uninit(aux_dev); + return err; + } + + lif->ionic_adev = ionic_adev; + return 0; +} + +void ionic_auxbus_unregister(struct ionic_lif *lif) +{ + mutex_lock(&lif->adev_lock); + if (!lif->ionic_adev) + goto out; + + auxiliary_device_delete(&lif->ionic_adev->adev); + auxiliary_device_uninit(&lif->ionic_adev->adev); + + lif->ionic_adev = NULL; +out: + mutex_unlock(&lif->adev_lock); +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_aux.h b/drivers/net/ethernet/pensando/ionic/ionic_aux.h new file mode 100644 index 000000000000..f5528a9f187d --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_aux.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#ifndef _IONIC_AUX_H_ +#define _IONIC_AUX_H_ + +int ionic_auxbus_register(struct ionic_lif *lif); +void ionic_auxbus_unregister(struct ionic_lif *lif); + +#endif /* _IONIC_AUX_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 136bfa3516d0..f8752b1d2790 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -9,6 +9,7 @@ #include "ionic.h" #include "ionic_bus.h" #include "ionic_lif.h" +#include "ionic_aux.h" #include "ionic_debugfs.h" /* Supported devices */ @@ -375,6 +376,8 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_deregister_devlink; } + ionic_auxbus_register(ionic->lif); + mod_timer(&ionic->watchdog_timer, round_jiffies(jiffies + ionic->watchdog_period)); ionic_queue_doorbell_check(ionic, IONIC_NAPI_DEADLINE); @@ -416,6 +419,7 @@ static void ionic_remove(struct pci_dev *pdev) if (ionic->lif->doorbell_wa) cancel_delayed_work_sync(&ionic->doorbell_check_dwork); + ionic_auxbus_unregister(ionic->lif); ionic_lif_unregister(ionic->lif); ionic_devlink_unregister(ionic); ionic_lif_deinit(ionic->lif); @@ -445,6 +449,7 @@ static void ionic_reset_prepare(struct pci_dev *pdev) timer_delete_sync(&ionic->watchdog_timer); cancel_work_sync(&lif->deferred.work); + ionic_auxbus_unregister(ionic->lif); mutex_lock(&lif->queue_lock); ionic_stop_queues_reconfig(lif); ionic_txrx_free(lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 48cb5d30b5f6..8ed5d2e5fde4 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -19,6 +19,7 @@ #include "ionic_bus.h" #include "ionic_dev.h" #include "ionic_lif.h" +#include "ionic_aux.h" #include "ionic_txrx.h" #include "ionic_ethtool.h" #include "ionic_debugfs.h" @@ -3293,6 +3294,7 @@ int ionic_lif_alloc(struct ionic *ionic) mutex_init(&lif->queue_lock); mutex_init(&lif->config_lock); + mutex_init(&lif->adev_lock); spin_lock_init(&lif->adminq_lock); @@ -3349,6 +3351,7 @@ err_out_free_lif_info: lif->info = NULL; lif->info_pa = 0; err_out_free_mutex: + mutex_destroy(&lif->adev_lock); mutex_destroy(&lif->config_lock); mutex_destroy(&lif->queue_lock); err_out_free_netdev: @@ -3384,6 +3387,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif) netif_device_detach(lif->netdev); + ionic_auxbus_unregister(ionic->lif); mutex_lock(&lif->queue_lock); if (test_bit(IONIC_LIF_F_UP, lif->state)) { dev_info(ionic->dev, "Surprise FW stop, stopping queues\n"); @@ -3446,6 +3450,8 @@ int ionic_restart_lif(struct ionic_lif *lif) netif_device_attach(lif->netdev); ionic_queue_doorbell_check(ionic, IONIC_NAPI_DEADLINE); + ionic_auxbus_register(ionic->lif); + return 0; err_txrx_free: @@ -3528,6 +3534,7 @@ void ionic_lif_free(struct ionic_lif *lif) mutex_destroy(&lif->config_lock); mutex_destroy(&lif->queue_lock); + mutex_destroy(&lif->adev_lock); /* free netdev & lif */ ionic_debugfs_del_lif(lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index e01756fb7fdd..43bdd0fb8733 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -10,6 +10,7 @@ #include #include #include "ionic_rx_filter.h" +#include "ionic_api.h" #define IONIC_ADMINQ_LENGTH 16 /* must be a power of two */ #define IONIC_NOTIFYQ_LENGTH 64 /* must be a power of two */ @@ -225,6 +226,8 @@ struct ionic_lif { dma_addr_t info_pa; u32 info_sz; struct ionic_qtype_info qtype_info[IONIC_QTYPE_MAX]; + struct ionic_aux_dev *ionic_adev; + struct mutex adev_lock; /* lock for aux_dev actions */ u8 rss_hash_key[IONIC_RSS_HASH_KEY_SIZE]; u8 *rss_ind_tbl; From 85372e4a493e413f1c1ee1850c51ad2d3a637fff Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:45:54 +0530 Subject: [PATCH 36/85] net: ionic: Update LIF identity with additional RDMA capabilities Firmware sends the RDMA capability in a response for LIF_IDENTIFY device command. Update the LIF indentify with additional RDMA capabilities used by driver and firmware. Reviewed-by: Shannon Nelson Reviewed-by: Simon Horman Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-3-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- .../net/ethernet/pensando/ionic/ionic_if.h | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 9886cd66ce68..77c3dc188264 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -494,6 +494,16 @@ union ionic_lif_config { __le32 words[64]; }; +/** + * enum ionic_lif_rdma_cap_stats - LIF stat type + * @IONIC_LIF_RDMA_STAT_GLOBAL: Global stats + * @IONIC_LIF_RDMA_STAT_QP: Queue pair stats + */ +enum ionic_lif_rdma_cap_stats { + IONIC_LIF_RDMA_STAT_GLOBAL = BIT(0), + IONIC_LIF_RDMA_STAT_QP = BIT(1), +}; + /** * struct ionic_lif_identity - LIF identity information (type-specific) * @@ -513,10 +523,10 @@ union ionic_lif_config { * @eth.config: LIF config struct with features, mtu, mac, q counts * * @rdma: RDMA identify structure - * @rdma.version: RDMA version of opcodes and queue descriptors + * @rdma.version: RDMA capability version * @rdma.qp_opcodes: Number of RDMA queue pair opcodes supported * @rdma.admin_opcodes: Number of RDMA admin opcodes supported - * @rdma.rsvd: reserved byte(s) + * @rdma.minor_version: RDMA capability minor version * @rdma.npts_per_lif: Page table size per LIF * @rdma.nmrs_per_lif: Number of memory regions per LIF * @rdma.nahs_per_lif: Number of address handles per LIF @@ -526,12 +536,17 @@ union ionic_lif_config { * @rdma.rrq_stride: Remote RQ work request stride * @rdma.rsq_stride: Remote SQ work request stride * @rdma.dcqcn_profiles: Number of DCQCN profiles - * @rdma.rsvd_dimensions: reserved byte(s) + * @rdma.udma_shift: Log2 number of queues per queue group + * @rdma.rsvd_dimensions: Reserved byte + * @rdma.page_size_cap: Supported page sizes * @rdma.aq_qtype: RDMA Admin Qtype * @rdma.sq_qtype: RDMA Send Qtype * @rdma.rq_qtype: RDMA Receive Qtype * @rdma.cq_qtype: RDMA Completion Qtype * @rdma.eq_qtype: RDMA Event Qtype + * @rdma.stats_type: Supported statistics type + * (enum ionic_lif_rdma_cap_stats) + * @rdma.rsvd1: Reserved byte(s) * @words: word access to struct contents */ union ionic_lif_identity { @@ -557,7 +572,7 @@ union ionic_lif_identity { u8 version; u8 qp_opcodes; u8 admin_opcodes; - u8 rsvd; + u8 minor_version; __le32 npts_per_lif; __le32 nmrs_per_lif; __le32 nahs_per_lif; @@ -567,12 +582,16 @@ union ionic_lif_identity { u8 rrq_stride; u8 rsq_stride; u8 dcqcn_profiles; - u8 rsvd_dimensions[10]; + u8 udma_shift; + u8 rsvd_dimensions; + __le64 page_size_cap; struct ionic_lif_logical_qtype aq_qtype; struct ionic_lif_logical_qtype sq_qtype; struct ionic_lif_logical_qtype rq_qtype; struct ionic_lif_logical_qtype cq_qtype; struct ionic_lif_logical_qtype eq_qtype; + __le16 stats_type; + u8 rsvd1[162]; } __packed rdma; } __packed; __le32 words[478]; From 089bbf4836c6e6480a6eade64b8ac8b2b5b2da04 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:45:55 +0530 Subject: [PATCH 37/85] net: ionic: Export the APIs from net driver to support device commands RDMA driver needs to establish admin queues to support admin operations. Export the APIs to send device commands for the RDMA driver. Reviewed-by: Shannon Nelson Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-4-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/pensando/ionic/ionic.h | 7 ---- .../net/ethernet/pensando/ionic/ionic_api.h | 36 +++++++++++++++++++ .../net/ethernet/pensando/ionic/ionic_dev.h | 1 + .../net/ethernet/pensando/ionic/ionic_main.c | 4 ++- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index 04f00ea94230..85198e6a806e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -65,16 +65,9 @@ struct ionic { int watchdog_period; }; -struct ionic_admin_ctx { - struct completion work; - union ionic_adminq_cmd cmd; - union ionic_adminq_comp comp; -}; - int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, const int err, const bool do_msg); -int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, u8 status, int err); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_api.h b/drivers/net/ethernet/pensando/ionic/ionic_api.h index f9fcd1b67b35..d75902ca34af 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_api.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_api.h @@ -5,6 +5,8 @@ #define _IONIC_API_H_ #include +#include "ionic_if.h" +#include "ionic_regs.h" /** * struct ionic_aux_dev - Auxiliary device information @@ -18,4 +20,38 @@ struct ionic_aux_dev { struct auxiliary_device adev; }; +/** + * struct ionic_admin_ctx - Admin command context + * @work: Work completion wait queue element + * @cmd: Admin command (64B) to be copied to the queue + * @comp: Admin completion (16B) copied from the queue + */ +struct ionic_admin_ctx { + struct completion work; + union ionic_adminq_cmd cmd; + union ionic_adminq_comp comp; +}; + +/** + * ionic_adminq_post_wait - Post an admin command and wait for response + * @lif: Logical interface + * @ctx: API admin command context + * + * Post the command to an admin queue in the ethernet driver. If this command + * succeeds, then the command has been posted, but that does not indicate a + * completion. If this command returns success, then the completion callback + * will eventually be called. + * + * Return: zero or negative error status + */ +int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); + +/** + * ionic_error_to_errno - Transform ionic_if errors to os errno + * @code: Ionic error number + * + * Return: Negative OS error number or zero + */ +int ionic_error_to_errno(enum ionic_status_code code); + #endif /* _IONIC_API_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index c8c710cfe70c..bc26eb8f5779 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -12,6 +12,7 @@ #include "ionic_if.h" #include "ionic_regs.h" +#include "ionic_api.h" #define IONIC_MAX_TX_DESC 8192 #define IONIC_MAX_RX_DESC 16384 diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 0e60a6bef99a..14dc055be3e9 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -72,7 +72,7 @@ static const char *ionic_error_to_str(enum ionic_status_code code) } } -static int ionic_error_to_errno(enum ionic_status_code code) +int ionic_error_to_errno(enum ionic_status_code code) { switch (code) { case IONIC_RC_SUCCESS: @@ -114,6 +114,7 @@ static int ionic_error_to_errno(enum ionic_status_code code) return -EIO; } } +EXPORT_SYMBOL_NS(ionic_error_to_errno, "NET_IONIC"); static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode) { @@ -480,6 +481,7 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) { return __ionic_adminq_post_wait(lif, ctx, true); } +EXPORT_SYMBOL_NS(ionic_adminq_post_wait, "NET_IONIC"); int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) { From 0e02faffdb80a95fdd8fb8482eecd3f006441c37 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:45:56 +0530 Subject: [PATCH 38/85] net: ionic: Provide RDMA reset support for the RDMA driver The Ethernet driver holds the privilege to execute the device commands. Export the function to execute RDMA reset command for use by RDMA driver. Reviewed-by: Shannon Nelson Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-5-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- .../net/ethernet/pensando/ionic/ionic_api.h | 9 ++++++++ .../net/ethernet/pensando/ionic/ionic_aux.c | 22 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_api.h b/drivers/net/ethernet/pensando/ionic/ionic_api.h index d75902ca34af..e0b766d1769f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_api.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_api.h @@ -54,4 +54,13 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); */ int ionic_error_to_errno(enum ionic_status_code code); +/** + * ionic_request_rdma_reset - request reset or disable the device or lif + * @lif: Logical interface + * + * The reset is triggered asynchronously. It will wait until reset request + * completes or times out. + */ +void ionic_request_rdma_reset(struct ionic_lif *lif); + #endif /* _IONIC_API_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_aux.c b/drivers/net/ethernet/pensando/ionic/ionic_aux.c index f3c2a5227b36..a2be338eb3e5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_aux.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_aux.c @@ -78,3 +78,25 @@ void ionic_auxbus_unregister(struct ionic_lif *lif) out: mutex_unlock(&lif->adev_lock); } + +void ionic_request_rdma_reset(struct ionic_lif *lif) +{ + struct ionic *ionic = lif->ionic; + int err; + + union ionic_dev_cmd cmd = { + .cmd.opcode = IONIC_CMD_RDMA_RESET_LIF, + .cmd.lif_index = cpu_to_le16(lif->index), + }; + + mutex_lock(&ionic->dev_cmd_lock); + + ionic_dev_cmd_go(&ionic->idev, &cmd); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + + mutex_unlock(&ionic->dev_cmd_lock); + + if (err) + pr_warn("%s request_reset: error %d\n", __func__, err); +} +EXPORT_SYMBOL_NS(ionic_request_rdma_reset, "NET_IONIC"); From 2dc6a6a6782def51879ff3e739f6e9706d6c1c4f Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:45:57 +0530 Subject: [PATCH 39/85] net: ionic: Provide interrupt allocation support for the RDMA driver RDMA driver needs an interrupt for an event queue. Export function from net driver to allocate an interrupt. Reviewed-by: Shannon Nelson Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-6-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- .../net/ethernet/pensando/ionic/ionic_api.h | 43 +++++++++++++++++++ .../net/ethernet/pensando/ionic/ionic_dev.h | 13 ------ .../net/ethernet/pensando/ionic/ionic_lif.c | 38 ++++++++-------- 3 files changed, 62 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_api.h b/drivers/net/ethernet/pensando/ionic/ionic_api.h index e0b766d1769f..5fd23aa8c5a1 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_api.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_api.h @@ -32,6 +32,29 @@ struct ionic_admin_ctx { union ionic_adminq_comp comp; }; +#define IONIC_INTR_INDEX_NOT_ASSIGNED -1 +#define IONIC_INTR_NAME_MAX_SZ 32 + +/** + * struct ionic_intr_info - Interrupt information + * @name: Name identifier + * @rearm_count: Interrupt rearm count + * @index: Interrupt index position + * @vector: Interrupt number + * @dim_coal_hw: Interrupt coalesce value in hardware units + * @affinity_mask: CPU affinity mask + * @aff_notify: context for notification of IRQ affinity changes + */ +struct ionic_intr_info { + char name[IONIC_INTR_NAME_MAX_SZ]; + u64 rearm_count; + unsigned int index; + unsigned int vector; + u32 dim_coal_hw; + cpumask_var_t *affinity_mask; + struct irq_affinity_notify aff_notify; +}; + /** * ionic_adminq_post_wait - Post an admin command and wait for response * @lif: Logical interface @@ -63,4 +86,24 @@ int ionic_error_to_errno(enum ionic_status_code code); */ void ionic_request_rdma_reset(struct ionic_lif *lif); +/** + * ionic_intr_alloc - Reserve a device interrupt + * @lif: Logical interface + * @intr: Reserved ionic interrupt structure + * + * Reserve an interrupt index and get irq number for that index. + * + * Return: zero or negative error status + */ +int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr); + +/** + * ionic_intr_free - Release a device interrupt index + * @lif: Logical interface + * @intr: Interrupt index + * + * Mark the interrupt index unused so that it can be reserved again. + */ +void ionic_intr_free(struct ionic_lif *lif, int intr); + #endif /* _IONIC_API_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index bc26eb8f5779..68cf4da3c6b3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -274,19 +274,6 @@ struct ionic_queue { char name[IONIC_QUEUE_NAME_MAX_SZ]; } ____cacheline_aligned_in_smp; -#define IONIC_INTR_INDEX_NOT_ASSIGNED -1 -#define IONIC_INTR_NAME_MAX_SZ 32 - -struct ionic_intr_info { - char name[IONIC_INTR_NAME_MAX_SZ]; - u64 rearm_count; - unsigned int index; - unsigned int vector; - u32 dim_coal_hw; - cpumask_var_t *affinity_mask; - struct irq_affinity_notify aff_notify; -}; - struct ionic_cq { struct ionic_lif *lif; struct ionic_queue *bound_q; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 8ed5d2e5fde4..276024002484 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -244,29 +244,36 @@ static int ionic_request_irq(struct ionic_lif *lif, struct ionic_qcq *qcq) 0, intr->name, &qcq->napi); } -static int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr) +int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr) { struct ionic *ionic = lif->ionic; - int index; + int index, err; index = find_first_zero_bit(ionic->intrs, ionic->nintrs); - if (index == ionic->nintrs) { - netdev_warn(lif->netdev, "%s: no intr, index=%d nintrs=%d\n", - __func__, index, ionic->nintrs); + if (index == ionic->nintrs) return -ENOSPC; - } set_bit(index, ionic->intrs); ionic_intr_init(&ionic->idev, intr, index); + err = ionic_bus_get_irq(ionic, intr->index); + if (err < 0) { + clear_bit(index, ionic->intrs); + return err; + } + + intr->vector = err; + return 0; } +EXPORT_SYMBOL_NS(ionic_intr_alloc, "NET_IONIC"); -static void ionic_intr_free(struct ionic *ionic, int index) +void ionic_intr_free(struct ionic_lif *lif, int index) { - if (index != IONIC_INTR_INDEX_NOT_ASSIGNED && index < ionic->nintrs) - clear_bit(index, ionic->intrs); + if (index != IONIC_INTR_INDEX_NOT_ASSIGNED && index < lif->ionic->nintrs) + clear_bit(index, lif->ionic->intrs); } +EXPORT_SYMBOL_NS(ionic_intr_free, "NET_IONIC"); static void ionic_irq_aff_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) @@ -401,7 +408,7 @@ static void ionic_qcq_intr_free(struct ionic_lif *lif, struct ionic_qcq *qcq) irq_set_affinity_hint(qcq->intr.vector, NULL); devm_free_irq(lif->ionic->dev, qcq->intr.vector, &qcq->napi); qcq->intr.vector = 0; - ionic_intr_free(lif->ionic, qcq->intr.index); + ionic_intr_free(lif, qcq->intr.index); qcq->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED; } @@ -511,13 +518,6 @@ static int ionic_alloc_qcq_interrupt(struct ionic_lif *lif, struct ionic_qcq *qc goto err_out; } - err = ionic_bus_get_irq(lif->ionic, qcq->intr.index); - if (err < 0) { - netdev_warn(lif->netdev, "no vector for %s: %d\n", - qcq->q.name, err); - goto err_out_free_intr; - } - qcq->intr.vector = err; ionic_intr_mask_assert(lif->ionic->idev.intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_SET); @@ -546,7 +546,7 @@ static int ionic_alloc_qcq_interrupt(struct ionic_lif *lif, struct ionic_qcq *qc return 0; err_out_free_intr: - ionic_intr_free(lif->ionic, qcq->intr.index); + ionic_intr_free(lif, qcq->intr.index); err_out: return err; } @@ -741,7 +741,7 @@ err_out_free_q: err_out_free_irq: if (flags & IONIC_QCQ_F_INTR) { devm_free_irq(dev, new->intr.vector, &new->napi); - ionic_intr_free(lif->ionic, new->intr.index); + ionic_intr_free(lif, new->intr.index); } err_out_free_page_pool: page_pool_destroy(new->q.page_pool); From 11016c795eec341e8f087e95dea78189b88057a6 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:45:58 +0530 Subject: [PATCH 40/85] net: ionic: Provide doorbell and CMB region information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RDMA device needs information of controller memory bar and doorbell capability to share with user context. Discover CMB regions and express doorbell capabilities on device init. Reviewed-by: Shannon Nelson Co-developed-by: Pablo Cascón Signed-off-by: Pablo Cascón Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-7-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- .../net/ethernet/pensando/ionic/ionic_api.h | 22 ++ .../ethernet/pensando/ionic/ionic_bus_pci.c | 2 + .../net/ethernet/pensando/ionic/ionic_dev.c | 270 +++++++++++++++++- .../net/ethernet/pensando/ionic/ionic_dev.h | 14 +- .../net/ethernet/pensando/ionic/ionic_if.h | 89 ++++++ .../net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 6 files changed, 381 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_api.h b/drivers/net/ethernet/pensando/ionic/ionic_api.h index 5fd23aa8c5a1..bd88666836b8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_api.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_api.h @@ -106,4 +106,26 @@ int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr); */ void ionic_intr_free(struct ionic_lif *lif, int intr); +/** + * ionic_get_cmb - Reserve cmb pages + * @lif: Logical interface + * @pgid: First page index + * @pgaddr: First page bus addr (contiguous) + * @order: Log base two number of pages (PAGE_SIZE) + * @stride_log2: Size of stride to determine CMB pool + * @expdb: Will be set to true if this CMB region has expdb enabled + * + * Return: zero or negative error status + */ +int ionic_get_cmb(struct ionic_lif *lif, u32 *pgid, phys_addr_t *pgaddr, + int order, u8 stride_log2, bool *expdb); + +/** + * ionic_put_cmb - Release cmb pages + * @lif: Logical interface + * @pgid: First page index + * @order: Log base two number of pages (PAGE_SIZE) + */ +void ionic_put_cmb(struct ionic_lif *lif, u32 pgid, int order); + #endif /* _IONIC_API_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index f8752b1d2790..70d86c5f52fb 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -272,6 +272,8 @@ static int ionic_setup_one(struct ionic *ionic) } ionic_debugfs_add_ident(ionic); + ionic_map_cmb(ionic); + err = ionic_init(ionic); if (err) { dev_err(dev, "Cannot init device: %d, aborting\n", err); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 093c5358b6e8..ab27e9225c1e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -199,13 +199,201 @@ void ionic_init_devinfo(struct ionic *ionic) dev_dbg(ionic->dev, "fw_version %s\n", idev->dev_info.fw_version); } +static void ionic_map_disc_cmb(struct ionic *ionic) +{ + struct ionic_identity *ident = &ionic->ident; + u32 length_reg0, length, offset, num_regions; + struct ionic_dev_bar *bar = ionic->bars; + struct ionic_dev *idev = &ionic->idev; + struct device *dev = ionic->dev; + int err, sz, i; + u64 end; + + mutex_lock(&ionic->dev_cmd_lock); + + ionic_dev_cmd_discover_cmb(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + if (!err) { + sz = min(sizeof(ident->cmb_layout), + sizeof(idev->dev_cmd_regs->data)); + memcpy_fromio(&ident->cmb_layout, + &idev->dev_cmd_regs->data, sz); + } + mutex_unlock(&ionic->dev_cmd_lock); + + if (err) { + dev_warn(dev, "Cannot discover CMB layout, disabling CMB\n"); + return; + } + + bar += 2; + + num_regions = le32_to_cpu(ident->cmb_layout.num_regions); + if (!num_regions || num_regions > IONIC_MAX_CMB_REGIONS) { + dev_warn(dev, "Invalid number of CMB entries (%d)\n", + num_regions); + return; + } + + dev_dbg(dev, "ionic_cmb_layout_identity num_regions %d flags %x:\n", + num_regions, ident->cmb_layout.flags); + + for (i = 0; i < num_regions; i++) { + offset = le32_to_cpu(ident->cmb_layout.region[i].offset); + length = le32_to_cpu(ident->cmb_layout.region[i].length); + end = offset + length; + + dev_dbg(dev, "CMB entry %d: bar_num %u cmb_type %u offset %x length %u\n", + i, ident->cmb_layout.region[i].bar_num, + ident->cmb_layout.region[i].cmb_type, + offset, length); + + if (end > (bar->len >> IONIC_CMB_SHIFT_64K)) { + dev_warn(dev, "Out of bounds CMB region %d offset %x length %u\n", + i, offset, length); + return; + } + } + + /* if first entry matches PCI config, expdb is not supported */ + if (ident->cmb_layout.region[0].bar_num == bar->res_index && + le32_to_cpu(ident->cmb_layout.region[0].length) == bar->len && + !ident->cmb_layout.region[0].offset) { + dev_warn(dev, "No CMB mapping discovered\n"); + return; + } + + /* process first entry for regular mapping */ + length_reg0 = le32_to_cpu(ident->cmb_layout.region[0].length); + if (!length_reg0) { + dev_warn(dev, "region len = 0. No CMB mapping discovered\n"); + return; + } + + /* Verify first entry size matches expected 8MB size (in 64KB pages) */ + if (length_reg0 != IONIC_BAR2_CMB_ENTRY_SIZE >> IONIC_CMB_SHIFT_64K) { + dev_warn(dev, "Unexpected CMB size in entry 0: %u pages\n", + length_reg0); + return; + } + + sz = BITS_TO_LONGS((length_reg0 << IONIC_CMB_SHIFT_64K) / + PAGE_SIZE) * sizeof(long); + idev->cmb_inuse = kzalloc(sz, GFP_KERNEL); + if (!idev->cmb_inuse) { + dev_warn(dev, "No memory for CMB, disabling\n"); + idev->phy_cmb_pages = 0; + idev->phy_cmb_expdb64_pages = 0; + idev->phy_cmb_expdb128_pages = 0; + idev->phy_cmb_expdb256_pages = 0; + idev->phy_cmb_expdb512_pages = 0; + idev->cmb_npages = 0; + return; + } + + for (i = 0; i < num_regions; i++) { + /* check this region matches first region length as to + * ease implementation + */ + if (le32_to_cpu(ident->cmb_layout.region[i].length) != + length_reg0) + continue; + + offset = le32_to_cpu(ident->cmb_layout.region[i].offset); + + switch (ident->cmb_layout.region[i].cmb_type) { + case IONIC_CMB_TYPE_DEVMEM: + idev->phy_cmb_pages = bar->bus_addr + offset; + idev->cmb_npages = + (length_reg0 << IONIC_CMB_SHIFT_64K) / PAGE_SIZE; + dev_dbg(dev, "regular cmb mapping: bar->bus_addr %pa region[%d].length %u\n", + &bar->bus_addr, i, length); + dev_dbg(dev, "idev->phy_cmb_pages %pad, idev->cmb_npages %u\n", + &idev->phy_cmb_pages, idev->cmb_npages); + break; + + case IONIC_CMB_TYPE_EXPDB64: + idev->phy_cmb_expdb64_pages = + bar->bus_addr + (offset << IONIC_CMB_SHIFT_64K); + dev_dbg(dev, "idev->phy_cmb_expdb64_pages %pad\n", + &idev->phy_cmb_expdb64_pages); + break; + + case IONIC_CMB_TYPE_EXPDB128: + idev->phy_cmb_expdb128_pages = + bar->bus_addr + (offset << IONIC_CMB_SHIFT_64K); + dev_dbg(dev, "idev->phy_cmb_expdb128_pages %pad\n", + &idev->phy_cmb_expdb128_pages); + break; + + case IONIC_CMB_TYPE_EXPDB256: + idev->phy_cmb_expdb256_pages = + bar->bus_addr + (offset << IONIC_CMB_SHIFT_64K); + dev_dbg(dev, "idev->phy_cmb_expdb256_pages %pad\n", + &idev->phy_cmb_expdb256_pages); + break; + + case IONIC_CMB_TYPE_EXPDB512: + idev->phy_cmb_expdb512_pages = + bar->bus_addr + (offset << IONIC_CMB_SHIFT_64K); + dev_dbg(dev, "idev->phy_cmb_expdb512_pages %pad\n", + &idev->phy_cmb_expdb512_pages); + break; + + default: + dev_warn(dev, "[%d] Invalid cmb_type (%d)\n", + i, ident->cmb_layout.region[i].cmb_type); + break; + } + } +} + +static void ionic_map_classic_cmb(struct ionic *ionic) +{ + struct ionic_dev_bar *bar = ionic->bars; + struct ionic_dev *idev = &ionic->idev; + struct device *dev = ionic->dev; + int sz; + + bar += 2; + /* classic CMB mapping */ + idev->phy_cmb_pages = bar->bus_addr; + idev->cmb_npages = bar->len / PAGE_SIZE; + dev_dbg(dev, "classic cmb mapping: bar->bus_addr %pa bar->len %lu\n", + &bar->bus_addr, bar->len); + dev_dbg(dev, "idev->phy_cmb_pages %pad, idev->cmb_npages %u\n", + &idev->phy_cmb_pages, idev->cmb_npages); + + sz = BITS_TO_LONGS(idev->cmb_npages) * sizeof(long); + idev->cmb_inuse = kzalloc(sz, GFP_KERNEL); + if (!idev->cmb_inuse) { + idev->phy_cmb_pages = 0; + idev->cmb_npages = 0; + } +} + +void ionic_map_cmb(struct ionic *ionic) +{ + struct pci_dev *pdev = ionic->pdev; + struct device *dev = ionic->dev; + + if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM)) { + dev_dbg(dev, "No CMB, disabling\n"); + return; + } + + if (ionic->ident.dev.capabilities & cpu_to_le64(IONIC_DEV_CAP_DISC_CMB)) + ionic_map_disc_cmb(ionic); + else + ionic_map_classic_cmb(ionic); +} + int ionic_dev_setup(struct ionic *ionic) { struct ionic_dev_bar *bar = ionic->bars; unsigned int num_bars = ionic->num_bars; struct ionic_dev *idev = &ionic->idev; struct device *dev = ionic->dev; - int size; u32 sig; int err; @@ -255,16 +443,11 @@ int ionic_dev_setup(struct ionic *ionic) mutex_init(&idev->cmb_inuse_lock); if (num_bars < 3 || !ionic->bars[IONIC_PCI_BAR_CMB].len) { idev->cmb_inuse = NULL; + idev->phy_cmb_pages = 0; + idev->cmb_npages = 0; return 0; } - idev->phy_cmb_pages = bar->bus_addr; - idev->cmb_npages = bar->len / PAGE_SIZE; - size = BITS_TO_LONGS(idev->cmb_npages) * sizeof(long); - idev->cmb_inuse = kzalloc(size, GFP_KERNEL); - if (!idev->cmb_inuse) - dev_warn(dev, "No memory for CMB, disabling\n"); - return 0; } @@ -277,6 +460,11 @@ void ionic_dev_teardown(struct ionic *ionic) idev->phy_cmb_pages = 0; idev->cmb_npages = 0; + idev->phy_cmb_expdb64_pages = 0; + idev->phy_cmb_expdb128_pages = 0; + idev->phy_cmb_expdb256_pages = 0; + idev->phy_cmb_expdb512_pages = 0; + if (ionic->wq) { destroy_workqueue(ionic->wq); ionic->wq = NULL; @@ -698,28 +886,79 @@ void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq, ionic_dev_cmd_go(idev, &cmd); } +void ionic_dev_cmd_discover_cmb(struct ionic_dev *idev) +{ + union ionic_dev_cmd cmd = { + .discover_cmb.opcode = IONIC_CMD_DISCOVER_CMB, + }; + + ionic_dev_cmd_go(idev, &cmd); +} + int ionic_db_page_num(struct ionic_lif *lif, int pid) { return (lif->hw_index * lif->dbid_count) + pid; } -int ionic_get_cmb(struct ionic_lif *lif, u32 *pgid, phys_addr_t *pgaddr, int order) +int ionic_get_cmb(struct ionic_lif *lif, u32 *pgid, phys_addr_t *pgaddr, + int order, u8 stride_log2, bool *expdb) { struct ionic_dev *idev = &lif->ionic->idev; - int ret; + void __iomem *nonexpdb_pgptr; + phys_addr_t nonexpdb_pgaddr; + int i, idx; mutex_lock(&idev->cmb_inuse_lock); - ret = bitmap_find_free_region(idev->cmb_inuse, idev->cmb_npages, order); + idx = bitmap_find_free_region(idev->cmb_inuse, idev->cmb_npages, order); mutex_unlock(&idev->cmb_inuse_lock); - if (ret < 0) - return ret; + if (idx < 0) + return idx; - *pgid = ret; - *pgaddr = idev->phy_cmb_pages + ret * PAGE_SIZE; + *pgid = (u32)idx; + + if (idev->phy_cmb_expdb64_pages && + stride_log2 == IONIC_EXPDB_64B_WQE_LG2) { + *pgaddr = idev->phy_cmb_expdb64_pages + idx * PAGE_SIZE; + if (expdb) + *expdb = true; + } else if (idev->phy_cmb_expdb128_pages && + stride_log2 == IONIC_EXPDB_128B_WQE_LG2) { + *pgaddr = idev->phy_cmb_expdb128_pages + idx * PAGE_SIZE; + if (expdb) + *expdb = true; + } else if (idev->phy_cmb_expdb256_pages && + stride_log2 == IONIC_EXPDB_256B_WQE_LG2) { + *pgaddr = idev->phy_cmb_expdb256_pages + idx * PAGE_SIZE; + if (expdb) + *expdb = true; + } else if (idev->phy_cmb_expdb512_pages && + stride_log2 == IONIC_EXPDB_512B_WQE_LG2) { + *pgaddr = idev->phy_cmb_expdb512_pages + idx * PAGE_SIZE; + if (expdb) + *expdb = true; + } else { + *pgaddr = idev->phy_cmb_pages + idx * PAGE_SIZE; + if (expdb) + *expdb = false; + } + + /* clear the requested CMB region, 1 PAGE_SIZE ioremap at a time */ + nonexpdb_pgaddr = idev->phy_cmb_pages + idx * PAGE_SIZE; + for (i = 0; i < (1 << order); i++) { + nonexpdb_pgptr = + ioremap_wc(nonexpdb_pgaddr + i * PAGE_SIZE, PAGE_SIZE); + if (!nonexpdb_pgptr) { + ionic_put_cmb(lif, *pgid, order); + return -ENOMEM; + } + memset_io(nonexpdb_pgptr, 0, PAGE_SIZE); + iounmap(nonexpdb_pgptr); + } return 0; } +EXPORT_SYMBOL_NS(ionic_get_cmb, "NET_IONIC"); void ionic_put_cmb(struct ionic_lif *lif, u32 pgid, int order) { @@ -729,6 +968,7 @@ void ionic_put_cmb(struct ionic_lif *lif, u32 pgid, int order) bitmap_release_region(idev->cmb_inuse, pgid, order); mutex_unlock(&idev->cmb_inuse_lock); } +EXPORT_SYMBOL_NS(ionic_put_cmb, "NET_IONIC"); int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq, struct ionic_intr_info *intr, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 68cf4da3c6b3..35566f97eaea 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -35,6 +35,11 @@ #define IONIC_RX_MIN_DOORBELL_DEADLINE (HZ / 100) /* 10ms */ #define IONIC_RX_MAX_DOORBELL_DEADLINE (HZ * 4) /* 4s */ +#define IONIC_EXPDB_64B_WQE_LG2 6 +#define IONIC_EXPDB_128B_WQE_LG2 7 +#define IONIC_EXPDB_256B_WQE_LG2 8 +#define IONIC_EXPDB_512B_WQE_LG2 9 + struct ionic_dev_bar { void __iomem *vaddr; phys_addr_t bus_addr; @@ -171,6 +176,11 @@ struct ionic_dev { dma_addr_t phy_cmb_pages; u32 cmb_npages; + dma_addr_t phy_cmb_expdb64_pages; + dma_addr_t phy_cmb_expdb128_pages; + dma_addr_t phy_cmb_expdb256_pages; + dma_addr_t phy_cmb_expdb512_pages; + u32 port_info_sz; struct ionic_port_info *port_info; dma_addr_t port_info_pa; @@ -351,8 +361,8 @@ void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq, int ionic_db_page_num(struct ionic_lif *lif, int pid); -int ionic_get_cmb(struct ionic_lif *lif, u32 *pgid, phys_addr_t *pgaddr, int order); -void ionic_put_cmb(struct ionic_lif *lif, u32 pgid, int order); +void ionic_dev_cmd_discover_cmb(struct ionic_dev *idev); +void ionic_map_cmb(struct ionic *ionic); int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq, struct ionic_intr_info *intr, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 77c3dc188264..47559c909c8b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -56,6 +56,9 @@ enum ionic_cmd_opcode { IONIC_CMD_VF_SETATTR = 61, IONIC_CMD_VF_CTRL = 62, + /* CMB command */ + IONIC_CMD_DISCOVER_CMB = 80, + /* QoS commands */ IONIC_CMD_QOS_CLASS_IDENTIFY = 240, IONIC_CMD_QOS_CLASS_INIT = 241, @@ -269,9 +272,11 @@ union ionic_drv_identity { /** * enum ionic_dev_capability - Device capabilities * @IONIC_DEV_CAP_VF_CTRL: Device supports VF ctrl operations + * @IONIC_DEV_CAP_DISC_CMB: Device supports CMB discovery operations */ enum ionic_dev_capability { IONIC_DEV_CAP_VF_CTRL = BIT(0), + IONIC_DEV_CAP_DISC_CMB = BIT(1), }; /** @@ -395,6 +400,7 @@ enum ionic_logical_qtype { * @IONIC_Q_F_4X_DESC: Quadruple main descriptor size * @IONIC_Q_F_4X_CQ_DESC: Quadruple cq descriptor size * @IONIC_Q_F_4X_SG_DESC: Quadruple sg descriptor size + * @IONIC_QIDENT_F_EXPDB: Queue supports express doorbell */ enum ionic_q_feature { IONIC_QIDENT_F_CQ = BIT_ULL(0), @@ -407,6 +413,7 @@ enum ionic_q_feature { IONIC_Q_F_4X_DESC = BIT_ULL(7), IONIC_Q_F_4X_CQ_DESC = BIT_ULL(8), IONIC_Q_F_4X_SG_DESC = BIT_ULL(9), + IONIC_QIDENT_F_EXPDB = BIT_ULL(10), }; /** @@ -2213,6 +2220,80 @@ struct ionic_vf_ctrl_comp { u8 rsvd[15]; }; +/** + * struct ionic_discover_cmb_cmd - CMB discovery command + * @opcode: Opcode for the command + * @rsvd: Reserved bytes + */ +struct ionic_discover_cmb_cmd { + u8 opcode; + u8 rsvd[63]; +}; + +/** + * struct ionic_discover_cmb_comp - CMB discover command completion. + * @status: Status of the command (enum ionic_status_code) + * @rsvd: Reserved bytes + */ +struct ionic_discover_cmb_comp { + u8 status; + u8 rsvd[15]; +}; + +#define IONIC_MAX_CMB_REGIONS 16 +#define IONIC_CMB_SHIFT_64K 16 + +enum ionic_cmb_type { + IONIC_CMB_TYPE_DEVMEM = 0, + IONIC_CMB_TYPE_EXPDB64 = 1, + IONIC_CMB_TYPE_EXPDB128 = 2, + IONIC_CMB_TYPE_EXPDB256 = 3, + IONIC_CMB_TYPE_EXPDB512 = 4, +}; + +/** + * union ionic_cmb_region - Configuration for CMB region + * @bar_num: CMB mapping number from FW + * @cmb_type: Type of CMB this region describes (enum ionic_cmb_type) + * @rsvd: Reserved + * @offset: Offset within BAR in 64KB pages + * @length: Length of the CMB region + * @words: 32-bit words for direct access to the entire region + */ +union ionic_cmb_region { + struct { + u8 bar_num; + u8 cmb_type; + u8 rsvd[6]; + __le32 offset; + __le32 length; + } __packed; + __le32 words[4]; +}; + +/** + * union ionic_discover_cmb_identity - CMB layout identity structure + * @num_regions: Number of CMB regions, up to 16 + * @flags: Feature and capability bits (0 for express + * doorbell, 1 for 4K alignment indicator, + * 31-24 for version information) + * @region: CMB mappings region, entry 0 for regular + * mapping, entries 1-7 for WQE sizes 64, + * 128, 256, 512, 1024, 2048 and 4096 bytes + * @words: Full union buffer size + */ +union ionic_discover_cmb_identity { + struct { + __le32 num_regions; +#define IONIC_CMB_FLAG_EXPDB BIT(0) +#define IONIC_CMB_FLAG_4KALIGN BIT(1) +#define IONIC_CMB_FLAG_VERSION 0xff000000 + __le32 flags; + union ionic_cmb_region region[IONIC_MAX_CMB_REGIONS]; + }; + __le32 words[478]; +}; + /** * struct ionic_qos_identify_cmd - QoS identify command * @opcode: opcode @@ -3073,6 +3154,8 @@ union ionic_dev_cmd { struct ionic_vf_getattr_cmd vf_getattr; struct ionic_vf_ctrl_cmd vf_ctrl; + struct ionic_discover_cmb_cmd discover_cmb; + struct ionic_lif_identify_cmd lif_identify; struct ionic_lif_init_cmd lif_init; struct ionic_lif_reset_cmd lif_reset; @@ -3112,6 +3195,8 @@ union ionic_dev_cmd_comp { struct ionic_vf_getattr_comp vf_getattr; struct ionic_vf_ctrl_comp vf_ctrl; + struct ionic_discover_cmb_comp discover_cmb; + struct ionic_lif_identify_comp lif_identify; struct ionic_lif_init_comp lif_init; ionic_lif_reset_comp lif_reset; @@ -3253,6 +3338,9 @@ union ionic_adminq_comp { #define IONIC_BAR0_DEV_CMD_DATA_REGS_OFFSET 0x0c00 #define IONIC_BAR0_INTR_STATUS_OFFSET 0x1000 #define IONIC_BAR0_INTR_CTRL_OFFSET 0x2000 + +/* BAR2 */ +#define IONIC_BAR2_CMB_ENTRY_SIZE 0x800000 #define IONIC_DEV_CMD_DONE 0x00000001 #define IONIC_ASIC_TYPE_NONE 0 @@ -3306,6 +3394,7 @@ struct ionic_identity { union ionic_port_identity port; union ionic_qos_identity qos; union ionic_q_identity txq; + union ionic_discover_cmb_identity cmb_layout; }; #endif /* _IONIC_IF_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 276024002484..b28966ae50c2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -673,7 +673,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->cmb_order = order_base_2(new->cmb_q_size / PAGE_SIZE); err = ionic_get_cmb(lif, &new->cmb_pgid, &new->cmb_q_base_pa, - new->cmb_order); + new->cmb_order, 0, NULL); if (err) { netdev_err(lif->netdev, "Cannot allocate queue order %d from cmb: err %d\n", From 83597c841ed53807a99a2ee837a8cbc3541ce62a Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:45:59 +0530 Subject: [PATCH 41/85] RDMA: Add IONIC to rdma_driver_id definition Define RDMA_DRIVER_IONIC in enum rdma_driver_id. Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-8-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/ib_user_ioctl_verbs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index fe15bc7e9f70..89e6a3f13191 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -255,6 +255,7 @@ enum rdma_driver_id { RDMA_DRIVER_SIW, RDMA_DRIVER_ERDMA, RDMA_DRIVER_MANA, + RDMA_DRIVER_IONIC, }; enum ib_uverbs_gid_type { From 8d765af51a099884bab37a51e211c7047f67f1f3 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:46:00 +0530 Subject: [PATCH 42/85] RDMA/ionic: Register auxiliary module for ionic ethernet adapter Register auxiliary module to create ibdevice for ionic ethernet adapter. Co-developed-by: Andrew Boyer Signed-off-by: Andrew Boyer Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-9-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_ibdev.c | 131 ++++++++++++++++++++ drivers/infiniband/hw/ionic/ionic_ibdev.h | 18 +++ drivers/infiniband/hw/ionic/ionic_lif_cfg.c | 101 +++++++++++++++ drivers/infiniband/hw/ionic/ionic_lif_cfg.h | 64 ++++++++++ 4 files changed, 314 insertions(+) create mode 100644 drivers/infiniband/hw/ionic/ionic_ibdev.c create mode 100644 drivers/infiniband/hw/ionic/ionic_ibdev.h create mode 100644 drivers/infiniband/hw/ionic/ionic_lif_cfg.c create mode 100644 drivers/infiniband/hw/ionic/ionic_lif_cfg.h diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c new file mode 100644 index 000000000000..d79470dae13a --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include +#include +#include + +#include "ionic_ibdev.h" + +#define DRIVER_DESCRIPTION "AMD Pensando RoCE HCA driver" +#define DEVICE_DESCRIPTION "AMD Pensando RoCE HCA" + +MODULE_AUTHOR("Allen Hubbe "); +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("NET_IONIC"); + +static void ionic_destroy_ibdev(struct ionic_ibdev *dev) +{ + ib_unregister_device(&dev->ibdev); + ib_dealloc_device(&dev->ibdev); +} + +static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev) +{ + struct ib_device *ibdev; + struct ionic_ibdev *dev; + struct net_device *ndev; + int rc; + + dev = ib_alloc_device(ionic_ibdev, ibdev); + if (!dev) + return ERR_PTR(-EINVAL); + + ionic_fill_lif_cfg(ionic_adev->lif, &dev->lif_cfg); + + ibdev = &dev->ibdev; + ibdev->dev.parent = dev->lif_cfg.hwdev; + + strscpy(ibdev->name, "ionic_%d", IB_DEVICE_NAME_MAX); + strscpy(ibdev->node_desc, DEVICE_DESCRIPTION, IB_DEVICE_NODE_DESC_MAX); + + ibdev->node_type = RDMA_NODE_IB_CA; + ibdev->phys_port_cnt = 1; + + /* the first two eq are reserved for async events */ + ibdev->num_comp_vectors = dev->lif_cfg.eq_count - 2; + + ndev = ionic_lif_netdev(ionic_adev->lif); + addrconf_ifid_eui48((u8 *)&ibdev->node_guid, ndev); + rc = ib_device_set_netdev(ibdev, ndev, 1); + /* ionic_lif_netdev() returns ndev with refcount held */ + dev_put(ndev); + if (rc) + goto err_admin; + + rc = ib_register_device(ibdev, "ionic_%d", ibdev->dev.parent); + if (rc) + goto err_register; + + return dev; + +err_register: +err_admin: + ib_dealloc_device(&dev->ibdev); + + return ERR_PTR(rc); +} + +static int ionic_aux_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct ionic_aux_dev *ionic_adev; + struct ionic_ibdev *dev; + + ionic_adev = container_of(adev, struct ionic_aux_dev, adev); + dev = ionic_create_ibdev(ionic_adev); + if (IS_ERR(dev)) + return dev_err_probe(&adev->dev, PTR_ERR(dev), + "Failed to register ibdev\n"); + + auxiliary_set_drvdata(adev, dev); + ibdev_dbg(&dev->ibdev, "registered\n"); + + return 0; +} + +static void ionic_aux_remove(struct auxiliary_device *adev) +{ + struct ionic_ibdev *dev = auxiliary_get_drvdata(adev); + + dev_dbg(&adev->dev, "unregister ibdev\n"); + ionic_destroy_ibdev(dev); + dev_dbg(&adev->dev, "unregistered\n"); +} + +static const struct auxiliary_device_id ionic_aux_id_table[] = { + { .name = "ionic.rdma", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, ionic_aux_id_table); + +static struct auxiliary_driver ionic_aux_r_driver = { + .name = "rdma", + .probe = ionic_aux_probe, + .remove = ionic_aux_remove, + .id_table = ionic_aux_id_table, +}; + +static int __init ionic_mod_init(void) +{ + int rc; + + rc = auxiliary_driver_register(&ionic_aux_r_driver); + if (rc) + goto err_aux; + + return 0; + +err_aux: + return rc; +} + +static void __exit ionic_mod_exit(void) +{ + auxiliary_driver_unregister(&ionic_aux_r_driver); +} + +module_init(ionic_mod_init); +module_exit(ionic_mod_exit); diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h new file mode 100644 index 000000000000..224e5e74056d --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#ifndef _IONIC_IBDEV_H_ +#define _IONIC_IBDEV_H_ + +#include +#include + +#include "ionic_lif_cfg.h" + +struct ionic_ibdev { + struct ib_device ibdev; + + struct ionic_lif_cfg lif_cfg; +}; + +#endif /* _IONIC_IBDEV_H_ */ diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.c b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c new file mode 100644 index 000000000000..8d0d209227e9 --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include + +#include +#include + +#include "ionic_lif_cfg.h" + +#define IONIC_MIN_RDMA_VERSION 0 +#define IONIC_MAX_RDMA_VERSION 2 + +static u8 ionic_get_expdb(struct ionic_lif *lif) +{ + u8 expdb_support = 0; + + if (lif->ionic->idev.phy_cmb_expdb64_pages) + expdb_support |= IONIC_EXPDB_64B_WQE; + if (lif->ionic->idev.phy_cmb_expdb128_pages) + expdb_support |= IONIC_EXPDB_128B_WQE; + if (lif->ionic->idev.phy_cmb_expdb256_pages) + expdb_support |= IONIC_EXPDB_256B_WQE; + if (lif->ionic->idev.phy_cmb_expdb512_pages) + expdb_support |= IONIC_EXPDB_512B_WQE; + + return expdb_support; +} + +void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg) +{ + union ionic_lif_identity *ident = &lif->ionic->ident.lif; + + cfg->lif = lif; + cfg->hwdev = &lif->ionic->pdev->dev; + cfg->lif_index = lif->index; + cfg->lif_hw_index = lif->hw_index; + + cfg->dbid = lif->kern_pid; + cfg->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif); + cfg->dbpage = lif->kern_dbpage; + cfg->intr_ctrl = lif->ionic->idev.intr_ctrl; + + cfg->db_phys = lif->ionic->bars[IONIC_PCI_BAR_DBELL].bus_addr; + + if (IONIC_VERSION(ident->rdma.version, ident->rdma.minor_version) >= + IONIC_VERSION(2, 1)) + cfg->page_size_supported = + le64_to_cpu(ident->rdma.page_size_cap); + else + cfg->page_size_supported = IONIC_PAGE_SIZE_SUPPORTED; + + cfg->rdma_version = ident->rdma.version; + cfg->qp_opcodes = ident->rdma.qp_opcodes; + cfg->admin_opcodes = ident->rdma.admin_opcodes; + + cfg->stats_type = le16_to_cpu(ident->rdma.stats_type); + cfg->npts_per_lif = le32_to_cpu(ident->rdma.npts_per_lif); + cfg->nmrs_per_lif = le32_to_cpu(ident->rdma.nmrs_per_lif); + cfg->nahs_per_lif = le32_to_cpu(ident->rdma.nahs_per_lif); + + cfg->aq_base = le32_to_cpu(ident->rdma.aq_qtype.qid_base); + cfg->cq_base = le32_to_cpu(ident->rdma.cq_qtype.qid_base); + cfg->eq_base = le32_to_cpu(ident->rdma.eq_qtype.qid_base); + + /* + * ionic_create_rdma_admin() may reduce aq_count or eq_count if + * it is unable to allocate all that were requested. + * aq_count is tunable; see ionic_aq_count + * eq_count is tunable; see ionic_eq_count + */ + cfg->aq_count = le32_to_cpu(ident->rdma.aq_qtype.qid_count); + cfg->eq_count = le32_to_cpu(ident->rdma.eq_qtype.qid_count); + cfg->cq_count = le32_to_cpu(ident->rdma.cq_qtype.qid_count); + cfg->qp_count = le32_to_cpu(ident->rdma.sq_qtype.qid_count); + cfg->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif); + + cfg->aq_qtype = ident->rdma.aq_qtype.qtype; + cfg->sq_qtype = ident->rdma.sq_qtype.qtype; + cfg->rq_qtype = ident->rdma.rq_qtype.qtype; + cfg->cq_qtype = ident->rdma.cq_qtype.qtype; + cfg->eq_qtype = ident->rdma.eq_qtype.qtype; + cfg->udma_qgrp_shift = ident->rdma.udma_shift; + cfg->udma_count = 2; + + cfg->max_stride = ident->rdma.max_stride; + cfg->expdb_mask = ionic_get_expdb(lif); + + cfg->sq_expdb = + !!(lif->qtype_info[IONIC_QTYPE_TXQ].features & IONIC_QIDENT_F_EXPDB); + cfg->rq_expdb = + !!(lif->qtype_info[IONIC_QTYPE_RXQ].features & IONIC_QIDENT_F_EXPDB); +} + +struct net_device *ionic_lif_netdev(struct ionic_lif *lif) +{ + struct net_device *netdev = lif->netdev; + + dev_hold(netdev); + return netdev; +} diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.h b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h new file mode 100644 index 000000000000..5b04b8a9937e --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#ifndef _IONIC_LIF_CFG_H_ + +#define IONIC_VERSION(a, b) (((a) << 16) + ((b) << 8)) +#define IONIC_PAGE_SIZE_SUPPORTED 0x40201000 /* 4kb, 2Mb, 1Gb */ + +#define IONIC_EXPDB_64B_WQE BIT(0) +#define IONIC_EXPDB_128B_WQE BIT(1) +#define IONIC_EXPDB_256B_WQE BIT(2) +#define IONIC_EXPDB_512B_WQE BIT(3) + +struct ionic_lif_cfg { + struct device *hwdev; + struct ionic_lif *lif; + + int lif_index; + int lif_hw_index; + + u32 dbid; + int dbid_count; + u64 __iomem *dbpage; + struct ionic_intr __iomem *intr_ctrl; + phys_addr_t db_phys; + + u64 page_size_supported; + u32 npts_per_lif; + u32 nmrs_per_lif; + u32 nahs_per_lif; + + u32 aq_base; + u32 cq_base; + u32 eq_base; + + int aq_count; + int eq_count; + int cq_count; + int qp_count; + + u16 stats_type; + u8 aq_qtype; + u8 sq_qtype; + u8 rq_qtype; + u8 cq_qtype; + u8 eq_qtype; + + u8 udma_count; + u8 udma_qgrp_shift; + + u8 rdma_version; + u8 qp_opcodes; + u8 admin_opcodes; + + u8 max_stride; + bool sq_expdb; + bool rq_expdb; + u8 expdb_mask; +}; + +void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg); +struct net_device *ionic_lif_netdev(struct ionic_lif *lif); + +#endif /* _IONIC_LIF_CFG_H_ */ From f3bdbd42702c6b10ebe627828c76ef51c68e4355 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:46:01 +0530 Subject: [PATCH 43/85] RDMA/ionic: Create device queues to support admin operations Setup RDMA admin queues using device command exposed over auxiliary device and manage these queues using ida. Co-developed-by: Andrew Boyer Signed-off-by: Andrew Boyer Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-10-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_admin.c | 1124 +++++++++++++++++ .../infiniband/hw/ionic/ionic_controlpath.c | 181 +++ drivers/infiniband/hw/ionic/ionic_fw.h | 164 +++ drivers/infiniband/hw/ionic/ionic_ibdev.c | 56 + drivers/infiniband/hw/ionic/ionic_ibdev.h | 222 ++++ drivers/infiniband/hw/ionic/ionic_pgtbl.c | 113 ++ drivers/infiniband/hw/ionic/ionic_queue.c | 52 + drivers/infiniband/hw/ionic/ionic_queue.h | 234 ++++ drivers/infiniband/hw/ionic/ionic_res.h | 154 +++ 9 files changed, 2300 insertions(+) create mode 100644 drivers/infiniband/hw/ionic/ionic_admin.c create mode 100644 drivers/infiniband/hw/ionic/ionic_controlpath.c create mode 100644 drivers/infiniband/hw/ionic/ionic_fw.h create mode 100644 drivers/infiniband/hw/ionic/ionic_pgtbl.c create mode 100644 drivers/infiniband/hw/ionic/ionic_queue.c create mode 100644 drivers/infiniband/hw/ionic/ionic_queue.h create mode 100644 drivers/infiniband/hw/ionic/ionic_res.h diff --git a/drivers/infiniband/hw/ionic/ionic_admin.c b/drivers/infiniband/hw/ionic/ionic_admin.c new file mode 100644 index 000000000000..845c03f6d9fb --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_admin.c @@ -0,0 +1,1124 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include +#include +#include + +#include "ionic_fw.h" +#include "ionic_ibdev.h" + +#define IONIC_EQ_COUNT_MIN 4 +#define IONIC_AQ_COUNT_MIN 1 + +/* not a valid queue position or negative error status */ +#define IONIC_ADMIN_POSTED 0x10000 + +/* cpu can be held with irq disabled for COUNT * MS (for create/destroy_ah) */ +#define IONIC_ADMIN_BUSY_RETRY_COUNT 2000 +#define IONIC_ADMIN_BUSY_RETRY_MS 1 + +/* admin queue will be considered failed if a command takes longer */ +#define IONIC_ADMIN_TIMEOUT (HZ * 2) +#define IONIC_ADMIN_WARN (HZ / 8) + +/* will poll for admin cq to tolerate and report from missed event */ +#define IONIC_ADMIN_DELAY (HZ / 8) + +/* work queue for polling the event queue and admin cq */ +struct workqueue_struct *ionic_evt_workq; + +static void ionic_admin_timedout(struct ionic_aq *aq) +{ + struct ionic_ibdev *dev = aq->dev; + unsigned long irqflags; + u16 pos; + + spin_lock_irqsave(&aq->lock, irqflags); + if (ionic_queue_empty(&aq->q)) + goto out; + + /* Reset ALL adminq if any one times out */ + if (atomic_read(&aq->admin_state) < IONIC_ADMIN_KILLED) + queue_work(ionic_evt_workq, &dev->reset_work); + + ibdev_err(&dev->ibdev, "admin command timed out, aq %d after: %ums\n", + aq->aqid, (u32)jiffies_to_msecs(jiffies - aq->stamp)); + + pos = (aq->q.prod - 1) & aq->q.mask; + if (pos == aq->q.cons) + goto out; + + ibdev_warn(&dev->ibdev, "admin pos %u (last posted)\n", pos); + print_hex_dump(KERN_WARNING, "cmd ", DUMP_PREFIX_OFFSET, 16, 1, + ionic_queue_at(&aq->q, pos), + BIT(aq->q.stride_log2), true); + +out: + spin_unlock_irqrestore(&aq->lock, irqflags); +} + +static void ionic_admin_reset_dwork(struct ionic_ibdev *dev) +{ + if (atomic_read(&dev->admin_state) == IONIC_ADMIN_KILLED) + return; + + queue_delayed_work(ionic_evt_workq, &dev->admin_dwork, + IONIC_ADMIN_DELAY); +} + +static void ionic_admin_reset_wdog(struct ionic_aq *aq) +{ + if (atomic_read(&aq->admin_state) == IONIC_ADMIN_KILLED) + return; + + aq->stamp = jiffies; + ionic_admin_reset_dwork(aq->dev); +} + +static bool ionic_admin_next_cqe(struct ionic_ibdev *dev, struct ionic_cq *cq, + struct ionic_v1_cqe **cqe) +{ + struct ionic_v1_cqe *qcqe = ionic_queue_at_prod(&cq->q); + + if (unlikely(cq->color != ionic_v1_cqe_color(qcqe))) + return false; + + /* Prevent out-of-order reads of the CQE */ + dma_rmb(); + *cqe = qcqe; + + return true; +} + +static void ionic_admin_poll_locked(struct ionic_aq *aq) +{ + struct ionic_cq *cq = &aq->vcq->cq[0]; + struct ionic_admin_wr *wr, *wr_next; + struct ionic_ibdev *dev = aq->dev; + u32 wr_strides, avlbl_strides; + struct ionic_v1_cqe *cqe; + u32 qtf, qid; + u16 old_prod; + u8 type; + + lockdep_assert_held(&aq->lock); + + if (atomic_read(&aq->admin_state) == IONIC_ADMIN_KILLED) { + list_for_each_entry_safe(wr, wr_next, &aq->wr_prod, aq_ent) { + INIT_LIST_HEAD(&wr->aq_ent); + aq->q_wr[wr->status].wr = NULL; + wr->status = atomic_read(&aq->admin_state); + complete_all(&wr->work); + } + INIT_LIST_HEAD(&aq->wr_prod); + + list_for_each_entry_safe(wr, wr_next, &aq->wr_post, aq_ent) { + INIT_LIST_HEAD(&wr->aq_ent); + wr->status = atomic_read(&aq->admin_state); + complete_all(&wr->work); + } + INIT_LIST_HEAD(&aq->wr_post); + + return; + } + + old_prod = cq->q.prod; + + while (ionic_admin_next_cqe(dev, cq, &cqe)) { + qtf = ionic_v1_cqe_qtf(cqe); + qid = ionic_v1_cqe_qtf_qid(qtf); + type = ionic_v1_cqe_qtf_type(qtf); + + if (unlikely(type != IONIC_V1_CQE_TYPE_ADMIN)) { + ibdev_warn_ratelimited(&dev->ibdev, + "bad cqe type %u\n", type); + goto cq_next; + } + + if (unlikely(qid != aq->aqid)) { + ibdev_warn_ratelimited(&dev->ibdev, + "bad cqe qid %u\n", qid); + goto cq_next; + } + + if (unlikely(be16_to_cpu(cqe->admin.cmd_idx) != aq->q.cons)) { + ibdev_warn_ratelimited(&dev->ibdev, + "bad idx %u cons %u qid %u\n", + be16_to_cpu(cqe->admin.cmd_idx), + aq->q.cons, qid); + goto cq_next; + } + + if (unlikely(ionic_queue_empty(&aq->q))) { + ibdev_warn_ratelimited(&dev->ibdev, + "bad cqe for empty adminq\n"); + goto cq_next; + } + + wr = aq->q_wr[aq->q.cons].wr; + if (wr) { + aq->q_wr[aq->q.cons].wr = NULL; + list_del_init(&wr->aq_ent); + + wr->cqe = *cqe; + wr->status = atomic_read(&aq->admin_state); + complete_all(&wr->work); + } + + ionic_queue_consume_entries(&aq->q, + aq->q_wr[aq->q.cons].wqe_strides); + +cq_next: + ionic_queue_produce(&cq->q); + cq->color = ionic_color_wrap(cq->q.prod, cq->color); + } + + if (old_prod != cq->q.prod) { + ionic_admin_reset_wdog(aq); + cq->q.cons = cq->q.prod; + ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype, + ionic_queue_dbell_val(&cq->q)); + queue_work(ionic_evt_workq, &aq->work); + } else if (!aq->armed) { + aq->armed = true; + cq->arm_any_prod = ionic_queue_next(&cq->q, cq->arm_any_prod); + ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype, + cq->q.dbell | IONIC_CQ_RING_ARM | + cq->arm_any_prod); + queue_work(ionic_evt_workq, &aq->work); + } + + if (atomic_read(&aq->admin_state) != IONIC_ADMIN_ACTIVE) + return; + + old_prod = aq->q.prod; + + if (ionic_queue_empty(&aq->q) && !list_empty(&aq->wr_post)) + ionic_admin_reset_wdog(aq); + + if (list_empty(&aq->wr_post)) + return; + + do { + u8 *src; + int i, src_len; + size_t stride_len; + + wr = list_first_entry(&aq->wr_post, struct ionic_admin_wr, + aq_ent); + wr_strides = (le16_to_cpu(wr->wqe.len) + ADMIN_WQE_HDR_LEN + + (ADMIN_WQE_STRIDE - 1)) >> aq->q.stride_log2; + avlbl_strides = ionic_queue_length_remaining(&aq->q); + + if (wr_strides > avlbl_strides) + break; + + list_move(&wr->aq_ent, &aq->wr_prod); + wr->status = aq->q.prod; + aq->q_wr[aq->q.prod].wr = wr; + aq->q_wr[aq->q.prod].wqe_strides = wr_strides; + + src_len = le16_to_cpu(wr->wqe.len); + src = (uint8_t *)&wr->wqe.cmd; + + /* First stride */ + memcpy(ionic_queue_at_prod(&aq->q), &wr->wqe, + ADMIN_WQE_HDR_LEN); + stride_len = ADMIN_WQE_STRIDE - ADMIN_WQE_HDR_LEN; + if (stride_len > src_len) + stride_len = src_len; + memcpy(ionic_queue_at_prod(&aq->q) + ADMIN_WQE_HDR_LEN, + src, stride_len); + ibdev_dbg(&dev->ibdev, "post admin prod %u (%u strides)\n", + aq->q.prod, wr_strides); + print_hex_dump_debug("wqe ", DUMP_PREFIX_OFFSET, 16, 1, + ionic_queue_at_prod(&aq->q), + BIT(aq->q.stride_log2), true); + ionic_queue_produce(&aq->q); + + /* Remaining strides */ + for (i = stride_len; i < src_len; i += stride_len) { + stride_len = ADMIN_WQE_STRIDE; + + if (i + stride_len > src_len) + stride_len = src_len - i; + + memcpy(ionic_queue_at_prod(&aq->q), src + i, + stride_len); + print_hex_dump_debug("wqe ", DUMP_PREFIX_OFFSET, 16, 1, + ionic_queue_at_prod(&aq->q), + BIT(aq->q.stride_log2), true); + ionic_queue_produce(&aq->q); + } + } while (!list_empty(&aq->wr_post)); + + if (old_prod != aq->q.prod) + ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.aq_qtype, + ionic_queue_dbell_val(&aq->q)); +} + +static void ionic_admin_dwork(struct work_struct *ws) +{ + struct ionic_ibdev *dev = + container_of(ws, struct ionic_ibdev, admin_dwork.work); + struct ionic_aq *aq, *bad_aq = NULL; + bool do_reschedule = false; + unsigned long irqflags; + bool do_reset = false; + u16 pos; + int i; + + for (i = 0; i < dev->lif_cfg.aq_count; i++) { + aq = dev->aq_vec[i]; + + spin_lock_irqsave(&aq->lock, irqflags); + + if (ionic_queue_empty(&aq->q)) + goto next_aq; + + /* Reschedule if any queue has outstanding work */ + do_reschedule = true; + + if (time_is_after_eq_jiffies(aq->stamp + IONIC_ADMIN_WARN)) + /* Warning threshold not met, nothing to do */ + goto next_aq; + + /* See if polling now makes some progress */ + pos = aq->q.cons; + ionic_admin_poll_locked(aq); + if (pos != aq->q.cons) { + ibdev_dbg(&dev->ibdev, + "missed event for acq %d\n", aq->cqid); + goto next_aq; + } + + if (time_is_after_eq_jiffies(aq->stamp + + IONIC_ADMIN_TIMEOUT)) { + /* Timeout threshold not met */ + ibdev_dbg(&dev->ibdev, "no progress after %ums\n", + (u32)jiffies_to_msecs(jiffies - aq->stamp)); + goto next_aq; + } + + /* Queue timed out */ + bad_aq = aq; + do_reset = true; +next_aq: + spin_unlock_irqrestore(&aq->lock, irqflags); + } + + if (do_reset) + /* Reset RDMA lif on a timeout */ + ionic_admin_timedout(bad_aq); + else if (do_reschedule) + /* Try to poll again later */ + ionic_admin_reset_dwork(dev); +} + +static void ionic_admin_work(struct work_struct *ws) +{ + struct ionic_aq *aq = container_of(ws, struct ionic_aq, work); + unsigned long irqflags; + + spin_lock_irqsave(&aq->lock, irqflags); + ionic_admin_poll_locked(aq); + spin_unlock_irqrestore(&aq->lock, irqflags); +} + +static void ionic_admin_post_aq(struct ionic_aq *aq, struct ionic_admin_wr *wr) +{ + unsigned long irqflags; + bool poll; + + wr->status = IONIC_ADMIN_POSTED; + wr->aq = aq; + + spin_lock_irqsave(&aq->lock, irqflags); + poll = list_empty(&aq->wr_post); + list_add(&wr->aq_ent, &aq->wr_post); + if (poll) + ionic_admin_poll_locked(aq); + spin_unlock_irqrestore(&aq->lock, irqflags); +} + +void ionic_admin_post(struct ionic_ibdev *dev, struct ionic_admin_wr *wr) +{ + int aq_idx; + + /* Use cpu id for the adminq selection */ + aq_idx = raw_smp_processor_id() % dev->lif_cfg.aq_count; + ionic_admin_post_aq(dev->aq_vec[aq_idx], wr); +} + +static void ionic_admin_cancel(struct ionic_admin_wr *wr) +{ + struct ionic_aq *aq = wr->aq; + unsigned long irqflags; + + spin_lock_irqsave(&aq->lock, irqflags); + + if (!list_empty(&wr->aq_ent)) { + list_del(&wr->aq_ent); + if (wr->status != IONIC_ADMIN_POSTED) + aq->q_wr[wr->status].wr = NULL; + } + + spin_unlock_irqrestore(&aq->lock, irqflags); +} + +static int ionic_admin_busy_wait(struct ionic_admin_wr *wr) +{ + struct ionic_aq *aq = wr->aq; + unsigned long irqflags; + int try_i; + + for (try_i = 0; try_i < IONIC_ADMIN_BUSY_RETRY_COUNT; ++try_i) { + if (completion_done(&wr->work)) + return 0; + + mdelay(IONIC_ADMIN_BUSY_RETRY_MS); + + spin_lock_irqsave(&aq->lock, irqflags); + ionic_admin_poll_locked(aq); + spin_unlock_irqrestore(&aq->lock, irqflags); + } + + /* + * we timed out. Initiate RDMA LIF reset and indicate + * error to caller. + */ + ionic_admin_timedout(aq); + return -ETIMEDOUT; +} + +int ionic_admin_wait(struct ionic_ibdev *dev, struct ionic_admin_wr *wr, + enum ionic_admin_flags flags) +{ + int rc, timo; + + if (flags & IONIC_ADMIN_F_BUSYWAIT) { + /* Spin */ + rc = ionic_admin_busy_wait(wr); + } else if (flags & IONIC_ADMIN_F_INTERRUPT) { + /* + * Interruptible sleep, 1s timeout + * This is used for commands which are safe for the caller + * to clean up without killing and resetting the adminq. + */ + timo = wait_for_completion_interruptible_timeout(&wr->work, + HZ); + if (timo > 0) + rc = 0; + else if (timo == 0) + rc = -ETIMEDOUT; + else + rc = timo; + } else { + /* + * Uninterruptible sleep + * This is used for commands which are NOT safe for the + * caller to clean up. Cleanup must be handled by the + * adminq kill and reset process so that host memory is + * not corrupted by the device. + */ + wait_for_completion(&wr->work); + rc = 0; + } + + if (rc) { + ibdev_warn(&dev->ibdev, "wait status %d\n", rc); + ionic_admin_cancel(wr); + } else if (wr->status == IONIC_ADMIN_KILLED) { + ibdev_dbg(&dev->ibdev, "admin killed\n"); + + /* No error if admin already killed during teardown */ + rc = (flags & IONIC_ADMIN_F_TEARDOWN) ? 0 : -ENODEV; + } else if (ionic_v1_cqe_error(&wr->cqe)) { + ibdev_warn(&dev->ibdev, "opcode %u error %u\n", + wr->wqe.op, + be32_to_cpu(wr->cqe.status_length)); + rc = -EINVAL; + } + return rc; +} + +static int ionic_rdma_devcmd(struct ionic_ibdev *dev, + struct ionic_admin_ctx *admin) +{ + int rc; + + rc = ionic_adminq_post_wait(dev->lif_cfg.lif, admin); + if (rc) + return rc; + + return ionic_error_to_errno(admin->comp.comp.status); +} + +int ionic_rdma_reset_devcmd(struct ionic_ibdev *dev) +{ + struct ionic_admin_ctx admin = { + .work = COMPLETION_INITIALIZER_ONSTACK(admin.work), + .cmd.rdma_reset = { + .opcode = IONIC_CMD_RDMA_RESET_LIF, + .lif_index = cpu_to_le16(dev->lif_cfg.lif_index), + }, + }; + + return ionic_rdma_devcmd(dev, &admin); +} + +static int ionic_rdma_queue_devcmd(struct ionic_ibdev *dev, + struct ionic_queue *q, + u32 qid, u32 cid, u16 opcode) +{ + struct ionic_admin_ctx admin = { + .work = COMPLETION_INITIALIZER_ONSTACK(admin.work), + .cmd.rdma_queue = { + .opcode = opcode, + .lif_index = cpu_to_le16(dev->lif_cfg.lif_index), + .qid_ver = cpu_to_le32(qid), + .cid = cpu_to_le32(cid), + .dbid = cpu_to_le16(dev->lif_cfg.dbid), + .depth_log2 = q->depth_log2, + .stride_log2 = q->stride_log2, + .dma_addr = cpu_to_le64(q->dma), + }, + }; + + return ionic_rdma_devcmd(dev, &admin); +} + +static void ionic_rdma_admincq_comp(struct ib_cq *ibcq, void *cq_context) +{ + struct ionic_aq *aq = cq_context; + unsigned long irqflags; + + spin_lock_irqsave(&aq->lock, irqflags); + aq->armed = false; + if (atomic_read(&aq->admin_state) < IONIC_ADMIN_KILLED) + queue_work(ionic_evt_workq, &aq->work); + spin_unlock_irqrestore(&aq->lock, irqflags); +} + +static void ionic_rdma_admincq_event(struct ib_event *event, void *cq_context) +{ + struct ionic_aq *aq = cq_context; + + ibdev_err(&aq->dev->ibdev, "admincq event %d\n", event->event); +} + +static struct ionic_vcq *ionic_create_rdma_admincq(struct ionic_ibdev *dev, + int comp_vector) +{ + struct ib_cq_init_attr attr = { + .cqe = IONIC_AQ_DEPTH, + .comp_vector = comp_vector, + }; + struct ionic_tbl_buf buf = {}; + struct ionic_vcq *vcq; + struct ionic_cq *cq; + int rc; + + vcq = kzalloc(sizeof(*vcq), GFP_KERNEL); + if (!vcq) + return ERR_PTR(-ENOMEM); + + vcq->ibcq.device = &dev->ibdev; + vcq->ibcq.comp_handler = ionic_rdma_admincq_comp; + vcq->ibcq.event_handler = ionic_rdma_admincq_event; + atomic_set(&vcq->ibcq.usecnt, 0); + + vcq->udma_mask = 1; + cq = &vcq->cq[0]; + + rc = ionic_create_cq_common(vcq, &buf, &attr, NULL, NULL, + NULL, NULL, 0); + if (rc) + goto err_init; + + rc = ionic_rdma_queue_devcmd(dev, &cq->q, cq->cqid, cq->eqid, + IONIC_CMD_RDMA_CREATE_CQ); + if (rc) + goto err_cmd; + + return vcq; + +err_cmd: + ionic_destroy_cq_common(dev, cq); +err_init: + kfree(vcq); + + return ERR_PTR(rc); +} + +static struct ionic_aq *__ionic_create_rdma_adminq(struct ionic_ibdev *dev, + u32 aqid, u32 cqid) +{ + struct ionic_aq *aq; + int rc; + + aq = kzalloc(sizeof(*aq), GFP_KERNEL); + if (!aq) + return ERR_PTR(-ENOMEM); + + atomic_set(&aq->admin_state, IONIC_ADMIN_KILLED); + aq->dev = dev; + aq->aqid = aqid; + aq->cqid = cqid; + spin_lock_init(&aq->lock); + + rc = ionic_queue_init(&aq->q, dev->lif_cfg.hwdev, IONIC_EQ_DEPTH, + ADMIN_WQE_STRIDE); + if (rc) + goto err_q; + + ionic_queue_dbell_init(&aq->q, aq->aqid); + + aq->q_wr = kcalloc((u32)aq->q.mask + 1, sizeof(*aq->q_wr), GFP_KERNEL); + if (!aq->q_wr) { + rc = -ENOMEM; + goto err_wr; + } + + INIT_LIST_HEAD(&aq->wr_prod); + INIT_LIST_HEAD(&aq->wr_post); + + INIT_WORK(&aq->work, ionic_admin_work); + aq->armed = false; + + return aq; + +err_wr: + ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev); +err_q: + kfree(aq); + + return ERR_PTR(rc); +} + +static void __ionic_destroy_rdma_adminq(struct ionic_ibdev *dev, + struct ionic_aq *aq) +{ + ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev); + kfree(aq); +} + +static struct ionic_aq *ionic_create_rdma_adminq(struct ionic_ibdev *dev, + u32 aqid, u32 cqid) +{ + struct ionic_aq *aq; + int rc; + + aq = __ionic_create_rdma_adminq(dev, aqid, cqid); + if (IS_ERR(aq)) + return aq; + + rc = ionic_rdma_queue_devcmd(dev, &aq->q, aq->aqid, aq->cqid, + IONIC_CMD_RDMA_CREATE_ADMINQ); + if (rc) + goto err_cmd; + + return aq; + +err_cmd: + __ionic_destroy_rdma_adminq(dev, aq); + + return ERR_PTR(rc); +} + +static void ionic_kill_ibdev(struct ionic_ibdev *dev, bool fatal_path) +{ + unsigned long irqflags; + bool do_flush = false; + int i; + + /* Mark AQs for drain and flush the QPs while irq is disabled */ + local_irq_save(irqflags); + + /* Mark the admin queue, flushing at most once */ + for (i = 0; i < dev->lif_cfg.aq_count; i++) { + struct ionic_aq *aq = dev->aq_vec[i]; + + spin_lock(&aq->lock); + if (atomic_read(&aq->admin_state) != IONIC_ADMIN_KILLED) { + atomic_set(&aq->admin_state, IONIC_ADMIN_KILLED); + /* Flush incomplete admin commands */ + ionic_admin_poll_locked(aq); + do_flush = true; + } + spin_unlock(&aq->lock); + } + + local_irq_restore(irqflags); + + /* Post a fatal event if requested */ + if (fatal_path) { + struct ib_event ev; + + ev.device = &dev->ibdev; + ev.element.port_num = 1; + ev.event = IB_EVENT_DEVICE_FATAL; + + ib_dispatch_event(&ev); + } + + atomic_set(&dev->admin_state, IONIC_ADMIN_KILLED); +} + +void ionic_kill_rdma_admin(struct ionic_ibdev *dev, bool fatal_path) +{ + enum ionic_admin_state old_state; + unsigned long irqflags = 0; + int i, rc; + + if (!dev->aq_vec) + return; + + /* + * Admin queues are transitioned from active to paused to killed state. + * When in paused state, no new commands are issued to the device, + * nor are any completed locally. After resetting the lif, it will be + * safe to resume the rdma admin queues in the killed state. Commands + * will not be issued to the device, but will complete locally with status + * IONIC_ADMIN_KILLED. Handling completion will ensure that creating or + * modifying resources fails, but destroying resources succeeds. + * If there was a failure resetting the lif using this strategy, + * then the state of the device is unknown. + */ + old_state = atomic_cmpxchg(&dev->admin_state, IONIC_ADMIN_ACTIVE, + IONIC_ADMIN_PAUSED); + if (old_state != IONIC_ADMIN_ACTIVE) + return; + + /* Pause all the AQs */ + local_irq_save(irqflags); + for (i = 0; i < dev->lif_cfg.aq_count; i++) { + struct ionic_aq *aq = dev->aq_vec[i]; + + spin_lock(&aq->lock); + /* pause rdma admin queues to reset lif */ + if (atomic_read(&aq->admin_state) == IONIC_ADMIN_ACTIVE) + atomic_set(&aq->admin_state, IONIC_ADMIN_PAUSED); + spin_unlock(&aq->lock); + } + local_irq_restore(irqflags); + + rc = ionic_rdma_reset_devcmd(dev); + if (unlikely(rc)) { + ibdev_err(&dev->ibdev, "failed to reset rdma %d\n", rc); + ionic_request_rdma_reset(dev->lif_cfg.lif); + } + + ionic_kill_ibdev(dev, fatal_path); +} + +static void ionic_reset_work(struct work_struct *ws) +{ + struct ionic_ibdev *dev = + container_of(ws, struct ionic_ibdev, reset_work); + + ionic_kill_rdma_admin(dev, true); +} + +static bool ionic_next_eqe(struct ionic_eq *eq, struct ionic_v1_eqe *eqe) +{ + struct ionic_v1_eqe *qeqe; + bool color; + + qeqe = ionic_queue_at_prod(&eq->q); + color = ionic_v1_eqe_color(qeqe); + + /* cons is color for eq */ + if (eq->q.cons != color) + return false; + + /* Prevent out-of-order reads of the EQE */ + dma_rmb(); + + ibdev_dbg(&eq->dev->ibdev, "poll eq prod %u\n", eq->q.prod); + print_hex_dump_debug("eqe ", DUMP_PREFIX_OFFSET, 16, 1, + qeqe, BIT(eq->q.stride_log2), true); + *eqe = *qeqe; + + return true; +} + +static void ionic_cq_event(struct ionic_ibdev *dev, u32 cqid, u8 code) +{ + unsigned long irqflags; + struct ib_event ibev; + struct ionic_cq *cq; + + xa_lock_irqsave(&dev->cq_tbl, irqflags); + cq = xa_load(&dev->cq_tbl, cqid); + if (cq) + kref_get(&cq->cq_kref); + xa_unlock_irqrestore(&dev->cq_tbl, irqflags); + + if (!cq) { + ibdev_dbg(&dev->ibdev, + "missing cqid %#x code %u\n", cqid, code); + return; + } + + switch (code) { + case IONIC_V1_EQE_CQ_NOTIFY: + if (cq->vcq->ibcq.comp_handler) + cq->vcq->ibcq.comp_handler(&cq->vcq->ibcq, + cq->vcq->ibcq.cq_context); + break; + + case IONIC_V1_EQE_CQ_ERR: + if (cq->vcq->ibcq.event_handler) { + ibev.event = IB_EVENT_CQ_ERR; + ibev.device = &dev->ibdev; + ibev.element.cq = &cq->vcq->ibcq; + + cq->vcq->ibcq.event_handler(&ibev, + cq->vcq->ibcq.cq_context); + } + break; + + default: + ibdev_dbg(&dev->ibdev, + "unrecognized cqid %#x code %u\n", cqid, code); + break; + } + + kref_put(&cq->cq_kref, ionic_cq_complete); +} + +static u16 ionic_poll_eq(struct ionic_eq *eq, u16 budget) +{ + struct ionic_ibdev *dev = eq->dev; + struct ionic_v1_eqe eqe; + u16 npolled = 0; + u8 type, code; + u32 evt, qid; + + while (npolled < budget) { + if (!ionic_next_eqe(eq, &eqe)) + break; + + ionic_queue_produce(&eq->q); + + /* cons is color for eq */ + eq->q.cons = ionic_color_wrap(eq->q.prod, eq->q.cons); + + ++npolled; + + evt = ionic_v1_eqe_evt(&eqe); + type = ionic_v1_eqe_evt_type(evt); + code = ionic_v1_eqe_evt_code(evt); + qid = ionic_v1_eqe_evt_qid(evt); + + switch (type) { + case IONIC_V1_EQE_TYPE_CQ: + ionic_cq_event(dev, qid, code); + break; + + default: + ibdev_dbg(&dev->ibdev, + "unknown event %#x type %u\n", evt, type); + } + } + + return npolled; +} + +static void ionic_poll_eq_work(struct work_struct *work) +{ + struct ionic_eq *eq = container_of(work, struct ionic_eq, work); + u32 npolled; + + if (unlikely(!eq->enable) || WARN_ON(eq->armed)) + return; + + npolled = ionic_poll_eq(eq, IONIC_EQ_WORK_BUDGET); + if (npolled == IONIC_EQ_WORK_BUDGET) { + ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr, + npolled, 0); + queue_work(ionic_evt_workq, &eq->work); + } else { + xchg(&eq->armed, true); + ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr, + 0, IONIC_INTR_CRED_UNMASK); + } +} + +static irqreturn_t ionic_poll_eq_isr(int irq, void *eqptr) +{ + struct ionic_eq *eq = eqptr; + bool was_armed; + u32 npolled; + + was_armed = xchg(&eq->armed, false); + + if (unlikely(!eq->enable) || !was_armed) + return IRQ_HANDLED; + + npolled = ionic_poll_eq(eq, IONIC_EQ_ISR_BUDGET); + if (npolled == IONIC_EQ_ISR_BUDGET) { + ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr, + npolled, 0); + queue_work(ionic_evt_workq, &eq->work); + } else { + xchg(&eq->armed, true); + ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr, + 0, IONIC_INTR_CRED_UNMASK); + } + + return IRQ_HANDLED; +} + +static struct ionic_eq *ionic_create_eq(struct ionic_ibdev *dev, int eqid) +{ + struct ionic_intr_info intr_obj = { }; + struct ionic_eq *eq; + int rc; + + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) + return ERR_PTR(-ENOMEM); + + eq->dev = dev; + + rc = ionic_queue_init(&eq->q, dev->lif_cfg.hwdev, IONIC_EQ_DEPTH, + sizeof(struct ionic_v1_eqe)); + if (rc) + goto err_q; + + eq->eqid = eqid; + + eq->armed = true; + eq->enable = false; + INIT_WORK(&eq->work, ionic_poll_eq_work); + + rc = ionic_intr_alloc(dev->lif_cfg.lif, &intr_obj); + if (rc < 0) + goto err_intr; + + eq->irq = intr_obj.vector; + eq->intr = intr_obj.index; + + ionic_queue_dbell_init(&eq->q, eq->eqid); + + /* cons is color for eq */ + eq->q.cons = true; + + snprintf(eq->name, sizeof(eq->name), "%s-%d-%d-eq", + "ionr", dev->lif_cfg.lif_index, eq->eqid); + + ionic_intr_mask(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_SET); + ionic_intr_mask_assert(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_SET); + ionic_intr_coal_init(dev->lif_cfg.intr_ctrl, eq->intr, 0); + ionic_intr_clean(dev->lif_cfg.intr_ctrl, eq->intr); + + eq->enable = true; + + rc = request_irq(eq->irq, ionic_poll_eq_isr, 0, eq->name, eq); + if (rc) + goto err_irq; + + rc = ionic_rdma_queue_devcmd(dev, &eq->q, eq->eqid, eq->intr, + IONIC_CMD_RDMA_CREATE_EQ); + if (rc) + goto err_cmd; + + ionic_intr_mask(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_CLEAR); + + return eq; + +err_cmd: + eq->enable = false; + free_irq(eq->irq, eq); + flush_work(&eq->work); +err_irq: + ionic_intr_free(dev->lif_cfg.lif, eq->intr); +err_intr: + ionic_queue_destroy(&eq->q, dev->lif_cfg.hwdev); +err_q: + kfree(eq); + + return ERR_PTR(rc); +} + +static void ionic_destroy_eq(struct ionic_eq *eq) +{ + struct ionic_ibdev *dev = eq->dev; + + eq->enable = false; + free_irq(eq->irq, eq); + flush_work(&eq->work); + + ionic_intr_free(dev->lif_cfg.lif, eq->intr); + ionic_queue_destroy(&eq->q, dev->lif_cfg.hwdev); + kfree(eq); +} + +int ionic_create_rdma_admin(struct ionic_ibdev *dev) +{ + int eq_i = 0, aq_i = 0, rc = 0; + struct ionic_vcq *vcq; + struct ionic_aq *aq; + struct ionic_eq *eq; + + dev->eq_vec = NULL; + dev->aq_vec = NULL; + + INIT_WORK(&dev->reset_work, ionic_reset_work); + INIT_DELAYED_WORK(&dev->admin_dwork, ionic_admin_dwork); + atomic_set(&dev->admin_state, IONIC_ADMIN_KILLED); + + if (dev->lif_cfg.aq_count > IONIC_AQ_COUNT) { + ibdev_dbg(&dev->ibdev, "limiting adminq count to %d\n", + IONIC_AQ_COUNT); + dev->lif_cfg.aq_count = IONIC_AQ_COUNT; + } + + if (dev->lif_cfg.eq_count > IONIC_EQ_COUNT) { + dev_dbg(&dev->ibdev.dev, "limiting eventq count to %d\n", + IONIC_EQ_COUNT); + dev->lif_cfg.eq_count = IONIC_EQ_COUNT; + } + + /* need at least two eq and one aq */ + if (dev->lif_cfg.eq_count < IONIC_EQ_COUNT_MIN || + dev->lif_cfg.aq_count < IONIC_AQ_COUNT_MIN) { + rc = -EINVAL; + goto out; + } + + dev->eq_vec = kmalloc_array(dev->lif_cfg.eq_count, sizeof(*dev->eq_vec), + GFP_KERNEL); + if (!dev->eq_vec) { + rc = -ENOMEM; + goto out; + } + + for (eq_i = 0; eq_i < dev->lif_cfg.eq_count; ++eq_i) { + eq = ionic_create_eq(dev, eq_i + dev->lif_cfg.eq_base); + if (IS_ERR(eq)) { + rc = PTR_ERR(eq); + + if (eq_i < IONIC_EQ_COUNT_MIN) { + ibdev_err(&dev->ibdev, + "fail create eq %d\n", rc); + goto out; + } + + /* ok, just fewer eq than device supports */ + ibdev_dbg(&dev->ibdev, "eq count %d want %d rc %d\n", + eq_i, dev->lif_cfg.eq_count, rc); + + rc = 0; + break; + } + + dev->eq_vec[eq_i] = eq; + } + + dev->lif_cfg.eq_count = eq_i; + + dev->aq_vec = kmalloc_array(dev->lif_cfg.aq_count, sizeof(*dev->aq_vec), + GFP_KERNEL); + if (!dev->aq_vec) { + rc = -ENOMEM; + goto out; + } + + /* Create one CQ per AQ */ + for (aq_i = 0; aq_i < dev->lif_cfg.aq_count; ++aq_i) { + vcq = ionic_create_rdma_admincq(dev, aq_i % eq_i); + if (IS_ERR(vcq)) { + rc = PTR_ERR(vcq); + + if (!aq_i) { + ibdev_err(&dev->ibdev, + "failed to create acq %d\n", rc); + goto out; + } + + /* ok, just fewer adminq than device supports */ + ibdev_dbg(&dev->ibdev, "acq count %d want %d rc %d\n", + aq_i, dev->lif_cfg.aq_count, rc); + break; + } + + aq = ionic_create_rdma_adminq(dev, aq_i + dev->lif_cfg.aq_base, + vcq->cq[0].cqid); + if (IS_ERR(aq)) { + /* Clean up the dangling CQ */ + ionic_destroy_cq_common(dev, &vcq->cq[0]); + kfree(vcq); + + rc = PTR_ERR(aq); + + if (!aq_i) { + ibdev_err(&dev->ibdev, + "failed to create aq %d\n", rc); + goto out; + } + + /* ok, just fewer adminq than device supports */ + ibdev_dbg(&dev->ibdev, "aq count %d want %d rc %d\n", + aq_i, dev->lif_cfg.aq_count, rc); + break; + } + + vcq->ibcq.cq_context = aq; + aq->vcq = vcq; + + atomic_set(&aq->admin_state, IONIC_ADMIN_ACTIVE); + dev->aq_vec[aq_i] = aq; + } + + atomic_set(&dev->admin_state, IONIC_ADMIN_ACTIVE); +out: + dev->lif_cfg.eq_count = eq_i; + dev->lif_cfg.aq_count = aq_i; + + return rc; +} + +void ionic_destroy_rdma_admin(struct ionic_ibdev *dev) +{ + struct ionic_vcq *vcq; + struct ionic_aq *aq; + struct ionic_eq *eq; + + /* + * Killing the admin before destroy makes sure all admin and + * completions are flushed. admin_state = IONIC_ADMIN_KILLED + * stops queueing up further works. + */ + cancel_delayed_work_sync(&dev->admin_dwork); + cancel_work_sync(&dev->reset_work); + + if (dev->aq_vec) { + while (dev->lif_cfg.aq_count > 0) { + aq = dev->aq_vec[--dev->lif_cfg.aq_count]; + vcq = aq->vcq; + + cancel_work_sync(&aq->work); + + __ionic_destroy_rdma_adminq(dev, aq); + if (vcq) { + ionic_destroy_cq_common(dev, &vcq->cq[0]); + kfree(vcq); + } + } + + kfree(dev->aq_vec); + } + + if (dev->eq_vec) { + while (dev->lif_cfg.eq_count > 0) { + eq = dev->eq_vec[--dev->lif_cfg.eq_count]; + ionic_destroy_eq(eq); + } + + kfree(dev->eq_vec); + } +} diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c new file mode 100644 index 000000000000..e1130573bd39 --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include "ionic_ibdev.h" + +static int ionic_validate_qdesc(struct ionic_qdesc *q) +{ + if (!q->addr || !q->size || !q->mask || + !q->depth_log2 || !q->stride_log2) + return -EINVAL; + + if (q->addr & (PAGE_SIZE - 1)) + return -EINVAL; + + if (q->mask != BIT(q->depth_log2) - 1) + return -EINVAL; + + if (q->size < BIT_ULL(q->depth_log2 + q->stride_log2)) + return -EINVAL; + + return 0; +} + +static u32 ionic_get_eqid(struct ionic_ibdev *dev, u32 comp_vector, u8 udma_idx) +{ + /* EQ per vector per udma, and the first eqs reserved for async events. + * The rest of the vectors can be requested for completions. + */ + u32 comp_vec_count = dev->lif_cfg.eq_count / dev->lif_cfg.udma_count - 1; + + return (comp_vector % comp_vec_count + 1) * dev->lif_cfg.udma_count + udma_idx; +} + +static int ionic_get_cqid(struct ionic_ibdev *dev, u32 *cqid, u8 udma_idx) +{ + unsigned int size, base, bound; + int rc; + + size = dev->lif_cfg.cq_count / dev->lif_cfg.udma_count; + base = size * udma_idx; + bound = base + size; + + rc = ionic_resid_get_shared(&dev->inuse_cqid, base, bound); + if (rc >= 0) { + /* cq_base is zero or a multiple of two queue groups */ + *cqid = dev->lif_cfg.cq_base + + ionic_bitid_to_qid(rc, dev->lif_cfg.udma_qgrp_shift, + dev->half_cqid_udma_shift); + + rc = 0; + } + + return rc; +} + +static void ionic_put_cqid(struct ionic_ibdev *dev, u32 cqid) +{ + u32 bitid = ionic_qid_to_bitid(cqid - dev->lif_cfg.cq_base, + dev->lif_cfg.udma_qgrp_shift, + dev->half_cqid_udma_shift); + + ionic_resid_put(&dev->inuse_cqid, bitid); +} + +int ionic_create_cq_common(struct ionic_vcq *vcq, + struct ionic_tbl_buf *buf, + const struct ib_cq_init_attr *attr, + struct ionic_ctx *ctx, + struct ib_udata *udata, + struct ionic_qdesc *req_cq, + __u32 *resp_cqid, + int udma_idx) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(vcq->ibcq.device); + struct ionic_cq *cq = &vcq->cq[udma_idx]; + void *entry; + int rc; + + cq->vcq = vcq; + + if (attr->cqe < 1 || attr->cqe + IONIC_CQ_GRACE > 0xffff) { + rc = -EINVAL; + goto err_args; + } + + rc = ionic_get_cqid(dev, &cq->cqid, udma_idx); + if (rc) + goto err_args; + + cq->eqid = ionic_get_eqid(dev, attr->comp_vector, udma_idx); + + spin_lock_init(&cq->lock); + INIT_LIST_HEAD(&cq->poll_sq); + INIT_LIST_HEAD(&cq->flush_sq); + INIT_LIST_HEAD(&cq->flush_rq); + + if (udata) { + rc = ionic_validate_qdesc(req_cq); + if (rc) + goto err_qdesc; + + cq->umem = ib_umem_get(&dev->ibdev, req_cq->addr, req_cq->size, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(cq->umem)) { + rc = PTR_ERR(cq->umem); + goto err_qdesc; + } + + cq->q.ptr = NULL; + cq->q.size = req_cq->size; + cq->q.mask = req_cq->mask; + cq->q.depth_log2 = req_cq->depth_log2; + cq->q.stride_log2 = req_cq->stride_log2; + + *resp_cqid = cq->cqid; + } else { + rc = ionic_queue_init(&cq->q, dev->lif_cfg.hwdev, + attr->cqe + IONIC_CQ_GRACE, + sizeof(struct ionic_v1_cqe)); + if (rc) + goto err_q_init; + + ionic_queue_dbell_init(&cq->q, cq->cqid); + cq->color = true; + cq->credit = cq->q.mask; + } + + rc = ionic_pgtbl_init(dev, buf, cq->umem, cq->q.dma, 1, PAGE_SIZE); + if (rc) + goto err_pgtbl_init; + + init_completion(&cq->cq_rel_comp); + kref_init(&cq->cq_kref); + + entry = xa_store_irq(&dev->cq_tbl, cq->cqid, cq, GFP_KERNEL); + if (entry) { + if (!xa_is_err(entry)) + rc = -EINVAL; + else + rc = xa_err(entry); + + goto err_xa; + } + + return 0; + +err_xa: + ionic_pgtbl_unbuf(dev, buf); +err_pgtbl_init: + if (!udata) + ionic_queue_destroy(&cq->q, dev->lif_cfg.hwdev); +err_q_init: + if (cq->umem) + ib_umem_release(cq->umem); +err_qdesc: + ionic_put_cqid(dev, cq->cqid); +err_args: + cq->vcq = NULL; + + return rc; +} + +void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq) +{ + if (!cq->vcq) + return; + + xa_erase_irq(&dev->cq_tbl, cq->cqid); + + kref_put(&cq->cq_kref, ionic_cq_complete); + wait_for_completion(&cq->cq_rel_comp); + + if (cq->umem) + ib_umem_release(cq->umem); + else + ionic_queue_destroy(&cq->q, dev->lif_cfg.hwdev); + + ionic_put_cqid(dev, cq->cqid); + + cq->vcq = NULL; +} diff --git a/drivers/infiniband/hw/ionic/ionic_fw.h b/drivers/infiniband/hw/ionic/ionic_fw.h new file mode 100644 index 000000000000..44ec69487519 --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_fw.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#ifndef _IONIC_FW_H_ +#define _IONIC_FW_H_ + +#include + +/* completion queue v1 cqe */ +struct ionic_v1_cqe { + union { + struct { + __be16 cmd_idx; + __u8 cmd_op; + __u8 rsvd[17]; + __le16 old_sq_cindex; + __le16 old_rq_cq_cindex; + } admin; + struct { + __u64 wqe_id; + __be32 src_qpn_op; + __u8 src_mac[6]; + __be16 vlan_tag; + __be32 imm_data_rkey; + } recv; + struct { + __u8 rsvd[4]; + __be32 msg_msn; + __u8 rsvd2[8]; + __u64 npg_wqe_id; + } send; + }; + __be32 status_length; + __be32 qid_type_flags; +}; + +/* bits for cqe qid_type_flags */ +enum ionic_v1_cqe_qtf_bits { + IONIC_V1_CQE_COLOR = BIT(0), + IONIC_V1_CQE_ERROR = BIT(1), + IONIC_V1_CQE_TYPE_SHIFT = 5, + IONIC_V1_CQE_TYPE_MASK = 0x7, + IONIC_V1_CQE_QID_SHIFT = 8, + + IONIC_V1_CQE_TYPE_ADMIN = 0, + IONIC_V1_CQE_TYPE_RECV = 1, + IONIC_V1_CQE_TYPE_SEND_MSN = 2, + IONIC_V1_CQE_TYPE_SEND_NPG = 3, +}; + +static inline bool ionic_v1_cqe_color(struct ionic_v1_cqe *cqe) +{ + return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_COLOR); +} + +static inline bool ionic_v1_cqe_error(struct ionic_v1_cqe *cqe) +{ + return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_ERROR); +} + +static inline void ionic_v1_cqe_clean(struct ionic_v1_cqe *cqe) +{ + cqe->qid_type_flags |= cpu_to_be32(~0u << IONIC_V1_CQE_QID_SHIFT); +} + +static inline u32 ionic_v1_cqe_qtf(struct ionic_v1_cqe *cqe) +{ + return be32_to_cpu(cqe->qid_type_flags); +} + +static inline u8 ionic_v1_cqe_qtf_type(u32 qtf) +{ + return (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK; +} + +static inline u32 ionic_v1_cqe_qtf_qid(u32 qtf) +{ + return qtf >> IONIC_V1_CQE_QID_SHIFT; +} + +#define ADMIN_WQE_STRIDE 64 +#define ADMIN_WQE_HDR_LEN 4 + +/* admin queue v1 wqe */ +struct ionic_v1_admin_wqe { + __u8 op; + __u8 rsvd; + __le16 len; + + union { + } cmd; +}; + +/* admin queue v1 cqe status */ +enum ionic_v1_admin_status { + IONIC_V1_ASTS_OK, + IONIC_V1_ASTS_BAD_CMD, + IONIC_V1_ASTS_BAD_INDEX, + IONIC_V1_ASTS_BAD_STATE, + IONIC_V1_ASTS_BAD_TYPE, + IONIC_V1_ASTS_BAD_ATTR, + IONIC_V1_ASTS_MSG_TOO_BIG, +}; + +/* event queue v1 eqe */ +struct ionic_v1_eqe { + __be32 evt; +}; + +/* bits for cqe queue_type_flags */ +enum ionic_v1_eqe_evt_bits { + IONIC_V1_EQE_COLOR = BIT(0), + IONIC_V1_EQE_TYPE_SHIFT = 1, + IONIC_V1_EQE_TYPE_MASK = 0x7, + IONIC_V1_EQE_CODE_SHIFT = 4, + IONIC_V1_EQE_CODE_MASK = 0xf, + IONIC_V1_EQE_QID_SHIFT = 8, + + /* cq events */ + IONIC_V1_EQE_TYPE_CQ = 0, + /* cq normal events */ + IONIC_V1_EQE_CQ_NOTIFY = 0, + /* cq error events */ + IONIC_V1_EQE_CQ_ERR = 8, + + /* qp and srq events */ + IONIC_V1_EQE_TYPE_QP = 1, + /* qp normal events */ + IONIC_V1_EQE_SRQ_LEVEL = 0, + IONIC_V1_EQE_SQ_DRAIN = 1, + IONIC_V1_EQE_QP_COMM_EST = 2, + IONIC_V1_EQE_QP_LAST_WQE = 3, + /* qp error events */ + IONIC_V1_EQE_QP_ERR = 8, + IONIC_V1_EQE_QP_ERR_REQUEST = 9, + IONIC_V1_EQE_QP_ERR_ACCESS = 10, +}; + +static inline bool ionic_v1_eqe_color(struct ionic_v1_eqe *eqe) +{ + return eqe->evt & cpu_to_be32(IONIC_V1_EQE_COLOR); +} + +static inline u32 ionic_v1_eqe_evt(struct ionic_v1_eqe *eqe) +{ + return be32_to_cpu(eqe->evt); +} + +static inline u8 ionic_v1_eqe_evt_type(u32 evt) +{ + return (evt >> IONIC_V1_EQE_TYPE_SHIFT) & IONIC_V1_EQE_TYPE_MASK; +} + +static inline u8 ionic_v1_eqe_evt_code(u32 evt) +{ + return (evt >> IONIC_V1_EQE_CODE_SHIFT) & IONIC_V1_EQE_CODE_MASK; +} + +static inline u32 ionic_v1_eqe_evt_qid(u32 evt) +{ + return evt >> IONIC_V1_EQE_QID_SHIFT; +} + +#endif /* _IONIC_FW_H_ */ diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index d79470dae13a..7710190ff65f 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -15,9 +15,41 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("NET_IONIC"); +static void ionic_init_resids(struct ionic_ibdev *dev) +{ + ionic_resid_init(&dev->inuse_cqid, dev->lif_cfg.cq_count); + dev->half_cqid_udma_shift = + order_base_2(dev->lif_cfg.cq_count / dev->lif_cfg.udma_count); + ionic_resid_init(&dev->inuse_pdid, IONIC_MAX_PD); + ionic_resid_init(&dev->inuse_ahid, dev->lif_cfg.nahs_per_lif); + ionic_resid_init(&dev->inuse_mrid, dev->lif_cfg.nmrs_per_lif); + /* skip reserved lkey */ + dev->next_mrkey = 1; + ionic_resid_init(&dev->inuse_qpid, dev->lif_cfg.qp_count); + /* skip reserved SMI and GSI qpids */ + dev->half_qpid_udma_shift = + order_base_2(dev->lif_cfg.qp_count / dev->lif_cfg.udma_count); + ionic_resid_init(&dev->inuse_dbid, dev->lif_cfg.dbid_count); +} + +static void ionic_destroy_resids(struct ionic_ibdev *dev) +{ + ionic_resid_destroy(&dev->inuse_cqid); + ionic_resid_destroy(&dev->inuse_pdid); + ionic_resid_destroy(&dev->inuse_ahid); + ionic_resid_destroy(&dev->inuse_mrid); + ionic_resid_destroy(&dev->inuse_qpid); + ionic_resid_destroy(&dev->inuse_dbid); +} + static void ionic_destroy_ibdev(struct ionic_ibdev *dev) { + ionic_kill_rdma_admin(dev, false); ib_unregister_device(&dev->ibdev); + ionic_destroy_rdma_admin(dev); + ionic_destroy_resids(dev); + WARN_ON(!xa_empty(&dev->cq_tbl)); + xa_destroy(&dev->cq_tbl); ib_dealloc_device(&dev->ibdev); } @@ -34,6 +66,18 @@ static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev) ionic_fill_lif_cfg(ionic_adev->lif, &dev->lif_cfg); + xa_init_flags(&dev->cq_tbl, GFP_ATOMIC); + + ionic_init_resids(dev); + + rc = ionic_rdma_reset_devcmd(dev); + if (rc) + goto err_reset; + + rc = ionic_create_rdma_admin(dev); + if (rc) + goto err_admin; + ibdev = &dev->ibdev; ibdev->dev.parent = dev->lif_cfg.hwdev; @@ -62,6 +106,11 @@ static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev) err_register: err_admin: + ionic_kill_rdma_admin(dev, false); + ionic_destroy_rdma_admin(dev); +err_reset: + ionic_destroy_resids(dev); + xa_destroy(&dev->cq_tbl); ib_dealloc_device(&dev->ibdev); return ERR_PTR(rc); @@ -112,6 +161,10 @@ static int __init ionic_mod_init(void) { int rc; + ionic_evt_workq = create_workqueue(DRIVER_NAME "-evt"); + if (!ionic_evt_workq) + return -ENOMEM; + rc = auxiliary_driver_register(&ionic_aux_r_driver); if (rc) goto err_aux; @@ -119,12 +172,15 @@ static int __init ionic_mod_init(void) return 0; err_aux: + destroy_workqueue(ionic_evt_workq); + return rc; } static void __exit ionic_mod_exit(void) { auxiliary_driver_unregister(&ionic_aux_r_driver); + destroy_workqueue(ionic_evt_workq); } module_init(ionic_mod_init); diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h index 224e5e74056d..490897628f41 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.h +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h @@ -4,15 +4,237 @@ #ifndef _IONIC_IBDEV_H_ #define _IONIC_IBDEV_H_ +#include #include + #include +#include + +#include "ionic_fw.h" +#include "ionic_queue.h" +#include "ionic_res.h" #include "ionic_lif_cfg.h" +/* Config knobs */ +#define IONIC_EQ_DEPTH 511 +#define IONIC_EQ_COUNT 32 +#define IONIC_AQ_DEPTH 63 +#define IONIC_AQ_COUNT 4 +#define IONIC_EQ_ISR_BUDGET 10 +#define IONIC_EQ_WORK_BUDGET 1000 +#define IONIC_MAX_PD 1024 + +#define IONIC_CQ_GRACE 100 + +struct ionic_aq; +struct ionic_cq; +struct ionic_eq; +struct ionic_vcq; + +enum ionic_admin_state { + IONIC_ADMIN_ACTIVE, /* submitting admin commands to queue */ + IONIC_ADMIN_PAUSED, /* not submitting, but may complete normally */ + IONIC_ADMIN_KILLED, /* not submitting, locally completed */ +}; + +enum ionic_admin_flags { + IONIC_ADMIN_F_BUSYWAIT = BIT(0), /* Don't sleep */ + IONIC_ADMIN_F_TEARDOWN = BIT(1), /* In destroy path */ + IONIC_ADMIN_F_INTERRUPT = BIT(2), /* Interruptible w/timeout */ +}; + +struct ionic_qdesc { + __aligned_u64 addr; + __u32 size; + __u16 mask; + __u8 depth_log2; + __u8 stride_log2; +}; + +enum ionic_mmap_flag { + IONIC_MMAP_WC = BIT(0), +}; + +struct ionic_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + unsigned long size; + unsigned long pfn; + u8 mmap_flags; +}; + struct ionic_ibdev { struct ib_device ibdev; struct ionic_lif_cfg lif_cfg; + + struct xarray qp_tbl; + struct xarray cq_tbl; + + struct ionic_resid_bits inuse_dbid; + struct ionic_resid_bits inuse_pdid; + struct ionic_resid_bits inuse_ahid; + struct ionic_resid_bits inuse_mrid; + struct ionic_resid_bits inuse_qpid; + struct ionic_resid_bits inuse_cqid; + + u8 half_cqid_udma_shift; + u8 half_qpid_udma_shift; + u8 next_qpid_udma_idx; + u8 next_mrkey; + + struct work_struct reset_work; + bool reset_posted; + u32 reset_cnt; + + struct delayed_work admin_dwork; + struct ionic_aq **aq_vec; + atomic_t admin_state; + + struct ionic_eq **eq_vec; }; +struct ionic_eq { + struct ionic_ibdev *dev; + + u32 eqid; + u32 intr; + + struct ionic_queue q; + + bool armed; + bool enable; + + struct work_struct work; + + int irq; + char name[32]; +}; + +struct ionic_admin_wr { + struct completion work; + struct list_head aq_ent; + struct ionic_v1_admin_wqe wqe; + struct ionic_v1_cqe cqe; + struct ionic_aq *aq; + int status; +}; + +struct ionic_admin_wr_q { + struct ionic_admin_wr *wr; + int wqe_strides; +}; + +struct ionic_aq { + struct ionic_ibdev *dev; + struct ionic_vcq *vcq; + + struct work_struct work; + + atomic_t admin_state; + unsigned long stamp; + bool armed; + + u32 aqid; + u32 cqid; + + spinlock_t lock; /* for posting */ + struct ionic_queue q; + struct ionic_admin_wr_q *q_wr; + struct list_head wr_prod; + struct list_head wr_post; +}; + +struct ionic_ctx { + struct ib_ucontext ibctx; + u32 dbid; + struct rdma_user_mmap_entry *mmap_dbell; +}; + +struct ionic_tbl_buf { + u32 tbl_limit; + u32 tbl_pages; + size_t tbl_size; + __le64 *tbl_buf; + dma_addr_t tbl_dma; + u8 page_size_log2; +}; + +struct ionic_cq { + struct ionic_vcq *vcq; + + u32 cqid; + u32 eqid; + + spinlock_t lock; /* for polling */ + struct list_head poll_sq; + bool flush; + struct list_head flush_sq; + struct list_head flush_rq; + struct list_head ibkill_flush_ent; + + struct ionic_queue q; + bool color; + int credit; + u16 arm_any_prod; + u16 arm_sol_prod; + + struct kref cq_kref; + struct completion cq_rel_comp; + + /* infrequently accessed, keep at end */ + struct ib_umem *umem; +}; + +struct ionic_vcq { + struct ib_cq ibcq; + struct ionic_cq cq[2]; + u8 udma_mask; + u8 poll_idx; +}; + +static inline struct ionic_ibdev *to_ionic_ibdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct ionic_ibdev, ibdev); +} + +static inline void ionic_cq_complete(struct kref *kref) +{ + struct ionic_cq *cq = container_of(kref, struct ionic_cq, cq_kref); + + complete(&cq->cq_rel_comp); +} + +/* ionic_admin.c */ +extern struct workqueue_struct *ionic_evt_workq; +void ionic_admin_post(struct ionic_ibdev *dev, struct ionic_admin_wr *wr); +int ionic_admin_wait(struct ionic_ibdev *dev, struct ionic_admin_wr *wr, + enum ionic_admin_flags); + +int ionic_rdma_reset_devcmd(struct ionic_ibdev *dev); + +int ionic_create_rdma_admin(struct ionic_ibdev *dev); +void ionic_destroy_rdma_admin(struct ionic_ibdev *dev); +void ionic_kill_rdma_admin(struct ionic_ibdev *dev, bool fatal_path); + +/* ionic_controlpath.c */ +int ionic_create_cq_common(struct ionic_vcq *vcq, + struct ionic_tbl_buf *buf, + const struct ib_cq_init_attr *attr, + struct ionic_ctx *ctx, + struct ib_udata *udata, + struct ionic_qdesc *req_cq, + __u32 *resp_cqid, + int udma_idx); +void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq); + +/* ionic_pgtbl.c */ +int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma); +int ionic_pgtbl_init(struct ionic_ibdev *dev, + struct ionic_tbl_buf *buf, + struct ib_umem *umem, + dma_addr_t dma, + int limit, + u64 page_size); +void ionic_pgtbl_unbuf(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf); #endif /* _IONIC_IBDEV_H_ */ diff --git a/drivers/infiniband/hw/ionic/ionic_pgtbl.c b/drivers/infiniband/hw/ionic/ionic_pgtbl.c new file mode 100644 index 000000000000..11461f7642bc --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_pgtbl.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include +#include + +#include "ionic_fw.h" +#include "ionic_ibdev.h" + +int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma) +{ + if (unlikely(buf->tbl_pages == buf->tbl_limit)) + return -ENOMEM; + + if (buf->tbl_buf) + buf->tbl_buf[buf->tbl_pages] = cpu_to_le64(dma); + else + buf->tbl_dma = dma; + + ++buf->tbl_pages; + + return 0; +} + +static int ionic_tbl_buf_alloc(struct ionic_ibdev *dev, + struct ionic_tbl_buf *buf) +{ + int rc; + + buf->tbl_size = buf->tbl_limit * sizeof(*buf->tbl_buf); + buf->tbl_buf = kmalloc(buf->tbl_size, GFP_KERNEL); + if (!buf->tbl_buf) + return -ENOMEM; + + buf->tbl_dma = dma_map_single(dev->lif_cfg.hwdev, buf->tbl_buf, + buf->tbl_size, DMA_TO_DEVICE); + rc = dma_mapping_error(dev->lif_cfg.hwdev, buf->tbl_dma); + if (rc) { + kfree(buf->tbl_buf); + return rc; + } + + return 0; +} + +static int ionic_pgtbl_umem(struct ionic_tbl_buf *buf, struct ib_umem *umem) +{ + struct ib_block_iter biter; + u64 page_dma; + int rc; + + rdma_umem_for_each_dma_block(umem, &biter, BIT_ULL(buf->page_size_log2)) { + page_dma = rdma_block_iter_dma_address(&biter); + rc = ionic_pgtbl_page(buf, page_dma); + if (rc) + return rc; + } + + return 0; +} + +void ionic_pgtbl_unbuf(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf) +{ + if (buf->tbl_buf) + dma_unmap_single(dev->lif_cfg.hwdev, buf->tbl_dma, + buf->tbl_size, DMA_TO_DEVICE); + + kfree(buf->tbl_buf); + memset(buf, 0, sizeof(*buf)); +} + +int ionic_pgtbl_init(struct ionic_ibdev *dev, + struct ionic_tbl_buf *buf, + struct ib_umem *umem, + dma_addr_t dma, + int limit, + u64 page_size) +{ + int rc; + + memset(buf, 0, sizeof(*buf)); + + if (umem) { + limit = ib_umem_num_dma_blocks(umem, page_size); + buf->page_size_log2 = order_base_2(page_size); + } + + if (limit < 1) + return -EINVAL; + + buf->tbl_limit = limit; + + /* skip pgtbl if contiguous / direct translation */ + if (limit > 1) { + rc = ionic_tbl_buf_alloc(dev, buf); + if (rc) + return rc; + } + + if (umem) + rc = ionic_pgtbl_umem(buf, umem); + else + rc = ionic_pgtbl_page(buf, dma); + + if (rc) + goto err_unbuf; + + return 0; + +err_unbuf: + ionic_pgtbl_unbuf(dev, buf); + return rc; +} diff --git a/drivers/infiniband/hw/ionic/ionic_queue.c b/drivers/infiniband/hw/ionic/ionic_queue.c new file mode 100644 index 000000000000..aa897ed2a412 --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_queue.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include + +#include "ionic_queue.h" + +int ionic_queue_init(struct ionic_queue *q, struct device *dma_dev, + int depth, size_t stride) +{ + if (depth < 0 || depth > 0xffff) + return -EINVAL; + + if (stride == 0 || stride > 0x10000) + return -EINVAL; + + if (depth == 0) + depth = 1; + + q->depth_log2 = order_base_2(depth + 1); + q->stride_log2 = order_base_2(stride); + + if (q->depth_log2 + q->stride_log2 < PAGE_SHIFT) + q->depth_log2 = PAGE_SHIFT - q->stride_log2; + + if (q->depth_log2 > 16 || q->stride_log2 > 16) + return -EINVAL; + + q->size = BIT_ULL(q->depth_log2 + q->stride_log2); + q->mask = BIT(q->depth_log2) - 1; + + q->ptr = dma_alloc_coherent(dma_dev, q->size, &q->dma, GFP_KERNEL); + if (!q->ptr) + return -ENOMEM; + + /* it will always be page aligned, but just to be sure... */ + if (!PAGE_ALIGNED(q->ptr)) { + dma_free_coherent(dma_dev, q->size, q->ptr, q->dma); + return -ENOMEM; + } + + q->prod = 0; + q->cons = 0; + q->dbell = 0; + + return 0; +} + +void ionic_queue_destroy(struct ionic_queue *q, struct device *dma_dev) +{ + dma_free_coherent(dma_dev, q->size, q->ptr, q->dma); +} diff --git a/drivers/infiniband/hw/ionic/ionic_queue.h b/drivers/infiniband/hw/ionic/ionic_queue.h new file mode 100644 index 000000000000..d18020d4cad5 --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_queue.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#ifndef _IONIC_QUEUE_H_ +#define _IONIC_QUEUE_H_ + +#include +#include + +#define IONIC_MAX_DEPTH 0xffff +#define IONIC_MAX_CQ_DEPTH 0xffff +#define IONIC_CQ_RING_ARM IONIC_DBELL_RING_1 +#define IONIC_CQ_RING_SOL IONIC_DBELL_RING_2 + +/** + * struct ionic_queue - Ring buffer used between device and driver + * @size: Size of the buffer, in bytes + * @dma: Dma address of the buffer + * @ptr: Buffer virtual address + * @prod: Driver position in the queue + * @cons: Device position in the queue + * @mask: Capacity of the queue, subtracting the hole + * This value is equal to ((1 << depth_log2) - 1) + * @depth_log2: Log base two size depth of the queue + * @stride_log2: Log base two size of an element in the queue + * @dbell: Doorbell identifying bits + */ +struct ionic_queue { + size_t size; + dma_addr_t dma; + void *ptr; + u16 prod; + u16 cons; + u16 mask; + u8 depth_log2; + u8 stride_log2; + u64 dbell; +}; + +/** + * ionic_queue_init() - Initialize user space queue + * @q: Uninitialized queue structure + * @dma_dev: DMA device for mapping + * @depth: Depth of the queue + * @stride: Size of each element of the queue + * + * Return: status code + */ +int ionic_queue_init(struct ionic_queue *q, struct device *dma_dev, + int depth, size_t stride); + +/** + * ionic_queue_destroy() - Destroy user space queue + * @q: Queue structure + * @dma_dev: DMA device for mapping + * + * Return: status code + */ +void ionic_queue_destroy(struct ionic_queue *q, struct device *dma_dev); + +/** + * ionic_queue_empty() - Test if queue is empty + * @q: Queue structure + * + * This is only valid for to-device queues. + * + * Return: is empty + */ +static inline bool ionic_queue_empty(struct ionic_queue *q) +{ + return q->prod == q->cons; +} + +/** + * ionic_queue_length() - Get the current length of the queue + * @q: Queue structure + * + * This is only valid for to-device queues. + * + * Return: length + */ +static inline u16 ionic_queue_length(struct ionic_queue *q) +{ + return (q->prod - q->cons) & q->mask; +} + +/** + * ionic_queue_length_remaining() - Get the remaining length of the queue + * @q: Queue structure + * + * This is only valid for to-device queues. + * + * Return: length remaining + */ +static inline u16 ionic_queue_length_remaining(struct ionic_queue *q) +{ + return q->mask - ionic_queue_length(q); +} + +/** + * ionic_queue_full() - Test if queue is full + * @q: Queue structure + * + * This is only valid for to-device queues. + * + * Return: is full + */ +static inline bool ionic_queue_full(struct ionic_queue *q) +{ + return q->mask == ionic_queue_length(q); +} + +/** + * ionic_color_wrap() - Flip the color if prod is wrapped + * @prod: Queue index just after advancing + * @color: Queue color just prior to advancing the index + * + * Return: color after advancing the index + */ +static inline bool ionic_color_wrap(u16 prod, bool color) +{ + /* logical xor color with (prod == 0) */ + return color != (prod == 0); +} + +/** + * ionic_queue_at() - Get the element at the given index + * @q: Queue structure + * @idx: Index in the queue + * + * The index must be within the bounds of the queue. It is not checked here. + * + * Return: pointer to element at index + */ +static inline void *ionic_queue_at(struct ionic_queue *q, u16 idx) +{ + return q->ptr + ((unsigned long)idx << q->stride_log2); +} + +/** + * ionic_queue_at_prod() - Get the element at the producer index + * @q: Queue structure + * + * Return: pointer to element at producer index + */ +static inline void *ionic_queue_at_prod(struct ionic_queue *q) +{ + return ionic_queue_at(q, q->prod); +} + +/** + * ionic_queue_at_cons() - Get the element at the consumer index + * @q: Queue structure + * + * Return: pointer to element at consumer index + */ +static inline void *ionic_queue_at_cons(struct ionic_queue *q) +{ + return ionic_queue_at(q, q->cons); +} + +/** + * ionic_queue_next() - Compute the next index + * @q: Queue structure + * @idx: Index + * + * Return: next index after idx + */ +static inline u16 ionic_queue_next(struct ionic_queue *q, u16 idx) +{ + return (idx + 1) & q->mask; +} + +/** + * ionic_queue_produce() - Increase the producer index + * @q: Queue structure + * + * Caller must ensure that the queue is not full. It is not checked here. + */ +static inline void ionic_queue_produce(struct ionic_queue *q) +{ + q->prod = ionic_queue_next(q, q->prod); +} + +/** + * ionic_queue_consume() - Increase the consumer index + * @q: Queue structure + * + * Caller must ensure that the queue is not empty. It is not checked here. + * + * This is only valid for to-device queues. + */ +static inline void ionic_queue_consume(struct ionic_queue *q) +{ + q->cons = ionic_queue_next(q, q->cons); +} + +/** + * ionic_queue_consume_entries() - Increase the consumer index by entries + * @q: Queue structure + * @entries: Number of entries to increment + * + * Caller must ensure that the queue is not empty. It is not checked here. + * + * This is only valid for to-device queues. + */ +static inline void ionic_queue_consume_entries(struct ionic_queue *q, + u16 entries) +{ + q->cons = (q->cons + entries) & q->mask; +} + +/** + * ionic_queue_dbell_init() - Initialize doorbell bits for queue id + * @q: Queue structure + * @qid: Queue identifying number + */ +static inline void ionic_queue_dbell_init(struct ionic_queue *q, u32 qid) +{ + q->dbell = IONIC_DBELL_QID(qid); +} + +/** + * ionic_queue_dbell_val() - Get current doorbell update value + * @q: Queue structure + * + * Return: current doorbell update value + */ +static inline u64 ionic_queue_dbell_val(struct ionic_queue *q) +{ + return q->dbell | q->prod; +} + +#endif /* _IONIC_QUEUE_H_ */ diff --git a/drivers/infiniband/hw/ionic/ionic_res.h b/drivers/infiniband/hw/ionic/ionic_res.h new file mode 100644 index 000000000000..46c8c584bd9a --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_res.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#ifndef _IONIC_RES_H_ +#define _IONIC_RES_H_ + +#include +#include + +/** + * struct ionic_resid_bits - Number allocator based on IDA + * + * @inuse: IDA handle + * @inuse_size: Highest ID limit for IDA + */ +struct ionic_resid_bits { + struct ida inuse; + unsigned int inuse_size; +}; + +/** + * ionic_resid_init() - Initialize a resid allocator + * @resid: Uninitialized resid allocator + * @size: Capacity of the allocator + * + * Return: Zero on success, or negative error number + */ +static inline void ionic_resid_init(struct ionic_resid_bits *resid, + unsigned int size) +{ + resid->inuse_size = size; + ida_init(&resid->inuse); +} + +/** + * ionic_resid_destroy() - Destroy a resid allocator + * @resid: Resid allocator + */ +static inline void ionic_resid_destroy(struct ionic_resid_bits *resid) +{ + ida_destroy(&resid->inuse); +} + +/** + * ionic_resid_get_shared() - Allocate an available shared resource id + * @resid: Resid allocator + * @min: Smallest valid resource id + * @size: One after largest valid resource id + * + * Return: Resource id, or negative error number + */ +static inline int ionic_resid_get_shared(struct ionic_resid_bits *resid, + unsigned int min, + unsigned int size) +{ + return ida_alloc_range(&resid->inuse, min, size - 1, GFP_KERNEL); +} + +/** + * ionic_resid_get() - Allocate an available resource id + * @resid: Resid allocator + * + * Return: Resource id, or negative error number + */ +static inline int ionic_resid_get(struct ionic_resid_bits *resid) +{ + return ionic_resid_get_shared(resid, 0, resid->inuse_size); +} + +/** + * ionic_resid_put() - Free a resource id + * @resid: Resid allocator + * @id: Resource id + */ +static inline void ionic_resid_put(struct ionic_resid_bits *resid, int id) +{ + ida_free(&resid->inuse, id); +} + +/** + * ionic_bitid_to_qid() - Transform a resource bit index into a queue id + * @bitid: Bit index + * @qgrp_shift: Log2 number of queues per queue group + * @half_qid_shift: Log2 of half the total number of queues + * + * Return: Queue id + * + * Udma-constrained queues (QPs and CQs) are associated with their udma by + * queue group. Even queue groups are associated with udma0, and odd queue + * groups with udma1. + * + * For allocating queue ids, we want to arrange the bits into two halves, + * with the even queue groups of udma0 in the lower half of the bitset, + * and the odd queue groups of udma1 in the upper half of the bitset. + * Then, one or two calls of find_next_zero_bit can examine all the bits + * for queues of an entire udma. + * + * For example, assuming eight queue groups with qgrp qids per group: + * + * bitid 0*qgrp..1*qgrp-1 : qid 0*qgrp..1*qgrp-1 + * bitid 1*qgrp..2*qgrp-1 : qid 2*qgrp..3*qgrp-1 + * bitid 2*qgrp..3*qgrp-1 : qid 4*qgrp..5*qgrp-1 + * bitid 3*qgrp..4*qgrp-1 : qid 6*qgrp..7*qgrp-1 + * bitid 4*qgrp..5*qgrp-1 : qid 1*qgrp..2*qgrp-1 + * bitid 5*qgrp..6*qgrp-1 : qid 3*qgrp..4*qgrp-1 + * bitid 6*qgrp..7*qgrp-1 : qid 5*qgrp..6*qgrp-1 + * bitid 7*qgrp..8*qgrp-1 : qid 7*qgrp..8*qgrp-1 + * + * There are three important ranges of bits in the qid. There is the udma + * bit "U" at qgrp_shift, which is the least significant bit of the group + * index, and determines which udma a queue is associated with. + * The bits of lesser significance we can call the idx bits "I", which are + * the index of the queue within the group. The bits of greater significance + * we can call the grp bits "G", which are other bits of the group index that + * do not determine the udma. Those bits are just rearranged in the bit index + * in the bitset. A bitid has the udma bit in the most significant place, + * then the grp bits, then the idx bits. + * + * bitid: 00000000000000 U GGG IIIIII + * qid: 00000000000000 GGG U IIIIII + * + * Transforming from bit index to qid, or from qid to bit index, can be + * accomplished by rearranging the bits by masking and shifting. + */ +static inline u32 ionic_bitid_to_qid(u32 bitid, u8 qgrp_shift, + u8 half_qid_shift) +{ + u32 udma_bit = + (bitid & BIT(half_qid_shift)) >> (half_qid_shift - qgrp_shift); + u32 grp_bits = (bitid & GENMASK(half_qid_shift - 1, qgrp_shift)) << 1; + u32 idx_bits = bitid & (BIT(qgrp_shift) - 1); + + return grp_bits | udma_bit | idx_bits; +} + +/** + * ionic_qid_to_bitid() - Transform a queue id into a resource bit index + * @qid: queue index + * @qgrp_shift: Log2 number of queues per queue group + * @half_qid_shift: Log2 of half the total number of queues + * + * Return: Resource bit index + * + * This is the inverse of ionic_bitid_to_qid(). + */ +static inline u32 ionic_qid_to_bitid(u32 qid, u8 qgrp_shift, u8 half_qid_shift) +{ + u32 udma_bit = (qid & BIT(qgrp_shift)) << (half_qid_shift - qgrp_shift); + u32 grp_bits = (qid & GENMASK(half_qid_shift, qgrp_shift + 1)) >> 1; + u32 idx_bits = qid & (BIT(qgrp_shift) - 1); + + return udma_bit | grp_bits | idx_bits; +} +#endif /* _IONIC_RES_H_ */ From e8521822c733c6deab0f339843cd37cd62c12795 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:46:02 +0530 Subject: [PATCH 44/85] RDMA/ionic: Register device ops for control path Implement device supported verb APIs for control path. Co-developed-by: Andrew Boyer Signed-off-by: Andrew Boyer Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-11-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_admin.c | 104 + .../infiniband/hw/ionic/ionic_controlpath.c | 2498 +++++++++++++++++ drivers/infiniband/hw/ionic/ionic_fw.h | 717 +++++ drivers/infiniband/hw/ionic/ionic_ibdev.c | 46 +- drivers/infiniband/hw/ionic/ionic_ibdev.h | 251 +- drivers/infiniband/hw/ionic/ionic_pgtbl.c | 19 + include/uapi/rdma/ionic-abi.h | 115 + 7 files changed, 3741 insertions(+), 9 deletions(-) create mode 100644 include/uapi/rdma/ionic-abi.h diff --git a/drivers/infiniband/hw/ionic/ionic_admin.c b/drivers/infiniband/hw/ionic/ionic_admin.c index 845c03f6d9fb..1ba7a8ecc073 100644 --- a/drivers/infiniband/hw/ionic/ionic_admin.c +++ b/drivers/infiniband/hw/ionic/ionic_admin.c @@ -627,6 +627,44 @@ err_cmd: return ERR_PTR(rc); } +static void ionic_flush_qs(struct ionic_ibdev *dev) +{ + struct ionic_qp *qp, *qp_tmp; + struct ionic_cq *cq, *cq_tmp; + LIST_HEAD(flush_list); + unsigned long index; + + WARN_ON(!irqs_disabled()); + + /* Flush qp send and recv */ + xa_lock(&dev->qp_tbl); + xa_for_each(&dev->qp_tbl, index, qp) { + kref_get(&qp->qp_kref); + list_add_tail(&qp->ibkill_flush_ent, &flush_list); + } + xa_unlock(&dev->qp_tbl); + + list_for_each_entry_safe(qp, qp_tmp, &flush_list, ibkill_flush_ent) { + ionic_flush_qp(dev, qp); + kref_put(&qp->qp_kref, ionic_qp_complete); + list_del(&qp->ibkill_flush_ent); + } + + /* Notify completions */ + xa_lock(&dev->cq_tbl); + xa_for_each(&dev->cq_tbl, index, cq) { + kref_get(&cq->cq_kref); + list_add_tail(&cq->ibkill_flush_ent, &flush_list); + } + xa_unlock(&dev->cq_tbl); + + list_for_each_entry_safe(cq, cq_tmp, &flush_list, ibkill_flush_ent) { + ionic_notify_flush_cq(cq); + kref_put(&cq->cq_kref, ionic_cq_complete); + list_del(&cq->ibkill_flush_ent); + } +} + static void ionic_kill_ibdev(struct ionic_ibdev *dev, bool fatal_path) { unsigned long irqflags; @@ -650,6 +688,9 @@ static void ionic_kill_ibdev(struct ionic_ibdev *dev, bool fatal_path) spin_unlock(&aq->lock); } + if (do_flush) + ionic_flush_qs(dev); + local_irq_restore(irqflags); /* Post a fatal event if requested */ @@ -789,6 +830,65 @@ static void ionic_cq_event(struct ionic_ibdev *dev, u32 cqid, u8 code) kref_put(&cq->cq_kref, ionic_cq_complete); } +static void ionic_qp_event(struct ionic_ibdev *dev, u32 qpid, u8 code) +{ + unsigned long irqflags; + struct ib_event ibev; + struct ionic_qp *qp; + + xa_lock_irqsave(&dev->qp_tbl, irqflags); + qp = xa_load(&dev->qp_tbl, qpid); + if (qp) + kref_get(&qp->qp_kref); + xa_unlock_irqrestore(&dev->qp_tbl, irqflags); + + if (!qp) { + ibdev_dbg(&dev->ibdev, + "missing qpid %#x code %u\n", qpid, code); + return; + } + + ibev.device = &dev->ibdev; + ibev.element.qp = &qp->ibqp; + + switch (code) { + case IONIC_V1_EQE_SQ_DRAIN: + ibev.event = IB_EVENT_SQ_DRAINED; + break; + + case IONIC_V1_EQE_QP_COMM_EST: + ibev.event = IB_EVENT_COMM_EST; + break; + + case IONIC_V1_EQE_QP_LAST_WQE: + ibev.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + + case IONIC_V1_EQE_QP_ERR: + ibev.event = IB_EVENT_QP_FATAL; + break; + + case IONIC_V1_EQE_QP_ERR_REQUEST: + ibev.event = IB_EVENT_QP_REQ_ERR; + break; + + case IONIC_V1_EQE_QP_ERR_ACCESS: + ibev.event = IB_EVENT_QP_ACCESS_ERR; + break; + + default: + ibdev_dbg(&dev->ibdev, + "unrecognized qpid %#x code %u\n", qpid, code); + goto out; + } + + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&ibev, qp->ibqp.qp_context); + +out: + kref_put(&qp->qp_kref, ionic_qp_complete); +} + static u16 ionic_poll_eq(struct ionic_eq *eq, u16 budget) { struct ionic_ibdev *dev = eq->dev; @@ -818,6 +918,10 @@ static u16 ionic_poll_eq(struct ionic_eq *eq, u16 budget) ionic_cq_event(dev, qid, code); break; + case IONIC_V1_EQE_TYPE_QP: + ionic_qp_event(dev, qid, code); + break; + default: ibdev_dbg(&dev->ibdev, "unknown event %#x type %u\n", evt, type); diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c index e1130573bd39..9ce7c2e6d7a8 100644 --- a/drivers/infiniband/hw/ionic/ionic_controlpath.c +++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c @@ -1,8 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ +#include +#include +#include +#include +#include +#include + +#include "ionic_fw.h" #include "ionic_ibdev.h" +#define ionic_set_ecn(tos) (((tos) | 2u) & ~1u) +#define ionic_clear_ecn(tos) ((tos) & ~3u) + static int ionic_validate_qdesc(struct ionic_qdesc *q) { if (!q->addr || !q->size || !q->mask || @@ -179,3 +190,2490 @@ void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq) cq->vcq = NULL; } + +static int ionic_validate_qdesc_zero(struct ionic_qdesc *q) +{ + if (q->addr || q->size || q->mask || q->depth_log2 || q->stride_log2) + return -EINVAL; + + return 0; +} + +static int ionic_get_pdid(struct ionic_ibdev *dev, u32 *pdid) +{ + int rc; + + rc = ionic_resid_get(&dev->inuse_pdid); + if (rc < 0) + return rc; + + *pdid = rc; + return 0; +} + +static int ionic_get_ahid(struct ionic_ibdev *dev, u32 *ahid) +{ + int rc; + + rc = ionic_resid_get(&dev->inuse_ahid); + if (rc < 0) + return rc; + + *ahid = rc; + return 0; +} + +static int ionic_get_mrid(struct ionic_ibdev *dev, u32 *mrid) +{ + int rc; + + /* wrap to 1, skip reserved lkey */ + rc = ionic_resid_get_shared(&dev->inuse_mrid, 1, + dev->inuse_mrid.inuse_size); + if (rc < 0) + return rc; + + *mrid = ionic_mrid(rc, dev->next_mrkey++); + return 0; +} + +static int ionic_get_gsi_qpid(struct ionic_ibdev *dev, u32 *qpid) +{ + int rc = 0; + + rc = ionic_resid_get_shared(&dev->inuse_qpid, IB_QPT_GSI, IB_QPT_GSI + 1); + if (rc < 0) + return rc; + + *qpid = IB_QPT_GSI; + return 0; +} + +static int ionic_get_qpid(struct ionic_ibdev *dev, u32 *qpid, + u8 *udma_idx, u8 udma_mask) +{ + unsigned int size, base, bound; + int udma_i, udma_x, udma_ix; + int rc = -EINVAL; + + udma_x = dev->next_qpid_udma_idx; + + dev->next_qpid_udma_idx ^= dev->lif_cfg.udma_count - 1; + + for (udma_i = 0; udma_i < dev->lif_cfg.udma_count; ++udma_i) { + udma_ix = udma_i ^ udma_x; + + if (!(udma_mask & BIT(udma_ix))) + continue; + + size = dev->lif_cfg.qp_count / dev->lif_cfg.udma_count; + base = size * udma_ix; + bound = base + size; + + /* skip reserved SMI and GSI qpids in group zero */ + if (!base) + base = 2; + + rc = ionic_resid_get_shared(&dev->inuse_qpid, base, bound); + if (rc >= 0) { + *qpid = ionic_bitid_to_qid(rc, + dev->lif_cfg.udma_qgrp_shift, + dev->half_qpid_udma_shift); + *udma_idx = udma_ix; + + rc = 0; + break; + } + } + + return rc; +} + +static int ionic_get_dbid(struct ionic_ibdev *dev, u32 *dbid, phys_addr_t *addr) +{ + int rc, dbpage_num; + + /* wrap to 1, skip kernel reserved */ + rc = ionic_resid_get_shared(&dev->inuse_dbid, 1, + dev->inuse_dbid.inuse_size); + if (rc < 0) + return rc; + + dbpage_num = (dev->lif_cfg.lif_hw_index * dev->lif_cfg.dbid_count) + rc; + *addr = dev->lif_cfg.db_phys + ((phys_addr_t)dbpage_num << PAGE_SHIFT); + + *dbid = rc; + + return 0; +} + +static void ionic_put_pdid(struct ionic_ibdev *dev, u32 pdid) +{ + ionic_resid_put(&dev->inuse_pdid, pdid); +} + +static void ionic_put_ahid(struct ionic_ibdev *dev, u32 ahid) +{ + ionic_resid_put(&dev->inuse_ahid, ahid); +} + +static void ionic_put_mrid(struct ionic_ibdev *dev, u32 mrid) +{ + ionic_resid_put(&dev->inuse_mrid, ionic_mrid_index(mrid)); +} + +static void ionic_put_qpid(struct ionic_ibdev *dev, u32 qpid) +{ + u32 bitid = ionic_qid_to_bitid(qpid, + dev->lif_cfg.udma_qgrp_shift, + dev->half_qpid_udma_shift); + + ionic_resid_put(&dev->inuse_qpid, bitid); +} + +static void ionic_put_dbid(struct ionic_ibdev *dev, u32 dbid) +{ + ionic_resid_put(&dev->inuse_dbid, dbid); +} + +static struct rdma_user_mmap_entry* +ionic_mmap_entry_insert(struct ionic_ctx *ctx, unsigned long size, + unsigned long pfn, u8 mmap_flags, u64 *offset) +{ + struct ionic_mmap_entry *entry; + int rc; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + entry->size = size; + entry->pfn = pfn; + entry->mmap_flags = mmap_flags; + + rc = rdma_user_mmap_entry_insert(&ctx->ibctx, &entry->rdma_entry, + entry->size); + if (rc) { + kfree(entry); + return NULL; + } + + if (offset) + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + +int ionic_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device); + struct ionic_ctx *ctx = to_ionic_ctx(ibctx); + struct ionic_ctx_resp resp = {}; + struct ionic_ctx_req req; + phys_addr_t db_phys = 0; + int rc; + + rc = ib_copy_from_udata(&req, udata, sizeof(req)); + if (rc) + return rc; + + /* try to allocate dbid for user ctx */ + rc = ionic_get_dbid(dev, &ctx->dbid, &db_phys); + if (rc < 0) + return rc; + + ibdev_dbg(&dev->ibdev, "user space dbid %u\n", ctx->dbid); + + ctx->mmap_dbell = ionic_mmap_entry_insert(ctx, PAGE_SIZE, + PHYS_PFN(db_phys), 0, NULL); + if (!ctx->mmap_dbell) { + rc = -ENOMEM; + goto err_mmap_dbell; + } + + resp.page_shift = PAGE_SHIFT; + + resp.dbell_offset = db_phys & ~PAGE_MASK; + + resp.version = dev->lif_cfg.rdma_version; + resp.qp_opcodes = dev->lif_cfg.qp_opcodes; + resp.admin_opcodes = dev->lif_cfg.admin_opcodes; + + resp.sq_qtype = dev->lif_cfg.sq_qtype; + resp.rq_qtype = dev->lif_cfg.rq_qtype; + resp.cq_qtype = dev->lif_cfg.cq_qtype; + resp.admin_qtype = dev->lif_cfg.aq_qtype; + resp.max_stride = dev->lif_cfg.max_stride; + resp.max_spec = IONIC_SPEC_HIGH; + + resp.udma_count = dev->lif_cfg.udma_count; + resp.expdb_mask = dev->lif_cfg.expdb_mask; + + if (dev->lif_cfg.sq_expdb) + resp.expdb_qtypes |= IONIC_EXPDB_SQ; + if (dev->lif_cfg.rq_expdb) + resp.expdb_qtypes |= IONIC_EXPDB_RQ; + + rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (rc) + goto err_resp; + + return 0; + +err_resp: + rdma_user_mmap_entry_remove(ctx->mmap_dbell); +err_mmap_dbell: + ionic_put_dbid(dev, ctx->dbid); + + return rc; +} + +void ionic_dealloc_ucontext(struct ib_ucontext *ibctx) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device); + struct ionic_ctx *ctx = to_ionic_ctx(ibctx); + + rdma_user_mmap_entry_remove(ctx->mmap_dbell); + ionic_put_dbid(dev, ctx->dbid); +} + +int ionic_mmap(struct ib_ucontext *ibctx, struct vm_area_struct *vma) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device); + struct ionic_ctx *ctx = to_ionic_ctx(ibctx); + struct rdma_user_mmap_entry *rdma_entry; + struct ionic_mmap_entry *ionic_entry; + int rc = 0; + + rdma_entry = rdma_user_mmap_entry_get(&ctx->ibctx, vma); + if (!rdma_entry) { + ibdev_dbg(&dev->ibdev, "not found %#lx\n", + vma->vm_pgoff << PAGE_SHIFT); + return -EINVAL; + } + + ionic_entry = container_of(rdma_entry, struct ionic_mmap_entry, + rdma_entry); + + ibdev_dbg(&dev->ibdev, "writecombine? %d\n", + ionic_entry->mmap_flags & IONIC_MMAP_WC); + if (ionic_entry->mmap_flags & IONIC_MMAP_WC) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + ibdev_dbg(&dev->ibdev, "remap st %#lx pf %#lx sz %#lx\n", + vma->vm_start, ionic_entry->pfn, ionic_entry->size); + rc = rdma_user_mmap_io(&ctx->ibctx, vma, ionic_entry->pfn, + ionic_entry->size, vma->vm_page_prot, + rdma_entry); + if (rc) + ibdev_dbg(&dev->ibdev, "remap failed %d\n", rc); + + rdma_user_mmap_entry_put(rdma_entry); + return rc; +} + +void ionic_mmap_free(struct rdma_user_mmap_entry *rdma_entry) +{ + struct ionic_mmap_entry *ionic_entry; + + ionic_entry = container_of(rdma_entry, struct ionic_mmap_entry, + rdma_entry); + kfree(ionic_entry); +} + +int ionic_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device); + struct ionic_pd *pd = to_ionic_pd(ibpd); + + return ionic_get_pdid(dev, &pd->pdid); +} + +int ionic_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device); + struct ionic_pd *pd = to_ionic_pd(ibpd); + + ionic_put_pdid(dev, pd->pdid); + + return 0; +} + +static int ionic_build_hdr(struct ionic_ibdev *dev, + struct ib_ud_header *hdr, + const struct rdma_ah_attr *attr, + u16 sport, bool want_ecn) +{ + const struct ib_global_route *grh; + enum rdma_network_type net; + u16 vlan; + int rc; + + if (attr->ah_flags != IB_AH_GRH) + return -EINVAL; + if (attr->type != RDMA_AH_ATTR_TYPE_ROCE) + return -EINVAL; + + grh = rdma_ah_read_grh(attr); + + rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, &hdr->eth.smac_h[0]); + if (rc) + return rc; + + net = rdma_gid_attr_network_type(grh->sgid_attr); + + rc = ib_ud_header_init(0, /* no payload */ + 0, /* no lrh */ + 1, /* yes eth */ + vlan != 0xffff, + 0, /* no grh */ + net == RDMA_NETWORK_IPV4 ? 4 : 6, + 1, /* yes udp */ + 0, /* no imm */ + hdr); + if (rc) + return rc; + + ether_addr_copy(hdr->eth.dmac_h, attr->roce.dmac); + + if (net == RDMA_NETWORK_IPV4) { + hdr->eth.type = cpu_to_be16(ETH_P_IP); + hdr->ip4.frag_off = cpu_to_be16(0x4000); /* don't fragment */ + hdr->ip4.ttl = grh->hop_limit; + hdr->ip4.tot_len = cpu_to_be16(0xffff); + hdr->ip4.saddr = + *(const __be32 *)(grh->sgid_attr->gid.raw + 12); + hdr->ip4.daddr = *(const __be32 *)(grh->dgid.raw + 12); + + if (want_ecn) + hdr->ip4.tos = ionic_set_ecn(grh->traffic_class); + else + hdr->ip4.tos = ionic_clear_ecn(grh->traffic_class); + } else { + hdr->eth.type = cpu_to_be16(ETH_P_IPV6); + hdr->grh.flow_label = cpu_to_be32(grh->flow_label); + hdr->grh.hop_limit = grh->hop_limit; + hdr->grh.source_gid = grh->sgid_attr->gid; + hdr->grh.destination_gid = grh->dgid; + + if (want_ecn) + hdr->grh.traffic_class = + ionic_set_ecn(grh->traffic_class); + else + hdr->grh.traffic_class = + ionic_clear_ecn(grh->traffic_class); + } + + if (vlan != 0xffff) { + vlan |= rdma_ah_get_sl(attr) << VLAN_PRIO_SHIFT; + hdr->vlan.tag = cpu_to_be16(vlan); + hdr->vlan.type = hdr->eth.type; + hdr->eth.type = cpu_to_be16(ETH_P_8021Q); + } + + hdr->udp.sport = cpu_to_be16(sport); + hdr->udp.dport = cpu_to_be16(ROCE_V2_UDP_DPORT); + + return 0; +} + +static void ionic_set_ah_attr(struct ionic_ibdev *dev, + struct rdma_ah_attr *ah_attr, + struct ib_ud_header *hdr, + int sgid_index) +{ + u32 flow_label; + u16 vlan = 0; + u8 tos, ttl; + + if (hdr->vlan_present) + vlan = be16_to_cpu(hdr->vlan.tag); + + if (hdr->ipv4_present) { + flow_label = 0; + ttl = hdr->ip4.ttl; + tos = hdr->ip4.tos; + *(__be16 *)(hdr->grh.destination_gid.raw + 10) = cpu_to_be16(0xffff); + *(__be32 *)(hdr->grh.destination_gid.raw + 12) = hdr->ip4.daddr; + } else { + flow_label = be32_to_cpu(hdr->grh.flow_label); + ttl = hdr->grh.hop_limit; + tos = hdr->grh.traffic_class; + } + + memset(ah_attr, 0, sizeof(*ah_attr)); + ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE; + if (hdr->eth_present) + memcpy(&ah_attr->roce.dmac, &hdr->eth.dmac_h, ETH_ALEN); + rdma_ah_set_sl(ah_attr, vlan >> VLAN_PRIO_SHIFT); + rdma_ah_set_port_num(ah_attr, 1); + rdma_ah_set_grh(ah_attr, NULL, flow_label, sgid_index, ttl, tos); + rdma_ah_set_dgid_raw(ah_attr, &hdr->grh.destination_gid); +} + +static int ionic_create_ah_cmd(struct ionic_ibdev *dev, + struct ionic_ah *ah, + struct ionic_pd *pd, + struct rdma_ah_attr *attr, + u32 flags) +{ + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_CREATE_AH, + .len = cpu_to_le16(IONIC_ADMIN_CREATE_AH_IN_V1_LEN), + .cmd.create_ah = { + .pd_id = cpu_to_le32(pd->pdid), + .dbid_flags = cpu_to_le16(dev->lif_cfg.dbid), + .id_ver = cpu_to_le32(ah->ahid), + } + } + }; + enum ionic_admin_flags admin_flags = 0; + dma_addr_t hdr_dma = 0; + void *hdr_buf; + gfp_t gfp = GFP_ATOMIC; + int rc, hdr_len = 0; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_AH) + return -EBADRQC; + + if (flags & RDMA_CREATE_AH_SLEEPABLE) + gfp = GFP_KERNEL; + else + admin_flags |= IONIC_ADMIN_F_BUSYWAIT; + + rc = ionic_build_hdr(dev, &ah->hdr, attr, IONIC_ROCE_UDP_SPORT, false); + if (rc) + return rc; + + if (ah->hdr.eth.type == cpu_to_be16(ETH_P_8021Q)) { + if (ah->hdr.vlan.type == cpu_to_be16(ETH_P_IP)) + wr.wqe.cmd.create_ah.csum_profile = + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP; + else + wr.wqe.cmd.create_ah.csum_profile = + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP; + } else { + if (ah->hdr.eth.type == cpu_to_be16(ETH_P_IP)) + wr.wqe.cmd.create_ah.csum_profile = + IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP; + else + wr.wqe.cmd.create_ah.csum_profile = + IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP; + } + + ah->sgid_index = rdma_ah_read_grh(attr)->sgid_index; + + hdr_buf = kmalloc(PAGE_SIZE, gfp); + if (!hdr_buf) + return -ENOMEM; + + hdr_len = ib_ud_header_pack(&ah->hdr, hdr_buf); + hdr_len -= IB_BTH_BYTES; + hdr_len -= IB_DETH_BYTES; + ibdev_dbg(&dev->ibdev, "roce packet header template\n"); + print_hex_dump_debug("hdr ", DUMP_PREFIX_OFFSET, 16, 1, + hdr_buf, hdr_len, true); + + hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, hdr_len, + DMA_TO_DEVICE); + + rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma); + if (rc) + goto err_dma; + + wr.wqe.cmd.create_ah.dma_addr = cpu_to_le64(hdr_dma); + wr.wqe.cmd.create_ah.length = cpu_to_le32(hdr_len); + + ionic_admin_post(dev, &wr); + rc = ionic_admin_wait(dev, &wr, admin_flags); + + dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, hdr_len, + DMA_TO_DEVICE); +err_dma: + kfree(hdr_buf); + + return rc; +} + +static int ionic_destroy_ah_cmd(struct ionic_ibdev *dev, u32 ahid, u32 flags) +{ + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_DESTROY_AH, + .len = cpu_to_le16(IONIC_ADMIN_DESTROY_AH_IN_V1_LEN), + .cmd.destroy_ah = { + .ah_id = cpu_to_le32(ahid), + }, + } + }; + enum ionic_admin_flags admin_flags = IONIC_ADMIN_F_TEARDOWN; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_AH) + return -EBADRQC; + + if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) + admin_flags |= IONIC_ADMIN_F_BUSYWAIT; + + ionic_admin_post(dev, &wr); + ionic_admin_wait(dev, &wr, admin_flags); + + /* No host-memory resource is associated with ah, so it is ok + * to "succeed" and complete this destroy ah on the host. + */ + return 0; +} + +int ionic_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device); + struct rdma_ah_attr *attr = init_attr->ah_attr; + struct ionic_pd *pd = to_ionic_pd(ibah->pd); + struct ionic_ah *ah = to_ionic_ah(ibah); + struct ionic_ah_resp resp = {}; + u32 flags = init_attr->flags; + int rc; + + rc = ionic_get_ahid(dev, &ah->ahid); + if (rc) + return rc; + + rc = ionic_create_ah_cmd(dev, ah, pd, attr, flags); + if (rc) + goto err_cmd; + + if (udata) { + resp.ahid = ah->ahid; + + rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (rc) + goto err_resp; + } + + return 0; + +err_resp: + ionic_destroy_ah_cmd(dev, ah->ahid, flags); +err_cmd: + ionic_put_ahid(dev, ah->ahid); + return rc; +} + +int ionic_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device); + struct ionic_ah *ah = to_ionic_ah(ibah); + + ionic_set_ah_attr(dev, ah_attr, &ah->hdr, ah->sgid_index); + + return 0; +} + +int ionic_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device); + struct ionic_ah *ah = to_ionic_ah(ibah); + int rc; + + rc = ionic_destroy_ah_cmd(dev, ah->ahid, flags); + if (rc) + return rc; + + ionic_put_ahid(dev, ah->ahid); + + return 0; +} + +static int ionic_create_mr_cmd(struct ionic_ibdev *dev, + struct ionic_pd *pd, + struct ionic_mr *mr, + u64 addr, + u64 length) +{ + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_CREATE_MR, + .len = cpu_to_le16(IONIC_ADMIN_CREATE_MR_IN_V1_LEN), + .cmd.create_mr = { + .va = cpu_to_le64(addr), + .length = cpu_to_le64(length), + .pd_id = cpu_to_le32(pd->pdid), + .page_size_log2 = mr->buf.page_size_log2, + .tbl_index = cpu_to_le32(~0), + .map_count = cpu_to_le32(mr->buf.tbl_pages), + .dma_addr = ionic_pgtbl_dma(&mr->buf, addr), + .dbid_flags = cpu_to_le16(mr->flags), + .id_ver = cpu_to_le32(mr->mrid), + } + } + }; + int rc; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_MR) + return -EBADRQC; + + ionic_admin_post(dev, &wr); + rc = ionic_admin_wait(dev, &wr, 0); + if (!rc) + mr->created = true; + + return rc; +} + +static int ionic_destroy_mr_cmd(struct ionic_ibdev *dev, u32 mrid) +{ + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_DESTROY_MR, + .len = cpu_to_le16(IONIC_ADMIN_DESTROY_MR_IN_V1_LEN), + .cmd.destroy_mr = { + .mr_id = cpu_to_le32(mrid), + }, + } + }; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_MR) + return -EBADRQC; + + ionic_admin_post(dev, &wr); + + return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN); +} + +struct ib_mr *ionic_get_dma_mr(struct ib_pd *ibpd, int access) +{ + struct ionic_pd *pd = to_ionic_pd(ibpd); + struct ionic_mr *mr; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->ibmr.lkey = IONIC_DMA_LKEY; + mr->ibmr.rkey = IONIC_DMA_RKEY; + + if (pd) + pd->flags |= IONIC_QPF_PRIVILEGED; + + return &mr->ibmr; +} + +struct ib_mr *ionic_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 addr, int access, struct ib_dmah *dmah, + struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device); + struct ionic_pd *pd = to_ionic_pd(ibpd); + struct ionic_mr *mr; + unsigned long pg_sz; + int rc; + + if (dmah) + return ERR_PTR(-EOPNOTSUPP); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + rc = ionic_get_mrid(dev, &mr->mrid); + if (rc) + goto err_mrid; + + mr->ibmr.lkey = mr->mrid; + mr->ibmr.rkey = mr->mrid; + mr->ibmr.iova = addr; + mr->ibmr.length = length; + + mr->flags = IONIC_MRF_USER_MR | to_ionic_mr_flags(access); + + mr->umem = ib_umem_get(&dev->ibdev, start, length, access); + if (IS_ERR(mr->umem)) { + rc = PTR_ERR(mr->umem); + goto err_umem; + } + + pg_sz = ib_umem_find_best_pgsz(mr->umem, + dev->lif_cfg.page_size_supported, + addr); + if (!pg_sz) { + rc = -EINVAL; + goto err_pgtbl; + } + + rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, 1, pg_sz); + if (rc) + goto err_pgtbl; + + rc = ionic_create_mr_cmd(dev, pd, mr, addr, length); + if (rc) + goto err_cmd; + + ionic_pgtbl_unbuf(dev, &mr->buf); + + return &mr->ibmr; + +err_cmd: + ionic_pgtbl_unbuf(dev, &mr->buf); +err_pgtbl: + ib_umem_release(mr->umem); +err_umem: + ionic_put_mrid(dev, mr->mrid); +err_mrid: + kfree(mr); + return ERR_PTR(rc); +} + +struct ib_mr *ionic_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 offset, + u64 length, u64 addr, int fd, int access, + struct ib_dmah *dmah, + struct uverbs_attr_bundle *attrs) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device); + struct ionic_pd *pd = to_ionic_pd(ibpd); + struct ib_umem_dmabuf *umem_dmabuf; + struct ionic_mr *mr; + u64 pg_sz; + int rc; + + if (dmah) + return ERR_PTR(-EOPNOTSUPP); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + rc = ionic_get_mrid(dev, &mr->mrid); + if (rc) + goto err_mrid; + + mr->ibmr.lkey = mr->mrid; + mr->ibmr.rkey = mr->mrid; + mr->ibmr.iova = addr; + mr->ibmr.length = length; + + mr->flags = IONIC_MRF_USER_MR | to_ionic_mr_flags(access); + + umem_dmabuf = ib_umem_dmabuf_get_pinned(&dev->ibdev, offset, length, + fd, access); + if (IS_ERR(umem_dmabuf)) { + rc = PTR_ERR(umem_dmabuf); + goto err_umem; + } + + mr->umem = &umem_dmabuf->umem; + + pg_sz = ib_umem_find_best_pgsz(mr->umem, + dev->lif_cfg.page_size_supported, + addr); + if (!pg_sz) { + rc = -EINVAL; + goto err_pgtbl; + } + + rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, 1, pg_sz); + if (rc) + goto err_pgtbl; + + rc = ionic_create_mr_cmd(dev, pd, mr, addr, length); + if (rc) + goto err_cmd; + + ionic_pgtbl_unbuf(dev, &mr->buf); + + return &mr->ibmr; + +err_cmd: + ionic_pgtbl_unbuf(dev, &mr->buf); +err_pgtbl: + ib_umem_release(mr->umem); +err_umem: + ionic_put_mrid(dev, mr->mrid); +err_mrid: + kfree(mr); + return ERR_PTR(rc); +} + +int ionic_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device); + struct ionic_mr *mr = to_ionic_mr(ibmr); + int rc; + + if (!mr->ibmr.lkey) + goto out; + + if (mr->created) { + rc = ionic_destroy_mr_cmd(dev, mr->mrid); + if (rc) + return rc; + } + + ionic_pgtbl_unbuf(dev, &mr->buf); + + if (mr->umem) + ib_umem_release(mr->umem); + + ionic_put_mrid(dev, mr->mrid); + +out: + kfree(mr); + + return 0; +} + +struct ib_mr *ionic_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type type, + u32 max_sg) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device); + struct ionic_pd *pd = to_ionic_pd(ibpd); + struct ionic_mr *mr; + int rc; + + if (type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + rc = ionic_get_mrid(dev, &mr->mrid); + if (rc) + goto err_mrid; + + mr->ibmr.lkey = mr->mrid; + mr->ibmr.rkey = mr->mrid; + + mr->flags = IONIC_MRF_PHYS_MR; + + rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, max_sg, PAGE_SIZE); + if (rc) + goto err_pgtbl; + + mr->buf.tbl_pages = 0; + + rc = ionic_create_mr_cmd(dev, pd, mr, 0, 0); + if (rc) + goto err_cmd; + + return &mr->ibmr; + +err_cmd: + ionic_pgtbl_unbuf(dev, &mr->buf); +err_pgtbl: + ionic_put_mrid(dev, mr->mrid); +err_mrid: + kfree(mr); + return ERR_PTR(rc); +} + +static int ionic_map_mr_page(struct ib_mr *ibmr, u64 dma) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device); + struct ionic_mr *mr = to_ionic_mr(ibmr); + + ibdev_dbg(&dev->ibdev, "dma %p\n", (void *)dma); + return ionic_pgtbl_page(&mr->buf, dma); +} + +int ionic_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device); + struct ionic_mr *mr = to_ionic_mr(ibmr); + int rc; + + /* mr must be allocated using ib_alloc_mr() */ + if (unlikely(!mr->buf.tbl_limit)) + return -EINVAL; + + mr->buf.tbl_pages = 0; + + if (mr->buf.tbl_buf) + dma_sync_single_for_cpu(dev->lif_cfg.hwdev, mr->buf.tbl_dma, + mr->buf.tbl_size, DMA_TO_DEVICE); + + ibdev_dbg(&dev->ibdev, "sg %p nent %d\n", sg, sg_nents); + rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ionic_map_mr_page); + + mr->buf.page_size_log2 = order_base_2(ibmr->page_size); + + if (mr->buf.tbl_buf) + dma_sync_single_for_device(dev->lif_cfg.hwdev, mr->buf.tbl_dma, + mr->buf.tbl_size, DMA_TO_DEVICE); + + return rc; +} + +int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibmw->device); + struct ionic_pd *pd = to_ionic_pd(ibmw->pd); + struct ionic_mr *mr = to_ionic_mw(ibmw); + int rc; + + rc = ionic_get_mrid(dev, &mr->mrid); + if (rc) + return rc; + + mr->ibmw.rkey = mr->mrid; + + if (mr->ibmw.type == IB_MW_TYPE_1) + mr->flags = IONIC_MRF_MW_1; + else + mr->flags = IONIC_MRF_MW_2; + + rc = ionic_create_mr_cmd(dev, pd, mr, 0, 0); + if (rc) + goto err_cmd; + + return 0; + +err_cmd: + ionic_put_mrid(dev, mr->mrid); + return rc; +} + +int ionic_dealloc_mw(struct ib_mw *ibmw) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibmw->device); + struct ionic_mr *mr = to_ionic_mw(ibmw); + int rc; + + rc = ionic_destroy_mr_cmd(dev, mr->mrid); + if (rc) + return rc; + + ionic_put_mrid(dev, mr->mrid); + + return 0; +} + +static int ionic_create_cq_cmd(struct ionic_ibdev *dev, + struct ionic_ctx *ctx, + struct ionic_cq *cq, + struct ionic_tbl_buf *buf) +{ + const u16 dbid = ionic_ctx_dbid(dev, ctx); + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_CREATE_CQ, + .len = cpu_to_le16(IONIC_ADMIN_CREATE_CQ_IN_V1_LEN), + .cmd.create_cq = { + .eq_id = cpu_to_le32(cq->eqid), + .depth_log2 = cq->q.depth_log2, + .stride_log2 = cq->q.stride_log2, + .page_size_log2 = buf->page_size_log2, + .tbl_index = cpu_to_le32(~0), + .map_count = cpu_to_le32(buf->tbl_pages), + .dma_addr = ionic_pgtbl_dma(buf, 0), + .dbid_flags = cpu_to_le16(dbid), + .id_ver = cpu_to_le32(cq->cqid), + } + } + }; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_CQ) + return -EBADRQC; + + ionic_admin_post(dev, &wr); + + return ionic_admin_wait(dev, &wr, 0); +} + +static int ionic_destroy_cq_cmd(struct ionic_ibdev *dev, u32 cqid) +{ + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_DESTROY_CQ, + .len = cpu_to_le16(IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN), + .cmd.destroy_cq = { + .cq_id = cpu_to_le32(cqid), + }, + } + }; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_CQ) + return -EBADRQC; + + ionic_admin_post(dev, &wr); + + return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN); +} + +int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device); + struct ib_udata *udata = &attrs->driver_udata; + struct ionic_ctx *ctx = + rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx); + struct ionic_vcq *vcq = to_ionic_vcq(ibcq); + struct ionic_tbl_buf buf = {}; + struct ionic_cq_resp resp; + struct ionic_cq_req req; + int udma_idx = 0, rc; + + if (udata) { + rc = ib_copy_from_udata(&req, udata, sizeof(req)); + if (rc) + return rc; + } + + vcq->udma_mask = BIT(dev->lif_cfg.udma_count) - 1; + + if (udata) + vcq->udma_mask &= req.udma_mask; + + if (!vcq->udma_mask) { + rc = -EINVAL; + goto err_init; + } + + for (; udma_idx < dev->lif_cfg.udma_count; ++udma_idx) { + if (!(vcq->udma_mask & BIT(udma_idx))) + continue; + + rc = ionic_create_cq_common(vcq, &buf, attr, ctx, udata, + &req.cq[udma_idx], + &resp.cqid[udma_idx], + udma_idx); + if (rc) + goto err_init; + + rc = ionic_create_cq_cmd(dev, ctx, &vcq->cq[udma_idx], &buf); + if (rc) + goto err_cmd; + + ionic_pgtbl_unbuf(dev, &buf); + } + + vcq->ibcq.cqe = attr->cqe; + + if (udata) { + resp.udma_mask = vcq->udma_mask; + + rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (rc) + goto err_resp; + } + + return 0; + +err_resp: + while (udma_idx) { + --udma_idx; + if (!(vcq->udma_mask & BIT(udma_idx))) + continue; + ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid); +err_cmd: + ionic_pgtbl_unbuf(dev, &buf); + ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]); +err_init: + ; + } + + return rc; +} + +int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device); + struct ionic_vcq *vcq = to_ionic_vcq(ibcq); + int udma_idx, rc_tmp, rc = 0; + + for (udma_idx = dev->lif_cfg.udma_count; udma_idx; ) { + --udma_idx; + + if (!(vcq->udma_mask & BIT(udma_idx))) + continue; + + rc_tmp = ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid); + if (rc_tmp) { + if (!rc) + rc = rc_tmp; + + continue; + } + + ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]); + } + + return rc; +} + +static bool pd_remote_privileged(struct ib_pd *pd) +{ + return pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; +} + +static int ionic_create_qp_cmd(struct ionic_ibdev *dev, + struct ionic_pd *pd, + struct ionic_cq *send_cq, + struct ionic_cq *recv_cq, + struct ionic_qp *qp, + struct ionic_tbl_buf *sq_buf, + struct ionic_tbl_buf *rq_buf, + struct ib_qp_init_attr *attr) +{ + const u16 dbid = ionic_obj_dbid(dev, pd->ibpd.uobject); + const u32 flags = to_ionic_qp_flags(0, 0, + qp->sq_cmb & IONIC_CMB_ENABLE, + qp->rq_cmb & IONIC_CMB_ENABLE, + qp->sq_spec, qp->rq_spec, + pd->flags & IONIC_QPF_PRIVILEGED, + pd_remote_privileged(&pd->ibpd)); + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_CREATE_QP, + .len = cpu_to_le16(IONIC_ADMIN_CREATE_QP_IN_V1_LEN), + .cmd.create_qp = { + .pd_id = cpu_to_le32(pd->pdid), + .priv_flags = cpu_to_be32(flags), + .type_state = to_ionic_qp_type(attr->qp_type), + .dbid_flags = cpu_to_le16(dbid), + .id_ver = cpu_to_le32(qp->qpid), + } + } + }; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_QP) + return -EBADRQC; + + if (qp->has_sq) { + wr.wqe.cmd.create_qp.sq_cq_id = cpu_to_le32(send_cq->cqid); + wr.wqe.cmd.create_qp.sq_depth_log2 = qp->sq.depth_log2; + wr.wqe.cmd.create_qp.sq_stride_log2 = qp->sq.stride_log2; + wr.wqe.cmd.create_qp.sq_page_size_log2 = sq_buf->page_size_log2; + wr.wqe.cmd.create_qp.sq_tbl_index_xrcd_id = cpu_to_le32(~0); + wr.wqe.cmd.create_qp.sq_map_count = + cpu_to_le32(sq_buf->tbl_pages); + wr.wqe.cmd.create_qp.sq_dma_addr = ionic_pgtbl_dma(sq_buf, 0); + } + + if (qp->has_rq) { + wr.wqe.cmd.create_qp.rq_cq_id = cpu_to_le32(recv_cq->cqid); + wr.wqe.cmd.create_qp.rq_depth_log2 = qp->rq.depth_log2; + wr.wqe.cmd.create_qp.rq_stride_log2 = qp->rq.stride_log2; + wr.wqe.cmd.create_qp.rq_page_size_log2 = rq_buf->page_size_log2; + wr.wqe.cmd.create_qp.rq_tbl_index_srq_id = cpu_to_le32(~0); + wr.wqe.cmd.create_qp.rq_map_count = + cpu_to_le32(rq_buf->tbl_pages); + wr.wqe.cmd.create_qp.rq_dma_addr = ionic_pgtbl_dma(rq_buf, 0); + } + + ionic_admin_post(dev, &wr); + + return ionic_admin_wait(dev, &wr, 0); +} + +static int ionic_modify_qp_cmd(struct ionic_ibdev *dev, + struct ionic_pd *pd, + struct ionic_qp *qp, + struct ib_qp_attr *attr, + int mask) +{ + const u32 flags = to_ionic_qp_flags(attr->qp_access_flags, + attr->en_sqd_async_notify, + qp->sq_cmb & IONIC_CMB_ENABLE, + qp->rq_cmb & IONIC_CMB_ENABLE, + qp->sq_spec, qp->rq_spec, + pd->flags & IONIC_QPF_PRIVILEGED, + pd_remote_privileged(qp->ibqp.pd)); + const u8 state = to_ionic_qp_modify_state(attr->qp_state, + attr->cur_qp_state); + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_MODIFY_QP, + .len = cpu_to_le16(IONIC_ADMIN_MODIFY_QP_IN_V1_LEN), + .cmd.mod_qp = { + .attr_mask = cpu_to_be32(mask), + .access_flags = cpu_to_be16(flags), + .rq_psn = cpu_to_le32(attr->rq_psn), + .sq_psn = cpu_to_le32(attr->sq_psn), + .rate_limit_kbps = + cpu_to_le32(attr->rate_limit), + .pmtu = (attr->path_mtu + 7), + .retry = (attr->retry_cnt | + (attr->rnr_retry << 4)), + .rnr_timer = attr->min_rnr_timer, + .retry_timeout = attr->timeout, + .type_state = state, + .id_ver = cpu_to_le32(qp->qpid), + } + } + }; + const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); + void *hdr_buf = NULL; + dma_addr_t hdr_dma = 0; + int rc, hdr_len = 0; + u16 sport; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_MODIFY_QP) + return -EBADRQC; + + if ((mask & IB_QP_MAX_DEST_RD_ATOMIC) && attr->max_dest_rd_atomic) { + /* Note, round up/down was already done for allocating + * resources on the device. The allocation order is in cache + * line size. We can't use the order of the resource + * allocation to determine the order wqes here, because for + * queue length <= one cache line it is not distinct. + * + * Therefore, order wqes is computed again here. + * + * Account for hole and round up to the next order. + */ + wr.wqe.cmd.mod_qp.rsq_depth = + order_base_2(attr->max_dest_rd_atomic + 1); + wr.wqe.cmd.mod_qp.rsq_index = cpu_to_le32(~0); + } + + if ((mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) { + /* Account for hole and round down to the next order */ + wr.wqe.cmd.mod_qp.rrq_depth = + order_base_2(attr->max_rd_atomic + 2) - 1; + wr.wqe.cmd.mod_qp.rrq_index = cpu_to_le32(~0); + } + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) + wr.wqe.cmd.mod_qp.qkey_dest_qpn = + cpu_to_le32(attr->dest_qp_num); + else + wr.wqe.cmd.mod_qp.qkey_dest_qpn = cpu_to_le32(attr->qkey); + + if (mask & IB_QP_AV) { + if (!qp->hdr) + return -ENOMEM; + + sport = rdma_get_udp_sport(grh->flow_label, + qp->qpid, + attr->dest_qp_num); + + rc = ionic_build_hdr(dev, qp->hdr, &attr->ah_attr, sport, true); + if (rc) + return rc; + + qp->sgid_index = grh->sgid_index; + + hdr_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!hdr_buf) + return -ENOMEM; + + hdr_len = ib_ud_header_pack(qp->hdr, hdr_buf); + hdr_len -= IB_BTH_BYTES; + hdr_len -= IB_DETH_BYTES; + ibdev_dbg(&dev->ibdev, "roce packet header template\n"); + print_hex_dump_debug("hdr ", DUMP_PREFIX_OFFSET, 16, 1, + hdr_buf, hdr_len, true); + + hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, hdr_len, + DMA_TO_DEVICE); + + rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma); + if (rc) + goto err_dma; + + if (qp->hdr->ipv4_present) { + wr.wqe.cmd.mod_qp.tfp_csum_profile = + qp->hdr->vlan_present ? + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP : + IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP; + } else { + wr.wqe.cmd.mod_qp.tfp_csum_profile = + qp->hdr->vlan_present ? + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP : + IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP; + } + + wr.wqe.cmd.mod_qp.ah_id_len = + cpu_to_le32(qp->ahid | (hdr_len << 24)); + wr.wqe.cmd.mod_qp.dma_addr = cpu_to_le64(hdr_dma); + + wr.wqe.cmd.mod_qp.en_pcp = attr->ah_attr.sl; + wr.wqe.cmd.mod_qp.ip_dscp = grh->traffic_class >> 2; + } + + ionic_admin_post(dev, &wr); + + rc = ionic_admin_wait(dev, &wr, 0); + + if (mask & IB_QP_AV) + dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, hdr_len, + DMA_TO_DEVICE); +err_dma: + if (mask & IB_QP_AV) + kfree(hdr_buf); + + return rc; +} + +static int ionic_query_qp_cmd(struct ionic_ibdev *dev, + struct ionic_qp *qp, + struct ib_qp_attr *attr, + int mask) +{ + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_QUERY_QP, + .len = cpu_to_le16(IONIC_ADMIN_QUERY_QP_IN_V1_LEN), + .cmd.query_qp = { + .id_ver = cpu_to_le32(qp->qpid), + }, + } + }; + struct ionic_v1_admin_query_qp_sq *query_sqbuf; + struct ionic_v1_admin_query_qp_rq *query_rqbuf; + dma_addr_t query_sqdma; + dma_addr_t query_rqdma; + dma_addr_t hdr_dma = 0; + void *hdr_buf = NULL; + int flags, rc; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_QUERY_QP) + return -EBADRQC; + + if (qp->has_sq) { + bool expdb = !!(qp->sq_cmb & IONIC_CMB_EXPDB); + + attr->cap.max_send_sge = + ionic_v1_send_wqe_max_sge(qp->sq.stride_log2, + qp->sq_spec, + expdb); + attr->cap.max_inline_data = + ionic_v1_send_wqe_max_data(qp->sq.stride_log2, expdb); + } + + if (qp->has_rq) { + attr->cap.max_recv_sge = + ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2, + qp->rq_spec, + qp->rq_cmb & IONIC_CMB_EXPDB); + } + + query_sqbuf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!query_sqbuf) + return -ENOMEM; + + query_rqbuf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!query_rqbuf) { + rc = -ENOMEM; + goto err_rqbuf; + } + + query_sqdma = dma_map_single(dev->lif_cfg.hwdev, query_sqbuf, PAGE_SIZE, + DMA_FROM_DEVICE); + rc = dma_mapping_error(dev->lif_cfg.hwdev, query_sqdma); + if (rc) + goto err_sqdma; + + query_rqdma = dma_map_single(dev->lif_cfg.hwdev, query_rqbuf, PAGE_SIZE, + DMA_FROM_DEVICE); + rc = dma_mapping_error(dev->lif_cfg.hwdev, query_rqdma); + if (rc) + goto err_rqdma; + + if (mask & IB_QP_AV) { + hdr_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!hdr_buf) { + rc = -ENOMEM; + goto err_hdrbuf; + } + + hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, + PAGE_SIZE, DMA_FROM_DEVICE); + rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma); + if (rc) + goto err_hdrdma; + } + + wr.wqe.cmd.query_qp.sq_dma_addr = cpu_to_le64(query_sqdma); + wr.wqe.cmd.query_qp.rq_dma_addr = cpu_to_le64(query_rqdma); + wr.wqe.cmd.query_qp.hdr_dma_addr = cpu_to_le64(hdr_dma); + wr.wqe.cmd.query_qp.ah_id = cpu_to_le32(qp->ahid); + + ionic_admin_post(dev, &wr); + + rc = ionic_admin_wait(dev, &wr, 0); + + if (rc) + goto err_hdrdma; + + flags = be16_to_cpu(query_sqbuf->access_perms_flags | + query_rqbuf->access_perms_flags); + + print_hex_dump_debug("sqbuf ", DUMP_PREFIX_OFFSET, 16, 1, + query_sqbuf, sizeof(*query_sqbuf), true); + print_hex_dump_debug("rqbuf ", DUMP_PREFIX_OFFSET, 16, 1, + query_rqbuf, sizeof(*query_rqbuf), true); + ibdev_dbg(&dev->ibdev, "query qp %u state_pmtu %#x flags %#x", + qp->qpid, query_rqbuf->state_pmtu, flags); + + attr->qp_state = from_ionic_qp_state(query_rqbuf->state_pmtu >> 4); + attr->cur_qp_state = attr->qp_state; + attr->path_mtu = (query_rqbuf->state_pmtu & 0xf) - 7; + attr->path_mig_state = IB_MIG_MIGRATED; + attr->qkey = be32_to_cpu(query_sqbuf->qkey_dest_qpn); + attr->rq_psn = be32_to_cpu(query_sqbuf->rq_psn); + attr->sq_psn = be32_to_cpu(query_rqbuf->sq_psn); + attr->dest_qp_num = attr->qkey; + attr->qp_access_flags = from_ionic_qp_flags(flags); + attr->pkey_index = 0; + attr->alt_pkey_index = 0; + attr->en_sqd_async_notify = !!(flags & IONIC_QPF_SQD_NOTIFY); + attr->sq_draining = !!(flags & IONIC_QPF_SQ_DRAINING); + attr->max_rd_atomic = BIT(query_rqbuf->rrq_depth) - 1; + attr->max_dest_rd_atomic = BIT(query_rqbuf->rsq_depth) - 1; + attr->min_rnr_timer = query_sqbuf->rnr_timer; + attr->port_num = 0; + attr->timeout = query_sqbuf->retry_timeout; + attr->retry_cnt = query_rqbuf->retry_rnrtry & 0xf; + attr->rnr_retry = query_rqbuf->retry_rnrtry >> 4; + attr->alt_port_num = 0; + attr->alt_timeout = 0; + attr->rate_limit = be32_to_cpu(query_sqbuf->rate_limit_kbps); + + if (mask & IB_QP_AV) + ionic_set_ah_attr(dev, &attr->ah_attr, + qp->hdr, qp->sgid_index); + +err_hdrdma: + if (mask & IB_QP_AV) { + dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + kfree(hdr_buf); + } +err_hdrbuf: + dma_unmap_single(dev->lif_cfg.hwdev, query_rqdma, sizeof(*query_rqbuf), + DMA_FROM_DEVICE); +err_rqdma: + dma_unmap_single(dev->lif_cfg.hwdev, query_sqdma, sizeof(*query_sqbuf), + DMA_FROM_DEVICE); +err_sqdma: + kfree(query_rqbuf); +err_rqbuf: + kfree(query_sqbuf); + + return rc; +} + +static int ionic_destroy_qp_cmd(struct ionic_ibdev *dev, u32 qpid) +{ + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = IONIC_V1_ADMIN_DESTROY_QP, + .len = cpu_to_le16(IONIC_ADMIN_DESTROY_QP_IN_V1_LEN), + .cmd.destroy_qp = { + .qp_id = cpu_to_le32(qpid), + }, + } + }; + + if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_QP) + return -EBADRQC; + + ionic_admin_post(dev, &wr); + + return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN); +} + +static bool ionic_expdb_wqe_size_supported(struct ionic_ibdev *dev, + uint32_t wqe_size) +{ + switch (wqe_size) { + case 64: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_64; + case 128: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_128; + case 256: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_256; + case 512: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_512; + } + + return false; +} + +static void ionic_qp_sq_init_cmb(struct ionic_ibdev *dev, + struct ionic_qp *qp, + struct ib_udata *udata, + int max_data) +{ + u8 expdb_stride_log2 = 0; + bool expdb; + int rc; + + if (!(qp->sq_cmb & IONIC_CMB_ENABLE)) + goto not_in_cmb; + + if (qp->sq_cmb & ~IONIC_CMB_SUPPORTED) { + if (qp->sq_cmb & IONIC_CMB_REQUIRE) + goto not_in_cmb; + + qp->sq_cmb &= IONIC_CMB_SUPPORTED; + } + + if ((qp->sq_cmb & IONIC_CMB_EXPDB) && !dev->lif_cfg.sq_expdb) { + if (qp->sq_cmb & IONIC_CMB_REQUIRE) + goto not_in_cmb; + + qp->sq_cmb &= ~IONIC_CMB_EXPDB; + } + + qp->sq_cmb_order = order_base_2(qp->sq.size / PAGE_SIZE); + + if (qp->sq_cmb_order >= IONIC_SQCMB_ORDER) + goto not_in_cmb; + + if (qp->sq_cmb & IONIC_CMB_EXPDB) + expdb_stride_log2 = qp->sq.stride_log2; + + rc = ionic_get_cmb(dev->lif_cfg.lif, &qp->sq_cmb_pgid, + &qp->sq_cmb_addr, qp->sq_cmb_order, + expdb_stride_log2, &expdb); + if (rc) + goto not_in_cmb; + + if ((qp->sq_cmb & IONIC_CMB_EXPDB) && !expdb) { + if (qp->sq_cmb & IONIC_CMB_REQUIRE) + goto err_map; + + qp->sq_cmb &= ~IONIC_CMB_EXPDB; + } + + return; + +err_map: + ionic_put_cmb(dev->lif_cfg.lif, qp->sq_cmb_pgid, qp->sq_cmb_order); +not_in_cmb: + if (qp->sq_cmb & IONIC_CMB_REQUIRE) + ibdev_dbg(&dev->ibdev, "could not place sq in cmb as required\n"); + + qp->sq_cmb = 0; + qp->sq_cmb_order = IONIC_RES_INVALID; + qp->sq_cmb_pgid = 0; + qp->sq_cmb_addr = 0; +} + +static void ionic_qp_sq_destroy_cmb(struct ionic_ibdev *dev, + struct ionic_ctx *ctx, + struct ionic_qp *qp) +{ + if (!(qp->sq_cmb & IONIC_CMB_ENABLE)) + return; + + if (ctx) + rdma_user_mmap_entry_remove(qp->mmap_sq_cmb); + + ionic_put_cmb(dev->lif_cfg.lif, qp->sq_cmb_pgid, qp->sq_cmb_order); +} + +static int ionic_qp_sq_init(struct ionic_ibdev *dev, struct ionic_ctx *ctx, + struct ionic_qp *qp, struct ionic_qdesc *sq, + struct ionic_tbl_buf *buf, int max_wr, int max_sge, + int max_data, int sq_spec, struct ib_udata *udata) +{ + u32 wqe_size; + int rc = 0; + + qp->sq_msn_prod = 0; + qp->sq_msn_cons = 0; + + if (!qp->has_sq) { + if (buf) { + buf->tbl_buf = NULL; + buf->tbl_limit = 0; + buf->tbl_pages = 0; + } + if (udata) + rc = ionic_validate_qdesc_zero(sq); + + return rc; + } + + rc = -EINVAL; + + if (max_wr < 0 || max_wr > 0xffff) + return rc; + + if (max_sge < 1) + return rc; + + if (max_sge > min(ionic_v1_send_wqe_max_sge(dev->lif_cfg.max_stride, 0, + qp->sq_cmb & + IONIC_CMB_EXPDB), + IONIC_SPEC_HIGH)) + return rc; + + if (max_data < 0) + return rc; + + if (max_data > ionic_v1_send_wqe_max_data(dev->lif_cfg.max_stride, + qp->sq_cmb & IONIC_CMB_EXPDB)) + return rc; + + if (udata) { + rc = ionic_validate_qdesc(sq); + if (rc) + return rc; + + qp->sq_spec = sq_spec; + + qp->sq.ptr = NULL; + qp->sq.size = sq->size; + qp->sq.mask = sq->mask; + qp->sq.depth_log2 = sq->depth_log2; + qp->sq.stride_log2 = sq->stride_log2; + + qp->sq_meta = NULL; + qp->sq_msn_idx = NULL; + + qp->sq_umem = ib_umem_get(&dev->ibdev, sq->addr, sq->size, 0); + if (IS_ERR(qp->sq_umem)) + return PTR_ERR(qp->sq_umem); + } else { + qp->sq_umem = NULL; + + qp->sq_spec = ionic_v1_use_spec_sge(max_sge, sq_spec); + if (sq_spec && !qp->sq_spec) + ibdev_dbg(&dev->ibdev, + "init sq: max_sge %u disables spec\n", + max_sge); + + if (qp->sq_cmb & IONIC_CMB_EXPDB) { + wqe_size = ionic_v1_send_wqe_min_size(max_sge, max_data, + qp->sq_spec, + true); + + if (!ionic_expdb_wqe_size_supported(dev, wqe_size)) + qp->sq_cmb &= ~IONIC_CMB_EXPDB; + } + + if (!(qp->sq_cmb & IONIC_CMB_EXPDB)) + wqe_size = ionic_v1_send_wqe_min_size(max_sge, max_data, + qp->sq_spec, + false); + + rc = ionic_queue_init(&qp->sq, dev->lif_cfg.hwdev, + max_wr, wqe_size); + if (rc) + return rc; + + ionic_queue_dbell_init(&qp->sq, qp->qpid); + + qp->sq_meta = kmalloc_array((u32)qp->sq.mask + 1, + sizeof(*qp->sq_meta), + GFP_KERNEL); + if (!qp->sq_meta) { + rc = -ENOMEM; + goto err_sq_meta; + } + + qp->sq_msn_idx = kmalloc_array((u32)qp->sq.mask + 1, + sizeof(*qp->sq_msn_idx), + GFP_KERNEL); + if (!qp->sq_msn_idx) { + rc = -ENOMEM; + goto err_sq_msn; + } + } + + ionic_qp_sq_init_cmb(dev, qp, udata, max_data); + + if (qp->sq_cmb & IONIC_CMB_ENABLE) + rc = ionic_pgtbl_init(dev, buf, NULL, + (u64)qp->sq_cmb_pgid << PAGE_SHIFT, + 1, PAGE_SIZE); + else + rc = ionic_pgtbl_init(dev, buf, + qp->sq_umem, qp->sq.dma, 1, PAGE_SIZE); + if (rc) + goto err_sq_tbl; + + return 0; + +err_sq_tbl: + ionic_qp_sq_destroy_cmb(dev, ctx, qp); + kfree(qp->sq_msn_idx); +err_sq_msn: + kfree(qp->sq_meta); +err_sq_meta: + if (qp->sq_umem) + ib_umem_release(qp->sq_umem); + else + ionic_queue_destroy(&qp->sq, dev->lif_cfg.hwdev); + return rc; +} + +static void ionic_qp_sq_destroy(struct ionic_ibdev *dev, + struct ionic_ctx *ctx, + struct ionic_qp *qp) +{ + if (!qp->has_sq) + return; + + ionic_qp_sq_destroy_cmb(dev, ctx, qp); + + kfree(qp->sq_msn_idx); + kfree(qp->sq_meta); + + if (qp->sq_umem) + ib_umem_release(qp->sq_umem); + else + ionic_queue_destroy(&qp->sq, dev->lif_cfg.hwdev); +} + +static void ionic_qp_rq_init_cmb(struct ionic_ibdev *dev, + struct ionic_qp *qp, + struct ib_udata *udata) +{ + u8 expdb_stride_log2 = 0; + bool expdb; + int rc; + + if (!(qp->rq_cmb & IONIC_CMB_ENABLE)) + goto not_in_cmb; + + if (qp->rq_cmb & ~IONIC_CMB_SUPPORTED) { + if (qp->rq_cmb & IONIC_CMB_REQUIRE) + goto not_in_cmb; + + qp->rq_cmb &= IONIC_CMB_SUPPORTED; + } + + if ((qp->rq_cmb & IONIC_CMB_EXPDB) && !dev->lif_cfg.rq_expdb) { + if (qp->rq_cmb & IONIC_CMB_REQUIRE) + goto not_in_cmb; + + qp->rq_cmb &= ~IONIC_CMB_EXPDB; + } + + qp->rq_cmb_order = order_base_2(qp->rq.size / PAGE_SIZE); + + if (qp->rq_cmb_order >= IONIC_RQCMB_ORDER) + goto not_in_cmb; + + if (qp->rq_cmb & IONIC_CMB_EXPDB) + expdb_stride_log2 = qp->rq.stride_log2; + + rc = ionic_get_cmb(dev->lif_cfg.lif, &qp->rq_cmb_pgid, + &qp->rq_cmb_addr, qp->rq_cmb_order, + expdb_stride_log2, &expdb); + if (rc) + goto not_in_cmb; + + if ((qp->rq_cmb & IONIC_CMB_EXPDB) && !expdb) { + if (qp->rq_cmb & IONIC_CMB_REQUIRE) + goto err_map; + + qp->rq_cmb &= ~IONIC_CMB_EXPDB; + } + + return; + +err_map: + ionic_put_cmb(dev->lif_cfg.lif, qp->rq_cmb_pgid, qp->rq_cmb_order); +not_in_cmb: + if (qp->rq_cmb & IONIC_CMB_REQUIRE) + ibdev_dbg(&dev->ibdev, "could not place rq in cmb as required\n"); + + qp->rq_cmb = 0; + qp->rq_cmb_order = IONIC_RES_INVALID; + qp->rq_cmb_pgid = 0; + qp->rq_cmb_addr = 0; +} + +static void ionic_qp_rq_destroy_cmb(struct ionic_ibdev *dev, + struct ionic_ctx *ctx, + struct ionic_qp *qp) +{ + if (!(qp->rq_cmb & IONIC_CMB_ENABLE)) + return; + + if (ctx) + rdma_user_mmap_entry_remove(qp->mmap_rq_cmb); + + ionic_put_cmb(dev->lif_cfg.lif, qp->rq_cmb_pgid, qp->rq_cmb_order); +} + +static int ionic_qp_rq_init(struct ionic_ibdev *dev, struct ionic_ctx *ctx, + struct ionic_qp *qp, struct ionic_qdesc *rq, + struct ionic_tbl_buf *buf, int max_wr, int max_sge, + int rq_spec, struct ib_udata *udata) +{ + int rc = 0, i; + u32 wqe_size; + + if (!qp->has_rq) { + if (buf) { + buf->tbl_buf = NULL; + buf->tbl_limit = 0; + buf->tbl_pages = 0; + } + if (udata) + rc = ionic_validate_qdesc_zero(rq); + + return rc; + } + + rc = -EINVAL; + + if (max_wr < 0 || max_wr > 0xffff) + return rc; + + if (max_sge < 1) + return rc; + + if (max_sge > min(ionic_v1_recv_wqe_max_sge(dev->lif_cfg.max_stride, 0, false), + IONIC_SPEC_HIGH)) + return rc; + + if (udata) { + rc = ionic_validate_qdesc(rq); + if (rc) + return rc; + + qp->rq_spec = rq_spec; + + qp->rq.ptr = NULL; + qp->rq.size = rq->size; + qp->rq.mask = rq->mask; + qp->rq.depth_log2 = rq->depth_log2; + qp->rq.stride_log2 = rq->stride_log2; + + qp->rq_meta = NULL; + + qp->rq_umem = ib_umem_get(&dev->ibdev, rq->addr, rq->size, 0); + if (IS_ERR(qp->rq_umem)) + return PTR_ERR(qp->rq_umem); + } else { + qp->rq_umem = NULL; + + qp->rq_spec = ionic_v1_use_spec_sge(max_sge, rq_spec); + if (rq_spec && !qp->rq_spec) + ibdev_dbg(&dev->ibdev, + "init rq: max_sge %u disables spec\n", + max_sge); + + if (qp->rq_cmb & IONIC_CMB_EXPDB) { + wqe_size = ionic_v1_recv_wqe_min_size(max_sge, + qp->rq_spec, + true); + + if (!ionic_expdb_wqe_size_supported(dev, wqe_size)) + qp->rq_cmb &= ~IONIC_CMB_EXPDB; + } + + if (!(qp->rq_cmb & IONIC_CMB_EXPDB)) + wqe_size = ionic_v1_recv_wqe_min_size(max_sge, + qp->rq_spec, + false); + + rc = ionic_queue_init(&qp->rq, dev->lif_cfg.hwdev, + max_wr, wqe_size); + if (rc) + return rc; + + ionic_queue_dbell_init(&qp->rq, qp->qpid); + + qp->rq_meta = kmalloc_array((u32)qp->rq.mask + 1, + sizeof(*qp->rq_meta), + GFP_KERNEL); + if (!qp->rq_meta) { + rc = -ENOMEM; + goto err_rq_meta; + } + + for (i = 0; i < qp->rq.mask; ++i) + qp->rq_meta[i].next = &qp->rq_meta[i + 1]; + qp->rq_meta[i].next = IONIC_META_LAST; + qp->rq_meta_head = &qp->rq_meta[0]; + } + + ionic_qp_rq_init_cmb(dev, qp, udata); + + if (qp->rq_cmb & IONIC_CMB_ENABLE) + rc = ionic_pgtbl_init(dev, buf, NULL, + (u64)qp->rq_cmb_pgid << PAGE_SHIFT, + 1, PAGE_SIZE); + else + rc = ionic_pgtbl_init(dev, buf, + qp->rq_umem, qp->rq.dma, 1, PAGE_SIZE); + if (rc) + goto err_rq_tbl; + + return 0; + +err_rq_tbl: + ionic_qp_rq_destroy_cmb(dev, ctx, qp); + kfree(qp->rq_meta); +err_rq_meta: + if (qp->rq_umem) + ib_umem_release(qp->rq_umem); + else + ionic_queue_destroy(&qp->rq, dev->lif_cfg.hwdev); + return rc; +} + +static void ionic_qp_rq_destroy(struct ionic_ibdev *dev, + struct ionic_ctx *ctx, + struct ionic_qp *qp) +{ + if (!qp->has_rq) + return; + + ionic_qp_rq_destroy_cmb(dev, ctx, qp); + + kfree(qp->rq_meta); + + if (qp->rq_umem) + ib_umem_release(qp->rq_umem); + else + ionic_queue_destroy(&qp->rq, dev->lif_cfg.hwdev); +} + +int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device); + struct ionic_tbl_buf sq_buf = {}, rq_buf = {}; + struct ionic_pd *pd = to_ionic_pd(ibqp->pd); + struct ionic_qp *qp = to_ionic_qp(ibqp); + struct ionic_ctx *ctx = + rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx); + struct ionic_qp_resp resp = {}; + struct ionic_qp_req req = {}; + struct ionic_cq *cq; + u8 udma_mask; + void *entry; + int rc; + + if (udata) { + rc = ib_copy_from_udata(&req, udata, sizeof(req)); + if (rc) + return rc; + } else { + req.sq_spec = IONIC_SPEC_HIGH; + req.rq_spec = IONIC_SPEC_HIGH; + } + + if (attr->qp_type == IB_QPT_SMI || attr->qp_type > IB_QPT_UD) + return -EOPNOTSUPP; + + qp->state = IB_QPS_RESET; + + INIT_LIST_HEAD(&qp->cq_poll_sq); + INIT_LIST_HEAD(&qp->cq_flush_sq); + INIT_LIST_HEAD(&qp->cq_flush_rq); + + spin_lock_init(&qp->sq_lock); + spin_lock_init(&qp->rq_lock); + + qp->has_sq = 1; + qp->has_rq = 1; + + if (attr->qp_type == IB_QPT_GSI) { + rc = ionic_get_gsi_qpid(dev, &qp->qpid); + } else { + udma_mask = BIT(dev->lif_cfg.udma_count) - 1; + + if (qp->has_sq) + udma_mask &= to_ionic_vcq(attr->send_cq)->udma_mask; + + if (qp->has_rq) + udma_mask &= to_ionic_vcq(attr->recv_cq)->udma_mask; + + if (udata && req.udma_mask) + udma_mask &= req.udma_mask; + + if (!udma_mask) + return -EINVAL; + + rc = ionic_get_qpid(dev, &qp->qpid, &qp->udma_idx, udma_mask); + } + if (rc) + return rc; + + qp->sig_all = attr->sq_sig_type == IB_SIGNAL_ALL_WR; + qp->has_ah = attr->qp_type == IB_QPT_RC; + + if (qp->has_ah) { + qp->hdr = kzalloc(sizeof(*qp->hdr), GFP_KERNEL); + if (!qp->hdr) { + rc = -ENOMEM; + goto err_ah_alloc; + } + + rc = ionic_get_ahid(dev, &qp->ahid); + if (rc) + goto err_ahid; + } + + if (udata) { + if (req.rq_cmb & IONIC_CMB_ENABLE) + qp->rq_cmb = req.rq_cmb; + + if (req.sq_cmb & IONIC_CMB_ENABLE) + qp->sq_cmb = req.sq_cmb; + } + + rc = ionic_qp_sq_init(dev, ctx, qp, &req.sq, &sq_buf, + attr->cap.max_send_wr, attr->cap.max_send_sge, + attr->cap.max_inline_data, req.sq_spec, udata); + if (rc) + goto err_sq; + + rc = ionic_qp_rq_init(dev, ctx, qp, &req.rq, &rq_buf, + attr->cap.max_recv_wr, attr->cap.max_recv_sge, + req.rq_spec, udata); + if (rc) + goto err_rq; + + rc = ionic_create_qp_cmd(dev, pd, + to_ionic_vcq_cq(attr->send_cq, qp->udma_idx), + to_ionic_vcq_cq(attr->recv_cq, qp->udma_idx), + qp, &sq_buf, &rq_buf, attr); + if (rc) + goto err_cmd; + + if (udata) { + resp.qpid = qp->qpid; + resp.udma_idx = qp->udma_idx; + + if (qp->sq_cmb & IONIC_CMB_ENABLE) { + bool wc; + + if ((qp->sq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) == + (IONIC_CMB_WC | IONIC_CMB_UC)) { + ibdev_dbg(&dev->ibdev, + "Both sq_cmb flags IONIC_CMB_WC and IONIC_CMB_UC are set, using default driver mapping\n"); + qp->sq_cmb &= ~(IONIC_CMB_WC | IONIC_CMB_UC); + } + + wc = (qp->sq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) + != IONIC_CMB_UC; + + /* let userspace know the mapping */ + if (wc) + qp->sq_cmb |= IONIC_CMB_WC; + else + qp->sq_cmb |= IONIC_CMB_UC; + + qp->mmap_sq_cmb = + ionic_mmap_entry_insert(ctx, + qp->sq.size, + PHYS_PFN(qp->sq_cmb_addr), + wc ? IONIC_MMAP_WC : 0, + &resp.sq_cmb_offset); + if (!qp->mmap_sq_cmb) { + rc = -ENOMEM; + goto err_mmap_sq; + } + + resp.sq_cmb = qp->sq_cmb; + } + + if (qp->rq_cmb & IONIC_CMB_ENABLE) { + bool wc; + + if ((qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) == + (IONIC_CMB_WC | IONIC_CMB_UC)) { + ibdev_dbg(&dev->ibdev, + "Both rq_cmb flags IONIC_CMB_WC and IONIC_CMB_UC are set, using default driver mapping\n"); + qp->rq_cmb &= ~(IONIC_CMB_WC | IONIC_CMB_UC); + } + + if (qp->rq_cmb & IONIC_CMB_EXPDB) + wc = (qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) + == IONIC_CMB_WC; + else + wc = (qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) + != IONIC_CMB_UC; + + /* let userspace know the mapping */ + if (wc) + qp->rq_cmb |= IONIC_CMB_WC; + else + qp->rq_cmb |= IONIC_CMB_UC; + + qp->mmap_rq_cmb = + ionic_mmap_entry_insert(ctx, + qp->rq.size, + PHYS_PFN(qp->rq_cmb_addr), + wc ? IONIC_MMAP_WC : 0, + &resp.rq_cmb_offset); + if (!qp->mmap_rq_cmb) { + rc = -ENOMEM; + goto err_mmap_rq; + } + + resp.rq_cmb = qp->rq_cmb; + } + + rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (rc) + goto err_resp; + } + + ionic_pgtbl_unbuf(dev, &rq_buf); + ionic_pgtbl_unbuf(dev, &sq_buf); + + qp->ibqp.qp_num = qp->qpid; + + init_completion(&qp->qp_rel_comp); + kref_init(&qp->qp_kref); + + entry = xa_store_irq(&dev->qp_tbl, qp->qpid, qp, GFP_KERNEL); + if (entry) { + if (!xa_is_err(entry)) + rc = -EINVAL; + else + rc = xa_err(entry); + + goto err_resp; + } + + if (qp->has_sq) { + cq = to_ionic_vcq_cq(attr->send_cq, qp->udma_idx); + + attr->cap.max_send_wr = qp->sq.mask; + attr->cap.max_send_sge = + ionic_v1_send_wqe_max_sge(qp->sq.stride_log2, + qp->sq_spec, + qp->sq_cmb & IONIC_CMB_EXPDB); + attr->cap.max_inline_data = + ionic_v1_send_wqe_max_data(qp->sq.stride_log2, + qp->sq_cmb & + IONIC_CMB_EXPDB); + qp->sq_cqid = cq->cqid; + } + + if (qp->has_rq) { + cq = to_ionic_vcq_cq(attr->recv_cq, qp->udma_idx); + + attr->cap.max_recv_wr = qp->rq.mask; + attr->cap.max_recv_sge = + ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2, + qp->rq_spec, + qp->rq_cmb & IONIC_CMB_EXPDB); + qp->rq_cqid = cq->cqid; + } + + return 0; + +err_resp: + if (udata && (qp->rq_cmb & IONIC_CMB_ENABLE)) + rdma_user_mmap_entry_remove(qp->mmap_rq_cmb); +err_mmap_rq: + if (udata && (qp->sq_cmb & IONIC_CMB_ENABLE)) + rdma_user_mmap_entry_remove(qp->mmap_sq_cmb); +err_mmap_sq: + ionic_destroy_qp_cmd(dev, qp->qpid); +err_cmd: + ionic_pgtbl_unbuf(dev, &rq_buf); + ionic_qp_rq_destroy(dev, ctx, qp); +err_rq: + ionic_pgtbl_unbuf(dev, &sq_buf); + ionic_qp_sq_destroy(dev, ctx, qp); +err_sq: + if (qp->has_ah) + ionic_put_ahid(dev, qp->ahid); +err_ahid: + kfree(qp->hdr); +err_ah_alloc: + ionic_put_qpid(dev, qp->qpid); + return rc; +} + +void ionic_notify_flush_cq(struct ionic_cq *cq) +{ + if (cq->flush && cq->vcq->ibcq.comp_handler) + cq->vcq->ibcq.comp_handler(&cq->vcq->ibcq, + cq->vcq->ibcq.cq_context); +} + +static void ionic_notify_qp_cqs(struct ionic_ibdev *dev, struct ionic_qp *qp) +{ + if (qp->ibqp.send_cq) + ionic_notify_flush_cq(to_ionic_vcq_cq(qp->ibqp.send_cq, + qp->udma_idx)); + if (qp->ibqp.recv_cq && qp->ibqp.recv_cq != qp->ibqp.send_cq) + ionic_notify_flush_cq(to_ionic_vcq_cq(qp->ibqp.recv_cq, + qp->udma_idx)); +} + +void ionic_flush_qp(struct ionic_ibdev *dev, struct ionic_qp *qp) +{ + unsigned long irqflags; + struct ionic_cq *cq; + + if (qp->ibqp.send_cq) { + cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx); + + /* Hold the CQ lock and QP sq_lock to set up flush */ + spin_lock_irqsave(&cq->lock, irqflags); + spin_lock(&qp->sq_lock); + qp->sq_flush = true; + if (!ionic_queue_empty(&qp->sq)) { + cq->flush = true; + list_move_tail(&qp->cq_flush_sq, &cq->flush_sq); + } + spin_unlock(&qp->sq_lock); + spin_unlock_irqrestore(&cq->lock, irqflags); + } + + if (qp->ibqp.recv_cq) { + cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx); + + /* Hold the CQ lock and QP rq_lock to set up flush */ + spin_lock_irqsave(&cq->lock, irqflags); + spin_lock(&qp->rq_lock); + qp->rq_flush = true; + if (!ionic_queue_empty(&qp->rq)) { + cq->flush = true; + list_move_tail(&qp->cq_flush_rq, &cq->flush_rq); + } + spin_unlock(&qp->rq_lock); + spin_unlock_irqrestore(&cq->lock, irqflags); + } +} + +static void ionic_clean_cq(struct ionic_cq *cq, u32 qpid) +{ + struct ionic_v1_cqe *qcqe; + int prod, qtf, qid, type; + bool color; + + if (!cq->q.ptr) + return; + + color = cq->color; + prod = cq->q.prod; + qcqe = ionic_queue_at(&cq->q, prod); + + while (color == ionic_v1_cqe_color(qcqe)) { + qtf = ionic_v1_cqe_qtf(qcqe); + qid = ionic_v1_cqe_qtf_qid(qtf); + type = ionic_v1_cqe_qtf_type(qtf); + + if (qid == qpid && type != IONIC_V1_CQE_TYPE_ADMIN) + ionic_v1_cqe_clean(qcqe); + + prod = ionic_queue_next(&cq->q, prod); + qcqe = ionic_queue_at(&cq->q, prod); + color = ionic_color_wrap(prod, color); + } +} + +static void ionic_reset_qp(struct ionic_ibdev *dev, struct ionic_qp *qp) +{ + unsigned long irqflags; + struct ionic_cq *cq; + int i; + + local_irq_save(irqflags); + + if (qp->ibqp.send_cq) { + cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx); + spin_lock(&cq->lock); + ionic_clean_cq(cq, qp->qpid); + spin_unlock(&cq->lock); + } + + if (qp->ibqp.recv_cq) { + cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx); + spin_lock(&cq->lock); + ionic_clean_cq(cq, qp->qpid); + spin_unlock(&cq->lock); + } + + if (qp->has_sq) { + spin_lock(&qp->sq_lock); + qp->sq_flush = false; + qp->sq_flush_rcvd = false; + qp->sq_msn_prod = 0; + qp->sq_msn_cons = 0; + qp->sq.prod = 0; + qp->sq.cons = 0; + spin_unlock(&qp->sq_lock); + } + + if (qp->has_rq) { + spin_lock(&qp->rq_lock); + qp->rq_flush = false; + qp->rq.prod = 0; + qp->rq.cons = 0; + if (qp->rq_meta) { + for (i = 0; i < qp->rq.mask; ++i) + qp->rq_meta[i].next = &qp->rq_meta[i + 1]; + qp->rq_meta[i].next = IONIC_META_LAST; + } + qp->rq_meta_head = &qp->rq_meta[0]; + spin_unlock(&qp->rq_lock); + } + + local_irq_restore(irqflags); +} + +static bool ionic_qp_cur_state_is_ok(enum ib_qp_state q_state, + enum ib_qp_state attr_state) +{ + if (q_state == attr_state) + return true; + + if (attr_state == IB_QPS_ERR) + return true; + + if (attr_state == IB_QPS_SQE) + return q_state == IB_QPS_RTS || q_state == IB_QPS_SQD; + + return false; +} + +static int ionic_check_modify_qp(struct ionic_qp *qp, struct ib_qp_attr *attr, + int mask) +{ + enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ? + attr->cur_qp_state : qp->state; + enum ib_qp_state next_state = (mask & IB_QP_STATE) ? + attr->qp_state : cur_state; + + if ((mask & IB_QP_CUR_STATE) && + !ionic_qp_cur_state_is_ok(qp->state, attr->cur_qp_state)) + return -EINVAL; + + if (!ib_modify_qp_is_ok(cur_state, next_state, qp->ibqp.qp_type, mask)) + return -EINVAL; + + /* unprivileged qp not allowed privileged qkey */ + if ((mask & IB_QP_QKEY) && (attr->qkey & 0x80000000) && + qp->ibqp.uobject) + return -EPERM; + + return 0; +} + +int ionic_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, + struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device); + struct ionic_pd *pd = to_ionic_pd(ibqp->pd); + struct ionic_qp *qp = to_ionic_qp(ibqp); + int rc; + + rc = ionic_check_modify_qp(qp, attr, mask); + if (rc) + return rc; + + if (mask & IB_QP_CAP) + return -EINVAL; + + rc = ionic_modify_qp_cmd(dev, pd, qp, attr, mask); + if (rc) + return rc; + + if (mask & IB_QP_STATE) { + qp->state = attr->qp_state; + + if (attr->qp_state == IB_QPS_ERR) { + ionic_flush_qp(dev, qp); + ionic_notify_qp_cqs(dev, qp); + } else if (attr->qp_state == IB_QPS_RESET) { + ionic_reset_qp(dev, qp); + } + } + + return 0; +} + +int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int mask, struct ib_qp_init_attr *init_attr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device); + struct ionic_qp *qp = to_ionic_qp(ibqp); + int rc; + + memset(attr, 0, sizeof(*attr)); + memset(init_attr, 0, sizeof(*init_attr)); + + rc = ionic_query_qp_cmd(dev, qp, attr, mask); + if (rc) + return rc; + + if (qp->has_sq) + attr->cap.max_send_wr = qp->sq.mask; + + if (qp->has_rq) + attr->cap.max_recv_wr = qp->rq.mask; + + init_attr->event_handler = ibqp->event_handler; + init_attr->qp_context = ibqp->qp_context; + init_attr->send_cq = ibqp->send_cq; + init_attr->recv_cq = ibqp->recv_cq; + init_attr->srq = ibqp->srq; + init_attr->xrcd = ibqp->xrcd; + init_attr->cap = attr->cap; + init_attr->sq_sig_type = qp->sig_all ? + IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + init_attr->qp_type = ibqp->qp_type; + init_attr->create_flags = 0; + init_attr->port_num = 0; + init_attr->rwq_ind_tbl = ibqp->rwq_ind_tbl; + init_attr->source_qpn = 0; + + return rc; +} + +int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +{ + struct ionic_ctx *ctx = + rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx); + struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device); + struct ionic_qp *qp = to_ionic_qp(ibqp); + unsigned long irqflags; + struct ionic_cq *cq; + int rc; + + rc = ionic_destroy_qp_cmd(dev, qp->qpid); + if (rc) + return rc; + + xa_erase_irq(&dev->qp_tbl, qp->qpid); + + kref_put(&qp->qp_kref, ionic_qp_complete); + wait_for_completion(&qp->qp_rel_comp); + + if (qp->ibqp.send_cq) { + cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx); + spin_lock_irqsave(&cq->lock, irqflags); + ionic_clean_cq(cq, qp->qpid); + list_del(&qp->cq_poll_sq); + list_del(&qp->cq_flush_sq); + spin_unlock_irqrestore(&cq->lock, irqflags); + } + + if (qp->ibqp.recv_cq) { + cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx); + spin_lock_irqsave(&cq->lock, irqflags); + ionic_clean_cq(cq, qp->qpid); + list_del(&qp->cq_flush_rq); + spin_unlock_irqrestore(&cq->lock, irqflags); + } + + ionic_qp_rq_destroy(dev, ctx, qp); + ionic_qp_sq_destroy(dev, ctx, qp); + if (qp->has_ah) { + ionic_put_ahid(dev, qp->ahid); + kfree(qp->hdr); + } + ionic_put_qpid(dev, qp->qpid); + + return 0; +} diff --git a/drivers/infiniband/hw/ionic/ionic_fw.h b/drivers/infiniband/hw/ionic/ionic_fw.h index 44ec69487519..62afe9220717 100644 --- a/drivers/infiniband/hw/ionic/ionic_fw.h +++ b/drivers/infiniband/hw/ionic/ionic_fw.h @@ -5,6 +5,266 @@ #define _IONIC_FW_H_ #include +#include + +/* common for ib spec */ + +#define IONIC_EXP_DBELL_SZ 8 + +enum ionic_mrid_bits { + IONIC_MRID_INDEX_SHIFT = 8, +}; + +static inline u32 ionic_mrid(u32 index, u8 key) +{ + return (index << IONIC_MRID_INDEX_SHIFT) | key; +} + +static inline u32 ionic_mrid_index(u32 lrkey) +{ + return lrkey >> IONIC_MRID_INDEX_SHIFT; +} + +/* common to all versions */ + +/* wqe scatter gather element */ +struct ionic_sge { + __be64 va; + __be32 len; + __be32 lkey; +}; + +/* admin queue mr type */ +enum ionic_mr_flags { + /* bits that determine mr access */ + IONIC_MRF_LOCAL_WRITE = BIT(0), + IONIC_MRF_REMOTE_WRITE = BIT(1), + IONIC_MRF_REMOTE_READ = BIT(2), + IONIC_MRF_REMOTE_ATOMIC = BIT(3), + IONIC_MRF_MW_BIND = BIT(4), + IONIC_MRF_ZERO_BASED = BIT(5), + IONIC_MRF_ON_DEMAND = BIT(6), + IONIC_MRF_PB = BIT(7), + IONIC_MRF_ACCESS_MASK = BIT(12) - 1, + + /* bits that determine mr type */ + IONIC_MRF_UKEY_EN = BIT(13), + IONIC_MRF_IS_MW = BIT(14), + IONIC_MRF_INV_EN = BIT(15), + + /* base flags combinations for mr types */ + IONIC_MRF_USER_MR = 0, + IONIC_MRF_PHYS_MR = (IONIC_MRF_UKEY_EN | + IONIC_MRF_INV_EN), + IONIC_MRF_MW_1 = (IONIC_MRF_UKEY_EN | + IONIC_MRF_IS_MW), + IONIC_MRF_MW_2 = (IONIC_MRF_UKEY_EN | + IONIC_MRF_IS_MW | + IONIC_MRF_INV_EN), +}; + +static inline int to_ionic_mr_flags(int access) +{ + int flags = 0; + + if (access & IB_ACCESS_LOCAL_WRITE) + flags |= IONIC_MRF_LOCAL_WRITE; + + if (access & IB_ACCESS_REMOTE_READ) + flags |= IONIC_MRF_REMOTE_READ; + + if (access & IB_ACCESS_REMOTE_WRITE) + flags |= IONIC_MRF_REMOTE_WRITE; + + if (access & IB_ACCESS_REMOTE_ATOMIC) + flags |= IONIC_MRF_REMOTE_ATOMIC; + + if (access & IB_ACCESS_MW_BIND) + flags |= IONIC_MRF_MW_BIND; + + if (access & IB_ZERO_BASED) + flags |= IONIC_MRF_ZERO_BASED; + + return flags; +} + +enum ionic_qp_flags { + /* bits that determine qp access */ + IONIC_QPF_REMOTE_WRITE = BIT(0), + IONIC_QPF_REMOTE_READ = BIT(1), + IONIC_QPF_REMOTE_ATOMIC = BIT(2), + + /* bits that determine other qp behavior */ + IONIC_QPF_SQ_PB = BIT(6), + IONIC_QPF_RQ_PB = BIT(7), + IONIC_QPF_SQ_SPEC = BIT(8), + IONIC_QPF_RQ_SPEC = BIT(9), + IONIC_QPF_REMOTE_PRIVILEGED = BIT(10), + IONIC_QPF_SQ_DRAINING = BIT(11), + IONIC_QPF_SQD_NOTIFY = BIT(12), + IONIC_QPF_SQ_CMB = BIT(13), + IONIC_QPF_RQ_CMB = BIT(14), + IONIC_QPF_PRIVILEGED = BIT(15), +}; + +static inline int from_ionic_qp_flags(int flags) +{ + int access_flags = 0; + + if (flags & IONIC_QPF_REMOTE_WRITE) + access_flags |= IB_ACCESS_REMOTE_WRITE; + + if (flags & IONIC_QPF_REMOTE_READ) + access_flags |= IB_ACCESS_REMOTE_READ; + + if (flags & IONIC_QPF_REMOTE_ATOMIC) + access_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return access_flags; +} + +static inline int to_ionic_qp_flags(int access, bool sqd_notify, + bool sq_is_cmb, bool rq_is_cmb, + bool sq_spec, bool rq_spec, + bool privileged, bool remote_privileged) +{ + int flags = 0; + + if (access & IB_ACCESS_REMOTE_WRITE) + flags |= IONIC_QPF_REMOTE_WRITE; + + if (access & IB_ACCESS_REMOTE_READ) + flags |= IONIC_QPF_REMOTE_READ; + + if (access & IB_ACCESS_REMOTE_ATOMIC) + flags |= IONIC_QPF_REMOTE_ATOMIC; + + if (sqd_notify) + flags |= IONIC_QPF_SQD_NOTIFY; + + if (sq_is_cmb) + flags |= IONIC_QPF_SQ_CMB; + + if (rq_is_cmb) + flags |= IONIC_QPF_RQ_CMB; + + if (sq_spec) + flags |= IONIC_QPF_SQ_SPEC; + + if (rq_spec) + flags |= IONIC_QPF_RQ_SPEC; + + if (privileged) + flags |= IONIC_QPF_PRIVILEGED; + + if (remote_privileged) + flags |= IONIC_QPF_REMOTE_PRIVILEGED; + + return flags; +} + +/* admin queue qp type */ +enum ionic_qp_type { + IONIC_QPT_RC, + IONIC_QPT_UC, + IONIC_QPT_RD, + IONIC_QPT_UD, + IONIC_QPT_SRQ, + IONIC_QPT_XRC_INI, + IONIC_QPT_XRC_TGT, + IONIC_QPT_XRC_SRQ, +}; + +static inline int to_ionic_qp_type(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_GSI: + case IB_QPT_UD: + return IONIC_QPT_UD; + case IB_QPT_RC: + return IONIC_QPT_RC; + case IB_QPT_UC: + return IONIC_QPT_UC; + case IB_QPT_XRC_INI: + return IONIC_QPT_XRC_INI; + case IB_QPT_XRC_TGT: + return IONIC_QPT_XRC_TGT; + default: + return -EINVAL; + } +} + +/* admin queue qp state */ +enum ionic_qp_state { + IONIC_QPS_RESET, + IONIC_QPS_INIT, + IONIC_QPS_RTR, + IONIC_QPS_RTS, + IONIC_QPS_SQD, + IONIC_QPS_SQE, + IONIC_QPS_ERR, +}; + +static inline int from_ionic_qp_state(enum ionic_qp_state state) +{ + switch (state) { + case IONIC_QPS_RESET: + return IB_QPS_RESET; + case IONIC_QPS_INIT: + return IB_QPS_INIT; + case IONIC_QPS_RTR: + return IB_QPS_RTR; + case IONIC_QPS_RTS: + return IB_QPS_RTS; + case IONIC_QPS_SQD: + return IB_QPS_SQD; + case IONIC_QPS_SQE: + return IB_QPS_SQE; + case IONIC_QPS_ERR: + return IB_QPS_ERR; + default: + return -EINVAL; + } +} + +static inline int to_ionic_qp_state(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: + return IONIC_QPS_RESET; + case IB_QPS_INIT: + return IONIC_QPS_INIT; + case IB_QPS_RTR: + return IONIC_QPS_RTR; + case IB_QPS_RTS: + return IONIC_QPS_RTS; + case IB_QPS_SQD: + return IONIC_QPS_SQD; + case IB_QPS_SQE: + return IONIC_QPS_SQE; + case IB_QPS_ERR: + return IONIC_QPS_ERR; + default: + return 0; + } +} + +static inline int to_ionic_qp_modify_state(enum ib_qp_state to_state, + enum ib_qp_state from_state) +{ + return to_ionic_qp_state(to_state) | + (to_ionic_qp_state(from_state) << 4); +} + +/* fw abi v1 */ + +/* data payload part of v1 wqe */ +union ionic_v1_pld { + struct ionic_sge sgl[2]; + __be32 spec32[8]; + __be16 spec16[16]; + __u8 data[32]; +}; /* completion queue v1 cqe */ struct ionic_v1_cqe { @@ -78,6 +338,390 @@ static inline u32 ionic_v1_cqe_qtf_qid(u32 qtf) return qtf >> IONIC_V1_CQE_QID_SHIFT; } +/* v1 base wqe header */ +struct ionic_v1_base_hdr { + __u64 wqe_id; + __u8 op; + __u8 num_sge_key; + __be16 flags; + __be32 imm_data_key; +}; + +/* v1 receive wqe body */ +struct ionic_v1_recv_bdy { + __u8 rsvd[16]; + union ionic_v1_pld pld; +}; + +/* v1 send/rdma wqe body (common, has sgl) */ +struct ionic_v1_common_bdy { + union { + struct { + __be32 ah_id; + __be32 dest_qpn; + __be32 dest_qkey; + } send; + struct { + __be32 remote_va_high; + __be32 remote_va_low; + __be32 remote_rkey; + } rdma; + }; + __be32 length; + union ionic_v1_pld pld; +}; + +/* v1 atomic wqe body */ +struct ionic_v1_atomic_bdy { + __be32 remote_va_high; + __be32 remote_va_low; + __be32 remote_rkey; + __be32 swap_add_high; + __be32 swap_add_low; + __be32 compare_high; + __be32 compare_low; + __u8 rsvd[4]; + struct ionic_sge sge; +}; + +/* v1 reg mr wqe body */ +struct ionic_v1_reg_mr_bdy { + __be64 va; + __be64 length; + __be64 offset; + __be64 dma_addr; + __be32 map_count; + __be16 flags; + __u8 dir_size_log2; + __u8 page_size_log2; + __u8 rsvd[8]; +}; + +/* v1 bind mw wqe body */ +struct ionic_v1_bind_mw_bdy { + __be64 va; + __be64 length; + __be32 lkey; + __be16 flags; + __u8 rsvd[26]; +}; + +/* v1 send/recv wqe */ +struct ionic_v1_wqe { + struct ionic_v1_base_hdr base; + union { + struct ionic_v1_recv_bdy recv; + struct ionic_v1_common_bdy common; + struct ionic_v1_atomic_bdy atomic; + struct ionic_v1_reg_mr_bdy reg_mr; + struct ionic_v1_bind_mw_bdy bind_mw; + }; +}; + +/* queue pair v1 send opcodes */ +enum ionic_v1_op { + IONIC_V1_OP_SEND, + IONIC_V1_OP_SEND_INV, + IONIC_V1_OP_SEND_IMM, + IONIC_V1_OP_RDMA_READ, + IONIC_V1_OP_RDMA_WRITE, + IONIC_V1_OP_RDMA_WRITE_IMM, + IONIC_V1_OP_ATOMIC_CS, + IONIC_V1_OP_ATOMIC_FA, + IONIC_V1_OP_REG_MR, + IONIC_V1_OP_LOCAL_INV, + IONIC_V1_OP_BIND_MW, + + /* flags */ + IONIC_V1_FLAG_FENCE = BIT(0), + IONIC_V1_FLAG_SOL = BIT(1), + IONIC_V1_FLAG_INL = BIT(2), + IONIC_V1_FLAG_SIG = BIT(3), + + /* flags last four bits for sgl spec format */ + IONIC_V1_FLAG_SPEC32 = (1u << 12), + IONIC_V1_FLAG_SPEC16 = (2u << 12), + IONIC_V1_SPEC_FIRST_SGE = 2, +}; + +static inline size_t ionic_v1_send_wqe_min_size(int min_sge, int min_data, + int spec, bool expdb) +{ + size_t sz_wqe, sz_sgl, sz_data; + + if (spec > IONIC_V1_SPEC_FIRST_SGE) + min_sge += IONIC_V1_SPEC_FIRST_SGE; + + if (expdb) { + min_sge += 1; + min_data += IONIC_EXP_DBELL_SZ; + } + + sz_wqe = sizeof(struct ionic_v1_wqe); + sz_sgl = offsetof(struct ionic_v1_wqe, common.pld.sgl[min_sge]); + sz_data = offsetof(struct ionic_v1_wqe, common.pld.data[min_data]); + + if (sz_sgl > sz_wqe) + sz_wqe = sz_sgl; + + if (sz_data > sz_wqe) + sz_wqe = sz_data; + + return sz_wqe; +} + +static inline int ionic_v1_send_wqe_max_sge(u8 stride_log2, int spec, + bool expdb) +{ + struct ionic_sge *sge = (void *)(1ull << stride_log2); + struct ionic_v1_wqe *wqe = (void *)0; + int num_sge = 0; + + if (expdb) + sge -= 1; + + if (spec > IONIC_V1_SPEC_FIRST_SGE) + num_sge = IONIC_V1_SPEC_FIRST_SGE; + + num_sge = sge - &wqe->common.pld.sgl[num_sge]; + + if (spec && num_sge > spec) + num_sge = spec; + + return num_sge; +} + +static inline int ionic_v1_send_wqe_max_data(u8 stride_log2, bool expdb) +{ + struct ionic_v1_wqe *wqe = (void *)0; + __u8 *data = (void *)(1ull << stride_log2); + + if (expdb) + data -= IONIC_EXP_DBELL_SZ; + + return data - wqe->common.pld.data; +} + +static inline size_t ionic_v1_recv_wqe_min_size(int min_sge, int spec, + bool expdb) +{ + size_t sz_wqe, sz_sgl; + + if (spec > IONIC_V1_SPEC_FIRST_SGE) + min_sge += IONIC_V1_SPEC_FIRST_SGE; + + if (expdb) + min_sge += 1; + + sz_wqe = sizeof(struct ionic_v1_wqe); + sz_sgl = offsetof(struct ionic_v1_wqe, recv.pld.sgl[min_sge]); + + if (sz_sgl > sz_wqe) + sz_wqe = sz_sgl; + + return sz_wqe; +} + +static inline int ionic_v1_recv_wqe_max_sge(u8 stride_log2, int spec, + bool expdb) +{ + struct ionic_sge *sge = (void *)(1ull << stride_log2); + struct ionic_v1_wqe *wqe = (void *)0; + int num_sge = 0; + + if (expdb) + sge -= 1; + + if (spec > IONIC_V1_SPEC_FIRST_SGE) + num_sge = IONIC_V1_SPEC_FIRST_SGE; + + num_sge = sge - &wqe->recv.pld.sgl[num_sge]; + + if (spec && num_sge > spec) + num_sge = spec; + + return num_sge; +} + +static inline int ionic_v1_use_spec_sge(int min_sge, int spec) +{ + if (!spec || min_sge > spec) + return 0; + + if (min_sge <= IONIC_V1_SPEC_FIRST_SGE) + return IONIC_V1_SPEC_FIRST_SGE; + + return spec; +} + +struct ionic_admin_create_ah { + __le64 dma_addr; + __le32 length; + __le32 pd_id; + __le32 id_ver; + __le16 dbid_flags; + __u8 csum_profile; + __u8 crypto; +} __packed; + +#define IONIC_ADMIN_CREATE_AH_IN_V1_LEN 24 +static_assert(sizeof(struct ionic_admin_create_ah) == + IONIC_ADMIN_CREATE_AH_IN_V1_LEN); + +struct ionic_admin_destroy_ah { + __le32 ah_id; +} __packed; + +#define IONIC_ADMIN_DESTROY_AH_IN_V1_LEN 4 +static_assert(sizeof(struct ionic_admin_destroy_ah) == + IONIC_ADMIN_DESTROY_AH_IN_V1_LEN); + +struct ionic_admin_query_ah { + __le64 dma_addr; +} __packed; + +#define IONIC_ADMIN_QUERY_AH_IN_V1_LEN 8 +static_assert(sizeof(struct ionic_admin_query_ah) == + IONIC_ADMIN_QUERY_AH_IN_V1_LEN); + +struct ionic_admin_create_mr { + __le64 va; + __le64 length; + __le32 pd_id; + __le32 id_ver; + __le32 tbl_index; + __le32 map_count; + __le64 dma_addr; + __le16 dbid_flags; + __u8 pt_type; + __u8 dir_size_log2; + __u8 page_size_log2; +} __packed; + +#define IONIC_ADMIN_CREATE_MR_IN_V1_LEN 45 +static_assert(sizeof(struct ionic_admin_create_mr) == + IONIC_ADMIN_CREATE_MR_IN_V1_LEN); + +struct ionic_admin_destroy_mr { + __le32 mr_id; +} __packed; + +#define IONIC_ADMIN_DESTROY_MR_IN_V1_LEN 4 +static_assert(sizeof(struct ionic_admin_destroy_mr) == + IONIC_ADMIN_DESTROY_MR_IN_V1_LEN); + +struct ionic_admin_create_cq { + __le32 eq_id; + __u8 depth_log2; + __u8 stride_log2; + __u8 dir_size_log2_rsvd; + __u8 page_size_log2; + __le32 cq_flags; + __le32 id_ver; + __le32 tbl_index; + __le32 map_count; + __le64 dma_addr; + __le16 dbid_flags; +} __packed; + +#define IONIC_ADMIN_CREATE_CQ_IN_V1_LEN 34 +static_assert(sizeof(struct ionic_admin_create_cq) == + IONIC_ADMIN_CREATE_CQ_IN_V1_LEN); + +struct ionic_admin_destroy_cq { + __le32 cq_id; +} __packed; + +#define IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN 4 +static_assert(sizeof(struct ionic_admin_destroy_cq) == + IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN); + +struct ionic_admin_create_qp { + __le32 pd_id; + __be32 priv_flags; + __le32 sq_cq_id; + __u8 sq_depth_log2; + __u8 sq_stride_log2; + __u8 sq_dir_size_log2_rsvd; + __u8 sq_page_size_log2; + __le32 sq_tbl_index_xrcd_id; + __le32 sq_map_count; + __le64 sq_dma_addr; + __le32 rq_cq_id; + __u8 rq_depth_log2; + __u8 rq_stride_log2; + __u8 rq_dir_size_log2_rsvd; + __u8 rq_page_size_log2; + __le32 rq_tbl_index_srq_id; + __le32 rq_map_count; + __le64 rq_dma_addr; + __le32 id_ver; + __le16 dbid_flags; + __u8 type_state; + __u8 rsvd; +} __packed; + +#define IONIC_ADMIN_CREATE_QP_IN_V1_LEN 64 +static_assert(sizeof(struct ionic_admin_create_qp) == + IONIC_ADMIN_CREATE_QP_IN_V1_LEN); + +struct ionic_admin_destroy_qp { + __le32 qp_id; +} __packed; + +#define IONIC_ADMIN_DESTROY_QP_IN_V1_LEN 4 +static_assert(sizeof(struct ionic_admin_destroy_qp) == + IONIC_ADMIN_DESTROY_QP_IN_V1_LEN); + +struct ionic_admin_mod_qp { + __be32 attr_mask; + __u8 dcqcn_profile; + __u8 tfp_csum_profile; + __be16 access_flags; + __le32 rq_psn; + __le32 sq_psn; + __le32 qkey_dest_qpn; + __le32 rate_limit_kbps; + __u8 pmtu; + __u8 retry; + __u8 rnr_timer; + __u8 retry_timeout; + __u8 rsq_depth; + __u8 rrq_depth; + __le16 pkey_id; + __le32 ah_id_len; + __u8 en_pcp; + __u8 ip_dscp; + __u8 rsvd2; + __u8 type_state; + union { + struct { + __le16 rsvd1; + }; + __le32 rrq_index; + }; + __le32 rsq_index; + __le64 dma_addr; + __le32 id_ver; +} __packed; + +#define IONIC_ADMIN_MODIFY_QP_IN_V1_LEN 60 +static_assert(sizeof(struct ionic_admin_mod_qp) == + IONIC_ADMIN_MODIFY_QP_IN_V1_LEN); + +struct ionic_admin_query_qp { + __le64 hdr_dma_addr; + __le64 sq_dma_addr; + __le64 rq_dma_addr; + __le32 ah_id; + __le32 id_ver; + __le16 dbid_flags; +} __packed; + +#define IONIC_ADMIN_QUERY_QP_IN_V1_LEN 34 +static_assert(sizeof(struct ionic_admin_query_qp) == + IONIC_ADMIN_QUERY_QP_IN_V1_LEN); + #define ADMIN_WQE_STRIDE 64 #define ADMIN_WQE_HDR_LEN 4 @@ -88,9 +732,66 @@ struct ionic_v1_admin_wqe { __le16 len; union { + struct ionic_admin_create_ah create_ah; + struct ionic_admin_destroy_ah destroy_ah; + struct ionic_admin_query_ah query_ah; + struct ionic_admin_create_mr create_mr; + struct ionic_admin_destroy_mr destroy_mr; + struct ionic_admin_create_cq create_cq; + struct ionic_admin_destroy_cq destroy_cq; + struct ionic_admin_create_qp create_qp; + struct ionic_admin_destroy_qp destroy_qp; + struct ionic_admin_mod_qp mod_qp; + struct ionic_admin_query_qp query_qp; } cmd; }; +/* side data for query qp */ +struct ionic_v1_admin_query_qp_sq { + __u8 rnr_timer; + __u8 retry_timeout; + __be16 access_perms_flags; + __be16 rsvd; + __be16 pkey_id; + __be32 qkey_dest_qpn; + __be32 rate_limit_kbps; + __be32 rq_psn; +}; + +struct ionic_v1_admin_query_qp_rq { + __u8 state_pmtu; + __u8 retry_rnrtry; + __u8 rrq_depth; + __u8 rsq_depth; + __be32 sq_psn; + __be16 access_perms_flags; + __be16 rsvd; +}; + +/* admin queue v1 opcodes */ +enum ionic_v1_admin_op { + IONIC_V1_ADMIN_NOOP, + IONIC_V1_ADMIN_CREATE_CQ, + IONIC_V1_ADMIN_CREATE_QP, + IONIC_V1_ADMIN_CREATE_MR, + IONIC_V1_ADMIN_STATS_HDRS, + IONIC_V1_ADMIN_STATS_VALS, + IONIC_V1_ADMIN_DESTROY_MR, + IONIC_V1_ADMIN_RSVD_7, /* RESIZE_CQ */ + IONIC_V1_ADMIN_DESTROY_CQ, + IONIC_V1_ADMIN_MODIFY_QP, + IONIC_V1_ADMIN_QUERY_QP, + IONIC_V1_ADMIN_DESTROY_QP, + IONIC_V1_ADMIN_DEBUG, + IONIC_V1_ADMIN_CREATE_AH, + IONIC_V1_ADMIN_QUERY_AH, + IONIC_V1_ADMIN_MODIFY_DCQCN, + IONIC_V1_ADMIN_DESTROY_AH, + IONIC_V1_ADMIN_QP_STATS_HDRS, + IONIC_V1_ADMIN_QP_STATS_VALS, + IONIC_V1_ADMIN_OPCODES_MAX, +}; + /* admin queue v1 cqe status */ enum ionic_v1_admin_status { IONIC_V1_ASTS_OK, @@ -136,6 +837,22 @@ enum ionic_v1_eqe_evt_bits { IONIC_V1_EQE_QP_ERR_ACCESS = 10, }; +enum ionic_tfp_csum_profiles { + IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP = 0, + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP = 1, + IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP = 2, + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP = 3, + IONIC_TFP_CSUM_PROF_IPV4_UDP_VXLAN_ETH_QTAG_IPV4_UDP = 4, + IONIC_TFP_CSUM_PROF_IPV4_UDP_VXLAN_ETH_QTAG_IPV6_UDP = 5, + IONIC_TFP_CSUM_PROF_QTAG_IPV4_UDP_VXLAN_ETH_QTAG_IPV4_UDP = 6, + IONIC_TFP_CSUM_PROF_QTAG_IPV4_UDP_VXLAN_ETH_QTAG_IPV6_UDP = 7, + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_ESP_IPV4_UDP = 8, + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_ESP_UDP = 9, + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_ESP_UDP = 10, + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_ESP_UDP = 11, + IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_CSUM = 12, +}; + static inline bool ionic_v1_eqe_color(struct ionic_v1_eqe *eqe) { return eqe->evt & cpu_to_be32(IONIC_V1_EQE_COLOR); diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 7710190ff65f..6833abbfb1dc 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -15,6 +15,44 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("NET_IONIC"); +static const struct ib_device_ops ionic_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_IONIC, + .uverbs_abi_ver = IONIC_ABI_VERSION, + + .alloc_ucontext = ionic_alloc_ucontext, + .dealloc_ucontext = ionic_dealloc_ucontext, + .mmap = ionic_mmap, + .mmap_free = ionic_mmap_free, + .alloc_pd = ionic_alloc_pd, + .dealloc_pd = ionic_dealloc_pd, + .create_ah = ionic_create_ah, + .query_ah = ionic_query_ah, + .destroy_ah = ionic_destroy_ah, + .create_user_ah = ionic_create_ah, + .get_dma_mr = ionic_get_dma_mr, + .reg_user_mr = ionic_reg_user_mr, + .reg_user_mr_dmabuf = ionic_reg_user_mr_dmabuf, + .dereg_mr = ionic_dereg_mr, + .alloc_mr = ionic_alloc_mr, + .map_mr_sg = ionic_map_mr_sg, + .alloc_mw = ionic_alloc_mw, + .dealloc_mw = ionic_dealloc_mw, + .create_cq = ionic_create_cq, + .destroy_cq = ionic_destroy_cq, + .create_qp = ionic_create_qp, + .modify_qp = ionic_modify_qp, + .query_qp = ionic_query_qp, + .destroy_qp = ionic_destroy_qp, + + INIT_RDMA_OBJ_SIZE(ib_ucontext, ionic_ctx, ibctx), + INIT_RDMA_OBJ_SIZE(ib_pd, ionic_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ah, ionic_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, ionic_vcq, ibcq), + INIT_RDMA_OBJ_SIZE(ib_qp, ionic_qp, ibqp), + INIT_RDMA_OBJ_SIZE(ib_mw, ionic_mr, ibmw), +}; + static void ionic_init_resids(struct ionic_ibdev *dev) { ionic_resid_init(&dev->inuse_cqid, dev->lif_cfg.cq_count); @@ -48,6 +86,8 @@ static void ionic_destroy_ibdev(struct ionic_ibdev *dev) ib_unregister_device(&dev->ibdev); ionic_destroy_rdma_admin(dev); ionic_destroy_resids(dev); + WARN_ON(!xa_empty(&dev->qp_tbl)); + xa_destroy(&dev->qp_tbl); WARN_ON(!xa_empty(&dev->cq_tbl)); xa_destroy(&dev->cq_tbl); ib_dealloc_device(&dev->ibdev); @@ -66,6 +106,7 @@ static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev) ionic_fill_lif_cfg(ionic_adev->lif, &dev->lif_cfg); + xa_init_flags(&dev->qp_tbl, GFP_ATOMIC); xa_init_flags(&dev->cq_tbl, GFP_ATOMIC); ionic_init_resids(dev); @@ -98,6 +139,8 @@ static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev) if (rc) goto err_admin; + ib_set_device_ops(&dev->ibdev, &ionic_dev_ops); + rc = ib_register_device(ibdev, "ionic_%d", ibdev->dev.parent); if (rc) goto err_register; @@ -110,6 +153,7 @@ err_admin: ionic_destroy_rdma_admin(dev); err_reset: ionic_destroy_resids(dev); + xa_destroy(&dev->qp_tbl); xa_destroy(&dev->cq_tbl); ib_dealloc_device(&dev->ibdev); @@ -161,7 +205,7 @@ static int __init ionic_mod_init(void) { int rc; - ionic_evt_workq = create_workqueue(DRIVER_NAME "-evt"); + ionic_evt_workq = create_workqueue(KBUILD_MODNAME "-evt"); if (!ionic_evt_workq) return -ENOMEM; diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h index 490897628f41..94ef76aaca43 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.h +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h @@ -6,7 +6,10 @@ #include #include +#include +#include +#include #include #include @@ -24,9 +27,26 @@ #define IONIC_EQ_ISR_BUDGET 10 #define IONIC_EQ_WORK_BUDGET 1000 #define IONIC_MAX_PD 1024 +#define IONIC_SPEC_HIGH 8 +#define IONIC_SQCMB_ORDER 5 +#define IONIC_RQCMB_ORDER 0 + +#define IONIC_META_LAST ((void *)1ul) +#define IONIC_META_POSTED ((void *)2ul) #define IONIC_CQ_GRACE 100 +#define IONIC_ROCE_UDP_SPORT 28272 +#define IONIC_DMA_LKEY 0 +#define IONIC_DMA_RKEY IONIC_DMA_LKEY + +#define IONIC_CMB_SUPPORTED \ + (IONIC_CMB_ENABLE | IONIC_CMB_REQUIRE | IONIC_CMB_EXPDB | \ + IONIC_CMB_WC | IONIC_CMB_UC) + +/* resource is not reserved on the device, indicated in tbl_order */ +#define IONIC_RES_INVALID -1 + struct ionic_aq; struct ionic_cq; struct ionic_eq; @@ -44,14 +64,6 @@ enum ionic_admin_flags { IONIC_ADMIN_F_INTERRUPT = BIT(2), /* Interruptible w/timeout */ }; -struct ionic_qdesc { - __aligned_u64 addr; - __u32 size; - __u16 mask; - __u8 depth_log2; - __u8 stride_log2; -}; - enum ionic_mmap_flag { IONIC_MMAP_WC = BIT(0), }; @@ -160,6 +172,13 @@ struct ionic_tbl_buf { u8 page_size_log2; }; +struct ionic_pd { + struct ib_pd ibpd; + + u32 pdid; + u32 flags; +}; + struct ionic_cq { struct ionic_vcq *vcq; @@ -193,11 +212,188 @@ struct ionic_vcq { u8 poll_idx; }; +struct ionic_sq_meta { + u64 wrid; + u32 len; + u16 seq; + u8 ibop; + u8 ibsts; + u8 remote:1; + u8 signal:1; + u8 local_comp:1; +}; + +struct ionic_rq_meta { + struct ionic_rq_meta *next; + u64 wrid; +}; + +struct ionic_qp { + struct ib_qp ibqp; + enum ib_qp_state state; + + u32 qpid; + u32 ahid; + u32 sq_cqid; + u32 rq_cqid; + u8 udma_idx; + u8 has_ah:1; + u8 has_sq:1; + u8 has_rq:1; + u8 sig_all:1; + + struct list_head qp_list_counter; + + struct list_head cq_poll_sq; + struct list_head cq_flush_sq; + struct list_head cq_flush_rq; + struct list_head ibkill_flush_ent; + + spinlock_t sq_lock; /* for posting and polling */ + struct ionic_queue sq; + struct ionic_sq_meta *sq_meta; + u16 *sq_msn_idx; + int sq_spec; + u16 sq_old_prod; + u16 sq_msn_prod; + u16 sq_msn_cons; + u8 sq_cmb; + bool sq_flush; + bool sq_flush_rcvd; + + spinlock_t rq_lock; /* for posting and polling */ + struct ionic_queue rq; + struct ionic_rq_meta *rq_meta; + struct ionic_rq_meta *rq_meta_head; + int rq_spec; + u16 rq_old_prod; + u8 rq_cmb; + bool rq_flush; + + struct kref qp_kref; + struct completion qp_rel_comp; + + /* infrequently accessed, keep at end */ + int sgid_index; + int sq_cmb_order; + u32 sq_cmb_pgid; + phys_addr_t sq_cmb_addr; + struct rdma_user_mmap_entry *mmap_sq_cmb; + + struct ib_umem *sq_umem; + + int rq_cmb_order; + u32 rq_cmb_pgid; + phys_addr_t rq_cmb_addr; + struct rdma_user_mmap_entry *mmap_rq_cmb; + + struct ib_umem *rq_umem; + + int dcqcn_profile; + + struct ib_ud_header *hdr; +}; + +struct ionic_ah { + struct ib_ah ibah; + u32 ahid; + int sgid_index; + struct ib_ud_header hdr; +}; + +struct ionic_mr { + union { + struct ib_mr ibmr; + struct ib_mw ibmw; + }; + + u32 mrid; + int flags; + + struct ib_umem *umem; + struct ionic_tbl_buf buf; + bool created; +}; + static inline struct ionic_ibdev *to_ionic_ibdev(struct ib_device *ibdev) { return container_of(ibdev, struct ionic_ibdev, ibdev); } +static inline struct ionic_ctx *to_ionic_ctx(struct ib_ucontext *ibctx) +{ + return container_of(ibctx, struct ionic_ctx, ibctx); +} + +static inline struct ionic_ctx *to_ionic_ctx_uobj(struct ib_uobject *uobj) +{ + if (!uobj) + return NULL; + + if (!uobj->context) + return NULL; + + return to_ionic_ctx(uobj->context); +} + +static inline struct ionic_pd *to_ionic_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct ionic_pd, ibpd); +} + +static inline struct ionic_mr *to_ionic_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct ionic_mr, ibmr); +} + +static inline struct ionic_mr *to_ionic_mw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct ionic_mr, ibmw); +} + +static inline struct ionic_vcq *to_ionic_vcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct ionic_vcq, ibcq); +} + +static inline struct ionic_cq *to_ionic_vcq_cq(struct ib_cq *ibcq, + uint8_t udma_idx) +{ + return &to_ionic_vcq(ibcq)->cq[udma_idx]; +} + +static inline struct ionic_qp *to_ionic_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct ionic_qp, ibqp); +} + +static inline struct ionic_ah *to_ionic_ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct ionic_ah, ibah); +} + +static inline u32 ionic_ctx_dbid(struct ionic_ibdev *dev, + struct ionic_ctx *ctx) +{ + if (!ctx) + return dev->lif_cfg.dbid; + + return ctx->dbid; +} + +static inline u32 ionic_obj_dbid(struct ionic_ibdev *dev, + struct ib_uobject *uobj) +{ + return ionic_ctx_dbid(dev, to_ionic_ctx_uobj(uobj)); +} + +static inline void ionic_qp_complete(struct kref *kref) +{ + struct ionic_qp *qp = container_of(kref, struct ionic_qp, qp_kref); + + complete(&qp->qp_rel_comp); +} + static inline void ionic_cq_complete(struct kref *kref) { struct ionic_cq *cq = container_of(kref, struct ionic_cq, cq_kref); @@ -227,8 +423,47 @@ int ionic_create_cq_common(struct ionic_vcq *vcq, __u32 *resp_cqid, int udma_idx); void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq); +void ionic_flush_qp(struct ionic_ibdev *dev, struct ionic_qp *qp); +void ionic_notify_flush_cq(struct ionic_cq *cq); + +int ionic_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata); +void ionic_dealloc_ucontext(struct ib_ucontext *ibctx); +int ionic_mmap(struct ib_ucontext *ibctx, struct vm_area_struct *vma); +void ionic_mmap_free(struct rdma_user_mmap_entry *rdma_entry); +int ionic_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int ionic_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int ionic_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); +int ionic_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); +int ionic_destroy_ah(struct ib_ah *ibah, u32 flags); +struct ib_mr *ionic_get_dma_mr(struct ib_pd *ibpd, int access); +struct ib_mr *ionic_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 addr, int access, struct ib_dmah *dmah, + struct ib_udata *udata); +struct ib_mr *ionic_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 offset, + u64 length, u64 addr, int fd, int access, + struct ib_dmah *dmah, + struct uverbs_attr_bundle *attrs); +int ionic_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +struct ib_mr *ionic_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type type, + u32 max_sg); +int ionic_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset); +int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata); +int ionic_dealloc_mw(struct ib_mw *ibmw); +int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct uverbs_attr_bundle *attrs); +int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, + struct ib_udata *udata); +int ionic_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, + struct ib_udata *udata); +int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, + struct ib_qp_init_attr *init_attr); +int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); /* ionic_pgtbl.c */ +__le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va); int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma); int ionic_pgtbl_init(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf, diff --git a/drivers/infiniband/hw/ionic/ionic_pgtbl.c b/drivers/infiniband/hw/ionic/ionic_pgtbl.c index 11461f7642bc..a8eb73be6f86 100644 --- a/drivers/infiniband/hw/ionic/ionic_pgtbl.c +++ b/drivers/infiniband/hw/ionic/ionic_pgtbl.c @@ -7,6 +7,25 @@ #include "ionic_fw.h" #include "ionic_ibdev.h" +__le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va) +{ + u64 pg_mask = BIT_ULL(buf->page_size_log2) - 1; + u64 dma; + + if (!buf->tbl_pages) + return cpu_to_le64(0); + + if (buf->tbl_pages > 1) + return cpu_to_le64(buf->tbl_dma); + + if (buf->tbl_buf) + dma = le64_to_cpu(buf->tbl_buf[0]); + else + dma = buf->tbl_dma; + + return cpu_to_le64(dma + (va & pg_mask)); +} + int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma) { if (unlikely(buf->tbl_pages == buf->tbl_limit)) diff --git a/include/uapi/rdma/ionic-abi.h b/include/uapi/rdma/ionic-abi.h new file mode 100644 index 000000000000..7b589d3e9728 --- /dev/null +++ b/include/uapi/rdma/ionic-abi.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc */ + +#ifndef IONIC_ABI_H +#define IONIC_ABI_H + +#include + +#define IONIC_ABI_VERSION 1 + +#define IONIC_EXPDB_64 1 +#define IONIC_EXPDB_128 2 +#define IONIC_EXPDB_256 4 +#define IONIC_EXPDB_512 8 + +#define IONIC_EXPDB_SQ 1 +#define IONIC_EXPDB_RQ 2 + +#define IONIC_CMB_ENABLE 1 +#define IONIC_CMB_REQUIRE 2 +#define IONIC_CMB_EXPDB 4 +#define IONIC_CMB_WC 8 +#define IONIC_CMB_UC 16 + +struct ionic_ctx_req { + __u32 rsvd[2]; +}; + +struct ionic_ctx_resp { + __u32 rsvd; + __u32 page_shift; + + __aligned_u64 dbell_offset; + + __u16 version; + __u8 qp_opcodes; + __u8 admin_opcodes; + + __u8 sq_qtype; + __u8 rq_qtype; + __u8 cq_qtype; + __u8 admin_qtype; + + __u8 max_stride; + __u8 max_spec; + __u8 udma_count; + __u8 expdb_mask; + __u8 expdb_qtypes; + + __u8 rsvd2[3]; +}; + +struct ionic_qdesc { + __aligned_u64 addr; + __u32 size; + __u16 mask; + __u8 depth_log2; + __u8 stride_log2; +}; + +struct ionic_ah_resp { + __u32 ahid; + __u32 pad; +}; + +struct ionic_cq_req { + struct ionic_qdesc cq[2]; + __u8 udma_mask; + __u8 rsvd[7]; +}; + +struct ionic_cq_resp { + __u32 cqid[2]; + __u8 udma_mask; + __u8 rsvd[7]; +}; + +struct ionic_qp_req { + struct ionic_qdesc sq; + struct ionic_qdesc rq; + __u8 sq_spec; + __u8 rq_spec; + __u8 sq_cmb; + __u8 rq_cmb; + __u8 udma_mask; + __u8 rsvd[3]; +}; + +struct ionic_qp_resp { + __u32 qpid; + __u8 sq_cmb; + __u8 rq_cmb; + __u8 udma_idx; + __u8 rsvd[1]; + __aligned_u64 sq_cmb_offset; + __aligned_u64 rq_cmb_offset; +}; + +struct ionic_srq_req { + struct ionic_qdesc rq; + __u8 rq_spec; + __u8 rq_cmb; + __u8 udma_mask; + __u8 rsvd[5]; +}; + +struct ionic_srq_resp { + __u32 qpid; + __u8 rq_cmb; + __u8 udma_idx; + __u8 rsvd[2]; + __aligned_u64 rq_cmb_offset; +}; + +#endif /* IONIC_ABI_H */ From b83c62055b6faabb444b2f8f3355420927cc39fd Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:46:03 +0530 Subject: [PATCH 45/85] RDMA/ionic: Register device ops for datapath Implement device supported verb APIs for datapath. Co-developed-by: Andrew Boyer Signed-off-by: Andrew Boyer Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-12-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_datapath.c | 1399 ++++++++++++++++++ drivers/infiniband/hw/ionic/ionic_fw.h | 105 ++ drivers/infiniband/hw/ionic/ionic_ibdev.c | 5 + drivers/infiniband/hw/ionic/ionic_ibdev.h | 14 + drivers/infiniband/hw/ionic/ionic_pgtbl.c | 11 + 5 files changed, 1534 insertions(+) create mode 100644 drivers/infiniband/hw/ionic/ionic_datapath.c diff --git a/drivers/infiniband/hw/ionic/ionic_datapath.c b/drivers/infiniband/hw/ionic/ionic_datapath.c new file mode 100644 index 000000000000..aa2944887f23 --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_datapath.c @@ -0,0 +1,1399 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include +#include +#include +#include + +#include "ionic_fw.h" +#include "ionic_ibdev.h" + +#define IONIC_OP(version, opname) \ + ((version) < 2 ? IONIC_V1_OP_##opname : IONIC_V2_OP_##opname) + +static bool ionic_next_cqe(struct ionic_ibdev *dev, struct ionic_cq *cq, + struct ionic_v1_cqe **cqe) +{ + struct ionic_v1_cqe *qcqe = ionic_queue_at_prod(&cq->q); + + if (unlikely(cq->color != ionic_v1_cqe_color(qcqe))) + return false; + + /* Prevent out-of-order reads of the CQE */ + dma_rmb(); + + *cqe = qcqe; + + return true; +} + +static int ionic_flush_recv(struct ionic_qp *qp, struct ib_wc *wc) +{ + struct ionic_rq_meta *meta; + struct ionic_v1_wqe *wqe; + + if (!qp->rq_flush) + return 0; + + if (ionic_queue_empty(&qp->rq)) + return 0; + + wqe = ionic_queue_at_cons(&qp->rq); + + /* wqe_id must be a valid queue index */ + if (unlikely(wqe->base.wqe_id >> qp->rq.depth_log2)) { + ibdev_warn(qp->ibqp.device, + "flush qp %u recv index %llu invalid\n", + qp->qpid, (unsigned long long)wqe->base.wqe_id); + return -EIO; + } + + /* wqe_id must indicate a request that is outstanding */ + meta = &qp->rq_meta[wqe->base.wqe_id]; + if (unlikely(meta->next != IONIC_META_POSTED)) { + ibdev_warn(qp->ibqp.device, + "flush qp %u recv index %llu not posted\n", + qp->qpid, (unsigned long long)wqe->base.wqe_id); + return -EIO; + } + + ionic_queue_consume(&qp->rq); + + memset(wc, 0, sizeof(*wc)); + + wc->status = IB_WC_WR_FLUSH_ERR; + wc->wr_id = meta->wrid; + wc->qp = &qp->ibqp; + + meta->next = qp->rq_meta_head; + qp->rq_meta_head = meta; + + return 1; +} + +static int ionic_flush_recv_many(struct ionic_qp *qp, + struct ib_wc *wc, int nwc) +{ + int rc = 0, npolled = 0; + + while (npolled < nwc) { + rc = ionic_flush_recv(qp, wc + npolled); + if (rc <= 0) + break; + + npolled += rc; + } + + return npolled ?: rc; +} + +static int ionic_flush_send(struct ionic_qp *qp, struct ib_wc *wc) +{ + struct ionic_sq_meta *meta; + + if (!qp->sq_flush) + return 0; + + if (ionic_queue_empty(&qp->sq)) + return 0; + + meta = &qp->sq_meta[qp->sq.cons]; + + ionic_queue_consume(&qp->sq); + + memset(wc, 0, sizeof(*wc)); + + wc->status = IB_WC_WR_FLUSH_ERR; + wc->wr_id = meta->wrid; + wc->qp = &qp->ibqp; + + return 1; +} + +static int ionic_flush_send_many(struct ionic_qp *qp, + struct ib_wc *wc, int nwc) +{ + int rc = 0, npolled = 0; + + while (npolled < nwc) { + rc = ionic_flush_send(qp, wc + npolled); + if (rc <= 0) + break; + + npolled += rc; + } + + return npolled ?: rc; +} + +static int ionic_poll_recv(struct ionic_ibdev *dev, struct ionic_cq *cq, + struct ionic_qp *cqe_qp, struct ionic_v1_cqe *cqe, + struct ib_wc *wc) +{ + struct ionic_qp *qp = NULL; + struct ionic_rq_meta *meta; + u32 src_qpn, st_len; + u16 vlan_tag; + u8 op; + + if (cqe_qp->rq_flush) + return 0; + + qp = cqe_qp; + + st_len = be32_to_cpu(cqe->status_length); + + /* ignore wqe_id in case of flush error */ + if (ionic_v1_cqe_error(cqe) && st_len == IONIC_STS_WQE_FLUSHED_ERR) { + cqe_qp->rq_flush = true; + cq->flush = true; + list_move_tail(&qp->cq_flush_rq, &cq->flush_rq); + + /* posted recvs (if any) flushed by ionic_flush_recv */ + return 0; + } + + /* there had better be something in the recv queue to complete */ + if (ionic_queue_empty(&qp->rq)) { + ibdev_warn(&dev->ibdev, "qp %u is empty\n", qp->qpid); + return -EIO; + } + + /* wqe_id must be a valid queue index */ + if (unlikely(cqe->recv.wqe_id >> qp->rq.depth_log2)) { + ibdev_warn(&dev->ibdev, + "qp %u recv index %llu invalid\n", + qp->qpid, (unsigned long long)cqe->recv.wqe_id); + return -EIO; + } + + /* wqe_id must indicate a request that is outstanding */ + meta = &qp->rq_meta[cqe->recv.wqe_id]; + if (unlikely(meta->next != IONIC_META_POSTED)) { + ibdev_warn(&dev->ibdev, + "qp %u recv index %llu not posted\n", + qp->qpid, (unsigned long long)cqe->recv.wqe_id); + return -EIO; + } + + meta->next = qp->rq_meta_head; + qp->rq_meta_head = meta; + + memset(wc, 0, sizeof(*wc)); + + wc->wr_id = meta->wrid; + + wc->qp = &cqe_qp->ibqp; + + if (ionic_v1_cqe_error(cqe)) { + wc->vendor_err = st_len; + wc->status = ionic_to_ib_status(st_len); + + cqe_qp->rq_flush = true; + cq->flush = true; + list_move_tail(&qp->cq_flush_rq, &cq->flush_rq); + + ibdev_warn(&dev->ibdev, + "qp %d recv cqe with error\n", qp->qpid); + print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1, + cqe, BIT(cq->q.stride_log2), true); + goto out; + } + + wc->vendor_err = 0; + wc->status = IB_WC_SUCCESS; + + src_qpn = be32_to_cpu(cqe->recv.src_qpn_op); + op = src_qpn >> IONIC_V1_CQE_RECV_OP_SHIFT; + + src_qpn &= IONIC_V1_CQE_RECV_QPN_MASK; + op &= IONIC_V1_CQE_RECV_OP_MASK; + + wc->opcode = IB_WC_RECV; + switch (op) { + case IONIC_V1_CQE_RECV_OP_RDMA_IMM: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags |= IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->recv.imm_data_rkey; /* be32 in wc */ + break; + case IONIC_V1_CQE_RECV_OP_SEND_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->recv.imm_data_rkey; /* be32 in wc */ + break; + case IONIC_V1_CQE_RECV_OP_SEND_INV: + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->recv.imm_data_rkey); + break; + } + + wc->byte_len = st_len; + wc->src_qp = src_qpn; + + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_GSI) { + wc->wc_flags |= IB_WC_GRH | IB_WC_WITH_SMAC; + ether_addr_copy(wc->smac, cqe->recv.src_mac); + + wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; + if (ionic_v1_cqe_recv_is_ipv4(cqe)) + wc->network_hdr_type = RDMA_NETWORK_IPV4; + else + wc->network_hdr_type = RDMA_NETWORK_IPV6; + + if (ionic_v1_cqe_recv_is_vlan(cqe)) + wc->wc_flags |= IB_WC_WITH_VLAN; + + /* vlan_tag in cqe will be valid from dpath even if no vlan */ + vlan_tag = be16_to_cpu(cqe->recv.vlan_tag); + wc->vlan_id = vlan_tag & 0xfff; /* 802.1q VID */ + wc->sl = vlan_tag >> VLAN_PRIO_SHIFT; /* 802.1q PCP */ + } + + wc->pkey_index = 0; + wc->port_num = 1; + +out: + ionic_queue_consume(&qp->rq); + + return 1; +} + +static bool ionic_peek_send(struct ionic_qp *qp) +{ + struct ionic_sq_meta *meta; + + if (qp->sq_flush) + return false; + + /* completed all send queue requests */ + if (ionic_queue_empty(&qp->sq)) + return false; + + meta = &qp->sq_meta[qp->sq.cons]; + + /* waiting for remote completion */ + if (meta->remote && meta->seq == qp->sq_msn_cons) + return false; + + /* waiting for local completion */ + if (!meta->remote && !meta->local_comp) + return false; + + return true; +} + +static int ionic_poll_send(struct ionic_ibdev *dev, struct ionic_cq *cq, + struct ionic_qp *qp, struct ib_wc *wc) +{ + struct ionic_sq_meta *meta; + + if (qp->sq_flush) + return 0; + + do { + /* completed all send queue requests */ + if (ionic_queue_empty(&qp->sq)) + goto out_empty; + + meta = &qp->sq_meta[qp->sq.cons]; + + /* waiting for remote completion */ + if (meta->remote && meta->seq == qp->sq_msn_cons) + goto out_empty; + + /* waiting for local completion */ + if (!meta->remote && !meta->local_comp) + goto out_empty; + + ionic_queue_consume(&qp->sq); + + /* produce wc only if signaled or error status */ + } while (!meta->signal && meta->ibsts == IB_WC_SUCCESS); + + memset(wc, 0, sizeof(*wc)); + + wc->status = meta->ibsts; + wc->wr_id = meta->wrid; + wc->qp = &qp->ibqp; + + if (meta->ibsts == IB_WC_SUCCESS) { + wc->byte_len = meta->len; + wc->opcode = meta->ibop; + } else { + wc->vendor_err = meta->len; + + qp->sq_flush = true; + cq->flush = true; + list_move_tail(&qp->cq_flush_sq, &cq->flush_sq); + } + + return 1; + +out_empty: + if (qp->sq_flush_rcvd) { + qp->sq_flush = true; + cq->flush = true; + list_move_tail(&qp->cq_flush_sq, &cq->flush_sq); + } + return 0; +} + +static int ionic_poll_send_many(struct ionic_ibdev *dev, struct ionic_cq *cq, + struct ionic_qp *qp, struct ib_wc *wc, int nwc) +{ + int rc = 0, npolled = 0; + + while (npolled < nwc) { + rc = ionic_poll_send(dev, cq, qp, wc + npolled); + if (rc <= 0) + break; + + npolled += rc; + } + + return npolled ?: rc; +} + +static int ionic_validate_cons(u16 prod, u16 cons, + u16 comp, u16 mask) +{ + if (((prod - cons) & mask) <= ((comp - cons) & mask)) + return -EIO; + + return 0; +} + +static int ionic_comp_msn(struct ionic_qp *qp, struct ionic_v1_cqe *cqe) +{ + struct ionic_sq_meta *meta; + u16 cqe_seq, cqe_idx; + int rc; + + if (qp->sq_flush) + return 0; + + cqe_seq = be32_to_cpu(cqe->send.msg_msn) & qp->sq.mask; + + rc = ionic_validate_cons(qp->sq_msn_prod, + qp->sq_msn_cons, + cqe_seq - 1, + qp->sq.mask); + if (rc) { + ibdev_warn(qp->ibqp.device, + "qp %u bad msn %#x seq %u for prod %u cons %u\n", + qp->qpid, be32_to_cpu(cqe->send.msg_msn), + cqe_seq, qp->sq_msn_prod, qp->sq_msn_cons); + return rc; + } + + qp->sq_msn_cons = cqe_seq; + + if (ionic_v1_cqe_error(cqe)) { + cqe_idx = qp->sq_msn_idx[(cqe_seq - 1) & qp->sq.mask]; + + meta = &qp->sq_meta[cqe_idx]; + meta->len = be32_to_cpu(cqe->status_length); + meta->ibsts = ionic_to_ib_status(meta->len); + + ibdev_warn(qp->ibqp.device, + "qp %d msn cqe with error\n", qp->qpid); + print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1, + cqe, sizeof(*cqe), true); + } + + return 0; +} + +static int ionic_comp_npg(struct ionic_qp *qp, struct ionic_v1_cqe *cqe) +{ + struct ionic_sq_meta *meta; + u16 cqe_idx; + u32 st_len; + + if (qp->sq_flush) + return 0; + + st_len = be32_to_cpu(cqe->status_length); + + if (ionic_v1_cqe_error(cqe) && st_len == IONIC_STS_WQE_FLUSHED_ERR) { + /* + * Flush cqe does not consume a wqe on the device, and maybe + * no such work request is posted. + * + * The driver should begin flushing after the last indicated + * normal or error completion. Here, only set a hint that the + * flush request was indicated. In poll_send, if nothing more + * can be polled normally, then begin flushing. + */ + qp->sq_flush_rcvd = true; + return 0; + } + + cqe_idx = cqe->send.npg_wqe_id & qp->sq.mask; + meta = &qp->sq_meta[cqe_idx]; + meta->local_comp = true; + + if (ionic_v1_cqe_error(cqe)) { + meta->len = st_len; + meta->ibsts = ionic_to_ib_status(st_len); + meta->remote = false; + ibdev_warn(qp->ibqp.device, + "qp %d npg cqe with error\n", qp->qpid); + print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1, + cqe, sizeof(*cqe), true); + } + + return 0; +} + +static void ionic_reserve_sync_cq(struct ionic_ibdev *dev, struct ionic_cq *cq) +{ + if (!ionic_queue_empty(&cq->q)) { + cq->credit += ionic_queue_length(&cq->q); + cq->q.cons = cq->q.prod; + + ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype, + ionic_queue_dbell_val(&cq->q)); + } +} + +static void ionic_reserve_cq(struct ionic_ibdev *dev, struct ionic_cq *cq, + int spend) +{ + cq->credit -= spend; + + if (cq->credit <= 0) + ionic_reserve_sync_cq(dev, cq); +} + +static int ionic_poll_vcq_cq(struct ionic_ibdev *dev, + struct ionic_cq *cq, + int nwc, struct ib_wc *wc) +{ + struct ionic_qp *qp, *qp_next; + struct ionic_v1_cqe *cqe; + int rc = 0, npolled = 0; + unsigned long irqflags; + u32 qtf, qid; + bool peek; + u8 type; + + if (nwc < 1) + return 0; + + spin_lock_irqsave(&cq->lock, irqflags); + + /* poll already indicated work completions for send queue */ + list_for_each_entry_safe(qp, qp_next, &cq->poll_sq, cq_poll_sq) { + if (npolled == nwc) + goto out; + + spin_lock(&qp->sq_lock); + rc = ionic_poll_send_many(dev, cq, qp, wc + npolled, + nwc - npolled); + spin_unlock(&qp->sq_lock); + + if (rc > 0) + npolled += rc; + + if (npolled < nwc) + list_del_init(&qp->cq_poll_sq); + } + + /* poll for more work completions */ + while (likely(ionic_next_cqe(dev, cq, &cqe))) { + if (npolled == nwc) + goto out; + + qtf = ionic_v1_cqe_qtf(cqe); + qid = ionic_v1_cqe_qtf_qid(qtf); + type = ionic_v1_cqe_qtf_type(qtf); + + /* + * Safe to access QP without additional reference here as, + * 1. We hold cq->lock throughout + * 2. ionic_destroy_qp() acquires the same cq->lock before cleanup + * 3. QP is removed from qp_tbl before any cleanup begins + * This ensures no concurrent access between polling and destruction. + */ + qp = xa_load(&dev->qp_tbl, qid); + if (unlikely(!qp)) { + ibdev_dbg(&dev->ibdev, "missing qp for qid %u\n", qid); + goto cq_next; + } + + switch (type) { + case IONIC_V1_CQE_TYPE_RECV: + spin_lock(&qp->rq_lock); + rc = ionic_poll_recv(dev, cq, qp, cqe, wc + npolled); + spin_unlock(&qp->rq_lock); + + if (rc < 0) + goto out; + + npolled += rc; + + break; + + case IONIC_V1_CQE_TYPE_SEND_MSN: + spin_lock(&qp->sq_lock); + rc = ionic_comp_msn(qp, cqe); + if (!rc) { + rc = ionic_poll_send_many(dev, cq, qp, + wc + npolled, + nwc - npolled); + peek = ionic_peek_send(qp); + } + spin_unlock(&qp->sq_lock); + + if (rc < 0) + goto out; + + npolled += rc; + + if (peek) + list_move_tail(&qp->cq_poll_sq, &cq->poll_sq); + break; + + case IONIC_V1_CQE_TYPE_SEND_NPG: + spin_lock(&qp->sq_lock); + rc = ionic_comp_npg(qp, cqe); + if (!rc) { + rc = ionic_poll_send_many(dev, cq, qp, + wc + npolled, + nwc - npolled); + peek = ionic_peek_send(qp); + } + spin_unlock(&qp->sq_lock); + + if (rc < 0) + goto out; + + npolled += rc; + + if (peek) + list_move_tail(&qp->cq_poll_sq, &cq->poll_sq); + break; + + default: + ibdev_warn(&dev->ibdev, + "unexpected cqe type %u\n", type); + rc = -EIO; + goto out; + } + +cq_next: + ionic_queue_produce(&cq->q); + cq->color = ionic_color_wrap(cq->q.prod, cq->color); + } + + /* lastly, flush send and recv queues */ + if (likely(!cq->flush)) + goto out; + + cq->flush = false; + + list_for_each_entry_safe(qp, qp_next, &cq->flush_sq, cq_flush_sq) { + if (npolled == nwc) + goto out; + + spin_lock(&qp->sq_lock); + rc = ionic_flush_send_many(qp, wc + npolled, nwc - npolled); + spin_unlock(&qp->sq_lock); + + if (rc > 0) + npolled += rc; + + if (npolled < nwc) + list_del_init(&qp->cq_flush_sq); + else + cq->flush = true; + } + + list_for_each_entry_safe(qp, qp_next, &cq->flush_rq, cq_flush_rq) { + if (npolled == nwc) + goto out; + + spin_lock(&qp->rq_lock); + rc = ionic_flush_recv_many(qp, wc + npolled, nwc - npolled); + spin_unlock(&qp->rq_lock); + + if (rc > 0) + npolled += rc; + + if (npolled < nwc) + list_del_init(&qp->cq_flush_rq); + else + cq->flush = true; + } + +out: + /* in case credit was depleted (more work posted than cq depth) */ + if (cq->credit <= 0) + ionic_reserve_sync_cq(dev, cq); + + spin_unlock_irqrestore(&cq->lock, irqflags); + + return npolled ?: rc; +} + +int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device); + struct ionic_vcq *vcq = to_ionic_vcq(ibcq); + int rc_tmp, rc = 0, npolled = 0; + int cq_i, cq_x, cq_ix; + + cq_x = vcq->poll_idx; + vcq->poll_idx ^= dev->lif_cfg.udma_count - 1; + + for (cq_i = 0; npolled < nwc && cq_i < dev->lif_cfg.udma_count; ++cq_i) { + cq_ix = cq_i ^ cq_x; + + if (!(vcq->udma_mask & BIT(cq_ix))) + continue; + + rc_tmp = ionic_poll_vcq_cq(dev, &vcq->cq[cq_ix], + nwc - npolled, + wc + npolled); + + if (rc_tmp >= 0) + npolled += rc_tmp; + else if (!rc) + rc = rc_tmp; + } + + return npolled ?: rc; +} + +static int ionic_req_notify_vcq_cq(struct ionic_ibdev *dev, struct ionic_cq *cq, + enum ib_cq_notify_flags flags) +{ + u64 dbell_val = cq->q.dbell; + + if (flags & IB_CQ_SOLICITED) { + cq->arm_sol_prod = ionic_queue_next(&cq->q, cq->arm_sol_prod); + dbell_val |= cq->arm_sol_prod | IONIC_CQ_RING_SOL; + } else { + cq->arm_any_prod = ionic_queue_next(&cq->q, cq->arm_any_prod); + dbell_val |= cq->arm_any_prod | IONIC_CQ_RING_ARM; + } + + ionic_reserve_sync_cq(dev, cq); + + ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype, dbell_val); + + /* + * IB_CQ_REPORT_MISSED_EVENTS: + * + * The queue index in ring zero guarantees no missed events. + * + * Here, we check if the color bit in the next cqe is flipped. If it + * is flipped, then progress can be made by immediately polling the cq. + * Still, the cq will be armed, and an event will be generated. The cq + * may be empty when polled after the event, because the next poll + * after arming the cq can empty it. + */ + return (flags & IB_CQ_REPORT_MISSED_EVENTS) && + cq->color == ionic_v1_cqe_color(ionic_queue_at_prod(&cq->q)); +} + +int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device); + struct ionic_vcq *vcq = to_ionic_vcq(ibcq); + int rc = 0, cq_i; + + for (cq_i = 0; cq_i < dev->lif_cfg.udma_count; ++cq_i) { + if (!(vcq->udma_mask & BIT(cq_i))) + continue; + + if (ionic_req_notify_vcq_cq(dev, &vcq->cq[cq_i], flags)) + rc = 1; + } + + return rc; +} + +static s64 ionic_prep_inline(void *data, u32 max_data, + const struct ib_sge *ib_sgl, int num_sge) +{ + static const s64 bit_31 = 1u << 31; + s64 len = 0, sg_len; + int sg_i; + + for (sg_i = 0; sg_i < num_sge; ++sg_i) { + sg_len = ib_sgl[sg_i].length; + + /* sge length zero means 2GB */ + if (unlikely(sg_len == 0)) + sg_len = bit_31; + + /* greater than max inline data is invalid */ + if (unlikely(len + sg_len > max_data)) + return -EINVAL; + + memcpy(data + len, (void *)ib_sgl[sg_i].addr, sg_len); + + len += sg_len; + } + + return len; +} + +static s64 ionic_prep_pld(struct ionic_v1_wqe *wqe, + union ionic_v1_pld *pld, + int spec, u32 max_sge, + const struct ib_sge *ib_sgl, + int num_sge) +{ + static const s64 bit_31 = 1l << 31; + struct ionic_sge *sgl; + __be32 *spec32 = NULL; + __be16 *spec16 = NULL; + s64 len = 0, sg_len; + int sg_i = 0; + + if (unlikely(num_sge < 0 || (u32)num_sge > max_sge)) + return -EINVAL; + + if (spec && num_sge > IONIC_V1_SPEC_FIRST_SGE) { + sg_i = IONIC_V1_SPEC_FIRST_SGE; + + if (num_sge > 8) { + wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SPEC16); + spec16 = pld->spec16; + } else { + wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SPEC32); + spec32 = pld->spec32; + } + } + + sgl = &pld->sgl[sg_i]; + + for (sg_i = 0; sg_i < num_sge; ++sg_i) { + sg_len = ib_sgl[sg_i].length; + + /* sge length zero means 2GB */ + if (unlikely(sg_len == 0)) + sg_len = bit_31; + + /* greater than 2GB data is invalid */ + if (unlikely(len + sg_len > bit_31)) + return -EINVAL; + + sgl[sg_i].va = cpu_to_be64(ib_sgl[sg_i].addr); + sgl[sg_i].len = cpu_to_be32(sg_len); + sgl[sg_i].lkey = cpu_to_be32(ib_sgl[sg_i].lkey); + + if (spec32) { + spec32[sg_i] = sgl[sg_i].len; + } else if (spec16) { + if (unlikely(sg_len > U16_MAX)) + return -EINVAL; + spec16[sg_i] = cpu_to_be16(sg_len); + } + + len += sg_len; + } + + return len; +} + +static void ionic_prep_base(struct ionic_qp *qp, + const struct ib_send_wr *wr, + struct ionic_sq_meta *meta, + struct ionic_v1_wqe *wqe) +{ + meta->wrid = wr->wr_id; + meta->ibsts = IB_WC_SUCCESS; + meta->signal = false; + meta->local_comp = false; + + wqe->base.wqe_id = qp->sq.prod; + + if (wr->send_flags & IB_SEND_FENCE) + wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_FENCE); + + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SOL); + + if (qp->sig_all || wr->send_flags & IB_SEND_SIGNALED) { + wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SIG); + meta->signal = true; + } + + meta->seq = qp->sq_msn_prod; + meta->remote = + qp->ibqp.qp_type != IB_QPT_UD && + qp->ibqp.qp_type != IB_QPT_GSI && + !ionic_ibop_is_local(wr->opcode); + + if (meta->remote) { + qp->sq_msn_idx[meta->seq] = qp->sq.prod; + qp->sq_msn_prod = ionic_queue_next(&qp->sq, qp->sq_msn_prod); + } + + ionic_queue_produce(&qp->sq); +} + +static int ionic_prep_common(struct ionic_qp *qp, + const struct ib_send_wr *wr, + struct ionic_sq_meta *meta, + struct ionic_v1_wqe *wqe) +{ + s64 signed_len; + u32 mval; + + if (wr->send_flags & IB_SEND_INLINE) { + wqe->base.num_sge_key = 0; + wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_INL); + mval = ionic_v1_send_wqe_max_data(qp->sq.stride_log2, false); + signed_len = ionic_prep_inline(wqe->common.pld.data, mval, + wr->sg_list, wr->num_sge); + } else { + wqe->base.num_sge_key = wr->num_sge; + mval = ionic_v1_send_wqe_max_sge(qp->sq.stride_log2, + qp->sq_spec, + false); + signed_len = ionic_prep_pld(wqe, &wqe->common.pld, + qp->sq_spec, mval, + wr->sg_list, wr->num_sge); + } + + if (unlikely(signed_len < 0)) + return signed_len; + + meta->len = signed_len; + wqe->common.length = cpu_to_be32(signed_len); + + ionic_prep_base(qp, wr, meta, wqe); + + return 0; +} + +static void ionic_prep_sq_wqe(struct ionic_qp *qp, void *wqe) +{ + memset(wqe, 0, 1u << qp->sq.stride_log2); +} + +static void ionic_prep_rq_wqe(struct ionic_qp *qp, void *wqe) +{ + memset(wqe, 0, 1u << qp->rq.stride_log2); +} + +static int ionic_prep_send(struct ionic_qp *qp, + const struct ib_send_wr *wr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device); + struct ionic_sq_meta *meta; + struct ionic_v1_wqe *wqe; + + meta = &qp->sq_meta[qp->sq.prod]; + wqe = ionic_queue_at_prod(&qp->sq); + + ionic_prep_sq_wqe(qp, wqe); + + meta->ibop = IB_WC_SEND; + + switch (wr->opcode) { + case IB_WR_SEND: + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND); + break; + case IB_WR_SEND_WITH_IMM: + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_IMM); + wqe->base.imm_data_key = wr->ex.imm_data; + break; + case IB_WR_SEND_WITH_INV: + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_INV); + wqe->base.imm_data_key = + cpu_to_be32(wr->ex.invalidate_rkey); + break; + default: + return -EINVAL; + } + + return ionic_prep_common(qp, wr, meta, wqe); +} + +static int ionic_prep_send_ud(struct ionic_qp *qp, + const struct ib_ud_wr *wr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device); + struct ionic_sq_meta *meta; + struct ionic_v1_wqe *wqe; + struct ionic_ah *ah; + + if (unlikely(!wr->ah)) + return -EINVAL; + + ah = to_ionic_ah(wr->ah); + + meta = &qp->sq_meta[qp->sq.prod]; + wqe = ionic_queue_at_prod(&qp->sq); + + ionic_prep_sq_wqe(qp, wqe); + + wqe->common.send.ah_id = cpu_to_be32(ah->ahid); + wqe->common.send.dest_qpn = cpu_to_be32(wr->remote_qpn); + wqe->common.send.dest_qkey = cpu_to_be32(wr->remote_qkey); + + meta->ibop = IB_WC_SEND; + + switch (wr->wr.opcode) { + case IB_WR_SEND: + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND); + break; + case IB_WR_SEND_WITH_IMM: + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_IMM); + wqe->base.imm_data_key = wr->wr.ex.imm_data; + break; + default: + return -EINVAL; + } + + return ionic_prep_common(qp, &wr->wr, meta, wqe); +} + +static int ionic_prep_rdma(struct ionic_qp *qp, + const struct ib_rdma_wr *wr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device); + struct ionic_sq_meta *meta; + struct ionic_v1_wqe *wqe; + + meta = &qp->sq_meta[qp->sq.prod]; + wqe = ionic_queue_at_prod(&qp->sq); + + ionic_prep_sq_wqe(qp, wqe); + + meta->ibop = IB_WC_RDMA_WRITE; + + switch (wr->wr.opcode) { + case IB_WR_RDMA_READ: + if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE)) + return -EINVAL; + meta->ibop = IB_WC_RDMA_READ; + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_READ); + break; + case IB_WR_RDMA_WRITE: + if (wr->wr.send_flags & IB_SEND_SOLICITED) + return -EINVAL; + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_WRITE); + break; + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_WRITE_IMM); + wqe->base.imm_data_key = wr->wr.ex.imm_data; + break; + default: + return -EINVAL; + } + + wqe->common.rdma.remote_va_high = cpu_to_be32(wr->remote_addr >> 32); + wqe->common.rdma.remote_va_low = cpu_to_be32(wr->remote_addr); + wqe->common.rdma.remote_rkey = cpu_to_be32(wr->rkey); + + return ionic_prep_common(qp, &wr->wr, meta, wqe); +} + +static int ionic_prep_atomic(struct ionic_qp *qp, + const struct ib_atomic_wr *wr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device); + struct ionic_sq_meta *meta; + struct ionic_v1_wqe *wqe; + + if (wr->wr.num_sge != 1 || wr->wr.sg_list[0].length != 8) + return -EINVAL; + + if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE)) + return -EINVAL; + + meta = &qp->sq_meta[qp->sq.prod]; + wqe = ionic_queue_at_prod(&qp->sq); + + ionic_prep_sq_wqe(qp, wqe); + + meta->ibop = IB_WC_RDMA_WRITE; + + switch (wr->wr.opcode) { + case IB_WR_ATOMIC_CMP_AND_SWP: + meta->ibop = IB_WC_COMP_SWAP; + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, ATOMIC_CS); + wqe->atomic.swap_add_high = cpu_to_be32(wr->swap >> 32); + wqe->atomic.swap_add_low = cpu_to_be32(wr->swap); + wqe->atomic.compare_high = cpu_to_be32(wr->compare_add >> 32); + wqe->atomic.compare_low = cpu_to_be32(wr->compare_add); + break; + case IB_WR_ATOMIC_FETCH_AND_ADD: + meta->ibop = IB_WC_FETCH_ADD; + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, ATOMIC_FA); + wqe->atomic.swap_add_high = cpu_to_be32(wr->compare_add >> 32); + wqe->atomic.swap_add_low = cpu_to_be32(wr->compare_add); + break; + default: + return -EINVAL; + } + + wqe->atomic.remote_va_high = cpu_to_be32(wr->remote_addr >> 32); + wqe->atomic.remote_va_low = cpu_to_be32(wr->remote_addr); + wqe->atomic.remote_rkey = cpu_to_be32(wr->rkey); + + wqe->base.num_sge_key = 1; + wqe->atomic.sge.va = cpu_to_be64(wr->wr.sg_list[0].addr); + wqe->atomic.sge.len = cpu_to_be32(8); + wqe->atomic.sge.lkey = cpu_to_be32(wr->wr.sg_list[0].lkey); + + return ionic_prep_common(qp, &wr->wr, meta, wqe); +} + +static int ionic_prep_inv(struct ionic_qp *qp, + const struct ib_send_wr *wr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device); + struct ionic_sq_meta *meta; + struct ionic_v1_wqe *wqe; + + if (wr->send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE)) + return -EINVAL; + + meta = &qp->sq_meta[qp->sq.prod]; + wqe = ionic_queue_at_prod(&qp->sq); + + ionic_prep_sq_wqe(qp, wqe); + + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, LOCAL_INV); + wqe->base.imm_data_key = cpu_to_be32(wr->ex.invalidate_rkey); + + meta->len = 0; + meta->ibop = IB_WC_LOCAL_INV; + + ionic_prep_base(qp, wr, meta, wqe); + + return 0; +} + +static int ionic_prep_reg(struct ionic_qp *qp, + const struct ib_reg_wr *wr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device); + struct ionic_mr *mr = to_ionic_mr(wr->mr); + struct ionic_sq_meta *meta; + struct ionic_v1_wqe *wqe; + __le64 dma_addr; + int flags; + + if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE)) + return -EINVAL; + + /* must call ib_map_mr_sg before posting reg wr */ + if (!mr->buf.tbl_pages) + return -EINVAL; + + meta = &qp->sq_meta[qp->sq.prod]; + wqe = ionic_queue_at_prod(&qp->sq); + + ionic_prep_sq_wqe(qp, wqe); + + flags = to_ionic_mr_flags(wr->access); + + wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, REG_MR); + wqe->base.num_sge_key = wr->key; + wqe->base.imm_data_key = cpu_to_be32(mr->ibmr.lkey); + wqe->reg_mr.va = cpu_to_be64(mr->ibmr.iova); + wqe->reg_mr.length = cpu_to_be64(mr->ibmr.length); + wqe->reg_mr.offset = ionic_pgtbl_off(&mr->buf, mr->ibmr.iova); + dma_addr = ionic_pgtbl_dma(&mr->buf, mr->ibmr.iova); + wqe->reg_mr.dma_addr = cpu_to_be64(le64_to_cpu(dma_addr)); + + wqe->reg_mr.map_count = cpu_to_be32(mr->buf.tbl_pages); + wqe->reg_mr.flags = cpu_to_be16(flags); + wqe->reg_mr.dir_size_log2 = 0; + wqe->reg_mr.page_size_log2 = order_base_2(mr->ibmr.page_size); + + meta->len = 0; + meta->ibop = IB_WC_REG_MR; + + ionic_prep_base(qp, &wr->wr, meta, wqe); + + return 0; +} + +static int ionic_prep_one_rc(struct ionic_qp *qp, + const struct ib_send_wr *wr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device); + int rc = 0; + + switch (wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND_WITH_INV: + rc = ionic_prep_send(qp, wr); + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + rc = ionic_prep_rdma(qp, rdma_wr(wr)); + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + rc = ionic_prep_atomic(qp, atomic_wr(wr)); + break; + case IB_WR_LOCAL_INV: + rc = ionic_prep_inv(qp, wr); + break; + case IB_WR_REG_MR: + rc = ionic_prep_reg(qp, reg_wr(wr)); + break; + default: + ibdev_dbg(&dev->ibdev, "invalid opcode %d\n", wr->opcode); + rc = -EINVAL; + } + + return rc; +} + +static int ionic_prep_one_ud(struct ionic_qp *qp, + const struct ib_send_wr *wr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device); + int rc = 0; + + switch (wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + rc = ionic_prep_send_ud(qp, ud_wr(wr)); + break; + default: + ibdev_dbg(&dev->ibdev, "invalid opcode %d\n", wr->opcode); + rc = -EINVAL; + } + + return rc; +} + +static int ionic_prep_recv(struct ionic_qp *qp, + const struct ib_recv_wr *wr) +{ + struct ionic_rq_meta *meta; + struct ionic_v1_wqe *wqe; + s64 signed_len; + u32 mval; + + wqe = ionic_queue_at_prod(&qp->rq); + + /* if wqe is owned by device, caller can try posting again soon */ + if (wqe->base.flags & cpu_to_be16(IONIC_V1_FLAG_FENCE)) + return -EAGAIN; + + meta = qp->rq_meta_head; + if (unlikely(meta == IONIC_META_LAST) || + unlikely(meta == IONIC_META_POSTED)) + return -EIO; + + ionic_prep_rq_wqe(qp, wqe); + + mval = ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2, qp->rq_spec, + false); + signed_len = ionic_prep_pld(wqe, &wqe->recv.pld, + qp->rq_spec, mval, + wr->sg_list, wr->num_sge); + if (signed_len < 0) + return signed_len; + + meta->wrid = wr->wr_id; + + wqe->base.wqe_id = meta - qp->rq_meta; + wqe->base.num_sge_key = wr->num_sge; + + /* total length for recv goes in base imm_data_key */ + wqe->base.imm_data_key = cpu_to_be32(signed_len); + + ionic_queue_produce(&qp->rq); + + qp->rq_meta_head = meta->next; + meta->next = IONIC_META_POSTED; + + return 0; +} + +static int ionic_post_send_common(struct ionic_ibdev *dev, + struct ionic_vcq *vcq, + struct ionic_cq *cq, + struct ionic_qp *qp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad) +{ + unsigned long irqflags; + bool notify = false; + int spend, rc = 0; + + if (!bad) + return -EINVAL; + + if (!qp->has_sq) { + *bad = wr; + return -EINVAL; + } + + if (qp->state < IB_QPS_RTS) { + *bad = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->sq_lock, irqflags); + + while (wr) { + if (ionic_queue_full(&qp->sq)) { + ibdev_dbg(&dev->ibdev, "queue full"); + rc = -ENOMEM; + goto out; + } + + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_GSI) + rc = ionic_prep_one_ud(qp, wr); + else + rc = ionic_prep_one_rc(qp, wr); + if (rc) + goto out; + + wr = wr->next; + } + +out: + spin_unlock_irqrestore(&qp->sq_lock, irqflags); + + spin_lock_irqsave(&cq->lock, irqflags); + spin_lock(&qp->sq_lock); + + if (likely(qp->sq.prod != qp->sq_old_prod)) { + /* ring cq doorbell just in time */ + spend = (qp->sq.prod - qp->sq_old_prod) & qp->sq.mask; + ionic_reserve_cq(dev, cq, spend); + + qp->sq_old_prod = qp->sq.prod; + + ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.sq_qtype, + ionic_queue_dbell_val(&qp->sq)); + } + + if (qp->sq_flush) { + notify = true; + cq->flush = true; + list_move_tail(&qp->cq_flush_sq, &cq->flush_sq); + } + + spin_unlock(&qp->sq_lock); + spin_unlock_irqrestore(&cq->lock, irqflags); + + if (notify && vcq->ibcq.comp_handler) + vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context); + + *bad = wr; + return rc; +} + +static int ionic_post_recv_common(struct ionic_ibdev *dev, + struct ionic_vcq *vcq, + struct ionic_cq *cq, + struct ionic_qp *qp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad) +{ + unsigned long irqflags; + bool notify = false; + int spend, rc = 0; + + if (!bad) + return -EINVAL; + + if (!qp->has_rq) { + *bad = wr; + return -EINVAL; + } + + if (qp->state < IB_QPS_INIT) { + *bad = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->rq_lock, irqflags); + + while (wr) { + if (ionic_queue_full(&qp->rq)) { + ibdev_dbg(&dev->ibdev, "queue full"); + rc = -ENOMEM; + goto out; + } + + rc = ionic_prep_recv(qp, wr); + if (rc) + goto out; + + wr = wr->next; + } + +out: + if (!cq) { + spin_unlock_irqrestore(&qp->rq_lock, irqflags); + goto out_unlocked; + } + spin_unlock_irqrestore(&qp->rq_lock, irqflags); + + spin_lock_irqsave(&cq->lock, irqflags); + spin_lock(&qp->rq_lock); + + if (likely(qp->rq.prod != qp->rq_old_prod)) { + /* ring cq doorbell just in time */ + spend = (qp->rq.prod - qp->rq_old_prod) & qp->rq.mask; + ionic_reserve_cq(dev, cq, spend); + + qp->rq_old_prod = qp->rq.prod; + + ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.rq_qtype, + ionic_queue_dbell_val(&qp->rq)); + } + + if (qp->rq_flush) { + notify = true; + cq->flush = true; + list_move_tail(&qp->cq_flush_rq, &cq->flush_rq); + } + + spin_unlock(&qp->rq_lock); + spin_unlock_irqrestore(&cq->lock, irqflags); + + if (notify && vcq->ibcq.comp_handler) + vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context); + +out_unlocked: + *bad = wr; + return rc; +} + +int ionic_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device); + struct ionic_vcq *vcq = to_ionic_vcq(ibqp->send_cq); + struct ionic_qp *qp = to_ionic_qp(ibqp); + struct ionic_cq *cq = + to_ionic_vcq_cq(ibqp->send_cq, qp->udma_idx); + + return ionic_post_send_common(dev, vcq, cq, qp, wr, bad); +} + +int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device); + struct ionic_vcq *vcq = to_ionic_vcq(ibqp->recv_cq); + struct ionic_qp *qp = to_ionic_qp(ibqp); + struct ionic_cq *cq = + to_ionic_vcq_cq(ibqp->recv_cq, qp->udma_idx); + + return ionic_post_recv_common(dev, vcq, cq, qp, wr, bad); +} diff --git a/drivers/infiniband/hw/ionic/ionic_fw.h b/drivers/infiniband/hw/ionic/ionic_fw.h index 62afe9220717..1065a7c4faa8 100644 --- a/drivers/infiniband/hw/ionic/ionic_fw.h +++ b/drivers/infiniband/hw/ionic/ionic_fw.h @@ -163,6 +163,61 @@ static inline int to_ionic_qp_flags(int access, bool sqd_notify, return flags; } +/* cqe non-admin status indicated in status_length field when err bit is set */ +enum ionic_status { + IONIC_STS_OK, + IONIC_STS_LOCAL_LEN_ERR, + IONIC_STS_LOCAL_QP_OPER_ERR, + IONIC_STS_LOCAL_PROT_ERR, + IONIC_STS_WQE_FLUSHED_ERR, + IONIC_STS_MEM_MGMT_OPER_ERR, + IONIC_STS_BAD_RESP_ERR, + IONIC_STS_LOCAL_ACC_ERR, + IONIC_STS_REMOTE_INV_REQ_ERR, + IONIC_STS_REMOTE_ACC_ERR, + IONIC_STS_REMOTE_OPER_ERR, + IONIC_STS_RETRY_EXCEEDED, + IONIC_STS_RNR_RETRY_EXCEEDED, + IONIC_STS_XRC_VIO_ERR, + IONIC_STS_LOCAL_SGL_INV_ERR, +}; + +static inline int ionic_to_ib_status(int sts) +{ + switch (sts) { + case IONIC_STS_OK: + return IB_WC_SUCCESS; + case IONIC_STS_LOCAL_LEN_ERR: + return IB_WC_LOC_LEN_ERR; + case IONIC_STS_LOCAL_QP_OPER_ERR: + case IONIC_STS_LOCAL_SGL_INV_ERR: + return IB_WC_LOC_QP_OP_ERR; + case IONIC_STS_LOCAL_PROT_ERR: + return IB_WC_LOC_PROT_ERR; + case IONIC_STS_WQE_FLUSHED_ERR: + return IB_WC_WR_FLUSH_ERR; + case IONIC_STS_MEM_MGMT_OPER_ERR: + return IB_WC_MW_BIND_ERR; + case IONIC_STS_BAD_RESP_ERR: + return IB_WC_BAD_RESP_ERR; + case IONIC_STS_LOCAL_ACC_ERR: + return IB_WC_LOC_ACCESS_ERR; + case IONIC_STS_REMOTE_INV_REQ_ERR: + return IB_WC_REM_INV_REQ_ERR; + case IONIC_STS_REMOTE_ACC_ERR: + return IB_WC_REM_ACCESS_ERR; + case IONIC_STS_REMOTE_OPER_ERR: + return IB_WC_REM_OP_ERR; + case IONIC_STS_RETRY_EXCEEDED: + return IB_WC_RETRY_EXC_ERR; + case IONIC_STS_RNR_RETRY_EXCEEDED: + return IB_WC_RNR_RETRY_EXC_ERR; + case IONIC_STS_XRC_VIO_ERR: + default: + return IB_WC_GENERAL_ERR; + } +} + /* admin queue qp type */ enum ionic_qp_type { IONIC_QPT_RC, @@ -294,6 +349,24 @@ struct ionic_v1_cqe { __be32 qid_type_flags; }; +/* bits for cqe recv */ +enum ionic_v1_cqe_src_qpn_bits { + IONIC_V1_CQE_RECV_QPN_MASK = 0xffffff, + IONIC_V1_CQE_RECV_OP_SHIFT = 24, + + /* MASK could be 0x3, but need 0x1f for makeshift values: + * OP_TYPE_RDMA_OPER_WITH_IMM, OP_TYPE_SEND_RCVD + */ + IONIC_V1_CQE_RECV_OP_MASK = 0x1f, + IONIC_V1_CQE_RECV_OP_SEND = 0, + IONIC_V1_CQE_RECV_OP_SEND_INV = 1, + IONIC_V1_CQE_RECV_OP_SEND_IMM = 2, + IONIC_V1_CQE_RECV_OP_RDMA_IMM = 3, + + IONIC_V1_CQE_RECV_IS_IPV4 = BIT(7 + IONIC_V1_CQE_RECV_OP_SHIFT), + IONIC_V1_CQE_RECV_IS_VLAN = BIT(6 + IONIC_V1_CQE_RECV_OP_SHIFT), +}; + /* bits for cqe qid_type_flags */ enum ionic_v1_cqe_qtf_bits { IONIC_V1_CQE_COLOR = BIT(0), @@ -318,6 +391,16 @@ static inline bool ionic_v1_cqe_error(struct ionic_v1_cqe *cqe) return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_ERROR); } +static inline bool ionic_v1_cqe_recv_is_ipv4(struct ionic_v1_cqe *cqe) +{ + return cqe->recv.src_qpn_op & cpu_to_be32(IONIC_V1_CQE_RECV_IS_IPV4); +} + +static inline bool ionic_v1_cqe_recv_is_vlan(struct ionic_v1_cqe *cqe) +{ + return cqe->recv.src_qpn_op & cpu_to_be32(IONIC_V1_CQE_RECV_IS_VLAN); +} + static inline void ionic_v1_cqe_clean(struct ionic_v1_cqe *cqe) { cqe->qid_type_flags |= cpu_to_be32(~0u << IONIC_V1_CQE_QID_SHIFT); @@ -444,6 +527,28 @@ enum ionic_v1_op { IONIC_V1_SPEC_FIRST_SGE = 2, }; +/* queue pair v2 send opcodes */ +enum ionic_v2_op { + IONIC_V2_OPSL_OUT = 0x20, + IONIC_V2_OPSL_IMM = 0x40, + IONIC_V2_OPSL_INV = 0x80, + + IONIC_V2_OP_SEND = 0x0 | IONIC_V2_OPSL_OUT, + IONIC_V2_OP_SEND_IMM = IONIC_V2_OP_SEND | IONIC_V2_OPSL_IMM, + IONIC_V2_OP_SEND_INV = IONIC_V2_OP_SEND | IONIC_V2_OPSL_INV, + + IONIC_V2_OP_RDMA_WRITE = 0x1 | IONIC_V2_OPSL_OUT, + IONIC_V2_OP_RDMA_WRITE_IMM = IONIC_V2_OP_RDMA_WRITE | IONIC_V2_OPSL_IMM, + + IONIC_V2_OP_RDMA_READ = 0x2, + + IONIC_V2_OP_ATOMIC_CS = 0x4, + IONIC_V2_OP_ATOMIC_FA = 0x5, + IONIC_V2_OP_REG_MR = 0x6, + IONIC_V2_OP_LOCAL_INV = 0x7, + IONIC_V2_OP_BIND_MW = 0x8, +}; + static inline size_t ionic_v1_send_wqe_min_size(int min_sge, int min_data, int spec, bool expdb) { diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 6833abbfb1dc..ab080d945a13 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -45,6 +45,11 @@ static const struct ib_device_ops ionic_dev_ops = { .query_qp = ionic_query_qp, .destroy_qp = ionic_destroy_qp, + .post_send = ionic_post_send, + .post_recv = ionic_post_recv, + .poll_cq = ionic_poll_cq, + .req_notify_cq = ionic_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_ucontext, ionic_ctx, ibctx), INIT_RDMA_OBJ_SIZE(ib_pd, ionic_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ah, ionic_ah, ibah), diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h index 94ef76aaca43..bbec041b378b 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.h +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h @@ -387,6 +387,11 @@ static inline u32 ionic_obj_dbid(struct ionic_ibdev *dev, return ionic_ctx_dbid(dev, to_ionic_ctx_uobj(uobj)); } +static inline bool ionic_ibop_is_local(enum ib_wr_opcode op) +{ + return op == IB_WR_LOCAL_INV || op == IB_WR_REG_MR; +} + static inline void ionic_qp_complete(struct kref *kref) { struct ionic_qp *qp = container_of(kref, struct ionic_qp, qp_kref); @@ -462,8 +467,17 @@ int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, struct ib_qp_init_attr *init_attr); int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); +/* ionic_datapath.c */ +int ionic_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad); +int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad); +int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc); +int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); + /* ionic_pgtbl.c */ __le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va); +__be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va); int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma); int ionic_pgtbl_init(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf, diff --git a/drivers/infiniband/hw/ionic/ionic_pgtbl.c b/drivers/infiniband/hw/ionic/ionic_pgtbl.c index a8eb73be6f86..e74db73c9246 100644 --- a/drivers/infiniband/hw/ionic/ionic_pgtbl.c +++ b/drivers/infiniband/hw/ionic/ionic_pgtbl.c @@ -26,6 +26,17 @@ __le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va) return cpu_to_le64(dma + (va & pg_mask)); } +__be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va) +{ + if (buf->tbl_pages > 1) { + u64 pg_mask = BIT_ULL(buf->page_size_log2) - 1; + + return cpu_to_be64(va & pg_mask); + } + + return 0; +} + int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma) { if (unlikely(buf->tbl_pages == buf->tbl_limit)) From 2075bbe8ef03914aa2211035eec45d1d3a5c4ff2 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:46:04 +0530 Subject: [PATCH 46/85] RDMA/ionic: Register device ops for miscellaneous functionality Implement idbdev ops for device and port information. Co-developed-by: Andrew Boyer Signed-off-by: Andrew Boyer Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-13-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_ibdev.c | 200 ++++++++++++++++++++ drivers/infiniband/hw/ionic/ionic_ibdev.h | 5 + drivers/infiniband/hw/ionic/ionic_lif_cfg.c | 10 + drivers/infiniband/hw/ionic/ionic_lif_cfg.h | 2 + 4 files changed, 217 insertions(+) diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index ab080d945a13..5f51873af350 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -3,7 +3,11 @@ #include #include +#include +#include #include +#include +#include #include "ionic_ibdev.h" @@ -15,6 +19,192 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("NET_IONIC"); +static int ionic_query_device(struct ib_device *ibdev, + struct ib_device_attr *attr, + struct ib_udata *udata) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibdev); + struct net_device *ndev; + + ndev = ib_device_get_netdev(ibdev, 1); + addrconf_ifid_eui48((u8 *)&attr->sys_image_guid, ndev); + dev_put(ndev); + attr->max_mr_size = dev->lif_cfg.npts_per_lif * PAGE_SIZE / 2; + attr->page_size_cap = dev->lif_cfg.page_size_supported; + + attr->vendor_id = to_pci_dev(dev->lif_cfg.hwdev)->vendor; + attr->vendor_part_id = to_pci_dev(dev->lif_cfg.hwdev)->device; + + attr->hw_ver = ionic_lif_asic_rev(dev->lif_cfg.lif); + attr->fw_ver = 0; + attr->max_qp = dev->lif_cfg.qp_count; + attr->max_qp_wr = IONIC_MAX_DEPTH; + attr->device_cap_flags = + IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS | + IB_DEVICE_MEM_WINDOW_TYPE_2B | + 0; + attr->max_send_sge = + min(ionic_v1_send_wqe_max_sge(dev->lif_cfg.max_stride, 0, false), + IONIC_SPEC_HIGH); + attr->max_recv_sge = + min(ionic_v1_recv_wqe_max_sge(dev->lif_cfg.max_stride, 0, false), + IONIC_SPEC_HIGH); + attr->max_sge_rd = attr->max_send_sge; + attr->max_cq = dev->lif_cfg.cq_count / dev->lif_cfg.udma_count; + attr->max_cqe = IONIC_MAX_CQ_DEPTH - IONIC_CQ_GRACE; + attr->max_mr = dev->lif_cfg.nmrs_per_lif; + attr->max_pd = IONIC_MAX_PD; + attr->max_qp_rd_atom = IONIC_MAX_RD_ATOM; + attr->max_ee_rd_atom = 0; + attr->max_res_rd_atom = IONIC_MAX_RD_ATOM; + attr->max_qp_init_rd_atom = IONIC_MAX_RD_ATOM; + attr->max_ee_init_rd_atom = 0; + attr->atomic_cap = IB_ATOMIC_GLOB; + attr->masked_atomic_cap = IB_ATOMIC_GLOB; + attr->max_mw = dev->lif_cfg.nmrs_per_lif; + attr->max_mcast_grp = 0; + attr->max_mcast_qp_attach = 0; + attr->max_ah = dev->lif_cfg.nahs_per_lif; + attr->max_fast_reg_page_list_len = dev->lif_cfg.npts_per_lif / 2; + attr->max_pkeys = IONIC_PKEY_TBL_LEN; + + return 0; +} + +static int ionic_query_port(struct ib_device *ibdev, u32 port, + struct ib_port_attr *attr) +{ + struct net_device *ndev; + + if (port != 1) + return -EINVAL; + + ndev = ib_device_get_netdev(ibdev, port); + + if (netif_running(ndev) && netif_carrier_ok(ndev)) { + attr->state = IB_PORT_ACTIVE; + attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; + } else if (netif_running(ndev)) { + attr->state = IB_PORT_DOWN; + attr->phys_state = IB_PORT_PHYS_STATE_POLLING; + } else { + attr->state = IB_PORT_DOWN; + attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; + } + + attr->max_mtu = iboe_get_mtu(ndev->max_mtu); + attr->active_mtu = min(attr->max_mtu, iboe_get_mtu(ndev->mtu)); + attr->gid_tbl_len = IONIC_GID_TBL_LEN; + attr->ip_gids = true; + attr->port_cap_flags = 0; + attr->max_msg_sz = 0x80000000; + attr->pkey_tbl_len = IONIC_PKEY_TBL_LEN; + attr->max_vl_num = 1; + attr->subnet_prefix = 0xfe80000000000000ull; + + dev_put(ndev); + + return ib_get_eth_speed(ibdev, port, + &attr->active_speed, + &attr->active_width); +} + +static enum rdma_link_layer ionic_get_link_layer(struct ib_device *ibdev, + u32 port) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static int ionic_query_pkey(struct ib_device *ibdev, u32 port, u16 index, + u16 *pkey) +{ + if (port != 1) + return -EINVAL; + + if (index != 0) + return -EINVAL; + + *pkey = IB_DEFAULT_PKEY_FULL; + + return 0; +} + +static int ionic_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *attr) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibdev); + + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) + memcpy(dev->ibdev.node_desc, attr->node_desc, + IB_DEVICE_NODE_DESC_MAX); + + return 0; +} + +static int ionic_get_port_immutable(struct ib_device *ibdev, u32 port, + struct ib_port_immutable *attr) +{ + if (port != 1) + return -EINVAL; + + attr->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + + attr->pkey_tbl_len = IONIC_PKEY_TBL_LEN; + attr->gid_tbl_len = IONIC_GID_TBL_LEN; + attr->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +static void ionic_get_dev_fw_str(struct ib_device *ibdev, char *str) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibdev); + + ionic_lif_fw_version(dev->lif_cfg.lif, str, IB_FW_VERSION_NAME_MAX); +} + +static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ionic_ibdev *dev = + rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); + + return sysfs_emit(buf, "0x%x\n", ionic_lif_asic_rev(dev->lif_cfg.lif)); +} +static DEVICE_ATTR_RO(hw_rev); + +static ssize_t hca_type_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ionic_ibdev *dev = + rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); + + return sysfs_emit(buf, "%s\n", dev->ibdev.node_desc); +} +static DEVICE_ATTR_RO(hca_type); + +static struct attribute *ionic_rdma_attributes[] = { + &dev_attr_hw_rev.attr, + &dev_attr_hca_type.attr, + NULL +}; + +static const struct attribute_group ionic_rdma_attr_group = { + .attrs = ionic_rdma_attributes, +}; + +static void ionic_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + /* + * Dummy define disassociate_ucontext so that it does not + * wait for user context before cleaning up hw resources. + */ +} + static const struct ib_device_ops ionic_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_IONIC, @@ -50,6 +240,16 @@ static const struct ib_device_ops ionic_dev_ops = { .poll_cq = ionic_poll_cq, .req_notify_cq = ionic_req_notify_cq, + .query_device = ionic_query_device, + .query_port = ionic_query_port, + .get_link_layer = ionic_get_link_layer, + .query_pkey = ionic_query_pkey, + .modify_device = ionic_modify_device, + .get_port_immutable = ionic_get_port_immutable, + .get_dev_fw_str = ionic_get_dev_fw_str, + .device_group = &ionic_rdma_attr_group, + .disassociate_ucontext = ionic_disassociate_ucontext, + INIT_RDMA_OBJ_SIZE(ib_ucontext, ionic_ctx, ibctx), INIT_RDMA_OBJ_SIZE(ib_pd, ionic_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ah, ionic_ah, ibah), diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h index bbec041b378b..c750e049fecc 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.h +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h @@ -26,6 +26,11 @@ #define IONIC_AQ_COUNT 4 #define IONIC_EQ_ISR_BUDGET 10 #define IONIC_EQ_WORK_BUDGET 1000 +#define IONIC_MAX_RD_ATOM 16 +#define IONIC_PKEY_TBL_LEN 1 +#define IONIC_GID_TBL_LEN 256 + +#define IONIC_SPEC_HIGH 8 #define IONIC_MAX_PD 1024 #define IONIC_SPEC_HIGH 8 #define IONIC_SQCMB_ORDER 5 diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.c b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c index 8d0d209227e9..f3cd281c3a2f 100644 --- a/drivers/infiniband/hw/ionic/ionic_lif_cfg.c +++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c @@ -99,3 +99,13 @@ struct net_device *ionic_lif_netdev(struct ionic_lif *lif) dev_hold(netdev); return netdev; } + +void ionic_lif_fw_version(struct ionic_lif *lif, char *str, size_t len) +{ + strscpy(str, lif->ionic->idev.dev_info.fw_version, len); +} + +u8 ionic_lif_asic_rev(struct ionic_lif *lif) +{ + return lif->ionic->idev.dev_info.asic_rev; +} diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.h b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h index 5b04b8a9937e..20853429f623 100644 --- a/drivers/infiniband/hw/ionic/ionic_lif_cfg.h +++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h @@ -60,5 +60,7 @@ struct ionic_lif_cfg { void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg); struct net_device *ionic_lif_netdev(struct ionic_lif *lif); +void ionic_lif_fw_version(struct ionic_lif *lif, char *str, size_t len); +u8 ionic_lif_asic_rev(struct ionic_lif *lif); #endif /* _IONIC_LIF_CFG_H_ */ From ea4c399642b85dc30f44d90ee805c6a18fa03062 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:46:05 +0530 Subject: [PATCH 47/85] RDMA/ionic: Implement device stats ops Implement device stats operations for hw stats and qp stats. Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-14-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_fw.h | 43 ++ drivers/infiniband/hw/ionic/ionic_hw_stats.c | 484 +++++++++++++++++++ drivers/infiniband/hw/ionic/ionic_ibdev.c | 4 + drivers/infiniband/hw/ionic/ionic_ibdev.h | 23 + 4 files changed, 554 insertions(+) create mode 100644 drivers/infiniband/hw/ionic/ionic_hw_stats.c diff --git a/drivers/infiniband/hw/ionic/ionic_fw.h b/drivers/infiniband/hw/ionic/ionic_fw.h index 1065a7c4faa8..adfbb89d856c 100644 --- a/drivers/infiniband/hw/ionic/ionic_fw.h +++ b/drivers/infiniband/hw/ionic/ionic_fw.h @@ -659,6 +659,17 @@ static inline int ionic_v1_use_spec_sge(int min_sge, int spec) return spec; } +struct ionic_admin_stats_hdr { + __le64 dma_addr; + __le32 length; + __le32 id_ver; + __u8 type_state; +} __packed; + +#define IONIC_ADMIN_STATS_HDRS_IN_V1_LEN 17 +static_assert(sizeof(struct ionic_admin_stats_hdr) == + IONIC_ADMIN_STATS_HDRS_IN_V1_LEN); + struct ionic_admin_create_ah { __le64 dma_addr; __le32 length; @@ -837,6 +848,7 @@ struct ionic_v1_admin_wqe { __le16 len; union { + struct ionic_admin_stats_hdr stats; struct ionic_admin_create_ah create_ah; struct ionic_admin_destroy_ah destroy_ah; struct ionic_admin_query_ah query_ah; @@ -983,4 +995,35 @@ static inline u32 ionic_v1_eqe_evt_qid(u32 evt) return evt >> IONIC_V1_EQE_QID_SHIFT; } +enum ionic_v1_stat_bits { + IONIC_V1_STAT_TYPE_SHIFT = 28, + IONIC_V1_STAT_TYPE_NONE = 0, + IONIC_V1_STAT_TYPE_8 = 1, + IONIC_V1_STAT_TYPE_LE16 = 2, + IONIC_V1_STAT_TYPE_LE32 = 3, + IONIC_V1_STAT_TYPE_LE64 = 4, + IONIC_V1_STAT_TYPE_BE16 = 5, + IONIC_V1_STAT_TYPE_BE32 = 6, + IONIC_V1_STAT_TYPE_BE64 = 7, + IONIC_V1_STAT_OFF_MASK = BIT(IONIC_V1_STAT_TYPE_SHIFT) - 1, +}; + +struct ionic_v1_stat { + union { + __be32 be_type_off; + u32 type_off; + }; + char name[28]; +}; + +static inline int ionic_v1_stat_type(struct ionic_v1_stat *hdr) +{ + return hdr->type_off >> IONIC_V1_STAT_TYPE_SHIFT; +} + +static inline unsigned int ionic_v1_stat_off(struct ionic_v1_stat *hdr) +{ + return hdr->type_off & IONIC_V1_STAT_OFF_MASK; +} + #endif /* _IONIC_FW_H_ */ diff --git a/drivers/infiniband/hw/ionic/ionic_hw_stats.c b/drivers/infiniband/hw/ionic/ionic_hw_stats.c new file mode 100644 index 000000000000..244a80dde08f --- /dev/null +++ b/drivers/infiniband/hw/ionic/ionic_hw_stats.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */ + +#include + +#include "ionic_fw.h" +#include "ionic_ibdev.h" + +static int ionic_v1_stat_normalize(struct ionic_v1_stat *hw_stats, + int hw_stats_count) +{ + int hw_stat_i; + + for (hw_stat_i = 0; hw_stat_i < hw_stats_count; ++hw_stat_i) { + struct ionic_v1_stat *stat = &hw_stats[hw_stat_i]; + + stat->type_off = be32_to_cpu(stat->be_type_off); + stat->name[sizeof(stat->name) - 1] = 0; + if (ionic_v1_stat_type(stat) == IONIC_V1_STAT_TYPE_NONE) + break; + } + + return hw_stat_i; +} + +static void ionic_fill_stats_desc(struct rdma_stat_desc *hw_stats_hdrs, + struct ionic_v1_stat *hw_stats, + int hw_stats_count) +{ + int hw_stat_i; + + for (hw_stat_i = 0; hw_stat_i < hw_stats_count; ++hw_stat_i) { + struct ionic_v1_stat *stat = &hw_stats[hw_stat_i]; + + hw_stats_hdrs[hw_stat_i].name = stat->name; + } +} + +static u64 ionic_v1_stat_val(struct ionic_v1_stat *stat, + void *vals_buf, size_t vals_len) +{ + unsigned int off = ionic_v1_stat_off(stat); + int type = ionic_v1_stat_type(stat); + +#define __ionic_v1_stat_validate(__type) \ + ((off + sizeof(__type) <= vals_len) && \ + (IS_ALIGNED(off, sizeof(__type)))) + + switch (type) { + case IONIC_V1_STAT_TYPE_8: + if (__ionic_v1_stat_validate(u8)) + return *(u8 *)(vals_buf + off); + break; + case IONIC_V1_STAT_TYPE_LE16: + if (__ionic_v1_stat_validate(__le16)) + return le16_to_cpu(*(__le16 *)(vals_buf + off)); + break; + case IONIC_V1_STAT_TYPE_LE32: + if (__ionic_v1_stat_validate(__le32)) + return le32_to_cpu(*(__le32 *)(vals_buf + off)); + break; + case IONIC_V1_STAT_TYPE_LE64: + if (__ionic_v1_stat_validate(__le64)) + return le64_to_cpu(*(__le64 *)(vals_buf + off)); + break; + case IONIC_V1_STAT_TYPE_BE16: + if (__ionic_v1_stat_validate(__be16)) + return be16_to_cpu(*(__be16 *)(vals_buf + off)); + break; + case IONIC_V1_STAT_TYPE_BE32: + if (__ionic_v1_stat_validate(__be32)) + return be32_to_cpu(*(__be32 *)(vals_buf + off)); + break; + case IONIC_V1_STAT_TYPE_BE64: + if (__ionic_v1_stat_validate(__be64)) + return be64_to_cpu(*(__be64 *)(vals_buf + off)); + break; + } + + return ~0ull; +#undef __ionic_v1_stat_validate +} + +static int ionic_hw_stats_cmd(struct ionic_ibdev *dev, + dma_addr_t dma, size_t len, int qid, int op) +{ + struct ionic_admin_wr wr = { + .work = COMPLETION_INITIALIZER_ONSTACK(wr.work), + .wqe = { + .op = op, + .len = cpu_to_le16(IONIC_ADMIN_STATS_HDRS_IN_V1_LEN), + .cmd.stats = { + .dma_addr = cpu_to_le64(dma), + .length = cpu_to_le32(len), + .id_ver = cpu_to_le32(qid), + }, + } + }; + + if (dev->lif_cfg.admin_opcodes <= op) + return -EBADRQC; + + ionic_admin_post(dev, &wr); + + return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_INTERRUPT); +} + +static int ionic_init_hw_stats(struct ionic_ibdev *dev) +{ + dma_addr_t hw_stats_dma; + int rc, hw_stats_count; + + if (dev->hw_stats_hdrs) + return 0; + + dev->hw_stats_count = 0; + + /* buffer for current values from the device */ + dev->hw_stats_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!dev->hw_stats_buf) { + rc = -ENOMEM; + goto err_buf; + } + + /* buffer for names, sizes, offsets of values */ + dev->hw_stats = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!dev->hw_stats) { + rc = -ENOMEM; + goto err_hw_stats; + } + + /* request the names, sizes, offsets */ + hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, dev->hw_stats, + PAGE_SIZE, DMA_FROM_DEVICE); + rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma); + if (rc) + goto err_dma; + + rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE, 0, + IONIC_V1_ADMIN_STATS_HDRS); + if (rc) + goto err_cmd; + + dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + /* normalize and count the number of hw_stats */ + hw_stats_count = + ionic_v1_stat_normalize(dev->hw_stats, + PAGE_SIZE / sizeof(*dev->hw_stats)); + if (!hw_stats_count) { + rc = -ENODATA; + goto err_dma; + } + + dev->hw_stats_count = hw_stats_count; + + /* alloc and init array of names, for alloc_hw_stats */ + dev->hw_stats_hdrs = kcalloc(hw_stats_count, + sizeof(*dev->hw_stats_hdrs), + GFP_KERNEL); + if (!dev->hw_stats_hdrs) { + rc = -ENOMEM; + goto err_dma; + } + + ionic_fill_stats_desc(dev->hw_stats_hdrs, dev->hw_stats, + hw_stats_count); + + return 0; + +err_cmd: + dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE); +err_dma: + kfree(dev->hw_stats); +err_hw_stats: + kfree(dev->hw_stats_buf); +err_buf: + dev->hw_stats_count = 0; + dev->hw_stats = NULL; + dev->hw_stats_buf = NULL; + dev->hw_stats_hdrs = NULL; + return rc; +} + +static struct rdma_hw_stats *ionic_alloc_hw_stats(struct ib_device *ibdev, + u32 port) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibdev); + + if (port != 1) + return NULL; + + return rdma_alloc_hw_stats_struct(dev->hw_stats_hdrs, + dev->hw_stats_count, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int ionic_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *hw_stats, + u32 port, int index) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibdev); + dma_addr_t hw_stats_dma; + int rc, hw_stat_i; + + if (port != 1) + return -EINVAL; + + hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, dev->hw_stats_buf, + PAGE_SIZE, DMA_FROM_DEVICE); + rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma); + if (rc) + goto err_dma; + + rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE, + 0, IONIC_V1_ADMIN_STATS_VALS); + if (rc) + goto err_cmd; + + dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + + for (hw_stat_i = 0; hw_stat_i < dev->hw_stats_count; ++hw_stat_i) + hw_stats->value[hw_stat_i] = + ionic_v1_stat_val(&dev->hw_stats[hw_stat_i], + dev->hw_stats_buf, PAGE_SIZE); + + return hw_stat_i; + +err_cmd: + dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, + PAGE_SIZE, DMA_FROM_DEVICE); +err_dma: + return rc; +} + +static struct rdma_hw_stats * +ionic_counter_alloc_stats(struct rdma_counter *counter) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(counter->device); + struct ionic_counter *cntr; + int err; + + cntr = kzalloc(sizeof(*cntr), GFP_KERNEL); + if (!cntr) + return NULL; + + /* buffer for current values from the device */ + cntr->vals = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!cntr->vals) + goto err_vals; + + err = xa_alloc(&dev->counter_stats->xa_counters, &counter->id, + cntr, + XA_LIMIT(0, IONIC_MAX_QPID), + GFP_KERNEL); + if (err) + goto err_xa; + + INIT_LIST_HEAD(&cntr->qp_list); + + return rdma_alloc_hw_stats_struct(dev->counter_stats->stats_hdrs, + dev->counter_stats->queue_stats_count, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +err_xa: + kfree(cntr->vals); +err_vals: + kfree(cntr); + + return NULL; +} + +static int ionic_counter_dealloc(struct rdma_counter *counter) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(counter->device); + struct ionic_counter *cntr; + + cntr = xa_erase(&dev->counter_stats->xa_counters, counter->id); + if (!cntr) + return -EINVAL; + + kfree(cntr->vals); + kfree(cntr); + + return 0; +} + +static int ionic_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *ibqp, + u32 port) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(counter->device); + struct ionic_qp *qp = to_ionic_qp(ibqp); + struct ionic_counter *cntr; + + cntr = xa_load(&dev->counter_stats->xa_counters, counter->id); + if (!cntr) + return -EINVAL; + + list_add_tail(&qp->qp_list_counter, &cntr->qp_list); + ibqp->counter = counter; + + return 0; +} + +static int ionic_counter_unbind_qp(struct ib_qp *ibqp, u32 port) +{ + struct ionic_qp *qp = to_ionic_qp(ibqp); + + if (ibqp->counter) { + list_del(&qp->qp_list_counter); + ibqp->counter = NULL; + } + + return 0; +} + +static int ionic_get_qp_stats(struct ib_device *ibdev, + struct rdma_hw_stats *hw_stats, + u32 counter_id) +{ + struct ionic_ibdev *dev = to_ionic_ibdev(ibdev); + struct ionic_counter_stats *cs; + struct ionic_counter *cntr; + dma_addr_t hw_stats_dma; + struct ionic_qp *qp; + int rc, stat_i = 0; + + cs = dev->counter_stats; + cntr = xa_load(&cs->xa_counters, counter_id); + if (!cntr) + return -EINVAL; + + hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, cntr->vals, + PAGE_SIZE, DMA_FROM_DEVICE); + rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma); + if (rc) + return rc; + + memset(hw_stats->value, 0, sizeof(u64) * hw_stats->num_counters); + + list_for_each_entry(qp, &cntr->qp_list, qp_list_counter) { + rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE, + qp->qpid, + IONIC_V1_ADMIN_QP_STATS_VALS); + if (rc) + goto err_cmd; + + for (stat_i = 0; stat_i < cs->queue_stats_count; ++stat_i) + hw_stats->value[stat_i] += + ionic_v1_stat_val(&cs->hdr[stat_i], + cntr->vals, + PAGE_SIZE); + } + + dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE); + return stat_i; + +err_cmd: + dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + return rc; +} + +static int ionic_counter_update_stats(struct rdma_counter *counter) +{ + return ionic_get_qp_stats(counter->device, counter->stats, counter->id); +} + +static int ionic_alloc_counters(struct ionic_ibdev *dev) +{ + struct ionic_counter_stats *cs = dev->counter_stats; + int rc, hw_stats_count; + dma_addr_t hdr_dma; + + /* buffer for names, sizes, offsets of values */ + cs->hdr = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!cs->hdr) + return -ENOMEM; + + hdr_dma = dma_map_single(dev->lif_cfg.hwdev, cs->hdr, + PAGE_SIZE, DMA_FROM_DEVICE); + rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma); + if (rc) + goto err_dma; + + rc = ionic_hw_stats_cmd(dev, hdr_dma, PAGE_SIZE, 0, + IONIC_V1_ADMIN_QP_STATS_HDRS); + if (rc) + goto err_cmd; + + dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + /* normalize and count the number of hw_stats */ + hw_stats_count = ionic_v1_stat_normalize(cs->hdr, + PAGE_SIZE / sizeof(*cs->hdr)); + if (!hw_stats_count) { + rc = -ENODATA; + goto err_dma; + } + + cs->queue_stats_count = hw_stats_count; + + /* alloc and init array of names */ + cs->stats_hdrs = kcalloc(hw_stats_count, sizeof(*cs->stats_hdrs), + GFP_KERNEL); + if (!cs->stats_hdrs) { + rc = -ENOMEM; + goto err_dma; + } + + ionic_fill_stats_desc(cs->stats_hdrs, cs->hdr, hw_stats_count); + + return 0; + +err_cmd: + dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, PAGE_SIZE, DMA_FROM_DEVICE); +err_dma: + kfree(cs->hdr); + + return rc; +} + +static const struct ib_device_ops ionic_hw_stats_ops = { + .driver_id = RDMA_DRIVER_IONIC, + .alloc_hw_port_stats = ionic_alloc_hw_stats, + .get_hw_stats = ionic_get_hw_stats, +}; + +static const struct ib_device_ops ionic_counter_stats_ops = { + .counter_alloc_stats = ionic_counter_alloc_stats, + .counter_dealloc = ionic_counter_dealloc, + .counter_bind_qp = ionic_counter_bind_qp, + .counter_unbind_qp = ionic_counter_unbind_qp, + .counter_update_stats = ionic_counter_update_stats, +}; + +void ionic_stats_init(struct ionic_ibdev *dev) +{ + u16 stats_type = dev->lif_cfg.stats_type; + int rc; + + if (stats_type & IONIC_LIF_RDMA_STAT_GLOBAL) { + rc = ionic_init_hw_stats(dev); + if (rc) + ibdev_dbg(&dev->ibdev, "Failed to init hw stats\n"); + else + ib_set_device_ops(&dev->ibdev, &ionic_hw_stats_ops); + } + + if (stats_type & IONIC_LIF_RDMA_STAT_QP) { + dev->counter_stats = kzalloc(sizeof(*dev->counter_stats), + GFP_KERNEL); + if (!dev->counter_stats) + return; + + rc = ionic_alloc_counters(dev); + if (rc) { + ibdev_dbg(&dev->ibdev, "Failed to init counter stats\n"); + kfree(dev->counter_stats); + dev->counter_stats = NULL; + return; + } + + xa_init_flags(&dev->counter_stats->xa_counters, XA_FLAGS_ALLOC); + + ib_set_device_ops(&dev->ibdev, &ionic_counter_stats_ops); + } +} + +void ionic_stats_cleanup(struct ionic_ibdev *dev) +{ + if (dev->counter_stats) { + xa_destroy(&dev->counter_stats->xa_counters); + kfree(dev->counter_stats->hdr); + kfree(dev->counter_stats->stats_hdrs); + kfree(dev->counter_stats); + dev->counter_stats = NULL; + } + + kfree(dev->hw_stats); + kfree(dev->hw_stats_buf); + kfree(dev->hw_stats_hdrs); +} diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 5f51873af350..164046d00e5d 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -289,6 +289,7 @@ static void ionic_destroy_ibdev(struct ionic_ibdev *dev) { ionic_kill_rdma_admin(dev, false); ib_unregister_device(&dev->ibdev); + ionic_stats_cleanup(dev); ionic_destroy_rdma_admin(dev); ionic_destroy_resids(dev); WARN_ON(!xa_empty(&dev->qp_tbl)); @@ -346,6 +347,8 @@ static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev) ib_set_device_ops(&dev->ibdev, &ionic_dev_ops); + ionic_stats_init(dev); + rc = ib_register_device(ibdev, "ionic_%d", ibdev->dev.parent); if (rc) goto err_register; @@ -353,6 +356,7 @@ static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev) return dev; err_register: + ionic_stats_cleanup(dev); err_admin: ionic_kill_rdma_admin(dev, false); ionic_destroy_rdma_admin(dev); diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h index c750e049fecc..b7a1a57bae03 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.h +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h @@ -30,6 +30,7 @@ #define IONIC_PKEY_TBL_LEN 1 #define IONIC_GID_TBL_LEN 256 +#define IONIC_MAX_QPID 0xffffff #define IONIC_SPEC_HIGH 8 #define IONIC_MAX_PD 1024 #define IONIC_SPEC_HIGH 8 @@ -109,6 +110,12 @@ struct ionic_ibdev { atomic_t admin_state; struct ionic_eq **eq_vec; + + struct ionic_v1_stat *hw_stats; + void *hw_stats_buf; + struct rdma_stat_desc *hw_stats_hdrs; + struct ionic_counter_stats *counter_stats; + int hw_stats_count; }; struct ionic_eq { @@ -320,6 +327,18 @@ struct ionic_mr { bool created; }; +struct ionic_counter_stats { + int queue_stats_count; + struct ionic_v1_stat *hdr; + struct rdma_stat_desc *stats_hdrs; + struct xarray xa_counters; +}; + +struct ionic_counter { + void *vals; + struct list_head qp_list; +}; + static inline struct ionic_ibdev *to_ionic_ibdev(struct ib_device *ibdev) { return container_of(ibdev, struct ionic_ibdev, ibdev); @@ -480,6 +499,10 @@ int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc); int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +/* ionic_hw_stats.c */ +void ionic_stats_init(struct ionic_ibdev *dev); +void ionic_stats_cleanup(struct ionic_ibdev *dev); + /* ionic_pgtbl.c */ __le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va); __be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va); From 6603fbf158e9a34eb0961579becee377c1efa1ee Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:46:06 +0530 Subject: [PATCH 48/85] RDMA/ionic: Add Makefile/Kconfig to kernel build environment Add ionic to the kernel build environment. Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-15-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- .../device_drivers/ethernet/index.rst | 1 + .../ethernet/pensando/ionic_rdma.rst | 52 +++++++++++++++++++ MAINTAINERS | 9 ++++ drivers/infiniband/Kconfig | 1 + drivers/infiniband/hw/Makefile | 1 + drivers/infiniband/hw/ionic/Kconfig | 15 ++++++ drivers/infiniband/hw/ionic/Makefile | 9 ++++ 7 files changed, 88 insertions(+) create mode 100644 Documentation/networking/device_drivers/ethernet/pensando/ionic_rdma.rst create mode 100644 drivers/infiniband/hw/ionic/Kconfig create mode 100644 drivers/infiniband/hw/ionic/Makefile diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 40ac552641a3..1fabfe02eb12 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -50,6 +50,7 @@ Contents: neterion/s2io netronome/nfp pensando/ionic + pensando/ionic_rdma smsc/smc9 stmicro/stmmac ti/cpsw diff --git a/Documentation/networking/device_drivers/ethernet/pensando/ionic_rdma.rst b/Documentation/networking/device_drivers/ethernet/pensando/ionic_rdma.rst new file mode 100644 index 000000000000..42eb461d5f85 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/pensando/ionic_rdma.rst @@ -0,0 +1,52 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +=========================================================== +RDMA Driver for the AMD Pensando(R) Ethernet adapter family +=========================================================== + +AMD Pensando RDMA driver. +Copyright (C) 2018-2025, Advanced Micro Devices, Inc. + +Overview +======== + +The ionic_rdma driver provides Remote Direct Memory Access functionality +for AMD Pensando DSC (Distributed Services Card) devices. This driver +implements RDMA capabilities as an auxiliary driver that operates in +conjunction with the ionic ethernet driver. + +The ionic ethernet driver detects RDMA capability during device +initialization and creates auxiliary devices that the ionic_rdma driver +binds to, establishing the RDMA data path and control interfaces. + +Identifying the Adapter +======================= + +See Documentation/networking/device_drivers/ethernet/pensando/ionic.rst +for more information on identifying the adapter. + +Enabling the driver +=================== + +The ionic_rdma driver depends on the ionic ethernet driver. +See Documentation/networking/device_drivers/ethernet/pensando/ionic.rst +for detailed information on enabling and configuring the ionic driver. + +The ionic_rdma driver is enabled via the standard kernel configuration system, +using the make command:: + + make oldconfig/menuconfig/etc. + +The driver is located in the menu structure at: + + -> Device Drivers + -> InfiniBand support + -> AMD Pensando DSC RDMA/RoCE Support + +Support +======= + +For general Linux RDMA support, please use the RDMA mailing +list, which is monitored by AMD Pensando personnel:: + + linux-rdma@vger.kernel.org diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..088558cc8b18 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1176,6 +1176,15 @@ F: Documentation/networking/device_drivers/ethernet/amd/pds_core.rst F: drivers/net/ethernet/amd/pds_core/ F: include/linux/pds/ +AMD PENSANDO RDMA DRIVER +M: Abhijit Gangurde +M: Allen Hubbe +L: linux-rdma@vger.kernel.org +S: Maintained +F: Documentation/networking/device_drivers/ethernet/pensando/ionic_rdma.rst +F: drivers/infiniband/hw/ionic/ +F: include/uapi/rdma/ionic-abi.h + AMD PMC DRIVER M: Shyam Sundar S K L: platform-driver-x86@vger.kernel.org diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 3a394cd772f6..f0323f1d6f01 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -85,6 +85,7 @@ source "drivers/infiniband/hw/efa/Kconfig" source "drivers/infiniband/hw/erdma/Kconfig" source "drivers/infiniband/hw/hfi1/Kconfig" source "drivers/infiniband/hw/hns/Kconfig" +source "drivers/infiniband/hw/ionic/Kconfig" source "drivers/infiniband/hw/irdma/Kconfig" source "drivers/infiniband/hw/mana/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index df61b2299ec0..b706dc0d0263 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns/ obj-$(CONFIG_INFINIBAND_QEDR) += qedr/ obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/ obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/ +obj-$(CONFIG_INFINIBAND_IONIC) += ionic/ diff --git a/drivers/infiniband/hw/ionic/Kconfig b/drivers/infiniband/hw/ionic/Kconfig new file mode 100644 index 000000000000..de6f10e9b6e9 --- /dev/null +++ b/drivers/infiniband/hw/ionic/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2018-2025, Advanced Micro Devices, Inc. + +config INFINIBAND_IONIC + tristate "AMD Pensando DSC RDMA/RoCE Support" + depends on NETDEVICES && ETHERNET && PCI && INET && IONIC + help + This enables RDMA/RoCE support for the AMD Pensando family of + Distributed Services Cards (DSCs). + + To learn more, visit our website at + . + + To compile this driver as a module, choose M here. The module + will be called ionic_rdma. diff --git a/drivers/infiniband/hw/ionic/Makefile b/drivers/infiniband/hw/ionic/Makefile new file mode 100644 index 000000000000..957973742820 --- /dev/null +++ b/drivers/infiniband/hw/ionic/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +ccflags-y := -I $(srctree)/drivers/net/ethernet/pensando/ionic + +obj-$(CONFIG_INFINIBAND_IONIC) += ionic_rdma.o + +ionic_rdma-y := \ + ionic_ibdev.o ionic_lif_cfg.o ionic_queue.o ionic_pgtbl.o ionic_admin.o \ + ionic_controlpath.o ionic_datapath.o ionic_hw_stats.o From 72d0b87e1e7c34ebb89deae75f5306264578b01f Mon Sep 17 00:00:00 2001 From: Anantha Prabhu Date: Mon, 8 Sep 2025 15:15:15 +0530 Subject: [PATCH 49/85] RDMA/bnxt_re: Update sysfs entries with appropriate data Updated the existing sysfs entries with correct data. This change is to align the behavior with our OOB driver. Added "board_id" sysfs entry which will provide the VPD Part number, if exists. Signed-off-by: Anantha Prabhu Signed-off-by: Saravanan Vajravel Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250908094516.18222-2-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 2 + drivers/infiniband/hw/bnxt_re/main.c | 50 ++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 297e0b79e885..ac0944b28ca0 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -228,6 +228,8 @@ struct bnxt_re_dev { struct workqueue_struct *dcb_wq; struct dentry *cc_config; struct bnxt_re_dbg_cc_config_params *cc_config_params; +#define BNXT_VPD_FLD_LEN 32 + char board_partno[BNXT_VPD_FLD_LEN]; /* RoCE mirror */ u16 mirror_vnic_id; union ib_gid ugid; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index e5a6f36e1656..bdc9022c19e1 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1031,7 +1031,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor); + return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->revision); } static DEVICE_ATTR_RO(hw_rev); @@ -1041,13 +1041,31 @@ static ssize_t hca_type_show(struct device *device, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc); + return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->device); } static DEVICE_ATTR_RO(hca_type); +static ssize_t board_id_show(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, + struct bnxt_re_dev, ibdev); + char buffer[BNXT_VPD_FLD_LEN] = {}; + + if (!rdev->is_virtfn) + memcpy(buffer, rdev->board_partno, BNXT_VPD_FLD_LEN - 1); + else + scnprintf(buffer, BNXT_VPD_FLD_LEN, "0x%x-VF", + rdev->en_dev->pdev->device); + + return sysfs_emit(buf, "%s\n", buffer); +} +static DEVICE_ATTR_RO(board_id); + static struct attribute *bnxt_re_attributes[] = { &dev_attr_hw_rev.attr, &dev_attr_hca_type.attr, + &dev_attr_board_id.attr, NULL }; @@ -2004,6 +2022,31 @@ static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev) ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); } +static void bnxt_re_read_vpd_info(struct bnxt_re_dev *rdev) +{ + struct pci_dev *pdev = rdev->en_dev->pdev; + unsigned int vpd_size, kw_len; + int pos, size; + u8 *vpd_data; + + vpd_data = pci_vpd_alloc(pdev, &vpd_size); + if (IS_ERR(vpd_data)) { + pci_warn(pdev, "Unable to read VPD, err=%ld\n", + PTR_ERR(vpd_data)); + return; + } + + pos = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, + PCI_VPD_RO_KEYWORD_PARTNO, &kw_len); + if (pos < 0) + goto free; + + size = min_t(int, kw_len, BNXT_VPD_FLD_LEN - 1); + memcpy(rdev->board_partno, &vpd_data[pos], size); +free: + kfree(vpd_data); +} + static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -2377,6 +2420,9 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) bnxt_re_init_dcb_wq(rdev); bnxt_re_net_register_async_event(rdev); + if (!rdev->is_virtfn) + bnxt_re_read_vpd_info(rdev); + rc = bnxt_re_get_stats3_ctx(rdev); if (rc) goto fail; From eba165b2909716c690351d21d9b6b6d95b9e1f59 Mon Sep 17 00:00:00 2001 From: Shravya KN Date: Mon, 8 Sep 2025 15:15:16 +0530 Subject: [PATCH 50/85] RDMA/bnxt_re: Avoid GID level QoS update from the driver The driver inserts a VLAN header into RoCE packets when the traffic was untagged by modifying the existing GID entries. This has caused the firmware to enforce only VLAN-based priority mappings, ignoring other valid priority configurations set via APP TLVs (e.g., DSCP selectors). Driver now has support for selecting the service level (vlan id) and traffic class (dscp) during modify_qp. So no need to override the priority update using the update gid method. Hence removing the code that handles the above operation. Signed-off-by: Shravya KN Link: https://patch.msgid.link/20250908094516.18222-3-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Saravanan Vajravel Reviewed-by: Selvin Xavier Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 4 - drivers/infiniband/hw/bnxt_re/main.c | 97 ------------------------ drivers/infiniband/hw/bnxt_re/qplib_sp.c | 40 ---------- 3 files changed, 141 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index ac0944b28ca0..3485e495ac6a 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -175,7 +175,6 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_STATS_CTX3_ALLOC 1 #define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 -#define BNXT_RE_FLAG_QOS_WORK_REG 5 #define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7 #define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8 #define BNXT_RE_FLAG_ERR_DEVICE_DETACHED 17 @@ -188,9 +187,6 @@ struct bnxt_re_dev { int id; - struct delayed_work worker; - u8 cur_prio_map; - /* RCFW Channel */ struct bnxt_qplib_rcfw rcfw; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index bdc9022c19e1..d8d3999d329e 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1927,81 +1927,6 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) mutex_unlock(&rdev->qp_lock); } -static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) -{ - struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; - struct bnxt_qplib_gid gid; - u16 gid_idx, index; - int rc = 0; - - if (!ib_device_try_get(&rdev->ibdev)) - return 0; - - for (index = 0; index < sgid_tbl->active; index++) { - gid_idx = sgid_tbl->hw_id[index]; - - if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, - sizeof(bnxt_qplib_gid_zero))) - continue; - /* need to modify the VLAN enable setting of non VLAN GID only - * as setting is done for VLAN GID while adding GID - */ - if (sgid_tbl->vlan[index]) - continue; - - memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid)); - - rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, - rdev->qplib_res.netdev->dev_addr); - } - - ib_device_put(&rdev->ibdev); - return rc; -} - -static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev) -{ - u32 prio_map = 0, tmp_map = 0; - struct net_device *netdev; - struct dcb_app app = {}; - - netdev = rdev->netdev; - - app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE; - app.protocol = ETH_P_IBOE; - tmp_map = dcb_ieee_getapp_mask(netdev, &app); - prio_map = tmp_map; - - app.selector = IEEE_8021QAZ_APP_SEL_DGRAM; - app.protocol = ROCE_V2_UDP_DPORT; - tmp_map = dcb_ieee_getapp_mask(netdev, &app); - prio_map |= tmp_map; - - return prio_map; -} - -static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) -{ - u8 prio_map = 0; - - /* Get priority for roce */ - prio_map = bnxt_re_get_priority_mask(rdev); - - if (prio_map == rdev->cur_prio_map) - return 0; - rdev->cur_prio_map = prio_map; - /* Actual priorities are not programmed as they are already - * done by L2 driver; just enable or disable priority vlan tagging - */ - if ((prio_map == 0 && rdev->qplib_res.prio) || - (prio_map != 0 && !rdev->qplib_res.prio)) { - rdev->qplib_res.prio = prio_map; - bnxt_re_update_gid(rdev); - } - - return 0; -} - static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev) { if (rdev->is_virtfn) @@ -2197,9 +2122,6 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) bnxt_re_net_unregister_async_event(rdev); bnxt_re_uninit_dcb_wq(rdev); - if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) - cancel_delayed_work_sync(&rdev->worker); - bnxt_re_put_stats3_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, @@ -2234,16 +2156,6 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) } } -/* worker thread for polling periodic events. Now used for QoS programming*/ -static void bnxt_re_worker(struct work_struct *work) -{ - struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, - worker.work); - - bnxt_re_setup_qos(rdev); - schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); -} - static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) { struct bnxt_re_ring_attr rattr = {}; @@ -2399,15 +2311,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) if (rc) ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n"); - rc = bnxt_re_setup_qos(rdev); - if (rc) - ibdev_info(&rdev->ibdev, - "RoCE priority not yet configured\n"); - - INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); - set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags); - schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); - if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)) bnxt_re_vf_res_config(rdev); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 698fcad9f034..9ef581ed785c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -400,46 +400,6 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, return 0; } -int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, u16 gid_idx, - const u8 *smac) -{ - struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, - struct bnxt_qplib_res, - sgid_tbl); - struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct creq_modify_gid_resp resp = {}; - struct bnxt_qplib_cmdqmsg msg = {}; - struct cmdq_modify_gid req = {}; - int rc; - - bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, - CMDQ_BASE_OPCODE_MODIFY_GID, - sizeof(req)); - - req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]); - req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]); - req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]); - req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]); - if (res->prio) { - req.vlan |= cpu_to_le16 - (CMDQ_ADD_GID_VLAN_TPID_TPID_8100 | - CMDQ_ADD_GID_VLAN_VLAN_EN); - } - - /* MAC in network format */ - req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]); - req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]); - req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]); - - req.gid_index = cpu_to_le16(gid_idx); - - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), - sizeof(resp), 0); - rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); - return rc; -} - /* AH */ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, bool block) From 2bbe1255fcf19c5eb300efb6cb5ad98d66fdae2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Fri, 12 Sep 2025 12:05:20 +0200 Subject: [PATCH 51/85] RDMA/cm: Rate limit destroy CM ID timeout error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the destroy CM ID timeout kicks in, you typically get a storm of them which creates a log flooding. Hence, change pr_err() to pr_err_ratelimited() in cm_destroy_id_wait_timeout(). Fixes: 96d9cbe2f2ff ("RDMA/cm: add timeout to cm_destroy_id wait") Signed-off-by: Håkon Bugge Link: https://patch.msgid.link/20250912100525.531102-1-haakon.bugge@oracle.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 92678e438ff4..01bede8ba105 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1049,8 +1049,8 @@ static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id, struct cm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__, - cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount)); + pr_err_ratelimited("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__, + cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount)); } static void cm_destroy_id(struct ib_cm_id *cm_id, int err) From 2bd7dd383609f11330814ecc0d3c10b67073a6be Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 15 Sep 2025 00:59:32 -0700 Subject: [PATCH 52/85] RDMA/mana_ib: Extend modify QP Extend modify QP to support further attributes: local_ack_timeout, UD qkey, rate_limit, qp_access_flags, flow_label, max_rd_atomic. Signed-off-by: Shiraz Saleem Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1757923172-4475-1-git-send-email-kotaranov@linux.microsoft.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/mana_ib.h | 11 +++++++++-- drivers/infiniband/hw/mana/qp.c | 9 +++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index af09a3e6ccb7..9d36232ed880 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -412,7 +412,7 @@ struct mana_ib_ah_attr { u8 traffic_class; u16 src_port; u16 dest_port; - u32 reserved; + u32 flow_label; }; struct mana_rnic_set_qp_state_req { @@ -429,8 +429,15 @@ struct mana_rnic_set_qp_state_req { u32 retry_cnt; u32 rnr_retry; u32 min_rnr_timer; - u32 reserved; + u32 rate_limit; struct mana_ib_ah_attr ah_attr; + u64 reserved1; + u32 qkey; + u32 qp_access_flags; + u8 local_ack_timeout; + u8 max_rd_atomic; + u16 reserved2; + u32 reserved3; }; /* HW Data */ struct mana_rnic_set_qp_state_resp { diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index a6bf4d539e67..48c1f4977f21 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -735,6 +735,8 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err; mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp)); + + req.hdr.req.msg_version = GDMA_MESSAGE_V3; req.hdr.dev_id = mdev->gdma_dev->dev_id; req.adapter = mdev->adapter_handle; req.qp_handle = qp->qp_handle; @@ -748,6 +750,12 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, req.retry_cnt = attr->retry_cnt; req.rnr_retry = attr->rnr_retry; req.min_rnr_timer = attr->min_rnr_timer; + req.rate_limit = attr->rate_limit; + req.qkey = attr->qkey; + req.local_ack_timeout = attr->timeout; + req.qp_access_flags = attr->qp_access_flags; + req.max_rd_atomic = attr->max_rd_atomic; + if (attr_mask & IB_QP_AV) { ndev = mana_ib_get_netdev(&mdev->ib_dev, ibqp->port); if (!ndev) { @@ -774,6 +782,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ibqp->qp_num, attr->dest_qp_num); req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2; req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit; + req.ah_attr.flow_label = attr->ah_attr.grh.flow_label; } err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); From 0c2b80cac96e199674de464a4b619bebab3d7761 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 27 Aug 2025 10:25:30 -0500 Subject: [PATCH 53/85] RDMA/irdma: Refactor GEN2 auxiliary driver Refactor the irdma auxiliary driver and associated interfaces out of main.c and into a standalone GEN2-specific source file and rename as gen_2 driver. This is in preparation for adding GEN3 auxiliary drivers. Each HW generation will have its own gen-specific interface file. Additionally, move the Address Handle hash table and associated locks under rf struct. This will allow GEN3 code to migrate to use it easily. Signed-off-by: Mustafa Ismail Co-developed-by: Tatyana Nikolova Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-2-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/Makefile | 1 + drivers/infiniband/hw/irdma/i40iw_if.c | 2 + drivers/infiniband/hw/irdma/icrdma_if.c | 341 +++++++++++++++++++++++ drivers/infiniband/hw/irdma/main.c | 347 +----------------------- drivers/infiniband/hw/irdma/main.h | 7 +- drivers/infiniband/hw/irdma/verbs.c | 16 +- 6 files changed, 360 insertions(+), 354 deletions(-) create mode 100644 drivers/infiniband/hw/irdma/icrdma_if.c diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile index 48c3854235a0..2522e4ca650b 100644 --- a/drivers/infiniband/hw/irdma/Makefile +++ b/drivers/infiniband/hw/irdma/Makefile @@ -13,6 +13,7 @@ irdma-objs := cm.o \ hw.o \ i40iw_hw.o \ i40iw_if.o \ + icrdma_if.o \ icrdma_hw.o \ main.o \ pble.o \ diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c index cc50a7070371..6fa807ef4545 100644 --- a/drivers/infiniband/hw/irdma/i40iw_if.c +++ b/drivers/infiniband/hw/irdma/i40iw_if.c @@ -75,6 +75,8 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info struct irdma_pci_f *rf = iwdev->rf; rf->rdma_ver = IRDMA_GEN_1; + rf->sc_dev.hw = &rf->hw; + rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_1; rf->gen_ops.request_reset = i40iw_request_reset; rf->pcidev = cdev_info->pcidev; rf->pf_id = cdev_info->fid; diff --git a/drivers/infiniband/hw/irdma/icrdma_if.c b/drivers/infiniband/hw/irdma/icrdma_if.c new file mode 100644 index 000000000000..db7c50b63b1d --- /dev/null +++ b/drivers/infiniband/hw/irdma/icrdma_if.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB +/* Copyright (c) 2015 - 2024 Intel Corporation */ + +#include "main.h" +#include + +static void icrdma_prep_tc_change(struct irdma_device *iwdev) +{ + iwdev->vsi.tc_change_pending = true; + irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND); + + /* Wait for all qp's to suspend */ + wait_event_timeout(iwdev->suspend_wq, + !atomic_read(&iwdev->vsi.qp_suspend_reqs), + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS)); + irdma_ws_reset(&iwdev->vsi); +} + +static void icrdma_fill_qos_info(struct irdma_l2params *l2params, + struct iidc_rdma_qos_params *qos_info) +{ + int i; + + l2params->num_tc = qos_info->num_tc; + l2params->vsi_prio_type = qos_info->vport_priority_type; + l2params->vsi_rel_bw = qos_info->vport_relative_bw; + for (i = 0; i < l2params->num_tc; i++) { + l2params->tc_info[i].egress_virt_up = + qos_info->tc_info[i].egress_virt_up; + l2params->tc_info[i].ingress_virt_up = + qos_info->tc_info[i].ingress_virt_up; + l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type; + l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw; + l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx; + } + for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++) + l2params->up2tc[i] = qos_info->up2tc[i]; + if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) { + l2params->dscp_mode = true; + memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map)); + } +} + +static void icrdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info, + struct iidc_rdma_event *event) +{ + struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev); + struct irdma_l2params l2params = {}; + + if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) { + ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu); + if (iwdev->vsi.mtu != iwdev->netdev->mtu) { + l2params.mtu = iwdev->netdev->mtu; + l2params.mtu_changed = true; + irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); + irdma_change_l2params(&iwdev->vsi, &l2params); + } + } else if (*event->type & BIT(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE)) { + if (iwdev->vsi.tc_change_pending) + return; + + icrdma_prep_tc_change(iwdev); + } else if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_TC_CHANGE)) { + struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv; + + if (!iwdev->vsi.tc_change_pending) + return; + + l2params.tc_changed = true; + ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n"); + + icrdma_fill_qos_info(&l2params, &idc_priv->qos_info); + if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + iwdev->dcb_vlan_mode = + l2params.num_tc > 1 && !l2params.dscp_mode; + irdma_change_l2params(&iwdev->vsi, &l2params); + } else if (*event->type & BIT(IIDC_RDMA_EVENT_CRIT_ERR)) { + ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n", + event->reg); + if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) { + u32 pe_criterr; + + pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]); +#define IRDMA_Q1_RESOURCE_ERR 0x0001024d + if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) { + ibdev_err(&iwdev->ibdev, "critical PE Error, GLPE_CRITERR=0x%08x\n", + pe_criterr); + iwdev->rf->reset = true; + } else { + ibdev_warn(&iwdev->ibdev, "Q1 Resource Check\n"); + } + } + if (event->reg & IRDMAPFINT_OICR_HMC_ERR_M) { + ibdev_err(&iwdev->ibdev, "HMC Error\n"); + iwdev->rf->reset = true; + } + if (event->reg & IRDMAPFINT_OICR_PE_PUSH_M) { + ibdev_err(&iwdev->ibdev, "PE Push Error\n"); + iwdev->rf->reset = true; + } + if (iwdev->rf->reset) + iwdev->rf->gen_ops.request_reset(iwdev->rf); + } +} + +/** + * icrdma_lan_register_qset - Register qset with LAN driver + * @vsi: vsi structure + * @tc_node: Traffic class node + */ +static int icrdma_lan_register_qset(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node) +{ + struct irdma_device *iwdev = vsi->back_vsi; + struct iidc_rdma_core_dev_info *cdev_info = iwdev->rf->cdev; + struct iidc_rdma_qset_params qset = {}; + int ret; + + qset.qs_handle = tc_node->qs_handle; + qset.tc = tc_node->traffic_class; + qset.vport_id = vsi->vsi_idx; + ret = ice_add_rdma_qset(cdev_info, &qset); + if (ret) { + ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n"); + return ret; + } + + tc_node->l2_sched_node_id = qset.teid; + vsi->qos[tc_node->user_pri].l2_sched_node_id = qset.teid; + + return 0; +} + +/** + * icrdma_lan_unregister_qset - Unregister qset with LAN driver + * @vsi: vsi structure + * @tc_node: Traffic class node + */ +static void icrdma_lan_unregister_qset(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node) +{ + struct irdma_device *iwdev = vsi->back_vsi; + struct iidc_rdma_core_dev_info *cdev_info = iwdev->rf->cdev; + struct iidc_rdma_qset_params qset = {}; + + qset.qs_handle = tc_node->qs_handle; + qset.tc = tc_node->traffic_class; + qset.vport_id = vsi->vsi_idx; + qset.teid = tc_node->l2_sched_node_id; + + if (ice_del_rdma_qset(cdev_info, &qset)) + ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n"); +} + +/** + * icrdma_request_reset - Request a reset + * @rf: RDMA PCI function + */ +static void icrdma_request_reset(struct irdma_pci_f *rf) +{ + ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n"); + ice_rdma_request_reset(rf->cdev, IIDC_FUNC_RESET); +} + +static int icrdma_init_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev) +{ + int i; + + rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX; + rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries), + GFP_KERNEL); + if (!rf->msix_entries) + return -ENOMEM; + + for (i = 0; i < rf->msix_count; i++) + if (ice_alloc_rdma_qvector(cdev, &rf->msix_entries[i])) + break; + + if (i < IRDMA_MIN_MSIX) { + while (--i >= 0) + ice_free_rdma_qvector(cdev, &rf->msix_entries[i]); + + kfree(rf->msix_entries); + return -ENOMEM; + } + + rf->msix_count = i; + + return 0; +} + +static void icrdma_deinit_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev) +{ + int i; + + for (i = 0; i < rf->msix_count; i++) + ice_free_rdma_qvector(cdev, &rf->msix_entries[i]); + + kfree(rf->msix_entries); +} + +static void icrdma_fill_device_info(struct irdma_device *iwdev, + struct iidc_rdma_core_dev_info *cdev_info) +{ + struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv; + struct irdma_pci_f *rf = iwdev->rf; + + rf->sc_dev.hw = &rf->hw; + rf->iwdev = iwdev; + rf->cdev = cdev_info; + rf->hw.hw_addr = idc_priv->hw_addr; + rf->pcidev = cdev_info->pdev; + rf->hw.device = &rf->pcidev->dev; + rf->pf_id = idc_priv->pf_id; + rf->rdma_ver = IRDMA_GEN_2; + rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2; + + rf->gen_ops.register_qset = icrdma_lan_register_qset; + rf->gen_ops.unregister_qset = icrdma_lan_unregister_qset; + + rf->default_vsi.vsi_idx = idc_priv->vport_id; + rf->protocol_used = + cdev_info->rdma_protocol == IIDC_RDMA_PROTOCOL_ROCEV2 ? + IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY; + rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT; + rf->rst_to = IRDMA_RST_TIMEOUT_HZ; + rf->gen_ops.request_reset = icrdma_request_reset; + rf->limits_sel = 7; + mutex_init(&rf->ah_tbl_lock); + + iwdev->netdev = idc_priv->netdev; + iwdev->vsi_num = idc_priv->vport_id; + iwdev->init_state = INITIAL_STATE; + iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT; + iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT; + iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED; + iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE; + if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + iwdev->roce_mode = true; +} + +static int icrdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id) +{ + struct iidc_rdma_core_auxiliary_dev *iidc_adev; + struct iidc_rdma_core_dev_info *cdev_info; + struct iidc_rdma_priv_dev_info *idc_priv; + struct irdma_l2params l2params = {}; + struct irdma_device *iwdev; + struct irdma_pci_f *rf; + int err; + + iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev); + cdev_info = iidc_adev->cdev_info; + idc_priv = cdev_info->iidc_priv; + + iwdev = ib_alloc_device(irdma_device, ibdev); + if (!iwdev) + return -ENOMEM; + iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL); + if (!iwdev->rf) { + ib_dealloc_device(&iwdev->ibdev); + return -ENOMEM; + } + + icrdma_fill_device_info(iwdev, cdev_info); + rf = iwdev->rf; + + err = icrdma_init_interrupts(rf, cdev_info); + if (err) + goto err_init_interrupts; + + err = irdma_ctrl_init_hw(rf); + if (err) + goto err_ctrl_init; + + l2params.mtu = iwdev->netdev->mtu; + icrdma_fill_qos_info(&l2params, &idc_priv->qos_info); + if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; + + err = irdma_rt_init_hw(iwdev, &l2params); + if (err) + goto err_rt_init; + + err = irdma_ib_register_device(iwdev); + if (err) + goto err_ibreg; + + ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, true); + + ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn)); + auxiliary_set_drvdata(aux_dev, iwdev); + + return 0; + +err_ibreg: + irdma_rt_deinit_hw(iwdev); +err_rt_init: + irdma_ctrl_deinit_hw(rf); +err_ctrl_init: + icrdma_deinit_interrupts(rf, cdev_info); +err_init_interrupts: + kfree(iwdev->rf); + ib_dealloc_device(&iwdev->ibdev); + + return err; +} + +static void icrdma_remove(struct auxiliary_device *aux_dev) +{ + struct iidc_rdma_core_auxiliary_dev *idc_adev = + container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev); + struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info; + struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev); + u8 rdma_ver = iwdev->rf->rdma_ver; + + ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false); + irdma_ib_unregister_device(iwdev); + icrdma_deinit_interrupts(iwdev->rf, cdev_info); + + pr_debug("INIT: Gen[%d] func[%d] device remove success\n", + rdma_ver, PCI_FUNC(cdev_info->pdev->devfn)); +} + +static const struct auxiliary_device_id icrdma_auxiliary_id_table[] = { + {.name = "ice.iwarp", }, + {.name = "ice.roce", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, icrdma_auxiliary_id_table); + +struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv = { + .adrv = { + .name = "gen_2", + .id_table = icrdma_auxiliary_id_table, + .probe = icrdma_probe, + .remove = icrdma_remove, + }, + .event_handler = icrdma_iidc_event_handler, +}; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 1e840bbd619d..f703f489a0bf 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -38,19 +38,7 @@ static void irdma_unregister_notifiers(void) unregister_netdevice_notifier(&irdma_netdevice_notifier); } -static void irdma_prep_tc_change(struct irdma_device *iwdev) -{ - iwdev->vsi.tc_change_pending = true; - irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND); - - /* Wait for all qp's to suspend */ - wait_event_timeout(iwdev->suspend_wq, - !atomic_read(&iwdev->vsi.qp_suspend_reqs), - msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS)); - irdma_ws_reset(&iwdev->vsi); -} - -static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev) +void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev) { if (mtu < IRDMA_MIN_MTU_IPV4) ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n", mtu); @@ -58,333 +46,6 @@ static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev) ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n", mtu); } -static void irdma_fill_qos_info(struct irdma_l2params *l2params, - struct iidc_rdma_qos_params *qos_info) -{ - int i; - - l2params->num_tc = qos_info->num_tc; - l2params->vsi_prio_type = qos_info->vport_priority_type; - l2params->vsi_rel_bw = qos_info->vport_relative_bw; - for (i = 0; i < l2params->num_tc; i++) { - l2params->tc_info[i].egress_virt_up = - qos_info->tc_info[i].egress_virt_up; - l2params->tc_info[i].ingress_virt_up = - qos_info->tc_info[i].ingress_virt_up; - l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type; - l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw; - l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx; - } - for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++) - l2params->up2tc[i] = qos_info->up2tc[i]; - if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) { - l2params->dscp_mode = true; - memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map)); - } -} - -static void irdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info, - struct iidc_rdma_event *event) -{ - struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev); - struct irdma_l2params l2params = {}; - - if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) { - ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu); - if (iwdev->vsi.mtu != iwdev->netdev->mtu) { - l2params.mtu = iwdev->netdev->mtu; - l2params.mtu_changed = true; - irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); - irdma_change_l2params(&iwdev->vsi, &l2params); - } - } else if (*event->type & BIT(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE)) { - if (iwdev->vsi.tc_change_pending) - return; - - irdma_prep_tc_change(iwdev); - } else if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_TC_CHANGE)) { - struct iidc_rdma_priv_dev_info *iidc_priv = cdev_info->iidc_priv; - - if (!iwdev->vsi.tc_change_pending) - return; - - l2params.tc_changed = true; - ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n"); - - irdma_fill_qos_info(&l2params, &iidc_priv->qos_info); - if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) - iwdev->dcb_vlan_mode = - l2params.num_tc > 1 && !l2params.dscp_mode; - irdma_change_l2params(&iwdev->vsi, &l2params); - } else if (*event->type & BIT(IIDC_RDMA_EVENT_CRIT_ERR)) { - ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n", - event->reg); - if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) { - u32 pe_criterr; - - pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]); -#define IRDMA_Q1_RESOURCE_ERR 0x0001024d - if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) { - ibdev_err(&iwdev->ibdev, "critical PE Error, GLPE_CRITERR=0x%08x\n", - pe_criterr); - iwdev->rf->reset = true; - } else { - ibdev_warn(&iwdev->ibdev, "Q1 Resource Check\n"); - } - } - if (event->reg & IRDMAPFINT_OICR_HMC_ERR_M) { - ibdev_err(&iwdev->ibdev, "HMC Error\n"); - iwdev->rf->reset = true; - } - if (event->reg & IRDMAPFINT_OICR_PE_PUSH_M) { - ibdev_err(&iwdev->ibdev, "PE Push Error\n"); - iwdev->rf->reset = true; - } - if (iwdev->rf->reset) - iwdev->rf->gen_ops.request_reset(iwdev->rf); - } -} - -/** - * irdma_request_reset - Request a reset - * @rf: RDMA PCI function - */ -static void irdma_request_reset(struct irdma_pci_f *rf) -{ - ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n"); - ice_rdma_request_reset(rf->cdev, IIDC_FUNC_RESET); -} - -/** - * irdma_lan_register_qset - Register qset with LAN driver - * @vsi: vsi structure - * @tc_node: Traffic class node - */ -static int irdma_lan_register_qset(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node) -{ - struct irdma_device *iwdev = vsi->back_vsi; - struct iidc_rdma_core_dev_info *cdev_info; - struct iidc_rdma_qset_params qset = {}; - int ret; - - cdev_info = iwdev->rf->cdev; - qset.qs_handle = tc_node->qs_handle; - qset.tc = tc_node->traffic_class; - qset.vport_id = vsi->vsi_idx; - ret = ice_add_rdma_qset(cdev_info, &qset); - if (ret) { - ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n"); - return ret; - } - - tc_node->l2_sched_node_id = qset.teid; - vsi->qos[tc_node->user_pri].l2_sched_node_id = qset.teid; - - return 0; -} - -/** - * irdma_lan_unregister_qset - Unregister qset with LAN driver - * @vsi: vsi structure - * @tc_node: Traffic class node - */ -static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node) -{ - struct irdma_device *iwdev = vsi->back_vsi; - struct iidc_rdma_core_dev_info *cdev_info; - struct iidc_rdma_qset_params qset = {}; - - cdev_info = iwdev->rf->cdev; - qset.qs_handle = tc_node->qs_handle; - qset.tc = tc_node->traffic_class; - qset.vport_id = vsi->vsi_idx; - qset.teid = tc_node->l2_sched_node_id; - - if (ice_del_rdma_qset(cdev_info, &qset)) - ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n"); -} - -static int irdma_init_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev) -{ - int i; - - rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX; - rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries), - GFP_KERNEL); - if (!rf->msix_entries) - return -ENOMEM; - - for (i = 0; i < rf->msix_count; i++) - if (ice_alloc_rdma_qvector(cdev, &rf->msix_entries[i])) - break; - - if (i < IRDMA_MIN_MSIX) { - while (--i >= 0) - ice_free_rdma_qvector(cdev, &rf->msix_entries[i]); - - kfree(rf->msix_entries); - return -ENOMEM; - } - - rf->msix_count = i; - - return 0; -} - -static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev) -{ - int i; - - for (i = 0; i < rf->msix_count; i++) - ice_free_rdma_qvector(cdev, &rf->msix_entries[i]); - - kfree(rf->msix_entries); -} - -static void irdma_remove(struct auxiliary_device *aux_dev) -{ - struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev); - struct iidc_rdma_core_auxiliary_dev *iidc_adev; - struct iidc_rdma_core_dev_info *cdev_info; - - iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev); - cdev_info = iidc_adev->cdev_info; - - ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false); - irdma_ib_unregister_device(iwdev); - irdma_deinit_interrupts(iwdev->rf, cdev_info); - - kfree(iwdev->rf); - - pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(cdev_info->pdev->devfn)); -} - -static void irdma_fill_device_info(struct irdma_device *iwdev, - struct iidc_rdma_core_dev_info *cdev_info) -{ - struct iidc_rdma_priv_dev_info *iidc_priv = cdev_info->iidc_priv; - struct irdma_pci_f *rf = iwdev->rf; - - rf->sc_dev.hw = &rf->hw; - rf->iwdev = iwdev; - rf->cdev = cdev_info; - rf->hw.hw_addr = iidc_priv->hw_addr; - rf->pcidev = cdev_info->pdev; - rf->hw.device = &rf->pcidev->dev; - rf->pf_id = iidc_priv->pf_id; - rf->gen_ops.register_qset = irdma_lan_register_qset; - rf->gen_ops.unregister_qset = irdma_lan_unregister_qset; - - rf->default_vsi.vsi_idx = iidc_priv->vport_id; - rf->protocol_used = - cdev_info->rdma_protocol == IIDC_RDMA_PROTOCOL_ROCEV2 ? - IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY; - rf->rdma_ver = IRDMA_GEN_2; - rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT; - rf->rst_to = IRDMA_RST_TIMEOUT_HZ; - rf->gen_ops.request_reset = irdma_request_reset; - rf->limits_sel = 7; - rf->iwdev = iwdev; - - mutex_init(&iwdev->ah_tbl_lock); - - iwdev->netdev = iidc_priv->netdev; - iwdev->vsi_num = iidc_priv->vport_id; - iwdev->init_state = INITIAL_STATE; - iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT; - iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT; - iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED; - iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE; - if (rf->protocol_used == IRDMA_ROCE_PROTOCOL_ONLY) - iwdev->roce_mode = true; -} - -static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id) -{ - struct iidc_rdma_core_auxiliary_dev *iidc_adev; - struct iidc_rdma_core_dev_info *cdev_info; - struct iidc_rdma_priv_dev_info *iidc_priv; - struct irdma_l2params l2params = {}; - struct irdma_device *iwdev; - struct irdma_pci_f *rf; - int err; - - iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev); - cdev_info = iidc_adev->cdev_info; - iidc_priv = cdev_info->iidc_priv; - - iwdev = ib_alloc_device(irdma_device, ibdev); - if (!iwdev) - return -ENOMEM; - iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL); - if (!iwdev->rf) { - ib_dealloc_device(&iwdev->ibdev); - return -ENOMEM; - } - - irdma_fill_device_info(iwdev, cdev_info); - rf = iwdev->rf; - - err = irdma_init_interrupts(rf, cdev_info); - if (err) - goto err_init_interrupts; - - err = irdma_ctrl_init_hw(rf); - if (err) - goto err_ctrl_init; - - l2params.mtu = iwdev->netdev->mtu; - irdma_fill_qos_info(&l2params, &iidc_priv->qos_info); - if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) - iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; - - err = irdma_rt_init_hw(iwdev, &l2params); - if (err) - goto err_rt_init; - - err = irdma_ib_register_device(iwdev); - if (err) - goto err_ibreg; - - ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, true); - - ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn)); - auxiliary_set_drvdata(aux_dev, iwdev); - - return 0; - -err_ibreg: - irdma_rt_deinit_hw(iwdev); -err_rt_init: - irdma_ctrl_deinit_hw(rf); -err_ctrl_init: - irdma_deinit_interrupts(rf, cdev_info); -err_init_interrupts: - kfree(iwdev->rf); - ib_dealloc_device(&iwdev->ibdev); - - return err; -} - -static const struct auxiliary_device_id irdma_auxiliary_id_table[] = { - {.name = "ice.iwarp", }, - {.name = "ice.roce", }, - {}, -}; - -MODULE_DEVICE_TABLE(auxiliary, irdma_auxiliary_id_table); - -static struct iidc_rdma_core_auxiliary_drv irdma_auxiliary_drv = { - .adrv = { - .id_table = irdma_auxiliary_id_table, - .probe = irdma_probe, - .remove = irdma_remove, - }, - .event_handler = irdma_iidc_event_handler, -}; - static int __init irdma_init_module(void) { int ret; @@ -396,10 +57,10 @@ static int __init irdma_init_module(void) return ret; } - ret = auxiliary_driver_register(&irdma_auxiliary_drv.adrv); + ret = auxiliary_driver_register(&icrdma_core_auxiliary_drv.adrv); if (ret) { auxiliary_driver_unregister(&i40iw_auxiliary_drv); - pr_err("Failed irdma auxiliary_driver_register() ret=%d\n", + pr_err("Failed icrdma(gen_2) auxiliary_driver_register() ret=%d\n", ret); return ret; } @@ -412,7 +73,7 @@ static int __init irdma_init_module(void) static void __exit irdma_exit_module(void) { irdma_unregister_notifiers(); - auxiliary_driver_unregister(&irdma_auxiliary_drv.adrv); + auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv); auxiliary_driver_unregister(&i40iw_auxiliary_drv); } diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 674acc952168..ca568ccf8a5a 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -30,7 +30,6 @@ #endif #include #include -#include #include #include #include @@ -54,6 +53,7 @@ #include "puda.h" extern struct auxiliary_driver i40iw_auxiliary_drv; +extern struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv; #define IRDMA_FW_VER_DEFAULT 2 #define IRDMA_HW_VER 2 @@ -331,6 +331,8 @@ struct irdma_pci_f { void *back_fcn; struct irdma_gen_ops gen_ops; struct irdma_device *iwdev; + DECLARE_HASHTABLE(ah_hash_tbl, 8); + struct mutex ah_tbl_lock; /* protect AH hash table access */ }; struct irdma_device { @@ -340,8 +342,6 @@ struct irdma_device { struct workqueue_struct *cleanup_wq; struct irdma_sc_vsi vsi; struct irdma_cm_core cm_core; - DECLARE_HASHTABLE(ah_hash_tbl, 8); - struct mutex ah_tbl_lock; /* protect AH hash table access */ u32 roce_cwnd; u32 roce_ackcreds; u32 vendor_id; @@ -557,4 +557,5 @@ int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event, void *ptr); void irdma_add_ip(struct irdma_device *iwdev); void cqp_compl_worker(struct work_struct *work); +void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev); #endif /* IRDMA_MAIN_H */ diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index da5a41b275d8..1bee1cbf7a4d 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -4539,7 +4539,7 @@ static bool irdma_ah_exists(struct irdma_device *iwdev, new_ah->sc_ah.ah_info.dest_ip_addr[2] ^ new_ah->sc_ah.ah_info.dest_ip_addr[3]; - hash_for_each_possible(iwdev->ah_hash_tbl, ah, list, key) { + hash_for_each_possible(iwdev->rf->ah_hash_tbl, ah, list, key) { /* Set ah_valid and ah_id the same so memcmp can work */ new_ah->sc_ah.ah_info.ah_idx = ah->sc_ah.ah_info.ah_idx; new_ah->sc_ah.ah_info.ah_valid = ah->sc_ah.ah_info.ah_valid; @@ -4565,14 +4565,14 @@ static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags) struct irdma_ah *ah = to_iwah(ibah); if ((ah_flags & RDMA_DESTROY_AH_SLEEPABLE) && ah->parent_ah) { - mutex_lock(&iwdev->ah_tbl_lock); + mutex_lock(&iwdev->rf->ah_tbl_lock); if (!refcount_dec_and_test(&ah->parent_ah->refcnt)) { - mutex_unlock(&iwdev->ah_tbl_lock); + mutex_unlock(&iwdev->rf->ah_tbl_lock); return 0; } hash_del(&ah->parent_ah->list); kfree(ah->parent_ah); - mutex_unlock(&iwdev->ah_tbl_lock); + mutex_unlock(&iwdev->rf->ah_tbl_lock); } irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, @@ -4609,11 +4609,11 @@ static int irdma_create_user_ah(struct ib_ah *ibah, err = irdma_setup_ah(ibah, attr); if (err) return err; - mutex_lock(&iwdev->ah_tbl_lock); + mutex_lock(&iwdev->rf->ah_tbl_lock); if (!irdma_ah_exists(iwdev, ah)) { err = irdma_create_hw_ah(iwdev, ah, true); if (err) { - mutex_unlock(&iwdev->ah_tbl_lock); + mutex_unlock(&iwdev->rf->ah_tbl_lock); return err; } /* Add new AH to list */ @@ -4625,11 +4625,11 @@ static int irdma_create_user_ah(struct ib_ah *ibah, parent_ah->sc_ah.ah_info.dest_ip_addr[3]; ah->parent_ah = parent_ah; - hash_add(iwdev->ah_hash_tbl, &parent_ah->list, key); + hash_add(iwdev->rf->ah_hash_tbl, &parent_ah->list, key); refcount_set(&parent_ah->refcnt, 1); } } - mutex_unlock(&iwdev->ah_tbl_lock); + mutex_unlock(&iwdev->rf->ah_tbl_lock); uresp.ah_id = ah->sc_ah.ah_info.ah_idx; err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); From d5edd33364a59795a3e3ba83bcdabe591a4b9931 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 27 Aug 2025 10:25:31 -0500 Subject: [PATCH 54/85] RDMA/irdma: Add GEN3 core driver support Introduce support for the GEN3 auxiliary core driver, which is responsible for initializing PCI-level RDMA resources. Facilitate host-driver communication with the device's Control Plane (CP) to discover capabilities and perform privileged operations through an RDMA-specific messaging interface built atop the IDPF mailbox and virtual channel protocol. Establish the RDMA virtual channel message interface and incorporate operations to retrieve the hardware version and discover capabilities from the CP. Additionally, set up the RDMA MMIO regions and initialize the RF structure. Signed-off-by: Mustafa Ismail Co-developed-by: Tatyana Nikolova Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-3-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/Makefile | 2 + drivers/infiniband/hw/irdma/ctrl.c | 484 +++++++++++++++++++---- drivers/infiniband/hw/irdma/defs.h | 50 ++- drivers/infiniband/hw/irdma/hmc.c | 18 +- drivers/infiniband/hw/irdma/hmc.h | 19 +- drivers/infiniband/hw/irdma/hw.c | 18 +- drivers/infiniband/hw/irdma/i40iw_if.c | 1 + drivers/infiniband/hw/irdma/icrdma_if.c | 2 + drivers/infiniband/hw/irdma/ig3rdma_hw.h | 14 + drivers/infiniband/hw/irdma/ig3rdma_if.c | 232 +++++++++++ drivers/infiniband/hw/irdma/irdma.h | 5 +- drivers/infiniband/hw/irdma/main.c | 10 + drivers/infiniband/hw/irdma/main.h | 2 + drivers/infiniband/hw/irdma/pble.c | 20 +- drivers/infiniband/hw/irdma/puda.h | 4 +- drivers/infiniband/hw/irdma/type.h | 67 +++- drivers/infiniband/hw/irdma/user.h | 5 +- drivers/infiniband/hw/irdma/virtchnl.c | 304 ++++++++++++++ drivers/infiniband/hw/irdma/virtchnl.h | 94 +++++ 19 files changed, 1224 insertions(+), 127 deletions(-) create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_hw.h create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_if.c create mode 100644 drivers/infiniband/hw/irdma/virtchnl.c create mode 100644 drivers/infiniband/hw/irdma/virtchnl.h diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile index 2522e4ca650b..3aa63b913377 100644 --- a/drivers/infiniband/hw/irdma/Makefile +++ b/drivers/infiniband/hw/irdma/Makefile @@ -13,6 +13,7 @@ irdma-objs := cm.o \ hw.o \ i40iw_hw.o \ i40iw_if.o \ + ig3rdma_if.o\ icrdma_if.o \ icrdma_hw.o \ main.o \ @@ -23,6 +24,7 @@ irdma-objs := cm.o \ uk.o \ utils.o \ verbs.o \ + virtchnl.o \ ws.o \ CFLAGS_trace.o = -I$(src) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 99a7f1a6c0b5..7b3afb9577f9 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1080,7 +1080,8 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) | FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len)); set_64bit_val(wqe, 16, - FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx)); + FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) | + FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18)); set_64bit_val(wqe, 40, FIELD_PREP(IRDMA_CQPSQ_STAG_HMCFNIDX, info->hmc_fcn_index)); @@ -1165,6 +1166,7 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); set_64bit_val(wqe, 16, FIELD_PREP(IRDMA_CQPSQ_STAG_KEY, info->stag_key) | + FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18) | FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx)); if (!info->chunk_size) { set_64bit_val(wqe, 32, info->reg_addr_pa); @@ -1223,7 +1225,8 @@ static int irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, set_64bit_val(wqe, 8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); set_64bit_val(wqe, 16, - FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx)); + FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) | + FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DEALLOC_STAG) | FIELD_PREP(IRDMA_CQPSQ_STAG_MR, info->mr) | @@ -1263,7 +1266,8 @@ static int irdma_sc_mw_alloc(struct irdma_sc_dev *dev, set_64bit_val(wqe, 8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); set_64bit_val(wqe, 16, - FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->mw_stag_index)); + FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->mw_stag_index) | + FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOC_STAG) | FIELD_PREP(IRDMA_CQPSQ_STAG_MWTYPE, info->mw_wide) | @@ -1888,7 +1892,7 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, * irdma_get_stats_idx - Return stats index * @vsi: pointer to the vsi */ -static u8 irdma_get_stats_idx(struct irdma_sc_vsi *vsi) +static u16 irdma_get_stats_idx(struct irdma_sc_vsi *vsi) { struct irdma_stats_inst_info stats_info = {}; struct irdma_sc_dev *dev = vsi->dev; @@ -1969,7 +1973,7 @@ int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, /* when stat allocation is not required default to fcn_id. */ vsi->stats_idx = info->fcn_id; if (info->alloc_stats_inst) { - u8 stats_idx = irdma_get_stats_idx(vsi); + u16 stats_idx = irdma_get_stats_idx(vsi); if (stats_idx != IRDMA_INVALID_STATS_IDX) { vsi->stats_inst_alloc = true; @@ -1993,7 +1997,7 @@ void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi) { struct irdma_stats_inst_info stats_info = {}; struct irdma_sc_dev *dev = vsi->dev; - u8 stats_idx = vsi->stats_idx; + u16 stats_idx = vsi->stats_idx; if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { if (vsi->stats_inst_alloc) { @@ -2794,7 +2798,10 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf, obj_info[rsrc_idx].cnt = (u32)FLD_RS_64(dev, temp, IRDMA_COMMIT_FPM_CQCNT); break; case IRDMA_HMC_IW_APBVT_ENTRY: - obj_info[rsrc_idx].cnt = 1; + if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) + obj_info[rsrc_idx].cnt = 1; + else + obj_info[rsrc_idx].cnt = 0; break; default: obj_info[rsrc_idx].cnt = (u32)temp; @@ -2829,7 +2836,8 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, IRDMA_HMC_IW_QP); irdma_sc_decode_fpm_commit(dev, buf, 8, info, IRDMA_HMC_IW_CQ); - /* skiping RSRVD */ + irdma_sc_decode_fpm_commit(dev, buf, 16, info, + IRDMA_HMC_IW_SRQ); irdma_sc_decode_fpm_commit(dev, buf, 24, info, IRDMA_HMC_IW_HTE); irdma_sc_decode_fpm_commit(dev, buf, 32, info, @@ -2864,15 +2872,17 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, IRDMA_HMC_IW_HDR); irdma_sc_decode_fpm_commit(dev, buf, 152, info, IRDMA_HMC_IW_MD); - irdma_sc_decode_fpm_commit(dev, buf, 160, info, - IRDMA_HMC_IW_OOISC); - irdma_sc_decode_fpm_commit(dev, buf, 168, info, - IRDMA_HMC_IW_OOISCFFL); + if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) { + irdma_sc_decode_fpm_commit(dev, buf, 160, info, + IRDMA_HMC_IW_OOISC); + irdma_sc_decode_fpm_commit(dev, buf, 168, info, + IRDMA_HMC_IW_OOISCFFL); + } } /* searching for the last object in HMC to find the size of the HMC area. */ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) { - if (info[i].base > max_base) { + if (info[i].base > max_base && info[i].cnt) { max_base = info[i].base; last_hmc_obj = i; } @@ -2935,7 +2945,19 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, get_64bit_val(buf, 0, &temp); hmc_info->first_sd_index = (u16)FIELD_GET(IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX, temp); - max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS, temp); + + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3, temp); + else + max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS, temp); + + /* Reduce SD count for unprivleged functions by 1 to account for PBLE + * backing page rounding + */ + if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2 && + (hmc_info->hmc_fn_id >= dev->hw_attrs.first_hw_vf_fpm_id || + !dev->privileged)) + max_pe_sds--; hmc_fpm_misc->max_sds = max_pe_sds; hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index; @@ -2949,11 +2971,17 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, size = (u32)(temp >> 32); obj_info[IRDMA_HMC_IW_CQ].size = BIT_ULL(size); + irdma_sc_decode_fpm_query(buf, 24, obj_info, IRDMA_HMC_IW_SRQ); irdma_sc_decode_fpm_query(buf, 32, obj_info, IRDMA_HMC_IW_HTE); irdma_sc_decode_fpm_query(buf, 40, obj_info, IRDMA_HMC_IW_ARP); - obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 8192; - obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 1; + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 0; + obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 0; + } else { + obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 8192; + obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 1; + } irdma_sc_decode_fpm_query(buf, 48, obj_info, IRDMA_HMC_IW_MR); irdma_sc_decode_fpm_query(buf, 56, obj_info, IRDMA_HMC_IW_XF); @@ -2962,7 +2990,7 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, obj_info[IRDMA_HMC_IW_XFFL].max_cnt = (u32)temp; obj_info[IRDMA_HMC_IW_XFFL].size = 4; hmc_fpm_misc->xf_block_size = FIELD_GET(IRDMA_QUERY_FPM_XFBLOCKSIZE, temp); - if (!hmc_fpm_misc->xf_block_size) + if (obj_info[IRDMA_HMC_IW_XF].max_cnt && !hmc_fpm_misc->xf_block_size) return -EINVAL; irdma_sc_decode_fpm_query(buf, 72, obj_info, IRDMA_HMC_IW_Q1); @@ -3000,15 +3028,25 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR); irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD); - irdma_sc_decode_fpm_query(buf, 160, obj_info, IRDMA_HMC_IW_OOISC); - get_64bit_val(buf, 168, &temp); - obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt = (u32)temp; - obj_info[IRDMA_HMC_IW_OOISCFFL].size = 4; - hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp); - if (!hmc_fpm_misc->ooiscf_block_size && - obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt) - return -EINVAL; + if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) { + irdma_sc_decode_fpm_query(buf, 160, obj_info, IRDMA_HMC_IW_OOISC); + + get_64bit_val(buf, 168, &temp); + obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt = (u32)temp; + obj_info[IRDMA_HMC_IW_OOISCFFL].size = 4; + hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp); + if (!hmc_fpm_misc->ooiscf_block_size && + obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt) + return -EINVAL; + } + + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + get_64bit_val(buf, 176, &temp); + hmc_fpm_misc->loc_mem_pages = (u32)FIELD_GET(IRDMA_QUERY_FPM_LOC_MEM_PAGES, temp); + if (!hmc_fpm_misc->loc_mem_pages) + return -EINVAL; + } return 0; } @@ -4335,6 +4373,26 @@ int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id) return ret_code; } +/** + * irdma_set_loc_mem() - set a local memory bit field + * @buf: ptr to a buffer where local memory gets enabled + */ +static void irdma_set_loc_mem(__le64 *buf) +{ + u64 loc_mem_en = BIT_ULL(ENABLE_LOC_MEM); + u32 offset; + u64 temp; + + for (offset = 0; offset < IRDMA_COMMIT_FPM_BUF_SIZE; + offset += sizeof(__le64)) { + if (offset == IRDMA_PBLE_COMMIT_OFFSET) + continue; + get_64bit_val(buf, offset, &temp); + if (temp) + set_64bit_val(buf, offset, temp | loc_mem_en); + } +} + /** * irdma_sc_cfg_iw_fpm() - commits hmc obj cnt values using cqp * command and populates fpm base address in hmc_info @@ -4356,7 +4414,7 @@ static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id) set_64bit_val(buf, 0, (u64)obj_info[IRDMA_HMC_IW_QP].cnt); set_64bit_val(buf, 8, (u64)obj_info[IRDMA_HMC_IW_CQ].cnt); - set_64bit_val(buf, 16, (u64)0); /* RSRVD */ + set_64bit_val(buf, 16, (u64)obj_info[IRDMA_HMC_IW_SRQ].cnt); set_64bit_val(buf, 24, (u64)obj_info[IRDMA_HMC_IW_HTE].cnt); set_64bit_val(buf, 32, (u64)obj_info[IRDMA_HMC_IW_ARP].cnt); set_64bit_val(buf, 40, (u64)0); /* RSVD */ @@ -4383,7 +4441,9 @@ static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id) (u64)obj_info[IRDMA_HMC_IW_OOISC].cnt); set_64bit_val(buf, 168, (u64)obj_info[IRDMA_HMC_IW_OOISCFFL].cnt); - + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3 && + dev->hmc_fpm_misc.loc_mem_pages) + irdma_set_loc_mem(buf); commit_fpm_mem.pa = dev->fpm_commit_buf_pa; commit_fpm_mem.va = dev->fpm_commit_buf; @@ -4592,6 +4652,7 @@ static bool irdma_cqp_ring_full(struct irdma_sc_cqp *cqp) static u32 irdma_est_sd(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info) { + struct irdma_hmc_obj_info *pble_info; int i; u64 size = 0; u64 sd; @@ -4600,12 +4661,22 @@ static u32 irdma_est_sd(struct irdma_sc_dev *dev, if (i != IRDMA_HMC_IW_PBLE) size += round_up(hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size, 512); - size += round_up(hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt * - hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].size, 512); + + pble_info = &hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE]; + if (dev->privileged) + size += round_up(pble_info->cnt * pble_info->size, 512); if (size & 0x1FFFFF) sd = (size >> 21) + 1; /* add 1 for remainder */ else sd = size >> 21; + if (!dev->privileged && !dev->hmc_fpm_misc.loc_mem_pages) { + /* 2MB alignment for VF PBLE HMC */ + size = pble_info->cnt * pble_info->size; + if (size & 0x1FFFFF) + sd += (size >> 21) + 1; /* add 1 for remainder */ + else + sd += size >> 21; + } if (sd > 0xFFFFFFFF) { ibdev_dbg(to_ibdev(dev), "HMC: sd overflow[%lld]\n", sd); sd = 0xFFFFFFFF - 1; @@ -4614,17 +4685,6 @@ static u32 irdma_est_sd(struct irdma_sc_dev *dev, return (u32)sd; } -/** - * irdma_sc_query_rdma_features_done - poll cqp for query features done - * @cqp: struct for cqp hw - */ -static int irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp) -{ - return irdma_sc_poll_for_cqp_op_done(cqp, - IRDMA_CQP_OP_QUERY_RDMA_FEATURES, - NULL); -} - /** * irdma_sc_query_rdma_features - query RDMA features and FW ver * @cqp: struct for cqp hw @@ -4634,7 +4694,9 @@ static int irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp) static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, struct irdma_dma_mem *buf, u64 scratch) { + u32 tail, val, error; __le64 *wqe; + int status; u64 temp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); @@ -4654,9 +4716,15 @@ static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, print_hex_dump_debug("WQE: QUERY RDMA FEATURES", DUMP_PREFIX_OFFSET, 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false); - irdma_sc_cqp_post_sq(cqp); + irdma_get_cqp_reg_info(cqp, &val, &tail, &error); - return 0; + irdma_sc_cqp_post_sq(cqp); + status = irdma_cqp_poll_registers(cqp, tail, + cqp->dev->hw_attrs.max_done_count); + if (error || status) + status = -EINVAL; + + return status; } /** @@ -4678,8 +4746,6 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev) return -ENOMEM; ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0); - if (!ret_code) - ret_code = irdma_sc_query_rdma_features_done(dev->cqp); if (ret_code) goto exit; @@ -4703,8 +4769,6 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev) return -ENOMEM; ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0); - if (!ret_code) - ret_code = irdma_sc_query_rdma_features_done(dev->cqp); if (ret_code) goto exit; @@ -4785,6 +4849,269 @@ static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev, hmc_fpm_misc->ooiscf_block_size; } +/** + * irdma_get_rsrc_mem_config - configure resources if local memory or host + * @dev: sc device struct + * @is_mrte_loc_mem: if true, MR's to be in local memory because sd=loc pages + * + * Only mr can be configured host or local memory if qp's are in local memory. + * If qp is in local memory, then all resource object will be in local memory + * except mr which can be either host or local memory. The only exception + * is pble's which are always in host memory. + */ +static void irdma_get_rsrc_mem_config(struct irdma_sc_dev *dev, bool is_mrte_loc_mem) +{ + struct irdma_hmc_info *hmc_info = dev->hmc_info; + int i; + + for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) + hmc_info->hmc_obj[i].mem_loc = IRDMA_LOC_MEM; + + if (dev->feature_info[IRDMA_OBJ_1] && !is_mrte_loc_mem) { + u8 mem_type; + + mem_type = (u8)FIELD_GET(IRDMA_MR_MEM_LOC, dev->feature_info[IRDMA_OBJ_1]); + + hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc = + (mem_type & IRDMA_OBJ_LOC_MEM_BIT) ? + IRDMA_LOC_MEM : IRDMA_HOST_MEM; + } else { + hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc = IRDMA_LOC_MEM; + } + + hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].mem_loc = IRDMA_HOST_MEM; + + ibdev_dbg(to_ibdev(dev), "HMC: INFO: mrte_mem_loc = %d pble = %d\n", + hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc, + hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].mem_loc); +} + +/** + * irdma_cfg_sd_mem - allocate sd memory + * @dev: sc device struct + * @hmc_info: ptr to irdma_hmc_obj_info struct + */ +static int irdma_cfg_sd_mem(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info) +{ + struct irdma_virt_mem virt_mem; + u32 mem_size; + + mem_size = sizeof(struct irdma_hmc_sd_entry) * hmc_info->sd_table.sd_cnt; + virt_mem.size = mem_size; + virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL); + if (!virt_mem.va) + return -ENOMEM; + hmc_info->sd_table.sd_entry = virt_mem.va; + + return 0; +} + +/** + * irdma_get_objs_pages - get number of 2M pages needed + * @dev: sc device struct + * @hmc_info: pointer to the HMC configuration information struct + * @mem_loc: pages for local or host memory + */ +static u32 irdma_get_objs_pages(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, + enum irdma_hmc_obj_mem mem_loc) +{ + u64 size = 0; + int i; + + for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) { + if (hmc_info->hmc_obj[i].mem_loc == mem_loc) { + size += round_up(hmc_info->hmc_obj[i].cnt * + hmc_info->hmc_obj[i].size, 512); + } + } + + return DIV_ROUND_UP(size, IRDMA_HMC_PAGE_SIZE); +} + +/** + * irdma_set_host_hmc_rsrc_gen_3 - calculate host hmc resources for gen 3 + * @dev: sc device struct + */ +static void irdma_set_host_hmc_rsrc_gen_3(struct irdma_sc_dev *dev) +{ + struct irdma_hmc_fpm_misc *hmc_fpm_misc; + struct irdma_hmc_info *hmc_info; + enum irdma_hmc_obj_mem mrte_loc; + u32 mrwanted, pblewanted; + u32 avail_sds, mr_sds; + + hmc_info = dev->hmc_info; + hmc_fpm_misc = &dev->hmc_fpm_misc; + avail_sds = hmc_fpm_misc->max_sds; + mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc; + mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt; + pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt; + + if (mrte_loc == IRDMA_HOST_MEM && avail_sds > IRDMA_MIN_PBLE_PAGES) { + mr_sds = avail_sds - IRDMA_MIN_PBLE_PAGES; + mrwanted = min(mrwanted, mr_sds * MAX_MR_PER_SD); + hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted; + avail_sds -= DIV_ROUND_UP(mrwanted, MAX_MR_PER_SD); + } + + pblewanted = min(pblewanted, avail_sds * MAX_PBLE_PER_SD); + hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted; +} + +/** + * irdma_set_loc_hmc_rsrc_gen_3 - calculate hmc resources for gen 3 + * @dev: sc device struct + * @max_pages: max local memory available + * @qpwanted: number of qp's wanted + */ +static int irdma_set_loc_hmc_rsrc_gen_3(struct irdma_sc_dev *dev, + u32 max_pages, + u32 qpwanted) +{ + struct irdma_hmc_fpm_misc *hmc_fpm_misc; + u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed; + struct irdma_hmc_info *hmc_info; + u32 ird, ord; + + hmc_info = dev->hmc_info; + hmc_fpm_misc = &dev->hmc_fpm_misc; + ird = dev->hw_attrs.max_hw_ird; + ord = dev->hw_attrs.max_hw_ord; + + hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted; + hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted; + + hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt = + min(hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt, qpwanted * 2); + + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt = + min(qpwanted * 8, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt); + + rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted); + hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt = + min(hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].max_cnt, rrf_cnt); + + if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt) + hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].cnt = + hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt / + hmc_fpm_misc->rrf_block_size; + + xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted); + hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt = + min(hmc_info->hmc_obj[IRDMA_HMC_IW_XF].max_cnt, xf_cnt); + hmc_info->hmc_obj[IRDMA_HMC_IW_XFFL].cnt = + xf_cnt / hmc_fpm_misc->xf_block_size; + + timer_cnt = (round_up(qpwanted, 512) / 512 + 1) * + hmc_fpm_misc->timer_bucket; + hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt = + min(timer_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt); + + do { + hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt = roundup_pow_of_two(ird * 2 * qpwanted); + hmc_info->hmc_obj[IRDMA_HMC_IW_Q1FL].cnt = + hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size; + + pages_needed = irdma_get_objs_pages(dev, hmc_info, IRDMA_LOC_MEM); + if (pages_needed <= max_pages) + break; + + ird /= 2; + ord /= 2; + } while (ird >= IRDMA_MIN_IRD); + + if (ird < IRDMA_MIN_IRD) { + ibdev_dbg(to_ibdev(dev), "HMC: FAIL: IRD=%u Q1 CNT = %u\n", + ird, hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt); + return -EINVAL; + } + + dev->hw_attrs.max_hw_ird = ird; + dev->hw_attrs.max_hw_ord = ord; + hmc_fpm_misc->max_sds -= pages_needed; + + return 0; +} + +/** + * cfg_fpm_value_gen_3 - configure fpm for gen 3 + * @dev: sc device struct + * @hmc_info: ptr to irdma_hmc_obj_info struct + * @hmc_fpm_misc: ptr to fpm data + */ +static int cfg_fpm_value_gen_3(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, + struct irdma_hmc_fpm_misc *hmc_fpm_misc) +{ + enum irdma_hmc_obj_mem mrte_loc; + u32 mrwanted, qpwanted; + int i, ret_code = 0; + u32 loc_mem_pages; + bool is_mrte_loc_mem; + + loc_mem_pages = hmc_fpm_misc->loc_mem_pages; + is_mrte_loc_mem = hmc_fpm_misc->loc_mem_pages == hmc_fpm_misc->max_sds ? + true : false; + + irdma_get_rsrc_mem_config(dev, is_mrte_loc_mem); + mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc; + + if (is_mrte_loc_mem) + loc_mem_pages -= IRDMA_MIN_PBLE_PAGES; + + ibdev_dbg(to_ibdev(dev), + "HMC: mrte_loc %d loc_mem %u fpm max sds %u host_obj %d\n", + hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc, + hmc_fpm_misc->loc_mem_pages, hmc_fpm_misc->max_sds, + is_mrte_loc_mem); + + mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt; + qpwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt; + hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted; + + hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].max_cnt = 0; + hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].max_cnt = 0; + hmc_info->hmc_obj[IRDMA_HMC_IW_HTE].max_cnt = 0; + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt = 0; + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt = + min(hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt, + (u32)IRDMA_FSIAV_CNT_MAX); + for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) + hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt; + + while (qpwanted >= IRDMA_MIN_QP_CNT) { + if (!irdma_set_loc_hmc_rsrc_gen_3(dev, loc_mem_pages, qpwanted)) + break; + + qpwanted /= 2; + if (mrte_loc == IRDMA_LOC_MEM) { + mrwanted = qpwanted * IRDMA_MIN_MR_PER_QP; + hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = + min(hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt, mrwanted); + } + } + + if (qpwanted < IRDMA_MIN_QP_CNT) { + ibdev_dbg(to_ibdev(dev), + "HMC: ERROR: could not allocate fpm resources\n"); + return -EINVAL; + } + + irdma_set_host_hmc_rsrc_gen_3(dev); + ret_code = irdma_sc_cfg_iw_fpm(dev, dev->hmc_fn_id); + if (ret_code) { + ibdev_dbg(to_ibdev(dev), + "HMC: cfg_iw_fpm returned error_code[x%08X]\n", + readl(dev->hw_regs[IRDMA_CQPERRCODES])); + + return ret_code; + } + + return irdma_cfg_sd_mem(dev, hmc_info); +} + /** * irdma_cfg_fpm_val - configure HMC objects * @dev: sc device struct @@ -4792,16 +5119,15 @@ static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev, */ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) { - struct irdma_virt_mem virt_mem; - u32 i, mem_size; u32 qpwanted, mrwanted, pblewanted; - u32 powerof2, hte; + u32 powerof2, hte, i; u32 sd_needed; u32 sd_diff; u32 loop_count = 0; struct irdma_hmc_info *hmc_info; struct irdma_hmc_fpm_misc *hmc_fpm_misc; int ret_code = 0; + u32 max_sds; hmc_info = dev->hmc_info; hmc_fpm_misc = &dev->hmc_fpm_misc; @@ -4814,14 +5140,16 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) return ret_code; } + max_sds = hmc_fpm_misc->max_sds; + + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + return cfg_fpm_value_gen_3(dev, hmc_info, hmc_fpm_misc); + for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt; sd_needed = irdma_est_sd(dev, hmc_info); - ibdev_dbg(to_ibdev(dev), - "HMC: FW max resources sd_needed[%08d] first_sd_index[%04d]\n", - sd_needed, hmc_info->first_sd_index); - ibdev_dbg(to_ibdev(dev), "HMC: sd count %d where max sd is %d\n", - hmc_info->sd_table.sd_cnt, hmc_fpm_misc->max_sds); + ibdev_dbg(to_ibdev(dev), "HMC: sd count %u where max sd is %u\n", + hmc_info->sd_table.sd_cnt, max_sds); qpwanted = min(qp_count, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt); @@ -4835,8 +5163,8 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt; ibdev_dbg(to_ibdev(dev), - "HMC: req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d, mc=%d, av=%d\n", - qp_count, hmc_fpm_misc->max_sds, + "HMC: req_qp=%d max_sd=%u, max_qp = %u, max_cq=%u, max_mr=%u, max_pble=%u, mc=%d, av=%u\n", + qp_count, max_sds, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt, @@ -4849,7 +5177,6 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt; hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].max_cnt; - hmc_info->hmc_obj[IRDMA_HMC_IW_APBVT_ENTRY].cnt = 1; while (irdma_q1_cnt(dev, hmc_info, qpwanted) > hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].max_cnt) @@ -4860,7 +5187,7 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted; hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt = min(2 * qpwanted, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt); - hmc_info->hmc_obj[IRDMA_HMC_IW_RESERVED].cnt = 0; /* Reserved */ + hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].cnt = 0; /* Reserved */ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted; hte = round_up(qpwanted + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt, 512); @@ -4898,11 +5225,12 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) if (!(loop_count % 2) && qpwanted > 128) { qpwanted /= 2; } else { - mrwanted /= 2; pblewanted /= 2; + mrwanted /= 2; } continue; } + if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF && pblewanted > (512 * FPM_MULTIPLIER * sd_diff)) { pblewanted -= 256 * FPM_MULTIPLIER * sd_diff; @@ -4928,14 +5256,13 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) if (sd_needed > hmc_fpm_misc->max_sds) { ibdev_dbg(to_ibdev(dev), - "HMC: cfg_fpm failed loop_cnt=%d, sd_needed=%d, max sd count %d\n", + "HMC: cfg_fpm failed loop_cnt=%u, sd_needed=%u, max sd count %u\n", loop_count, sd_needed, hmc_info->sd_table.sd_cnt); return -EINVAL; } - if (loop_count > 1 && sd_needed < hmc_fpm_misc->max_sds) { - pblewanted += (hmc_fpm_misc->max_sds - sd_needed) * 256 * - FPM_MULTIPLIER; + if (loop_count > 1 && sd_needed < max_sds) { + pblewanted += (max_sds - sd_needed) * 256 * FPM_MULTIPLIER; hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted; sd_needed = irdma_est_sd(dev, hmc_info); } @@ -4959,18 +5286,7 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) return ret_code; } - mem_size = sizeof(struct irdma_hmc_sd_entry) * - (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1); - virt_mem.size = mem_size; - virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL); - if (!virt_mem.va) { - ibdev_dbg(to_ibdev(dev), - "HMC: failed to allocate memory for sd_entry buffer\n"); - return -ENOMEM; - } - hmc_info->sd_table.sd_entry = virt_mem.va; - - return ret_code; + return irdma_cfg_sd_mem(dev, hmc_info); } /** @@ -5381,10 +5697,14 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, dev->fpm_commit_buf = info->fpm_commit_buf; dev->hw = info->hw; dev->hw->hw_addr = info->bar0; + dev->protocol_used = info->protocol_used; /* Setup the hardware limits, hmc may limit further */ dev->hw_attrs.min_hw_qp_id = IRDMA_MIN_IW_QP_ID; dev->hw_attrs.min_hw_aeq_size = IRDMA_MIN_AEQ_ENTRIES; - dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES; + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES_GEN_3; + else + dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES; dev->hw_attrs.min_hw_ceq_size = IRDMA_MIN_CEQ_ENTRIES; dev->hw_attrs.max_hw_ceq_size = IRDMA_MAX_CEQ_ENTRIES; dev->hw_attrs.uk_attrs.min_hw_cq_size = IRDMA_MIN_CQ_SIZE; @@ -5409,7 +5729,17 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, dev->hw_attrs.max_sleep_count = IRDMA_SLEEP_COUNT; dev->hw_attrs.max_cqp_compl_wait_time_ms = CQP_COMPL_WAIT_TIME_MS; - dev->hw_attrs.uk_attrs.hw_rev = ver; + if (!dev->privileged) { + ret_code = irdma_vchnl_req_get_hmc_fcn(dev); + if (ret_code) { + ibdev_dbg(to_ibdev(dev), + "DEV: Get HMC function ret = %d\n", + ret_code); + + return ret_code; + } + } + irdma_sc_init_hw(dev); if (irdma_wait_pe_ready(dev)) diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 2cb4b96db721..7d363088b5c3 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -114,6 +114,12 @@ enum irdma_protocol_used { #define IRDMA_UPDATE_SD_BUFF_SIZE 128 #define IRDMA_FEATURE_BUF_SIZE (8 * IRDMA_MAX_FEATURES) +#define ENABLE_LOC_MEM 63 +#define MAX_PBLE_PER_SD 0x40000 +#define MAX_PBLE_SD_PER_FCN 0x400 +#define MAX_MR_PER_SD 0x8000 +#define MAX_MR_SD_PER_FCN 0x80 +#define IRDMA_PBLE_COMMIT_OFFSET 112 #define IRDMA_MAX_QUANTA_PER_WR 8 #define IRDMA_QP_SW_MAX_WQ_QUANTA 32768 @@ -395,7 +401,11 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_STATS_INST_INDEX GENMASK_ULL(6, 0) #define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(5, 0) #define IRDMA_CQPSQ_WS_WQEVALID BIT_ULL(63) -#define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(53, 52) +#define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(55, 52) +#define IRDMA_SD_MAX GENMASK_ULL(15, 0) +#define IRDMA_MEM_MAX GENMASK_ULL(15, 0) +#define IRDMA_QP_MEM_LOC GENMASK_ULL(47, 44) +#define IRDMA_MR_MEM_LOC GENMASK_ULL(27, 24) #define IRDMA_CQPSQ_WS_ENABLENODE BIT_ULL(62) #define IRDMA_CQPSQ_WS_NODETYPE BIT_ULL(61) @@ -404,16 +414,16 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_WS_VMVFTYPE GENMASK_ULL(55, 54) #define IRDMA_CQPSQ_WS_VMVFNUM GENMASK_ULL(51, 42) #define IRDMA_CQPSQ_WS_OP GENMASK_ULL(37, 32) -#define IRDMA_CQPSQ_WS_PARENTID GENMASK_ULL(25, 16) -#define IRDMA_CQPSQ_WS_NODEID GENMASK_ULL(9, 0) -#define IRDMA_CQPSQ_WS_VSI GENMASK_ULL(57, 48) +#define IRDMA_CQPSQ_WS_PARENTID GENMASK_ULL(29, 16) +#define IRDMA_CQPSQ_WS_NODEID GENMASK_ULL(13, 0) +#define IRDMA_CQPSQ_WS_VSI GENMASK_ULL(63, 48) #define IRDMA_CQPSQ_WS_WEIGHT GENMASK_ULL(38, 32) #define IRDMA_CQPSQ_UP_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_UP_USEVLAN BIT_ULL(62) #define IRDMA_CQPSQ_UP_USEOVERRIDE BIT_ULL(61) #define IRDMA_CQPSQ_UP_OP GENMASK_ULL(37, 32) -#define IRDMA_CQPSQ_UP_HMCFCNIDX GENMASK_ULL(5, 0) +#define IRDMA_CQPSQ_UP_HMCFCNIDX GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_UP_CNPOVERRIDE GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN GENMASK_ULL(31, 0) @@ -476,8 +486,6 @@ enum irdma_cqp_op_type { #define IRDMA_CQ_UDSMAC GENMASK_ULL(47, 0) #define IRDMA_CQ_UDVLAN GENMASK_ULL(63, 48) -#define IRDMA_CQ_IMMDATA_S 0 -#define IRDMA_CQ_IMMDATA_M (0xffffffffffffffffULL << IRDMA_CQ_IMMVALID_S) #define IRDMA_CQ_IMMDATALOW32 GENMASK_ULL(31, 0) #define IRDMA_CQ_IMMDATAUP32 GENMASK_ULL(63, 32) #define IRDMACQ_PAYLDLEN GENMASK_ULL(31, 0) @@ -590,6 +598,7 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_STAG_MR BIT_ULL(43) #define IRDMA_CQPSQ_STAG_MWTYPE BIT_ULL(42) #define IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY BIT_ULL(58) +#define IRDMA_CQPSQ_STAG_PDID_HI GENMASK_ULL(59, 54) #define IRDMA_CQPSQ_STAG_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE #define IRDMA_CQPSQ_STAG_HPAGESIZE GENMASK_ULL(47, 46) @@ -628,11 +637,8 @@ enum irdma_cqp_op_type { /* Manage Push Page - MPP */ #define IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff #define IRDMA_INVALID_PUSH_PAGE_INDEX 0xffffffff - -#define IRDMA_CQPSQ_MPP_QS_HANDLE GENMASK_ULL(9, 0) -#define IRDMA_CQPSQ_MPP_PPIDX GENMASK_ULL(9, 0) +#define IRDMA_CQPSQ_MPP_PPIDX GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_MPP_PPTYPE GENMASK_ULL(61, 60) - #define IRDMA_CQPSQ_MPP_FREE_PAGE BIT_ULL(62) /* Upload Context - UCTX */ @@ -660,10 +666,10 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_AEQ_VMAP BIT_ULL(47) #define IRDMA_CQPSQ_AEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0) -#define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(18, 0) - +#define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(20, 0) #define IRDMA_COMMIT_FPM_BASE_S 32 -#define IRDMA_CQPSQ_CFPM_HMCFNID GENMASK_ULL(5, 0) +#define IRDMA_CQPSQ_CFPM_HMCFNID GENMASK_ULL(15, 0) + #define IRDMA_CQPSQ_FWQE_AECODE GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_FWQE_AESOURCE GENMASK_ULL(19, 16) #define IRDMA_CQPSQ_FWQE_RQMNERR GENMASK_ULL(15, 0) @@ -787,8 +793,8 @@ enum irdma_cqp_op_type { #define IRDMAQPC_MAXSNDWND GENMASK_ULL(31, 0) #define IRDMAQPC_REXMIT_THRESH GENMASK_ULL(53, 48) #define IRDMAQPC_RNRNAK_THRESH GENMASK_ULL(56, 54) -#define IRDMAQPC_TXCQNUM GENMASK_ULL(18, 0) -#define IRDMAQPC_RXCQNUM GENMASK_ULL(50, 32) +#define IRDMAQPC_TXCQNUM GENMASK_ULL(24, 0) +#define IRDMAQPC_RXCQNUM GENMASK_ULL(56, 32) #define IRDMAQPC_STAT_INDEX GENMASK_ULL(6, 0) #define IRDMAQPC_Q2ADDR GENMASK_ULL(63, 8) #define IRDMAQPC_LASTBYTESENT GENMASK_ULL(7, 0) @@ -856,7 +862,7 @@ enum irdma_cqp_op_type { #define IRDMAQPSQ_REMSTAGINV GENMASK_ULL(31, 0) #define IRDMAQPSQ_DESTQKEY GENMASK_ULL(31, 0) #define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32) -#define IRDMAQPSQ_AHID GENMASK_ULL(16, 0) +#define IRDMAQPSQ_AHID GENMASK_ULL(24, 0) #define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57) #define IRDMA_INLINE_VALID_S 7 @@ -903,10 +909,12 @@ enum irdma_cqp_op_type { #define IRDMAPFINT_OICR_PE_PUSH_M BIT(27) #define IRDMAPFINT_OICR_PE_CRITERR_M BIT(28) -#define IRDMA_QUERY_FPM_MAX_QPS GENMASK_ULL(18, 0) -#define IRDMA_QUERY_FPM_MAX_CQS GENMASK_ULL(19, 0) +#define IRDMA_QUERY_FPM_LOC_MEM_PAGES GENMASK_ULL(63, 32) +#define IRDMA_QUERY_FPM_MAX_QPS GENMASK_ULL(31, 0) +#define IRDMA_QUERY_FPM_MAX_CQS GENMASK_ULL(31, 0) #define IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX GENMASK_ULL(13, 0) -#define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(45, 32) +#define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(44, 32) +#define IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3 GENMASK_ULL(47, 32) #define IRDMA_QUERY_FPM_MAX_CEQS GENMASK_ULL(9, 0) #define IRDMA_QUERY_FPM_XFBLOCKSIZE GENMASK_ULL(63, 32) #define IRDMA_QUERY_FPM_Q1BLOCKSIZE GENMASK_ULL(63, 32) @@ -1103,7 +1111,7 @@ enum irdma_alignment { IRDMA_CEQ_ALIGNMENT = 0x100, IRDMA_CQ0_ALIGNMENT = 0x100, IRDMA_SD_BUF_ALIGNMENT = 0x80, - IRDMA_FEATURE_BUF_ALIGNMENT = 0x8, + IRDMA_FEATURE_BUF_ALIGNMENT = 0x10, }; enum icrdma_protocol_used { diff --git a/drivers/infiniband/hw/irdma/hmc.c b/drivers/infiniband/hw/irdma/hmc.c index ac58088a8e41..da18add141da 100644 --- a/drivers/infiniband/hw/irdma/hmc.c +++ b/drivers/infiniband/hw/irdma/hmc.c @@ -5,6 +5,7 @@ #include "defs.h" #include "type.h" #include "protos.h" +#include "virtchnl.h" /** * irdma_find_sd_index_limit - finds segment descriptor index limit @@ -228,6 +229,10 @@ int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, bool pd_error = false; int ret_code = 0; + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3 && + dev->hmc_info->hmc_obj[info->rsrc_type].mem_loc == IRDMA_LOC_MEM) + return 0; + if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) return -EINVAL; @@ -330,7 +335,7 @@ static int irdma_finish_del_sd_reg(struct irdma_sc_dev *dev, u32 i, sd_idx; struct irdma_dma_mem *mem; - if (!reset) + if (dev->privileged && !reset) ret_code = irdma_hmc_sd_grp(dev, info->hmc_info, info->hmc_info->sd_indexes[0], info->del_sd_cnt, false); @@ -376,6 +381,9 @@ int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, u32 i, j; int ret_code = 0; + if (dev->hmc_info->hmc_obj[info->rsrc_type].mem_loc == IRDMA_LOC_MEM) + return 0; + if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) { ibdev_dbg(to_ibdev(dev), "HMC: error start_idx[%04d] >= [type %04d].cnt[%04d]\n", @@ -589,7 +597,10 @@ int irdma_add_pd_table_entry(struct irdma_sc_dev *dev, pd_entry->sd_index = sd_idx; pd_entry->valid = true; pd_table->use_cnt++; - irdma_invalidate_pf_hmc_pd(dev, sd_idx, rel_pd_idx); + + if (hmc_info->hmc_fn_id < dev->hw_attrs.first_hw_vf_fpm_id && + dev->privileged) + irdma_invalidate_pf_hmc_pd(dev, sd_idx, rel_pd_idx); } pd_entry->bp.use_cnt++; @@ -640,7 +651,8 @@ int irdma_remove_pd_bp(struct irdma_sc_dev *dev, pd_addr = pd_table->pd_page_addr.va; pd_addr += rel_pd_idx; memset(pd_addr, 0, sizeof(u64)); - irdma_invalidate_pf_hmc_pd(dev, sd_idx, idx); + if (dev->privileged && dev->hmc_fn_id == hmc_info->hmc_fn_id) + irdma_invalidate_pf_hmc_pd(dev, sd_idx, idx); if (!pd_entry->rsrc_pg) { mem = &pd_entry->bp.addr; diff --git a/drivers/infiniband/hw/irdma/hmc.h b/drivers/infiniband/hw/irdma/hmc.h index 415f9e23bbf6..257a5d22aa96 100644 --- a/drivers/infiniband/hw/irdma/hmc.h +++ b/drivers/infiniband/hw/irdma/hmc.h @@ -16,11 +16,21 @@ #define IRDMA_HMC_PD_BP_BUF_ALIGNMENT 4096 #define IRDMA_FIRST_VF_FPM_ID 8 #define FPM_MULTIPLIER 1024 +#define IRDMA_OBJ_LOC_MEM_BIT 0x4 +#define IRDMA_XF_MULTIPLIER 16 +#define IRDMA_RRF_MULTIPLIER 8 +#define IRDMA_MIN_PBLE_PAGES 3 +#define IRDMA_HMC_PAGE_SIZE 2097152 +#define IRDMA_MIN_MR_PER_QP 4 +#define IRDMA_MIN_QP_CNT 64 +#define IRDMA_FSIAV_CNT_MAX 1048576 +#define IRDMA_MIN_IRD 8 +#define IRDMA_HMC_MIN_RRF 16 enum irdma_hmc_rsrc_type { IRDMA_HMC_IW_QP = 0, IRDMA_HMC_IW_CQ = 1, - IRDMA_HMC_IW_RESERVED = 2, + IRDMA_HMC_IW_SRQ = 2, IRDMA_HMC_IW_HTE = 3, IRDMA_HMC_IW_ARP = 4, IRDMA_HMC_IW_APBVT_ENTRY = 5, @@ -48,11 +58,17 @@ enum irdma_sd_entry_type { IRDMA_SD_TYPE_DIRECT = 2, }; +enum irdma_hmc_obj_mem { + IRDMA_HOST_MEM = 0, + IRDMA_LOC_MEM = 1, +}; + struct irdma_hmc_obj_info { u64 base; u32 max_cnt; u32 cnt; u64 size; + enum irdma_hmc_obj_mem mem_loc; }; struct irdma_hmc_bp { @@ -117,6 +133,7 @@ struct irdma_update_sds_info { struct irdma_ccq_cqe_info; struct irdma_hmc_fcn_info { u32 vf_id; + u8 protocol_used; u8 free_fcn; }; diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 69ce1862eabe..2aa5f53a46a7 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -33,6 +33,7 @@ static struct irdma_rsrc_limits rsrc_limits_table[] = { static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = { IRDMA_HMC_IW_QP, IRDMA_HMC_IW_CQ, + IRDMA_HMC_IW_SRQ, IRDMA_HMC_IW_HTE, IRDMA_HMC_IW_ARP, IRDMA_HMC_IW_APBVT_ENTRY, @@ -1569,6 +1570,8 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; + if (!rf->sc_dev.privileged) + irdma_vchnl_req_put_hmc_fcn(&rf->sc_dev); kfree(dev->hmc_info->sd_table.sd_entry); dev->hmc_info->sd_table.sd_entry = NULL; vfree(rf->mem_rsrc); @@ -1635,6 +1638,7 @@ static int irdma_initialize_dev(struct irdma_pci_f *rf) info.bar0 = rf->hw.hw_addr; info.hmc_fn_id = rf->pf_id; + info.protocol_used = rf->protocol_used; info.hw = &rf->hw; status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info); if (status) @@ -1907,6 +1911,13 @@ int irdma_ctrl_init_hw(struct irdma_pci_f *rf) break; rf->init_state = CQP_CREATED; + dev->feature_info[IRDMA_FEATURE_FW_INFO] = IRDMA_FW_VER_DEFAULT; + if (rf->rdma_ver != IRDMA_GEN_1) { + status = irdma_get_rdma_features(dev); + if (status) + break; + } + status = irdma_hmc_setup(rf); if (status) break; @@ -1922,13 +1933,6 @@ int irdma_ctrl_init_hw(struct irdma_pci_f *rf) break; rf->init_state = CCQ_CREATED; - dev->feature_info[IRDMA_FEATURE_FW_INFO] = IRDMA_FW_VER_DEFAULT; - if (rf->rdma_ver != IRDMA_GEN_1) { - status = irdma_get_rdma_features(dev); - if (status) - break; - } - status = irdma_setup_ceq_0(rf); if (status) break; diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c index 6fa807ef4545..15e036ddaffb 100644 --- a/drivers/infiniband/hw/irdma/i40iw_if.c +++ b/drivers/infiniband/hw/irdma/i40iw_if.c @@ -77,6 +77,7 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info rf->rdma_ver = IRDMA_GEN_1; rf->sc_dev.hw = &rf->hw; rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_1; + rf->sc_dev.privileged = true; rf->gen_ops.request_reset = i40iw_request_reset; rf->pcidev = cdev_info->pcidev; rf->pf_id = cdev_info->fid; diff --git a/drivers/infiniband/hw/irdma/icrdma_if.c b/drivers/infiniband/hw/irdma/icrdma_if.c index db7c50b63b1d..27b191f61caf 100644 --- a/drivers/infiniband/hw/irdma/icrdma_if.c +++ b/drivers/infiniband/hw/irdma/icrdma_if.c @@ -214,6 +214,8 @@ static void icrdma_fill_device_info(struct irdma_device *iwdev, rf->pf_id = idc_priv->pf_id; rf->rdma_ver = IRDMA_GEN_2; rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2; + rf->sc_dev.is_pf = true; + rf->sc_dev.privileged = true; rf->gen_ops.register_qset = icrdma_lan_register_qset; rf->gen_ops.unregister_qset = icrdma_lan_unregister_qset; diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.h b/drivers/infiniband/hw/irdma/ig3rdma_hw.h new file mode 100644 index 000000000000..c0997c54e5e3 --- /dev/null +++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ +/* Copyright (c) 2021 - 2024 Intel Corporation */ +#ifndef IG3RDMA_HW_H +#define IG3RDMA_HW_H + +#define IG3_PF_RDMA_REGION_OFFSET 0xBC00000 +#define IG3_PF_RDMA_REGION_LEN 0x401000 +#define IG3_VF_RDMA_REGION_OFFSET 0x8C00 +#define IG3_VF_RDMA_REGION_LEN 0x8400 + +int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len, + u8 *recv_msg, u16 *recv_len); + +#endif /* IG3RDMA_HW_H*/ diff --git a/drivers/infiniband/hw/irdma/ig3rdma_if.c b/drivers/infiniband/hw/irdma/ig3rdma_if.c new file mode 100644 index 000000000000..1bb42eb298ba --- /dev/null +++ b/drivers/infiniband/hw/irdma/ig3rdma_if.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB +/* Copyright (c) 2023 - 2024 Intel Corporation */ + +#include "main.h" +#include +#include "ig3rdma_hw.h" + +static void ig3rdma_idc_core_event_handler(struct iidc_rdma_core_dev_info *cdev_info, + struct iidc_rdma_event *event) +{ + struct irdma_pci_f *rf = auxiliary_get_drvdata(cdev_info->adev); + + if (*event->type & BIT(IIDC_RDMA_EVENT_WARN_RESET)) { + rf->reset = true; + rf->sc_dev.vchnl_up = false; + } +} + +int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len, + u8 *recv_msg, u16 *recv_len) +{ + struct iidc_rdma_core_dev_info *cdev_info = dev_to_rf(dev)->cdev; + int ret; + + ret = idpf_idc_rdma_vc_send_sync(cdev_info, msg, len, recv_msg, + recv_len); + if (ret == -ETIMEDOUT) { + ibdev_err(&(dev_to_rf(dev)->iwdev->ibdev), + "Virtual channel Req <-> Resp completion timeout\n"); + dev->vchnl_up = false; + } + + return ret; +} + +static int ig3rdma_vchnl_init(struct irdma_pci_f *rf, + struct iidc_rdma_core_dev_info *cdev_info, + u8 *rdma_ver) +{ + struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv; + struct irdma_vchnl_init_info virt_info; + u8 gen = rf->rdma_ver; + int ret; + + rf->vchnl_wq = alloc_ordered_workqueue("irdma-virtchnl-wq", 0); + if (!rf->vchnl_wq) + return -ENOMEM; + + mutex_init(&rf->sc_dev.vchnl_mutex); + + virt_info.is_pf = !idc_priv->ftype; + virt_info.hw_rev = gen; + virt_info.privileged = gen == IRDMA_GEN_2; + virt_info.vchnl_wq = rf->vchnl_wq; + ret = irdma_sc_vchnl_init(&rf->sc_dev, &virt_info); + if (ret) { + destroy_workqueue(rf->vchnl_wq); + return ret; + } + + *rdma_ver = rf->sc_dev.hw_attrs.uk_attrs.hw_rev; + + return 0; +} + +/** + * ig3rdma_request_reset - Request a reset + * @rf: RDMA PCI function + */ +static void ig3rdma_request_reset(struct irdma_pci_f *rf) +{ + ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n"); + idpf_idc_request_reset(rf->cdev, IIDC_FUNC_RESET); +} + +static int ig3rdma_cfg_regions(struct irdma_hw *hw, + struct iidc_rdma_core_dev_info *cdev_info) +{ + struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv; + struct pci_dev *pdev = cdev_info->pdev; + int i; + + switch (idc_priv->ftype) { + case IIDC_FUNCTION_TYPE_PF: + hw->rdma_reg.len = IG3_PF_RDMA_REGION_LEN; + hw->rdma_reg.offset = IG3_PF_RDMA_REGION_OFFSET; + break; + case IIDC_FUNCTION_TYPE_VF: + hw->rdma_reg.len = IG3_VF_RDMA_REGION_LEN; + hw->rdma_reg.offset = IG3_VF_RDMA_REGION_OFFSET; + break; + default: + return -ENODEV; + } + + hw->rdma_reg.addr = ioremap(pci_resource_start(pdev, 0) + hw->rdma_reg.offset, + hw->rdma_reg.len); + + if (!hw->rdma_reg.addr) + return -ENOMEM; + + hw->num_io_regions = le16_to_cpu(idc_priv->num_memory_regions); + hw->io_regs = kcalloc(hw->num_io_regions, + sizeof(struct irdma_mmio_region), GFP_KERNEL); + + if (!hw->io_regs) { + iounmap(hw->rdma_reg.addr); + return -ENOMEM; + } + + for (i = 0; i < hw->num_io_regions; i++) { + hw->io_regs[i].addr = + idc_priv->mapped_mem_regions[i].region_addr; + hw->io_regs[i].len = + le64_to_cpu(idc_priv->mapped_mem_regions[i].size); + hw->io_regs[i].offset = + le64_to_cpu(idc_priv->mapped_mem_regions[i].start_offset); + } + + return 0; +} + +static void ig3rdma_decfg_rf(struct irdma_pci_f *rf) +{ + struct irdma_hw *hw = &rf->hw; + + destroy_workqueue(rf->vchnl_wq); + kfree(hw->io_regs); + iounmap(hw->rdma_reg.addr); +} + +static int ig3rdma_cfg_rf(struct irdma_pci_f *rf, + struct iidc_rdma_core_dev_info *cdev_info) +{ + struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv; + int err; + + rf->sc_dev.hw = &rf->hw; + rf->cdev = cdev_info; + rf->pcidev = cdev_info->pdev; + rf->hw.device = &rf->pcidev->dev; + rf->msix_count = idc_priv->msix_count; + rf->msix_entries = idc_priv->msix_entries; + + err = ig3rdma_vchnl_init(rf, cdev_info, &rf->rdma_ver); + if (err) + return err; + + err = ig3rdma_cfg_regions(&rf->hw, cdev_info); + if (err) { + destroy_workqueue(rf->vchnl_wq); + return err; + } + + rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY; + rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT; + rf->rst_to = IRDMA_RST_TIMEOUT_HZ; + rf->gen_ops.request_reset = ig3rdma_request_reset; + rf->limits_sel = 7; + mutex_init(&rf->ah_tbl_lock); + + return 0; +} + +static int ig3rdma_core_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *id) +{ + struct iidc_rdma_core_auxiliary_dev *idc_adev = + container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev); + struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info; + struct irdma_pci_f *rf; + int err; + + rf = kzalloc(sizeof(*rf), GFP_KERNEL); + if (!rf) + return -ENOMEM; + + err = ig3rdma_cfg_rf(rf, cdev_info); + if (err) + goto err_cfg_rf; + + err = irdma_ctrl_init_hw(rf); + if (err) + goto err_ctrl_init; + + auxiliary_set_drvdata(aux_dev, rf); + + err = idpf_idc_vport_dev_ctrl(cdev_info, true); + if (err) + goto err_vport_ctrl; + + return 0; + +err_vport_ctrl: + irdma_ctrl_deinit_hw(rf); +err_ctrl_init: + ig3rdma_decfg_rf(rf); +err_cfg_rf: + kfree(rf); + + return err; +} + +static void ig3rdma_core_remove(struct auxiliary_device *aux_dev) +{ + struct iidc_rdma_core_auxiliary_dev *idc_adev = + container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev); + struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info; + struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_dev); + + idpf_idc_vport_dev_ctrl(cdev_info, false); + irdma_ctrl_deinit_hw(rf); + ig3rdma_decfg_rf(rf); + kfree(rf); +} + +static const struct auxiliary_device_id ig3rdma_core_auxiliary_id_table[] = { + {.name = "idpf.8086.rdma.core", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, ig3rdma_core_auxiliary_id_table); + +struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv = { + .adrv = { + .name = "core", + .id_table = ig3rdma_core_auxiliary_id_table, + .probe = ig3rdma_core_probe, + .remove = ig3rdma_core_remove, + }, + .event_handler = ig3rdma_idc_core_event_handler, +}; diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index 20d2e7393e3d..df7f6124570c 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -92,7 +92,7 @@ struct irdma_mcast_grp_ctx_entry_info { struct irdma_mcast_grp_info { u8 dest_mac_addr[ETH_ALEN]; u16 vlan_id; - u8 hmc_fcn_id; + u16 hmc_fcn_id; bool ipv4_valid:1; bool vlan_valid:1; u16 mg_id; @@ -107,6 +107,9 @@ enum irdma_vers { IRDMA_GEN_RSVD, IRDMA_GEN_1, IRDMA_GEN_2, + IRDMA_GEN_3, + IRDMA_GEN_NEXT, + IRDMA_GEN_MAX = IRDMA_GEN_NEXT-1 }; struct irdma_uk_attrs { diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index f703f489a0bf..162f1fab32b5 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -65,6 +65,15 @@ static int __init irdma_init_module(void) return ret; } + ret = auxiliary_driver_register(&ig3rdma_core_auxiliary_drv.adrv); + if (ret) { + auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv); + auxiliary_driver_unregister(&i40iw_auxiliary_drv); + pr_err("Failed ig3rdma(gen_3) core auxiliary_driver_register() ret=%d\n", + ret); + + return ret; + } irdma_register_notifiers(); return 0; @@ -75,6 +84,7 @@ static void __exit irdma_exit_module(void) irdma_unregister_notifiers(); auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv); auxiliary_driver_unregister(&i40iw_auxiliary_drv); + auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv); } module_init(irdma_init_module); diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index ca568ccf8a5a..efc7c13e29df 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -54,6 +54,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv; extern struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv; +extern struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv; #define IRDMA_FW_VER_DEFAULT 2 #define IRDMA_HW_VER 2 @@ -327,6 +328,7 @@ struct irdma_pci_f { wait_queue_head_t vchnl_waitq; struct workqueue_struct *cqp_cmpl_wq; struct work_struct cqp_cmpl_work; + struct workqueue_struct *vchnl_wq; struct irdma_sc_vsi default_vsi; void *back_fcn; struct irdma_gen_ops gen_ops; diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c index 37ce35cb10e7..3091f9345f12 100644 --- a/drivers/infiniband/hw/irdma/pble.c +++ b/drivers/infiniband/hw/irdma/pble.c @@ -193,8 +193,15 @@ static enum irdma_sd_entry_type irdma_get_type(struct irdma_sc_dev *dev, { enum irdma_sd_entry_type sd_entry_type; - sd_entry_type = !idx->rel_pd_idx && pages == IRDMA_HMC_PD_CNT_IN_SD ? - IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED; + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + sd_entry_type = (!idx->rel_pd_idx && + pages == IRDMA_HMC_PD_CNT_IN_SD) ? + IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED; + else + sd_entry_type = (!idx->rel_pd_idx && + pages == IRDMA_HMC_PD_CNT_IN_SD && + dev->privileged) ? + IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED; return sd_entry_type; } @@ -279,10 +286,11 @@ static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ? sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa; - - if (!sd_entry->valid) { - ret_code = irdma_hmc_sd_one(dev, hmc_info->hmc_fn_id, sd_reg_val, - idx->sd_idx, sd_entry->entry_type, true); + if ((dev->privileged && !sd_entry->valid) || + dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + ret_code = irdma_hmc_sd_one(dev, hmc_info->hmc_fn_id, + sd_reg_val, idx->sd_idx, + sd_entry->entry_type, true); if (ret_code) goto error; } diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h index 2fc638f2b143..d65041bee667 100644 --- a/drivers/infiniband/hw/irdma/puda.h +++ b/drivers/infiniband/hw/irdma/puda.h @@ -91,7 +91,7 @@ struct irdma_puda_rsrc_info { u32 rq_size; u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */ u16 buf_size; - u8 stats_idx; + u16 stats_idx; bool stats_idx_valid:1; int abi_ver; }; @@ -140,7 +140,7 @@ struct irdma_puda_rsrc { u64 crc_err; u64 pmode_count; u64 partials_handled; - u8 stats_idx; + u16 stats_idx; bool check_crc:1; bool stats_idx_valid:1; }; diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 527c6da2c1ac..bb241a4ff5f9 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -8,6 +8,8 @@ #include "hmc.h" #include "uda.h" #include "ws.h" +#include "virtchnl.h" + #define IRDMA_DEBUG_ERR "ERR" #define IRDMA_DEBUG_INIT "INIT" #define IRDMA_DEBUG_DEV "DEV" @@ -159,7 +161,34 @@ enum irdma_hw_stats_index { enum irdma_feature_type { IRDMA_FEATURE_FW_INFO = 0, IRDMA_HW_VERSION_INFO = 1, + IRDMA_QP_MAX_INCR = 2, + IRDMA_CQ_MAX_INCR = 3, + IRDMA_CEQ_MAX_INCR = 4, + IRDMA_SD_MAX_INCR = 5, + IRDMA_QP_SMALL = 6, + IRDMA_QP_MEDIUM = 7, + IRDMA_QP_LARGE = 8, + IRDMA_QP_XLARGE = 9, + IRDMA_CQ_SMALL = 10, + IRDMA_CQ_MEDIUM = 11, + IRDMA_CQ_LARGE = 12, + IRDMA_CQ_XLARGE = 13, + IRDMA_CEQ_SMALL = 14, + IRDMA_CEQ_MEDIUM = 15, + IRDMA_CEQ_LARGE = 16, + IRDMA_CEQ_XLARGE = 17, + IRDMA_SD_SMALL = 18, + IRDMA_SD_MEDIUM = 19, + IRDMA_SD_LARGE = 20, + IRDMA_SD_XLARGE = 21, + IRDMA_OBJ_1 = 22, + IRDMA_OBJ_2 = 23, + IRDMA_ENDPT_TRK = 24, + IRDMA_FTN_INLINE_MAX = 25, IRDMA_QSETS_MAX = 26, + IRDMA_ASO = 27, + IRDMA_FTN_FLAGS = 32, + IRDMA_FTN_NOP = 33, IRDMA_MAX_FEATURES, /* Must be last entry */ }; @@ -310,9 +339,21 @@ struct irdma_vsi_pestat { spinlock_t lock; /* rdma stats lock */ }; +struct irdma_mmio_region { + u8 __iomem *addr; + resource_size_t len; + resource_size_t offset; +}; + struct irdma_hw { - u8 __iomem *hw_addr; - u8 __iomem *priv_hw_addr; + union { + u8 __iomem *hw_addr; + struct { + struct irdma_mmio_region rdma_reg; /* RDMA region */ + struct irdma_mmio_region *io_regs; /* Non-RDMA MMIO regions */ + u16 num_io_regions; /* Number of Non-RDMA MMIO regions */ + }; + }; struct device *device; struct irdma_hmc_info hmc; }; @@ -495,7 +536,7 @@ struct irdma_stats_inst_info { struct irdma_up_info { u8 map[8]; u8 cnp_up_override; - u8 hmc_fcn_idx; + u16 hmc_fcn_idx; bool use_vlan:1; bool use_cnp_up_override:1; }; @@ -518,6 +559,7 @@ struct irdma_ws_node_info { struct irdma_hmc_fpm_misc { u32 max_ceqs; u32 max_sds; + u32 loc_mem_pages; u32 xf_block_size; u32 q1_block_size; u32 ht_multiplier; @@ -526,6 +568,7 @@ struct irdma_hmc_fpm_misc { u32 ooiscf_block_size; }; +#define IRDMA_VCHNL_MAX_MSG_SIZE 512 #define IRDMA_LEAF_DEFAULT_REL_BW 64 #define IRDMA_PARENT_DEFAULT_REL_BW 1 @@ -601,19 +644,28 @@ struct irdma_sc_dev { u64 cqp_cmd_stats[IRDMA_MAX_CQP_OPS]; struct irdma_hw_attrs hw_attrs; struct irdma_hmc_info *hmc_info; + struct irdma_vchnl_rdma_caps vc_caps; + u8 vc_recv_buf[IRDMA_VCHNL_MAX_MSG_SIZE]; + u16 vc_recv_len; struct irdma_sc_cqp *cqp; struct irdma_sc_aeq *aeq; struct irdma_sc_ceq *ceq[IRDMA_CEQ_MAX_COUNT]; struct irdma_sc_cq *ccq; const struct irdma_irq_ops *irq_ops; + struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY]; struct irdma_hmc_fpm_misc hmc_fpm_misc; struct irdma_ws_node *ws_tree_root; struct mutex ws_mutex; /* ws tree mutex */ + u32 vchnl_ver; u16 num_vfs; - u8 hmc_fn_id; + u16 hmc_fn_id; u8 vf_id; + bool privileged:1; bool vchnl_up:1; bool ceq_valid:1; + bool is_pf:1; + u8 protocol_used; + struct mutex vchnl_mutex; /* mutex to synchronize RDMA virtual channel messages */ u8 pci_rev; int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri); @@ -720,7 +772,7 @@ struct irdma_vsi_init_info { struct irdma_vsi_stats_info { struct irdma_vsi_pestat *pestat; - u8 fcn_id; + u16 fcn_id; bool alloc_stats_inst; }; @@ -731,7 +783,8 @@ struct irdma_device_init_info { __le64 *fpm_commit_buf; struct irdma_hw *hw; void __iomem *bar0; - u8 hmc_fn_id; + enum irdma_protocol_used protocol_used; + u16 hmc_fn_id; }; struct irdma_ceq_init_info { @@ -972,7 +1025,7 @@ struct irdma_allocate_stag_info { bool use_hmc_fcn_index:1; bool use_pf_rid:1; bool all_memory:1; - u8 hmc_fcn_index; + u16 hmc_fcn_index; }; struct irdma_mw_alloc_info { diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 380e4a47aede..5f489feda32c 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -55,8 +55,8 @@ enum irdma_device_caps_const { IRDMA_CEQE_SIZE = 1, IRDMA_CQP_CTX_SIZE = 8, IRDMA_SHADOW_AREA_SIZE = 8, - IRDMA_QUERY_FPM_BUF_SIZE = 176, - IRDMA_COMMIT_FPM_BUF_SIZE = 176, + IRDMA_QUERY_FPM_BUF_SIZE = 192, + IRDMA_COMMIT_FPM_BUF_SIZE = 192, IRDMA_GATHER_STATS_BUF_SIZE = 1024, IRDMA_MIN_IW_QP_ID = 0, IRDMA_MAX_IW_QP_ID = 262143, @@ -67,6 +67,7 @@ enum irdma_device_caps_const { IRDMA_MAX_CQID = 524287, IRDMA_MIN_AEQ_ENTRIES = 1, IRDMA_MAX_AEQ_ENTRIES = 524287, + IRDMA_MAX_AEQ_ENTRIES_GEN_3 = 262144, IRDMA_MIN_CEQ_ENTRIES = 1, IRDMA_MAX_CEQ_ENTRIES = 262143, IRDMA_MIN_CQ_SIZE = 1, diff --git a/drivers/infiniband/hw/irdma/virtchnl.c b/drivers/infiniband/hw/irdma/virtchnl.c new file mode 100644 index 000000000000..5beaecd7dbb5 --- /dev/null +++ b/drivers/infiniband/hw/irdma/virtchnl.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB +/* Copyright (c) 2015 - 2024 Intel Corporation */ + +#include "osdep.h" +#include "hmc.h" +#include "defs.h" +#include "type.h" +#include "protos.h" +#include "virtchnl.h" +#include "ws.h" +#include "i40iw_hw.h" +#include "ig3rdma_hw.h" + +/** + * irdma_sc_vchnl_init - Initialize dev virtchannel and get hw_rev + * @dev: dev structure to update + * @info: virtchannel info parameters to fill into the dev structure + */ +int irdma_sc_vchnl_init(struct irdma_sc_dev *dev, + struct irdma_vchnl_init_info *info) +{ + dev->vchnl_up = true; + dev->privileged = info->privileged; + dev->is_pf = info->is_pf; + dev->hw_attrs.uk_attrs.hw_rev = info->hw_rev; + + if (!dev->privileged) { + int ret = irdma_vchnl_req_get_ver(dev, IRDMA_VCHNL_CHNL_VER_MAX, + &dev->vchnl_ver); + + ibdev_dbg(to_ibdev(dev), + "DEV: Get Channel version ret = %d, version is %u\n", + ret, dev->vchnl_ver); + + if (ret) + return ret; + + ret = irdma_vchnl_req_get_caps(dev); + if (ret) + return ret; + + dev->hw_attrs.uk_attrs.hw_rev = dev->vc_caps.hw_rev; + } + + return 0; +} + +/** + * irdma_vchnl_req_verify_resp - Verify requested response size + * @vchnl_req: vchnl message requested + * @resp_len: response length sent from vchnl peer + */ +static int irdma_vchnl_req_verify_resp(struct irdma_vchnl_req *vchnl_req, + u16 resp_len) +{ + switch (vchnl_req->vchnl_msg->op_code) { + case IRDMA_VCHNL_OP_GET_VER: + case IRDMA_VCHNL_OP_GET_HMC_FCN: + case IRDMA_VCHNL_OP_PUT_HMC_FCN: + if (resp_len != vchnl_req->parm_len) + return -EBADMSG; + break; + case IRDMA_VCHNL_OP_GET_RDMA_CAPS: + if (resp_len < IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE) + return -EBADMSG; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static void irdma_free_vchnl_req_msg(struct irdma_vchnl_req *vchnl_req) +{ + kfree(vchnl_req->vchnl_msg); +} + +static int irdma_alloc_vchnl_req_msg(struct irdma_vchnl_req *vchnl_req, + struct irdma_vchnl_req_init_info *info) +{ + struct irdma_vchnl_op_buf *vchnl_msg; + + vchnl_msg = kzalloc(IRDMA_VCHNL_MAX_MSG_SIZE, GFP_KERNEL); + + if (!vchnl_msg) + return -ENOMEM; + + vchnl_msg->op_ctx = (uintptr_t)vchnl_req; + vchnl_msg->buf_len = sizeof(*vchnl_msg) + info->req_parm_len; + if (info->req_parm_len) + memcpy(vchnl_msg->buf, info->req_parm, info->req_parm_len); + vchnl_msg->op_code = info->op_code; + vchnl_msg->op_ver = info->op_ver; + + vchnl_req->vchnl_msg = vchnl_msg; + vchnl_req->parm = info->resp_parm; + vchnl_req->parm_len = info->resp_parm_len; + + return 0; +} + +static int irdma_vchnl_req_send_sync(struct irdma_sc_dev *dev, + struct irdma_vchnl_req_init_info *info) +{ + u16 resp_len = sizeof(dev->vc_recv_buf); + struct irdma_vchnl_req vchnl_req = {}; + u16 msg_len; + u8 *msg; + int ret; + + ret = irdma_alloc_vchnl_req_msg(&vchnl_req, info); + if (ret) + return ret; + + msg_len = vchnl_req.vchnl_msg->buf_len; + msg = (u8 *)vchnl_req.vchnl_msg; + + mutex_lock(&dev->vchnl_mutex); + ret = ig3rdma_vchnl_send_sync(dev, msg, msg_len, dev->vc_recv_buf, + &resp_len); + dev->vc_recv_len = resp_len; + if (ret) + goto exit; + + ret = irdma_vchnl_req_get_resp(dev, &vchnl_req); +exit: + mutex_unlock(&dev->vchnl_mutex); + ibdev_dbg(to_ibdev(dev), + "VIRT: virtual channel send %s caller: %pS ret=%d op=%u op_ver=%u req_len=%u parm_len=%u resp_len=%u\n", + !ret ? "SUCCEEDS" : "FAILS", __builtin_return_address(0), + ret, vchnl_req.vchnl_msg->op_code, + vchnl_req.vchnl_msg->op_ver, vchnl_req.vchnl_msg->buf_len, + vchnl_req.parm_len, vchnl_req.resp_len); + irdma_free_vchnl_req_msg(&vchnl_req); + + return ret; +} + +/** + * irdma_vchnl_req_get_ver - Request Channel version + * @dev: RDMA device pointer + * @ver_req: Virtual channel version requested + * @ver_res: Virtual channel version response + */ +int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req, u32 *ver_res) +{ + struct irdma_vchnl_req_init_info info = {}; + int ret; + + if (!dev->vchnl_up) + return -EBUSY; + + info.op_code = IRDMA_VCHNL_OP_GET_VER; + info.op_ver = ver_req; + info.resp_parm = ver_res; + info.resp_parm_len = sizeof(*ver_res); + + ret = irdma_vchnl_req_send_sync(dev, &info); + if (ret) + return ret; + + if (*ver_res < IRDMA_VCHNL_CHNL_VER_MIN) { + ibdev_dbg(to_ibdev(dev), + "VIRT: %s unsupported vchnl version 0x%0x\n", + __func__, *ver_res); + return -EOPNOTSUPP; + } + + return 0; +} + +/** + * irdma_vchnl_req_get_hmc_fcn - Request VF HMC Function + * @dev: RDMA device pointer + */ +int irdma_vchnl_req_get_hmc_fcn(struct irdma_sc_dev *dev) +{ + struct irdma_vchnl_req_hmc_info req_hmc = {}; + struct irdma_vchnl_resp_hmc_info resp_hmc = {}; + struct irdma_vchnl_req_init_info info = {}; + int ret; + + if (!dev->vchnl_up) + return -EBUSY; + + info.op_code = IRDMA_VCHNL_OP_GET_HMC_FCN; + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + info.op_ver = IRDMA_VCHNL_OP_GET_HMC_FCN_V2; + req_hmc.protocol_used = dev->protocol_used; + info.req_parm_len = sizeof(req_hmc); + info.req_parm = &req_hmc; + info.resp_parm = &resp_hmc; + info.resp_parm_len = sizeof(resp_hmc); + } + + ret = irdma_vchnl_req_send_sync(dev, &info); + + if (ret) + return ret; + + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + int i; + + dev->hmc_fn_id = resp_hmc.hmc_func; + + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { + dev->qos[i].qs_handle = resp_hmc.qs_handle[i]; + dev->qos[i].valid = true; + } + } + return 0; +} + +/** + * irdma_vchnl_req_put_hmc_fcn - Free VF HMC Function + * @dev: RDMA device pointer + */ +int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev) +{ + struct irdma_vchnl_req_init_info info = {}; + + if (!dev->vchnl_up) + return -EBUSY; + + info.op_code = IRDMA_VCHNL_OP_PUT_HMC_FCN; + info.op_ver = IRDMA_VCHNL_OP_PUT_HMC_FCN_V0; + + return irdma_vchnl_req_send_sync(dev, &info); +} + +/** + * irdma_vchnl_req_get_caps - Request RDMA capabilities + * @dev: RDMA device pointer + */ +int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev) +{ + struct irdma_vchnl_req_init_info info = {}; + int ret; + + if (!dev->vchnl_up) + return -EBUSY; + + info.op_code = IRDMA_VCHNL_OP_GET_RDMA_CAPS; + info.op_ver = IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0; + info.resp_parm = &dev->vc_caps; + info.resp_parm_len = sizeof(dev->vc_caps); + + ret = irdma_vchnl_req_send_sync(dev, &info); + + if (ret) + return ret; + + if (dev->vc_caps.hw_rev > IRDMA_GEN_MAX || + dev->vc_caps.hw_rev < IRDMA_GEN_2) { + ibdev_dbg(to_ibdev(dev), + "ERR: %s unsupported hw_rev version 0x%0x\n", + __func__, dev->vc_caps.hw_rev); + return -EOPNOTSUPP; + } + + return 0; +} + +/** + * irdma_vchnl_req_get_resp - Receive the inbound vchnl response. + * @dev: Dev pointer + * @vchnl_req: Vchannel request + */ +int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev, + struct irdma_vchnl_req *vchnl_req) +{ + struct irdma_vchnl_resp_buf *vchnl_msg_resp = + (struct irdma_vchnl_resp_buf *)dev->vc_recv_buf; + u16 resp_len; + int ret; + + if ((uintptr_t)vchnl_req != (uintptr_t)vchnl_msg_resp->op_ctx) { + ibdev_dbg(to_ibdev(dev), + "VIRT: error vchnl context value does not match\n"); + return -EBADMSG; + } + + resp_len = dev->vc_recv_len - sizeof(*vchnl_msg_resp); + resp_len = min(resp_len, vchnl_req->parm_len); + + ret = irdma_vchnl_req_verify_resp(vchnl_req, resp_len); + if (ret) + return ret; + + ret = (int)vchnl_msg_resp->op_ret; + if (ret) + return ret; + + vchnl_req->resp_len = 0; + if (vchnl_req->parm_len && vchnl_req->parm && resp_len) { + memcpy(vchnl_req->parm, vchnl_msg_resp->buf, resp_len); + vchnl_req->resp_len = resp_len; + ibdev_dbg(to_ibdev(dev), "VIRT: Got response, data size %u\n", + resp_len); + } + + return 0; +} diff --git a/drivers/infiniband/hw/irdma/virtchnl.h b/drivers/infiniband/hw/irdma/virtchnl.h new file mode 100644 index 000000000000..d1e64666208d --- /dev/null +++ b/drivers/infiniband/hw/irdma/virtchnl.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ +/* Copyright (c) 2015 - 2024 Intel Corporation */ +#ifndef IRDMA_VIRTCHNL_H +#define IRDMA_VIRTCHNL_H + +#include "hmc.h" +#include "irdma.h" + +/* IRDMA_VCHNL_CHNL_VER_V0 is for legacy hw, no longer supported. */ +#define IRDMA_VCHNL_CHNL_VER_V2 2 +#define IRDMA_VCHNL_CHNL_VER_MIN IRDMA_VCHNL_CHNL_VER_V2 +#define IRDMA_VCHNL_CHNL_VER_MAX IRDMA_VCHNL_CHNL_VER_V2 +#define IRDMA_VCHNL_OP_GET_HMC_FCN_V0 0 +#define IRDMA_VCHNL_OP_GET_HMC_FCN_V1 1 +#define IRDMA_VCHNL_OP_GET_HMC_FCN_V2 2 +#define IRDMA_VCHNL_OP_PUT_HMC_FCN_V0 0 +#define IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0 0 +#define IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE 1 + +enum irdma_vchnl_ops { + IRDMA_VCHNL_OP_GET_VER = 0, + IRDMA_VCHNL_OP_GET_HMC_FCN = 1, + IRDMA_VCHNL_OP_PUT_HMC_FCN = 2, + IRDMA_VCHNL_OP_GET_RDMA_CAPS = 13, +}; + +struct irdma_vchnl_req_hmc_info { + u8 protocol_used; + u8 disable_qos; +} __packed; + +struct irdma_vchnl_resp_hmc_info { + u16 hmc_func; + u16 qs_handle[IRDMA_MAX_USER_PRIORITY]; +} __packed; + +struct irdma_vchnl_op_buf { + u16 op_code; + u16 op_ver; + u16 buf_len; + u16 rsvd; + u64 op_ctx; + u8 buf[]; +} __packed; + +struct irdma_vchnl_resp_buf { + u64 op_ctx; + u16 buf_len; + s16 op_ret; + u16 rsvd[2]; + u8 buf[]; +} __packed; + +struct irdma_vchnl_rdma_caps { + u8 hw_rev; + u16 cqp_timeout_s; + u16 cqp_def_timeout_s; + u16 max_hw_push_len; +} __packed; + +struct irdma_vchnl_init_info { + struct workqueue_struct *vchnl_wq; + enum irdma_vers hw_rev; + bool privileged; + bool is_pf; +}; + +struct irdma_vchnl_req { + struct irdma_vchnl_op_buf *vchnl_msg; + void *parm; + u32 vf_id; + u16 parm_len; + u16 resp_len; +}; + +struct irdma_vchnl_req_init_info { + void *req_parm; + void *resp_parm; + u16 req_parm_len; + u16 resp_parm_len; + u16 op_code; + u16 op_ver; +} __packed; + +int irdma_sc_vchnl_init(struct irdma_sc_dev *dev, + struct irdma_vchnl_init_info *info); +int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req, + u32 *ver_res); +int irdma_vchnl_req_get_hmc_fcn(struct irdma_sc_dev *dev); +int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev); +int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev); +int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev, + struct irdma_vchnl_req *vc_req); +#endif /* IRDMA_VIRTCHNL_H */ From 7d5a7cc7b9989d7f4507b89cbff35df75959e0d9 Mon Sep 17 00:00:00 2001 From: Christopher Bednarz Date: Wed, 27 Aug 2025 10:25:32 -0500 Subject: [PATCH 55/85] RDMA/irdma: Discover and set up GEN3 hardware register layout Discover the hardware register layout for GEN3 devices through an RDMA virtual channel operation with the Control Plane (CP). Set up the corresponding hardware attributes specific to GEN3 devices. Signed-off-by: Christopher Bednarz Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-4-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/Makefile | 1 + drivers/infiniband/hw/irdma/ctrl.c | 31 ++-- drivers/infiniband/hw/irdma/defs.h | 12 +- drivers/infiniband/hw/irdma/i40iw_hw.c | 2 + drivers/infiniband/hw/irdma/i40iw_hw.h | 2 + drivers/infiniband/hw/irdma/icrdma_hw.c | 3 + drivers/infiniband/hw/irdma/icrdma_hw.h | 5 +- drivers/infiniband/hw/irdma/ig3rdma_hw.c | 65 +++++++++ drivers/infiniband/hw/irdma/ig3rdma_hw.h | 18 +++ drivers/infiniband/hw/irdma/irdma.h | 5 + drivers/infiniband/hw/irdma/virtchnl.c | 178 +++++++++++++++++++++++ drivers/infiniband/hw/irdma/virtchnl.h | 44 ++++++ 12 files changed, 351 insertions(+), 15 deletions(-) create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_hw.c diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile index 3aa63b913377..03ceb9e5475f 100644 --- a/drivers/infiniband/hw/irdma/Makefile +++ b/drivers/infiniband/hw/irdma/Makefile @@ -16,6 +16,7 @@ irdma-objs := cm.o \ ig3rdma_if.o\ icrdma_if.o \ icrdma_hw.o \ + ig3rdma_hw.o\ main.o \ pble.o \ puda.o \ diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 7b3afb9577f9..dd174e771786 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -5672,6 +5672,9 @@ static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev) case IRDMA_GEN_2: icrdma_init_hw(dev); break; + case IRDMA_GEN_3: + ig3rdma_init_hw(dev); + break; } } @@ -5742,18 +5745,26 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, irdma_sc_init_hw(dev); - if (irdma_wait_pe_ready(dev)) - return -ETIMEDOUT; + if (dev->privileged) { + if (irdma_wait_pe_ready(dev)) + return -ETIMEDOUT; - val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]); - db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val); - if (db_size != IRDMA_PE_DB_SIZE_4M && db_size != IRDMA_PE_DB_SIZE_8M) { - ibdev_dbg(to_ibdev(dev), - "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n", - val, db_size); - return -ENODEV; + val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]); + db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val); + if (db_size != IRDMA_PE_DB_SIZE_4M && + db_size != IRDMA_PE_DB_SIZE_8M) { + ibdev_dbg(to_ibdev(dev), + "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n", + val, db_size); + return -ENODEV; + } + } else { + ret_code = irdma_vchnl_req_get_reg_layout(dev); + if (ret_code) + ibdev_dbg(to_ibdev(dev), + "DEV: Get Register layout failed ret = %d\n", + ret_code); } - dev->db_addr = dev->hw->hw_addr + (uintptr_t)dev->hw_regs[IRDMA_DB_ADDR_OFFSET]; return ret_code; } diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 7d363088b5c3..425bcd17abe9 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -115,6 +115,7 @@ enum irdma_protocol_used { #define IRDMA_FEATURE_BUF_SIZE (8 * IRDMA_MAX_FEATURES) #define ENABLE_LOC_MEM 63 +#define IRDMA_ATOMICS_ALLOWED_BIT 1 #define MAX_PBLE_PER_SD 0x40000 #define MAX_PBLE_SD_PER_FCN 0x400 #define MAX_MR_PER_SD 0x8000 @@ -127,7 +128,7 @@ enum irdma_protocol_used { #define IRDMA_QP_SW_MAX_RQ_QUANTA 32768 #define IRDMA_MAX_QP_WRS(max_quanta_per_wr) \ ((IRDMA_QP_SW_MAX_WQ_QUANTA - IRDMA_SQ_RSVD) / (max_quanta_per_wr)) - +#define IRDMA_SRQ_MAX_QUANTA 262144 #define IRDMAQP_TERM_SEND_TERM_AND_FIN 0 #define IRDMAQP_TERM_SEND_TERM_ONLY 1 #define IRDMAQP_TERM_SEND_FIN_ONLY 2 @@ -153,8 +154,13 @@ enum irdma_protocol_used { #define IRDMA_SQ_RSVD 258 #define IRDMA_RQ_RSVD 1 -#define IRDMA_FEATURE_RTS_AE 1ULL -#define IRDMA_FEATURE_CQ_RESIZE 2ULL +#define IRDMA_FEATURE_RTS_AE BIT_ULL(0) +#define IRDMA_FEATURE_CQ_RESIZE BIT_ULL(1) +#define IRDMA_FEATURE_64_BYTE_CQE BIT_ULL(5) +#define IRDMA_FEATURE_ATOMIC_OPS BIT_ULL(6) +#define IRDMA_FEATURE_SRQ BIT_ULL(7) +#define IRDMA_FEATURE_CQE_TIMESTAMPING BIT_ULL(8) + #define IRDMAQP_OP_RDMA_WRITE 0x00 #define IRDMAQP_OP_RDMA_READ 0x01 #define IRDMAQP_OP_RDMA_SEND 0x03 diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c index ce61a27cb1f6..60c1f2b1811d 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.c +++ b/drivers/infiniband/hw/irdma/i40iw_hw.c @@ -85,6 +85,7 @@ static u64 i40iw_masks[IRDMA_MAX_MASKS] = { I40E_CQPSQ_CQ_CEQID, I40E_CQPSQ_CQ_CQID, I40E_COMMIT_FPM_CQCNT, + I40E_CQPSQ_UPESD_HMCFNID, }; static u64 i40iw_shifts[IRDMA_MAX_SHIFTS] = { @@ -94,6 +95,7 @@ static u64 i40iw_shifts[IRDMA_MAX_SHIFTS] = { I40E_CQPSQ_CQ_CEQID_S, I40E_CQPSQ_CQ_CQID_S, I40E_COMMIT_FPM_CQCNT_S, + I40E_CQPSQ_UPESD_HMCFNID_S, }; /** diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.h b/drivers/infiniband/hw/irdma/i40iw_hw.h index e1db84d8a62c..0095b327afcc 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.h +++ b/drivers/infiniband/hw/irdma/i40iw_hw.h @@ -123,6 +123,8 @@ #define I40E_CQPSQ_CQ_CQID GENMASK_ULL(15, 0) #define I40E_COMMIT_FPM_CQCNT_S 0 #define I40E_COMMIT_FPM_CQCNT GENMASK_ULL(17, 0) +#define I40E_CQPSQ_UPESD_HMCFNID_S 0 +#define I40E_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0) #define I40E_VSIQF_CTL(_VSI) (0x0020D800 + ((_VSI) * 4)) diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c index 941d3edffadb..32f26284a788 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.c +++ b/drivers/infiniband/hw/irdma/icrdma_hw.c @@ -38,6 +38,7 @@ static u64 icrdma_masks[IRDMA_MAX_MASKS] = { ICRDMA_CQPSQ_CQ_CEQID, ICRDMA_CQPSQ_CQ_CQID, ICRDMA_COMMIT_FPM_CQCNT, + ICRDMA_CQPSQ_UPESD_HMCFNID, }; static u64 icrdma_shifts[IRDMA_MAX_SHIFTS] = { @@ -47,6 +48,7 @@ static u64 icrdma_shifts[IRDMA_MAX_SHIFTS] = { ICRDMA_CQPSQ_CQ_CEQID_S, ICRDMA_CQPSQ_CQ_CQID_S, ICRDMA_COMMIT_FPM_CQCNT_S, + ICRDMA_CQPSQ_UPESD_HMCFNID_S, }; /** @@ -194,6 +196,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE; dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT; dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_2; + dev->hw_attrs.max_hw_device_pages = ICRDMA_MAX_PUSH_PAGE_COUNT; dev->hw_attrs.uk_attrs.min_hw_wq_size = ICRDMA_MIN_WQ_SIZE; dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR; diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.h b/drivers/infiniband/hw/irdma/icrdma_hw.h index 697b9572b5c6..d97944ab45da 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.h +++ b/drivers/infiniband/hw/irdma/icrdma_hw.h @@ -58,14 +58,15 @@ #define ICRDMA_CQPSQ_CQ_CQID GENMASK_ULL(18, 0) #define ICRDMA_COMMIT_FPM_CQCNT_S 0 #define ICRDMA_COMMIT_FPM_CQCNT GENMASK_ULL(19, 0) - +#define ICRDMA_CQPSQ_UPESD_HMCFNID_S 0 +#define ICRDMA_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0) enum icrdma_device_caps_const { ICRDMA_MAX_STATS_COUNT = 128, ICRDMA_MAX_IRD_SIZE = 127, ICRDMA_MAX_ORD_SIZE = 255, ICRDMA_MIN_WQ_SIZE = 8 /* WQEs */, - + ICRDMA_MAX_PUSH_PAGE_COUNT = 256, }; void icrdma_init_hw(struct irdma_sc_dev *dev); diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c b/drivers/infiniband/hw/irdma/ig3rdma_hw.c new file mode 100644 index 000000000000..83ef6af82a8f --- /dev/null +++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB +/* Copyright (c) 2018 - 2024 Intel Corporation */ +#include "osdep.h" +#include "type.h" +#include "protos.h" +#include "ig3rdma_hw.h" + +void ig3rdma_init_hw(struct irdma_sc_dev *dev) +{ + dev->hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_3; + dev->hw_attrs.uk_attrs.max_hw_wq_frags = IG3RDMA_MAX_WQ_FRAGMENT_COUNT; + dev->hw_attrs.uk_attrs.max_hw_read_sges = IG3RDMA_MAX_SGE_RD; + dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR; + dev->hw_attrs.first_hw_vf_fpm_id = 0; + dev->hw_attrs.max_hw_vf_fpm_id = IG3_MAX_APFS + IG3_MAX_AVFS; + dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_64_BYTE_CQE; + if (dev->feature_info[IRDMA_FTN_FLAGS] & IRDMA_ATOMICS_ALLOWED_BIT) + dev->hw_attrs.uk_attrs.feature_flags |= + IRDMA_FEATURE_ATOMIC_OPS; + dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_CQE_TIMESTAMPING; + + dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_SRQ; + dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE | + IRDMA_FEATURE_CQ_RESIZE; + dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G; + dev->hw_attrs.max_hw_ird = IG3RDMA_MAX_IRD_SIZE; + dev->hw_attrs.max_hw_ord = IG3RDMA_MAX_ORD_SIZE; + dev->hw_attrs.uk_attrs.min_hw_wq_size = IG3RDMA_MIN_WQ_SIZE; + dev->hw_attrs.uk_attrs.max_hw_srq_quanta = IRDMA_SRQ_MAX_QUANTA; + dev->hw_attrs.uk_attrs.max_hw_inline = IG3RDMA_MAX_INLINE_DATA_SIZE; + dev->hw_attrs.max_hw_device_pages = + dev->is_pf ? IG3RDMA_MAX_PF_PUSH_PAGE_COUNT : IG3RDMA_MAX_VF_PUSH_PAGE_COUNT; +} + +static void __iomem *__ig3rdma_get_reg_addr(struct irdma_mmio_region *region, u64 reg_offset) +{ + if (reg_offset >= region->offset && + reg_offset < (region->offset + region->len)) { + reg_offset -= region->offset; + + return region->addr + reg_offset; + } + + return NULL; +} + +void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset) +{ + u8 __iomem *reg_addr; + int i; + + reg_addr = __ig3rdma_get_reg_addr(&hw->rdma_reg, reg_offset); + if (reg_addr) + return reg_addr; + + for (i = 0; i < hw->num_io_regions; i++) { + reg_addr = __ig3rdma_get_reg_addr(&hw->io_regs[i], reg_offset); + if (reg_addr) + return reg_addr; + } + + WARN_ON_ONCE(1); + + return NULL; +} diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.h b/drivers/infiniband/hw/irdma/ig3rdma_hw.h index c0997c54e5e3..03d5f1188789 100644 --- a/drivers/infiniband/hw/irdma/ig3rdma_hw.h +++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.h @@ -3,11 +3,29 @@ #ifndef IG3RDMA_HW_H #define IG3RDMA_HW_H +#define IG3_MAX_APFS 1 +#define IG3_MAX_AVFS 0 + #define IG3_PF_RDMA_REGION_OFFSET 0xBC00000 #define IG3_PF_RDMA_REGION_LEN 0x401000 #define IG3_VF_RDMA_REGION_OFFSET 0x8C00 #define IG3_VF_RDMA_REGION_LEN 0x8400 +enum ig3rdma_device_caps_const { + IG3RDMA_MAX_WQ_FRAGMENT_COUNT = 14, + IG3RDMA_MAX_SGE_RD = 14, + + IG3RDMA_MAX_STATS_COUNT = 128, + + IG3RDMA_MAX_IRD_SIZE = 64, + IG3RDMA_MAX_ORD_SIZE = 64, + IG3RDMA_MIN_WQ_SIZE = 16 /* WQEs */, + IG3RDMA_MAX_INLINE_DATA_SIZE = 216, + IG3RDMA_MAX_PF_PUSH_PAGE_COUNT = 8192, + IG3RDMA_MAX_VF_PUSH_PAGE_COUNT = 16, +}; + +void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset); int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len, u8 *recv_msg, u16 *recv_len); diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index df7f6124570c..7e3214904488 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -67,6 +67,7 @@ enum irdma_shifts { IRDMA_CQPSQ_CQ_CEQID_S, IRDMA_CQPSQ_CQ_CQID_S, IRDMA_COMMIT_FPM_CQCNT_S, + IRDMA_CQPSQ_UPESD_HMCFNID_S, IRDMA_MAX_SHIFTS, }; @@ -77,6 +78,7 @@ enum irdma_masks { IRDMA_CQPSQ_CQ_CEQID_M, IRDMA_CQPSQ_CQ_CQID_M, IRDMA_COMMIT_FPM_CQCNT_M, + IRDMA_CQPSQ_UPESD_HMCFNID_M, IRDMA_MAX_MASKS, /* Must be last entry */ }; @@ -121,6 +123,7 @@ struct irdma_uk_attrs { u32 max_hw_wq_quanta; u32 min_hw_cq_size; u32 max_hw_cq_size; + u32 max_hw_srq_quanta; u16 max_hw_sq_chunk; u16 min_hw_wq_size; u8 hw_rev; @@ -156,4 +159,6 @@ struct irdma_hw_attrs { void i40iw_init_hw(struct irdma_sc_dev *dev); void icrdma_init_hw(struct irdma_sc_dev *dev); +void ig3rdma_init_hw(struct irdma_sc_dev *dev); +void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset); #endif /* IRDMA_H*/ diff --git a/drivers/infiniband/hw/irdma/virtchnl.c b/drivers/infiniband/hw/irdma/virtchnl.c index 5beaecd7dbb5..ea071ffc0204 100644 --- a/drivers/infiniband/hw/irdma/virtchnl.c +++ b/drivers/infiniband/hw/irdma/virtchnl.c @@ -11,6 +11,51 @@ #include "i40iw_hw.h" #include "ig3rdma_hw.h" +struct vchnl_reg_map_elem { + u16 reg_id; + u16 reg_idx; + bool pg_rel; +}; + +struct vchnl_regfld_map_elem { + u16 regfld_id; + u16 regfld_idx; +}; + +static struct vchnl_reg_map_elem vchnl_reg_map[] = { + {IRDMA_VCHNL_REG_ID_CQPTAIL, IRDMA_CQPTAIL, false}, + {IRDMA_VCHNL_REG_ID_CQPDB, IRDMA_CQPDB, false}, + {IRDMA_VCHNL_REG_ID_CCQPSTATUS, IRDMA_CCQPSTATUS, false}, + {IRDMA_VCHNL_REG_ID_CCQPHIGH, IRDMA_CCQPHIGH, false}, + {IRDMA_VCHNL_REG_ID_CCQPLOW, IRDMA_CCQPLOW, false}, + {IRDMA_VCHNL_REG_ID_CQARM, IRDMA_CQARM, false}, + {IRDMA_VCHNL_REG_ID_CQACK, IRDMA_CQACK, false}, + {IRDMA_VCHNL_REG_ID_AEQALLOC, IRDMA_AEQALLOC, false}, + {IRDMA_VCHNL_REG_ID_CQPERRCODES, IRDMA_CQPERRCODES, false}, + {IRDMA_VCHNL_REG_ID_WQEALLOC, IRDMA_WQEALLOC, false}, + {IRDMA_VCHNL_REG_ID_DB_ADDR_OFFSET, IRDMA_DB_ADDR_OFFSET, false }, + {IRDMA_VCHNL_REG_ID_DYN_CTL, IRDMA_GLINT_DYN_CTL, false }, + {IRDMA_VCHNL_REG_INV_ID, IRDMA_VCHNL_REG_INV_ID, false } +}; + +static struct vchnl_regfld_map_elem vchnl_regfld_map[] = { + {IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CQP_OP_ERR, IRDMA_CCQPSTATUS_CCQP_ERR_M}, + {IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CCQP_DONE, IRDMA_CCQPSTATUS_CCQP_DONE_M}, + {IRDMA_VCHNL_REGFLD_ID_CQPSQ_STAG_PDID, IRDMA_CQPSQ_STAG_PDID_M}, + {IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CEQID, IRDMA_CQPSQ_CQ_CEQID_M}, + {IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CQID, IRDMA_CQPSQ_CQ_CQID_M}, + {IRDMA_VCHNL_REGFLD_ID_COMMIT_FPM_CQCNT, IRDMA_COMMIT_FPM_CQCNT_M}, + {IRDMA_VCHNL_REGFLD_ID_UPESD_HMCN_ID, IRDMA_CQPSQ_UPESD_HMCFNID_M}, + {IRDMA_VCHNL_REGFLD_INV_ID, IRDMA_VCHNL_REGFLD_INV_ID} +}; + +#define IRDMA_VCHNL_REG_COUNT ARRAY_SIZE(vchnl_reg_map) +#define IRDMA_VCHNL_REGFLD_COUNT ARRAY_SIZE(vchnl_regfld_map) +#define IRDMA_VCHNL_REGFLD_BUF_SIZE \ + (IRDMA_VCHNL_REG_COUNT * sizeof(struct irdma_vchnl_reg_info) + \ + IRDMA_VCHNL_REGFLD_COUNT * sizeof(struct irdma_vchnl_reg_field_info)) +#define IRDMA_REGMAP_RESP_BUF_SIZE (IRDMA_VCHNL_RESP_MIN_SIZE + IRDMA_VCHNL_REGFLD_BUF_SIZE) + /** * irdma_sc_vchnl_init - Initialize dev virtchannel and get hw_rev * @dev: dev structure to update @@ -64,6 +109,8 @@ static int irdma_vchnl_req_verify_resp(struct irdma_vchnl_req *vchnl_req, if (resp_len < IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE) return -EBADMSG; break; + case IRDMA_VCHNL_OP_GET_REG_LAYOUT: + break; default: return -EOPNOTSUPP; } @@ -137,6 +184,137 @@ exit: return ret; } +/** + * irdma_vchnl_req_get_reg_layout - Get Register Layout + * @dev: RDMA device pointer + */ +int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev) +{ + u16 reg_idx, reg_id, tmp_reg_id, regfld_idx, regfld_id, tmp_regfld_id; + struct irdma_vchnl_reg_field_info *regfld_array = NULL; + u8 resp_buffer[IRDMA_REGMAP_RESP_BUF_SIZE] = {}; + struct vchnl_regfld_map_elem *regfld_map_array; + struct irdma_vchnl_req_init_info info = {}; + struct vchnl_reg_map_elem *reg_map_array; + struct irdma_vchnl_reg_info *reg_array; + u8 num_bits, shift_cnt; + u16 buf_len = 0; + u64 bitmask; + u32 rindex; + int ret; + + if (!dev->vchnl_up) + return -EBUSY; + + info.op_code = IRDMA_VCHNL_OP_GET_REG_LAYOUT; + info.op_ver = IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0; + info.resp_parm = resp_buffer; + info.resp_parm_len = sizeof(resp_buffer); + + ret = irdma_vchnl_req_send_sync(dev, &info); + + if (ret) + return ret; + + /* parse the response buffer and update reg info*/ + /* Parse registers till invalid */ + /* Parse register fields till invalid */ + reg_array = (struct irdma_vchnl_reg_info *)resp_buffer; + for (rindex = 0; rindex < IRDMA_VCHNL_REG_COUNT; rindex++) { + buf_len += sizeof(struct irdma_vchnl_reg_info); + if (buf_len >= sizeof(resp_buffer)) + return -ENOMEM; + + regfld_array = + (struct irdma_vchnl_reg_field_info *)®_array[rindex + 1]; + reg_id = reg_array[rindex].reg_id; + if (reg_id == IRDMA_VCHNL_REG_INV_ID) + break; + + reg_id &= ~IRDMA_VCHNL_REG_PAGE_REL; + if (reg_id >= IRDMA_VCHNL_REG_COUNT) + return -EINVAL; + + /* search regmap for register index in hw_regs.*/ + reg_map_array = vchnl_reg_map; + do { + tmp_reg_id = reg_map_array->reg_id; + if (tmp_reg_id == reg_id) + break; + + reg_map_array++; + } while (tmp_reg_id != IRDMA_VCHNL_REG_INV_ID); + if (tmp_reg_id != reg_id) + continue; + + reg_idx = reg_map_array->reg_idx; + + /* Page relative, DB Offset do not need bar offset */ + if (reg_idx == IRDMA_DB_ADDR_OFFSET || + (reg_array[rindex].reg_id & IRDMA_VCHNL_REG_PAGE_REL)) { + dev->hw_regs[reg_idx] = + (u32 __iomem *)(uintptr_t)reg_array[rindex].reg_offset; + continue; + } + + /* Update the local HW struct */ + dev->hw_regs[reg_idx] = ig3rdma_get_reg_addr(dev->hw, + reg_array[rindex].reg_offset); + if (!dev->hw_regs[reg_idx]) + return -EINVAL; + } + + if (!regfld_array) + return -ENOMEM; + + /* set up doorbell variables using mapped DB page */ + dev->wqe_alloc_db = dev->hw_regs[IRDMA_WQEALLOC]; + dev->cq_arm_db = dev->hw_regs[IRDMA_CQARM]; + dev->aeq_alloc_db = dev->hw_regs[IRDMA_AEQALLOC]; + dev->cqp_db = dev->hw_regs[IRDMA_CQPDB]; + dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK]; + + for (rindex = 0; rindex < IRDMA_VCHNL_REGFLD_COUNT; rindex++) { + buf_len += sizeof(struct irdma_vchnl_reg_field_info); + if ((buf_len - 1) > sizeof(resp_buffer)) + break; + + if (regfld_array[rindex].fld_id == IRDMA_VCHNL_REGFLD_INV_ID) + break; + + regfld_id = regfld_array[rindex].fld_id; + regfld_map_array = vchnl_regfld_map; + do { + tmp_regfld_id = regfld_map_array->regfld_id; + if (tmp_regfld_id == regfld_id) + break; + + regfld_map_array++; + } while (tmp_regfld_id != IRDMA_VCHNL_REGFLD_INV_ID); + + if (tmp_regfld_id != regfld_id) + continue; + + regfld_idx = regfld_map_array->regfld_idx; + + num_bits = regfld_array[rindex].fld_bits; + shift_cnt = regfld_array[rindex].fld_shift; + if ((num_bits + shift_cnt > 64) || !num_bits) { + ibdev_dbg(to_ibdev(dev), + "ERR: Invalid field mask id %d bits %d shift %d", + regfld_id, num_bits, shift_cnt); + + continue; + } + + bitmask = (1ULL << num_bits) - 1; + dev->hw_masks[regfld_idx] = bitmask << shift_cnt; + dev->hw_shifts[regfld_idx] = shift_cnt; + } + + return 0; +} + /** * irdma_vchnl_req_get_ver - Request Channel version * @dev: RDMA device pointer diff --git a/drivers/infiniband/hw/irdma/virtchnl.h b/drivers/infiniband/hw/irdma/virtchnl.h index d1e64666208d..ba551dc70d19 100644 --- a/drivers/infiniband/hw/irdma/virtchnl.h +++ b/drivers/infiniband/hw/irdma/virtchnl.h @@ -14,13 +14,44 @@ #define IRDMA_VCHNL_OP_GET_HMC_FCN_V1 1 #define IRDMA_VCHNL_OP_GET_HMC_FCN_V2 2 #define IRDMA_VCHNL_OP_PUT_HMC_FCN_V0 0 +#define IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0 0 #define IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0 0 #define IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE 1 +#define IRDMA_VCHNL_REG_ID_CQPTAIL 0 +#define IRDMA_VCHNL_REG_ID_CQPDB 1 +#define IRDMA_VCHNL_REG_ID_CCQPSTATUS 2 +#define IRDMA_VCHNL_REG_ID_CCQPHIGH 3 +#define IRDMA_VCHNL_REG_ID_CCQPLOW 4 +#define IRDMA_VCHNL_REG_ID_CQARM 5 +#define IRDMA_VCHNL_REG_ID_CQACK 6 +#define IRDMA_VCHNL_REG_ID_AEQALLOC 7 +#define IRDMA_VCHNL_REG_ID_CQPERRCODES 8 +#define IRDMA_VCHNL_REG_ID_WQEALLOC 9 +#define IRDMA_VCHNL_REG_ID_IPCONFIG0 10 +#define IRDMA_VCHNL_REG_ID_DB_ADDR_OFFSET 11 +#define IRDMA_VCHNL_REG_ID_DYN_CTL 12 +#define IRDMA_VCHNL_REG_ID_AEQITRMASK 13 +#define IRDMA_VCHNL_REG_ID_CEQITRMASK 14 +#define IRDMA_VCHNL_REG_INV_ID 0xFFFF +#define IRDMA_VCHNL_REG_PAGE_REL 0x8000 + +#define IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CQP_OP_ERR 2 +#define IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CCQP_DONE 5 +#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_STAG_PDID 6 +#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CEQID 7 +#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CQID 8 +#define IRDMA_VCHNL_REGFLD_ID_COMMIT_FPM_CQCNT 9 +#define IRDMA_VCHNL_REGFLD_ID_UPESD_HMCN_ID 10 +#define IRDMA_VCHNL_REGFLD_INV_ID 0xFFFF + +#define IRDMA_VCHNL_RESP_MIN_SIZE (sizeof(struct irdma_vchnl_resp_buf)) + enum irdma_vchnl_ops { IRDMA_VCHNL_OP_GET_VER = 0, IRDMA_VCHNL_OP_GET_HMC_FCN = 1, IRDMA_VCHNL_OP_PUT_HMC_FCN = 2, + IRDMA_VCHNL_OP_GET_REG_LAYOUT = 11, IRDMA_VCHNL_OP_GET_RDMA_CAPS = 13, }; @@ -65,6 +96,18 @@ struct irdma_vchnl_init_info { bool is_pf; }; +struct irdma_vchnl_reg_info { + u32 reg_offset; + u16 field_cnt; + u16 reg_id; /* High bit of reg_id: bar or page relative */ +}; + +struct irdma_vchnl_reg_field_info { + u8 fld_shift; + u8 fld_bits; + u16 fld_id; +}; + struct irdma_vchnl_req { struct irdma_vchnl_op_buf *vchnl_msg; void *parm; @@ -91,4 +134,5 @@ int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev); int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev); int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev, struct irdma_vchnl_req *vc_req); +int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev); #endif /* IRDMA_VIRTCHNL_H */ From c7db0abe5f2bbfa99a080b344e6f70a3d6d0ea38 Mon Sep 17 00:00:00 2001 From: Krzysztof Czurylo Date: Wed, 27 Aug 2025 10:25:33 -0500 Subject: [PATCH 56/85] RDMA/irdma: Add GEN3 CQP support with deferred completions GEN3 introduces asynchronous handling of Control QP (CQP) operations to minimize head-of-line blocking. Create the CQP using the updated GEN3- specific descriptor fields and implement the necessary support for this deferred completion mechanism. Signed-off-by: Krzysztof Czurylo Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-5-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 253 ++++++++++++++++++++++++++- drivers/infiniband/hw/irdma/defs.h | 15 ++ drivers/infiniband/hw/irdma/hw.c | 89 ++++++++-- drivers/infiniband/hw/irdma/main.h | 2 + drivers/infiniband/hw/irdma/protos.h | 1 + drivers/infiniband/hw/irdma/type.h | 43 ++++- drivers/infiniband/hw/irdma/utils.c | 50 +++++- 7 files changed, 438 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index dd174e771786..0fd2b6869ecd 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2741,6 +2741,89 @@ static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val, *error = FIELD_GET(IRDMA_CQPTAIL_CQP_OP_ERR, *val); } +/** + * irdma_sc_cqp_def_cmpl_ae_handler - remove completed requests from pending list + * @dev: sc device struct + * @info: AE entry info + * @first: true if this is the first call to this handler for given AEQE + * @scratch: (out) scratch entry pointer + * @sw_def_info: (in/out) SW ticket value for this AE + * + * In case of AE_DEF_CMPL event, this function should be called in a loop + * until it returns NULL-ptr via scratch. + * For each call, it looks for a matching CQP request on pending list, + * removes it from the list and returns the pointer to the associated scratch + * entry. + * If this is the first call to this function for given AEQE, sw_def_info + * value is not used to find matching requests. Instead, it is populated + * with the value from the first matching cqp_request on the list. + * For subsequent calls, ooo_op->sw_def_info need to match the value passed + * by a caller. + * + * Return: scratch entry pointer for cqp_request to be released or NULL + * if no matching request is found. + */ +void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev, + struct irdma_aeqe_info *info, + bool first, u64 *scratch, + u32 *sw_def_info) +{ + struct irdma_ooo_cqp_op *ooo_op; + unsigned long flags; + + *scratch = 0; + + spin_lock_irqsave(&dev->cqp->ooo_list_lock, flags); + list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) { + if (ooo_op->deferred && + ((first && ooo_op->def_info == info->def_info) || + (!first && ooo_op->sw_def_info == *sw_def_info))) { + *sw_def_info = ooo_op->sw_def_info; + *scratch = ooo_op->scratch; + + list_move(&ooo_op->list_entry, &dev->cqp->ooo_avail); + atomic64_inc(&dev->cqp->completed_ops); + + break; + } + } + spin_unlock_irqrestore(&dev->cqp->ooo_list_lock, flags); + + if (first && !*scratch) + ibdev_dbg(to_ibdev(dev), + "AEQ: deferred completion with unknown ticket: def_info 0x%x\n", + info->def_info); +} + +/** + * irdma_sc_cqp_cleanup_handler - remove requests from pending list + * @dev: sc device struct + * + * This function should be called in a loop from irdma_cleanup_pending_cqp_op. + * For each call, it returns first CQP request on pending list, removes it + * from the list and returns the pointer to the associated scratch entry. + * + * Return: scratch entry pointer for cqp_request to be released or NULL + * if pending list is empty. + */ +u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev) +{ + struct irdma_ooo_cqp_op *ooo_op; + u64 scratch = 0; + + list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) { + scratch = ooo_op->scratch; + + list_del(&ooo_op->list_entry); + list_add(&ooo_op->list_entry, &dev->cqp->ooo_avail); + atomic64_inc(&dev->cqp->completed_ops); + + break; + } + + return scratch; +} + /** * irdma_cqp_poll_registers - poll cqp registers * @cqp: struct for cqp hw @@ -3126,6 +3209,8 @@ exit: int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, struct irdma_cqp_init_info *info) { + struct irdma_ooo_cqp_op *ooo_op; + u32 num_ooo_ops; u8 hw_sq_size; if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 || @@ -3156,17 +3241,43 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, cqp->rocev2_rto_policy = info->rocev2_rto_policy; cqp->protocol_used = info->protocol_used; memcpy(&cqp->dcqcn_params, &info->dcqcn_params, sizeof(cqp->dcqcn_params)); + if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + cqp->ooisc_blksize = info->ooisc_blksize; + cqp->rrsp_blksize = info->rrsp_blksize; + cqp->q1_blksize = info->q1_blksize; + cqp->xmit_blksize = info->xmit_blksize; + cqp->blksizes_valid = info->blksizes_valid; + cqp->ts_shift = info->ts_shift; + cqp->ts_override = info->ts_override; + cqp->en_fine_grained_timers = info->en_fine_grained_timers; + cqp->pe_en_vf_cnt = info->pe_en_vf_cnt; + cqp->ooo_op_array = info->ooo_op_array; + /* initialize the OOO lists */ + INIT_LIST_HEAD(&cqp->ooo_avail); + INIT_LIST_HEAD(&cqp->ooo_pnd); + if (cqp->ooo_op_array) { + /* Populate avail list entries */ + for (num_ooo_ops = 0, ooo_op = info->ooo_op_array; + num_ooo_ops < cqp->sq_size; + num_ooo_ops++, ooo_op++) + list_add(&ooo_op->list_entry, &cqp->ooo_avail); + } + } info->dev->cqp = cqp; IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size); + cqp->last_def_cmpl_ticket = 0; + cqp->sw_def_cmpl_ticket = 0; cqp->requested_ops = 0; atomic64_set(&cqp->completed_ops, 0); /* for the cqp commands backlog. */ INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head); writel(0, cqp->dev->hw_regs[IRDMA_CQPTAIL]); - writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]); - writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]); + if (cqp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) { + writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]); + writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]); + } ibdev_dbg(to_ibdev(cqp->dev), "WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%p] cqp[%p] polarity[x%04x]\n", @@ -3198,6 +3309,7 @@ int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err) return -ENOMEM; spin_lock_init(&cqp->dev->cqp_lock); + spin_lock_init(&cqp->ooo_list_lock); temp = FIELD_PREP(IRDMA_CQPHC_SQSIZE, cqp->hw_sq_size) | FIELD_PREP(IRDMA_CQPHC_SVER, cqp->struct_ver) | @@ -3209,12 +3321,29 @@ int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err) FIELD_PREP(IRDMA_CQPHC_PROTOCOL_USED, cqp->protocol_used); } + if (hw_rev >= IRDMA_GEN_3) + temp |= FIELD_PREP(IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS, + cqp->en_fine_grained_timers); set_64bit_val(cqp->host_ctx, 0, temp); set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa); temp = FIELD_PREP(IRDMA_CQPHC_ENABLED_VFS, cqp->ena_vf_count) | FIELD_PREP(IRDMA_CQPHC_HMC_PROFILE, cqp->hmc_profile); + + if (hw_rev >= IRDMA_GEN_3) + temp |= FIELD_PREP(IRDMA_CQPHC_OOISC_BLKSIZE, + cqp->ooisc_blksize) | + FIELD_PREP(IRDMA_CQPHC_RRSP_BLKSIZE, + cqp->rrsp_blksize) | + FIELD_PREP(IRDMA_CQPHC_Q1_BLKSIZE, cqp->q1_blksize) | + FIELD_PREP(IRDMA_CQPHC_XMIT_BLKSIZE, + cqp->xmit_blksize) | + FIELD_PREP(IRDMA_CQPHC_BLKSIZES_VALID, + cqp->blksizes_valid) | + FIELD_PREP(IRDMA_CQPHC_TIMESTAMP_OVERRIDE, + cqp->ts_override) | + FIELD_PREP(IRDMA_CQPHC_TS_SHIFT, cqp->ts_shift); set_64bit_val(cqp->host_ctx, 16, temp); set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp); temp = FIELD_PREP(IRDMA_CQPHC_HW_MAJVER, cqp->hw_maj_ver) | @@ -3375,6 +3504,87 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq) writel(ccq->cq_uk.cq_id, ccq->dev->cq_arm_db); } +/** + * irdma_sc_process_def_cmpl - process deferred or pending completion + * @cqp: CQP sc struct + * @info: CQP CQE info + * @wqe_idx: CQP WQE descriptor index + * @def_info: deferred op ticket value or out-of-order completion id + * @def_cmpl: true for deferred completion, false for pending (RCA) + */ +static void irdma_sc_process_def_cmpl(struct irdma_sc_cqp *cqp, + struct irdma_ccq_cqe_info *info, + u32 wqe_idx, u32 def_info, bool def_cmpl) +{ + struct irdma_ooo_cqp_op *ooo_op; + unsigned long flags; + + /* Deferred and out-of-order completions share the same list of pending + * completions. Since the list can be also accessed from AE handler, + * it must be protected by a lock. + */ + spin_lock_irqsave(&cqp->ooo_list_lock, flags); + + /* For deferred completions bump up SW completion ticket value. */ + if (def_cmpl) { + cqp->last_def_cmpl_ticket = def_info; + cqp->sw_def_cmpl_ticket++; + } + if (!list_empty(&cqp->ooo_avail)) { + ooo_op = (struct irdma_ooo_cqp_op *) + list_entry(cqp->ooo_avail.next, + struct irdma_ooo_cqp_op, list_entry); + + list_del(&ooo_op->list_entry); + ooo_op->scratch = info->scratch; + ooo_op->def_info = def_info; + ooo_op->sw_def_info = cqp->sw_def_cmpl_ticket; + ooo_op->deferred = def_cmpl; + ooo_op->wqe_idx = wqe_idx; + /* Pending completions must be chronologically ordered, + * so adding at the end of list. + */ + list_add_tail(&ooo_op->list_entry, &cqp->ooo_pnd); + } + spin_unlock_irqrestore(&cqp->ooo_list_lock, flags); + + info->pending = true; +} + +/** + * irdma_sc_process_ooo_cmpl - process out-of-order (final) completion + * @cqp: CQP sc struct + * @info: CQP CQE info + * @def_info: out-of-order completion id + */ +static void irdma_sc_process_ooo_cmpl(struct irdma_sc_cqp *cqp, + struct irdma_ccq_cqe_info *info, + u32 def_info) +{ + struct irdma_ooo_cqp_op *ooo_op_tmp; + struct irdma_ooo_cqp_op *ooo_op; + unsigned long flags; + + info->scratch = 0; + + spin_lock_irqsave(&cqp->ooo_list_lock, flags); + list_for_each_entry_safe(ooo_op, ooo_op_tmp, &cqp->ooo_pnd, + list_entry) { + if (!ooo_op->deferred && ooo_op->def_info == def_info) { + list_del(&ooo_op->list_entry); + info->scratch = ooo_op->scratch; + list_add(&ooo_op->list_entry, &cqp->ooo_avail); + break; + } + } + spin_unlock_irqrestore(&cqp->ooo_list_lock, flags); + + if (!info->scratch) + ibdev_dbg(to_ibdev(cqp->dev), + "CQP: DEBUG_FW_OOO out-of-order completion with unknown def_info = 0x%x\n", + def_info); +} + /** * irdma_sc_ccq_get_cqe_info - get ccq's cq entry * @ccq: ccq sc struct @@ -3383,6 +3593,10 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq) int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, struct irdma_ccq_cqe_info *info) { + u32 def_info; + bool def_cmpl = false; + bool pend_cmpl = false; + bool ooo_final_cmpl = false; u64 qp_ctx, temp, temp1; __le64 *cqe; struct irdma_sc_cqp *cqp; @@ -3390,6 +3604,7 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, u32 error; u8 polarity; int ret_code = 0; + unsigned long flags; if (ccq->cq_uk.avoid_mem_cflct) cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk); @@ -3421,6 +3636,25 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, get_64bit_val(cqe, 16, &temp1); info->op_ret_val = (u32)FIELD_GET(IRDMA_CCQ_OPRETVAL, temp1); + if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + def_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR && + info->min_err_code == IRDMA_CQPSQ_MIN_DEF_CMPL; + def_info = (u32)FIELD_GET(IRDMA_CCQ_DEFINFO, temp1); + + pend_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR && + info->min_err_code == IRDMA_CQPSQ_MIN_OOO_CMPL; + + ooo_final_cmpl = (bool)FIELD_GET(IRDMA_OOO_CMPL, temp); + + if (def_cmpl || pend_cmpl || ooo_final_cmpl) { + if (ooo_final_cmpl) + irdma_sc_process_ooo_cmpl(cqp, info, def_info); + else + irdma_sc_process_def_cmpl(cqp, info, wqe_idx, + def_info, def_cmpl); + } + } + get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1); info->op_code = (u8)FIELD_GET(IRDMA_CQPSQ_OPCODE, temp1); info->cqp = cqp; @@ -3437,7 +3671,16 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, dma_wmb(); /* make sure shadow area is updated before moving tail */ - IRDMA_RING_MOVE_TAIL(cqp->sq_ring); + spin_lock_irqsave(&cqp->dev->cqp_lock, flags); + if (!ooo_final_cmpl) + IRDMA_RING_MOVE_TAIL(cqp->sq_ring); + spin_unlock_irqrestore(&cqp->dev->cqp_lock, flags); + + /* Do not increment completed_ops counter on pending or deferred + * completions. + */ + if (pend_cmpl || def_cmpl) + return ret_code; atomic64_inc(&cqp->completed_ops); return ret_code; @@ -4123,6 +4366,10 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, info->compl_ctx = compl_ctx << 1; ae_src = IRDMA_AE_SOURCE_RSVD; break; + case IRDMA_AE_CQP_DEFERRED_COMPLETE: + info->def_info = info->wqe_idx; + ae_src = IRDMA_AE_SOURCE_RSVD; + break; case IRDMA_AE_ROCE_EMPTY_MCG: case IRDMA_AE_ROCE_BAD_MC_IP_ADDR: case IRDMA_AE_ROCE_BAD_MC_QPID: diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 425bcd17abe9..1239776c6ba3 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -367,6 +367,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 #define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702 #define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900 +#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901 #define FLD_LS_64(dev, val, field) \ (((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M]) @@ -464,6 +465,16 @@ enum irdma_cqp_op_type { #define IRDMA_CQPHC_SVER GENMASK_ULL(31, 24) #define IRDMA_CQPHC_SQBASE GENMASK_ULL(63, 9) +#define IRDMA_CQPHC_TIMESTAMP_OVERRIDE BIT_ULL(5) +#define IRDMA_CQPHC_TS_SHIFT GENMASK_ULL(12, 8) +#define IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS BIT_ULL(0) + +#define IRDMA_CQPHC_OOISC_BLKSIZE GENMASK_ULL(63, 60) +#define IRDMA_CQPHC_RRSP_BLKSIZE GENMASK_ULL(59, 56) +#define IRDMA_CQPHC_Q1_BLKSIZE GENMASK_ULL(55, 52) +#define IRDMA_CQPHC_XMIT_BLKSIZE GENMASK_ULL(51, 48) +#define IRDMA_CQPHC_BLKSIZES_VALID BIT_ULL(4) + #define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0) #define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0) #define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0) @@ -477,6 +488,8 @@ enum irdma_cqp_op_type { #define IRDMA_CCQ_OPRETVAL GENMASK_ULL(31, 0) +#define IRDMA_CCQ_DEFINFO GENMASK_ULL(63, 32) + #define IRDMA_CQ_MINERR GENMASK_ULL(15, 0) #define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16) #define IRDMA_CQ_WQEIDX GENMASK_ULL(46, 32) @@ -708,6 +721,8 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_MIN_STAG_INVALID 0x0001 #define IRDMA_CQPSQ_MIN_SUSPEND_PND 0x0005 +#define IRDMA_CQPSQ_MIN_DEF_CMPL 0x0006 +#define IRDMA_CQPSQ_MIN_OOO_CMPL 0x0007 #define IRDMA_CQPSQ_MAJ_NO_ERROR 0x0000 #define IRDMA_CQPSQ_MAJ_OBJCACHE_ERROR 0xF000 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 2aa5f53a46a7..caad1b1f2ee3 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -207,6 +207,51 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, } } +/** + * irdma_complete_cqp_request - perform post-completion cleanup + * @cqp: device CQP + * @cqp_request: CQP request + * + * Mark CQP request as done, wake up waiting thread or invoke + * callback function and release/free CQP request. + */ +static void irdma_complete_cqp_request(struct irdma_cqp *cqp, + struct irdma_cqp_request *cqp_request) +{ + if (cqp_request->waiting) { + WRITE_ONCE(cqp_request->request_done, true); + wake_up(&cqp_request->waitq); + } else if (cqp_request->callback_fcn) { + cqp_request->callback_fcn(cqp_request); + } + irdma_put_cqp_request(cqp, cqp_request); +} + +/** + * irdma_process_ae_def_cmpl - handle IRDMA_AE_CQP_DEFERRED_COMPLETE event + * @rf: RDMA PCI function + * @info: AEQ entry info + */ +static void irdma_process_ae_def_cmpl(struct irdma_pci_f *rf, + struct irdma_aeqe_info *info) +{ + u32 sw_def_info; + u64 scratch; + + irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); + + irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, true, + &scratch, &sw_def_info); + while (scratch) { + struct irdma_cqp_request *cqp_request = + (struct irdma_cqp_request *)(uintptr_t)scratch; + + irdma_complete_cqp_request(&rf->cqp, cqp_request); + irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, false, + &scratch, &sw_def_info); + } +} + /** * irdma_process_aeq - handle aeq events * @rf: RDMA PCI function @@ -269,7 +314,8 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) spin_unlock_irqrestore(&iwqp->lock, flags); ctx_info = &iwqp->ctx_info; } else { - if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR) + if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR && + info->ae_id != IRDMA_AE_CQP_DEFERRED_COMPLETE) continue; } @@ -364,6 +410,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) } irdma_cq_rem_ref(&iwcq->ibcq); break; + case IRDMA_AE_CQP_DEFERRED_COMPLETE: + /* Remove completed CQP requests from pending list + * and notify about those CQP ops completion. + */ + irdma_process_ae_def_cmpl(rf, info); + break; case IRDMA_AE_RESET_NOT_SENT: case IRDMA_AE_LLP_DOUBT_REACHABILITY: case IRDMA_AE_RESOURCE_EXHAUSTION: @@ -600,6 +652,8 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf) dma_free_coherent(dev->hw->device, cqp->sq.size, cqp->sq.va, cqp->sq.pa); cqp->sq.va = NULL; + kfree(cqp->oop_op_array); + cqp->oop_op_array = NULL; kfree(cqp->scratch_array); cqp->scratch_array = NULL; kfree(cqp->cqp_requests); @@ -943,6 +997,13 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) goto err_scratch; } + cqp->oop_op_array = kcalloc(sqsize, sizeof(*cqp->oop_op_array), + GFP_KERNEL); + if (!cqp->oop_op_array) { + status = -ENOMEM; + goto err_oop; + } + cqp_init_info.ooo_op_array = cqp->oop_op_array; dev->cqp = &cqp->sc_cqp; dev->cqp->dev = dev; cqp->sq.size = ALIGN(sizeof(struct irdma_cqp_sq_wqe) * sqsize, @@ -979,6 +1040,10 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) case IRDMA_GEN_2: cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_2; break; + case IRDMA_GEN_3: + cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_3; + cqp_init_info.ts_override = 1; + break; } status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info); if (status) { @@ -1013,6 +1078,9 @@ err_ctx: cqp->sq.va, cqp->sq.pa); cqp->sq.va = NULL; err_sq: + kfree(cqp->oop_op_array); + cqp->oop_op_array = NULL; +err_oop: kfree(cqp->scratch_array); cqp->scratch_array = NULL; err_scratch: @@ -2104,15 +2172,16 @@ void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) cqp_request->compl_info.op_ret_val = info.op_ret_val; cqp_request->compl_info.error = info.error; - if (cqp_request->waiting) { - WRITE_ONCE(cqp_request->request_done, true); - wake_up(&cqp_request->waitq); - irdma_put_cqp_request(&rf->cqp, cqp_request); - } else { - if (cqp_request->callback_fcn) - cqp_request->callback_fcn(cqp_request); - irdma_put_cqp_request(&rf->cqp, cqp_request); - } + /* + * If this is deferred or pending completion, then mark + * CQP request as pending to not block the CQ, but don't + * release CQP request, as it is still on the OOO list. + */ + if (info.pending) + cqp_request->pending = true; + else + irdma_complete_cqp_request(&rf->cqp, + cqp_request); } cqe_count++; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index efc7c13e29df..7414e0ec6306 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -168,6 +168,7 @@ struct irdma_cqp_request { bool request_done; /* READ/WRITE_ONCE macros operate on it */ bool waiting:1; bool dynamic:1; + bool pending:1; }; struct irdma_cqp { @@ -180,6 +181,7 @@ struct irdma_cqp { struct irdma_dma_mem host_ctx; u64 *scratch_array; struct irdma_cqp_request *cqp_requests; + struct irdma_ooo_cqp_op *oop_op_array; struct list_head cqp_avail_reqs; struct list_head cqp_pending_reqs; }; diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h index c0c9441885d3..324cfbf21764 100644 --- a/drivers/infiniband/hw/irdma/protos.h +++ b/drivers/infiniband/hw/irdma/protos.h @@ -10,6 +10,7 @@ #define ALL_TC2PFC 0xff #define CQP_COMPL_WAIT_TIME_MS 10 #define CQP_TIMEOUT_THRESHOLD 500 +#define CQP_DEF_CMPL_TIMEOUT_THRESHOLD 2500 /* init operations */ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index bb241a4ff5f9..e779ad8dcba5 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -262,12 +262,22 @@ struct irdma_cqp_init_info { __le64 *host_ctx; u64 *scratch_array; u32 sq_size; + struct irdma_ooo_cqp_op *ooo_op_array; + u32 pe_en_vf_cnt; u16 hw_maj_ver; u16 hw_min_ver; u8 struct_ver; u8 hmc_profile; u8 ena_vf_count; u8 ceqs_per_vf; + u8 ooisc_blksize; + u8 rrsp_blksize; + u8 q1_blksize; + u8 xmit_blksize; + u8 ts_override; + u8 ts_shift; + u8 en_fine_grained_timers; + u8 blksizes_valid; bool en_datacenter_tcp:1; bool disable_packed:1; bool rocev2_rto_policy:1; @@ -392,7 +402,21 @@ struct irdma_cqp_quanta { __le64 elem[IRDMA_CQP_WQE_SIZE]; }; +struct irdma_ooo_cqp_op { + struct list_head list_entry; + u64 scratch; + u32 def_info; + u32 sw_def_info; + u32 wqe_idx; + bool deferred:1; +}; + struct irdma_sc_cqp { + spinlock_t ooo_list_lock; /* protects list of pending completions */ + struct list_head ooo_avail; + struct list_head ooo_pnd; + u32 last_def_cmpl_ticket; + u32 sw_def_cmpl_ticket; u32 size; u64 sq_pa; u64 host_ctx_pa; @@ -408,8 +432,10 @@ struct irdma_sc_cqp { u64 *scratch_array; u64 requested_ops; atomic64_t completed_ops; + struct irdma_ooo_cqp_op *ooo_op_array; u32 cqp_id; u32 sq_size; + u32 pe_en_vf_cnt; u32 hw_sq_size; u16 hw_maj_ver; u16 hw_min_ver; @@ -419,6 +445,14 @@ struct irdma_sc_cqp { u8 ena_vf_count; u8 timeout_count; u8 ceqs_per_vf; + u8 ooisc_blksize; + u8 rrsp_blksize; + u8 q1_blksize; + u8 xmit_blksize; + u8 ts_override; + u8 ts_shift; + u8 en_fine_grained_timers; + u8 blksizes_valid; bool en_datacenter_tcp:1; bool disable_packed:1; bool rocev2_rto_policy:1; @@ -723,7 +757,8 @@ struct irdma_ccq_cqe_info { u16 maj_err_code; u16 min_err_code; u8 op_code; - bool error; + bool error:1; + bool pending:1; }; struct irdma_dcb_app_info { @@ -998,6 +1033,7 @@ struct irdma_qp_host_ctx_info { struct irdma_aeqe_info { u64 compl_ctx; u32 qp_cq_id; + u32 def_info; /* only valid for DEF_CMPL */ u16 ae_id; u16 wqe_idx; u8 tcp_state; @@ -1242,6 +1278,11 @@ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_i void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable); void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout, struct irdma_sc_dev *dev); +void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev, + struct irdma_aeqe_info *info, + bool first, u64 *scratch, + u32 *sw_def_info); +u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev); int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err); int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp); int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index b510ef747399..d4c51b56ed22 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -481,6 +481,7 @@ void irdma_free_cqp_request(struct irdma_cqp *cqp, WRITE_ONCE(cqp_request->request_done, false); cqp_request->callback_fcn = NULL; cqp_request->waiting = false; + cqp_request->pending = false; spin_lock_irqsave(&cqp->req_lock, flags); list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs); @@ -520,6 +521,22 @@ irdma_free_pending_cqp_request(struct irdma_cqp *cqp, irdma_put_cqp_request(cqp, cqp_request); } +/** + * irdma_cleanup_deferred_cqp_ops - clean-up cqp with no completions + * @dev: sc_dev + * @cqp: cqp + */ +static void irdma_cleanup_deferred_cqp_ops(struct irdma_sc_dev *dev, + struct irdma_cqp *cqp) +{ + u64 scratch; + + /* process all CQP requests with deferred/pending completions */ + while ((scratch = irdma_sc_cqp_cleanup_handler(dev))) + irdma_free_pending_cqp_request(cqp, (struct irdma_cqp_request *) + (uintptr_t)scratch); +} + /** * irdma_cleanup_pending_cqp_op - clean-up cqp with no * completions @@ -533,6 +550,8 @@ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf) struct cqp_cmds_info *pcmdinfo = NULL; u32 i, pending_work, wqe_idx; + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + irdma_cleanup_deferred_cqp_ops(dev, cqp); pending_work = IRDMA_RING_USED_QUANTA(cqp->sc_cqp.sq_ring); wqe_idx = IRDMA_RING_CURRENT_TAIL(cqp->sc_cqp.sq_ring); for (i = 0; i < pending_work; i++) { @@ -552,6 +571,26 @@ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf) } } +static int irdma_get_timeout_threshold(struct irdma_sc_dev *dev) +{ + u16 time_s = dev->vc_caps.cqp_timeout_s; + + if (!time_s) + return CQP_TIMEOUT_THRESHOLD; + + return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms; +} + +static int irdma_get_def_timeout_threshold(struct irdma_sc_dev *dev) +{ + u16 time_s = dev->vc_caps.cqp_def_timeout_s; + + if (!time_s) + return CQP_DEF_CMPL_TIMEOUT_THRESHOLD; + + return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms; +} + /** * irdma_wait_event - wait for completion * @rf: RDMA PCI function @@ -561,6 +600,7 @@ static int irdma_wait_event(struct irdma_pci_f *rf, struct irdma_cqp_request *cqp_request) { struct irdma_cqp_timeout cqp_timeout = {}; + int timeout_threshold = irdma_get_timeout_threshold(&rf->sc_dev); bool cqp_error = false; int err_code = 0; @@ -572,9 +612,17 @@ static int irdma_wait_event(struct irdma_pci_f *rf, msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS))) break; + if (cqp_request->pending) + /* There was a deferred or pending completion + * received for this CQP request, so we need + * to wait longer than usual. + */ + timeout_threshold = + irdma_get_def_timeout_threshold(&rf->sc_dev); + irdma_check_cqp_progress(&cqp_timeout, &rf->sc_dev); - if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD) + if (cqp_timeout.count < timeout_threshold) continue; if (!rf->reset) { From b800e82feba7bd758f6564975e9f9995866162e3 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 27 Aug 2025 10:25:34 -0500 Subject: [PATCH 57/85] RDMA/irdma: Add GEN3 support for AEQ and CEQ Extend support for GEN3 devices by programming the necessary hardware IRQ registers and the updated descriptor fields for the Asynchronous Event Queue (AEQ) and Completion Event Queue (CEQ). Introduce a RDMA virtual channel operation with the Control Plane (CP) to associate interrupt vectors appropriately with AEQ and CEQ. Add new Asynchronous Event (AE) definitions specific to GEN3. Additionally, refactor the AEQ and CEQ setup into the irdma_ctrl_init_hw device control initialization routine. This completes the PCI device level initialization for RDMA in the core driver. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-6-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 76 +++++++++++-- drivers/infiniband/hw/irdma/defs.h | 29 ++++- drivers/infiniband/hw/irdma/hw.c | 134 +++++++++++++---------- drivers/infiniband/hw/irdma/ig3rdma_hw.c | 45 ++++++++ drivers/infiniband/hw/irdma/irdma.h | 11 +- drivers/infiniband/hw/irdma/main.h | 6 +- drivers/infiniband/hw/irdma/type.h | 11 +- drivers/infiniband/hw/irdma/virtchnl.c | 84 ++++++++++++++ drivers/infiniband/hw/irdma/virtchnl.h | 19 ++++ 9 files changed, 340 insertions(+), 75 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 0fd2b6869ecd..3f83ffaf448d 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2566,6 +2566,9 @@ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch, FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, check_overflow) | FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) | + FIELD_PREP(IRDMA_CQPSQ_CQ_CQID_HIGH, cq->cq_uk.cq_id >> 22) | + FIELD_PREP(IRDMA_CQPSQ_CQ_CEQID_HIGH, + (cq->ceq_id_valid ? cq->ceq_id : 0) >> 10) | FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) | FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) | @@ -3928,7 +3931,7 @@ int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL); ceq->tph_en = info->tph_en; ceq->tph_val = info->tph_val; - ceq->vsi = info->vsi; + ceq->vsi_idx = info->vsi_idx; ceq->polarity = 1; IRDMA_RING_INIT(ceq->ceq_ring, ceq->elem_cnt); ceq->dev->ceq[info->ceq_id] = ceq; @@ -3961,13 +3964,16 @@ static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch, (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0)); set_64bit_val(wqe, 56, FIELD_PREP(IRDMA_CQPSQ_TPHVAL, ceq->tph_val) | - FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi->vsi_idx)); + FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid) | + FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi_idx)); hdr = FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID, ceq->ceq_id) | + FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID_HIGH, ceq->ceq_id >> 10) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CEQ) | FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_CEQ_ITRNOEXPIRE, ceq->itr_no_expire) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) | + FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -4022,7 +4028,7 @@ int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) int ret_code; struct irdma_sc_dev *dev = ceq->dev; - dev->ccq->vsi = ceq->vsi; + dev->ccq->vsi_idx = ceq->vsi_idx; if (ceq->reg_cq) { ret_code = irdma_sc_add_cq_ctx(ceq, ceq->dev->ccq); if (ret_code) @@ -4055,11 +4061,14 @@ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq) set_64bit_val(wqe, 16, ceq->elem_cnt); set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx); + set_64bit_val(wqe, 56, + FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid)); hdr = ceq->ceq_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CEQ) | FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) | + FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -4223,10 +4232,13 @@ static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch, (aeq->virtual_map ? 0 : aeq->aeq_elem_pa)); set_64bit_val(wqe, 48, (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0)); + set_64bit_val(wqe, 56, + FIELD_PREP(IRDMA_CQPSQ_PASID, aeq->pasid)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_AEQ) | FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) | + FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, aeq->pasid_valid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -4255,7 +4267,8 @@ static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch, u64 hdr; dev = aeq->dev; - writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]); + if (dev->privileged) + writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]); cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); @@ -4263,9 +4276,12 @@ static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch, return -ENOMEM; set_64bit_val(wqe, 16, aeq->elem_cnt); set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx); + set_64bit_val(wqe, 56, + FIELD_PREP(IRDMA_CQPSQ_PASID, aeq->pasid)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_AEQ) | FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) | + FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, aeq->pasid_valid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -4306,18 +4322,39 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8, aeqe, 16, false); - ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp); - info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp); - info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) | + if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC_GEN_3, temp); + info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX_GEN_3, + temp); + info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_GEN_3, temp); + info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE_GEN_3, temp); + info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE_GEN_3, compl_ctx); + info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE_GEN_3, temp); + info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA_GEN_3, compl_ctx); + info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW_GEN_3, temp); + info->compl_ctx = FIELD_GET(IRDMA_AEQE_CMPL_CTXT, compl_ctx); + compl_ctx = FIELD_GET(IRDMA_AEQE_CMPL_CTXT, compl_ctx) << IRDMA_AEQE_CMPL_CTXT_S; + } else { + ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp); + info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp); + info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) | ((u32)FIELD_GET(IRDMA_AEQE_QPCQID_HI, temp) << 18); - info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp); - info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE, temp); - info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE, temp); - info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA, temp); - info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW, temp); + info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp); + info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE, temp); + info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE, temp); + info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA, temp); + info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW, + temp); + } info->ae_src = ae_src; switch (info->ae_id) { + case IRDMA_AE_SRQ_LIMIT: + info->srq = true; + /* [63:6] from CMPL_CTXT, [5:0] from WQDESCIDX. */ + info->compl_ctx = compl_ctx | info->wqe_idx; + ae_src = IRDMA_AE_SOURCE_RSVD; + break; case IRDMA_AE_PRIV_OPERATION_DENIED: case IRDMA_AE_AMP_INVALIDATE_TYPE1_MW: case IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW: @@ -4350,6 +4387,10 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: case IRDMA_AE_LLP_TOO_MANY_RETRIES: + case IRDMA_AE_LLP_TOO_MANY_RNRS: + case IRDMA_AE_REMOTE_QP_CATASTROPHIC: + case IRDMA_AE_LOCAL_QP_CATASTROPHIC: + case IRDMA_AE_RCE_QP_CATASTROPHIC: case IRDMA_AE_LLP_DOUBT_REACHABILITY: case IRDMA_AE_LLP_CONNECTION_ESTABLISHED: case IRDMA_AE_RESET_SENT: @@ -4395,6 +4436,7 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, info->qp = true; info->rq = true; info->compl_ctx = compl_ctx; + info->err_rq_idx_valid = true; break; case IRDMA_AE_SOURCE_CQ: case IRDMA_AE_SOURCE_CQ_0110: @@ -4410,8 +4452,18 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, info->compl_ctx = compl_ctx; break; case IRDMA_AE_SOURCE_IN_RR_WR: + info->qp = true; + if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + info->err_rq_idx_valid = true; + info->compl_ctx = compl_ctx; + info->in_rdrsp_wr = true; + break; case IRDMA_AE_SOURCE_IN_RR_WR_1011: info->qp = true; + if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + info->sq = true; + info->err_rq_idx_valid = true; + } info->compl_ctx = compl_ctx; info->in_rdrsp_wr = true; break; diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 1239776c6ba3..52ace06912eb 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -319,13 +319,18 @@ enum irdma_cqp_op_type { #define IRDMA_AE_STAG_ZERO_INVALID 0x0206 #define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207 #define IRDMA_AE_IB_INVALID_REQUEST 0x0208 +#define IRDMA_AE_SRQ_LIMIT 0x0209 #define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a #define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b #define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c #define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d #define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e +#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR 0x020f #define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220 +#define IRDMA_AE_ATOMIC_ALIGNMENT 0x0221 +#define IRDMA_AE_ATOMIC_MASK 0x0222 #define IRDMA_AE_INVALID_REQUEST 0x0223 +#define IRDMA_AE_PCIE_ATOMIC_DISABLE 0x0224 #define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 #define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 #define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 @@ -366,8 +371,12 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700 #define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 #define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702 +#define IRDMA_AE_REMOTE_QP_CATASTROPHIC 0x0703 +#define IRDMA_AE_LOCAL_QP_CATASTROPHIC 0x0704 +#define IRDMA_AE_RCE_QP_CATASTROPHIC 0x0705 #define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900 #define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901 +#define IRDMA_AE_ADAPTER_CATASTROPHIC 0x0B0B #define FLD_LS_64(dev, val, field) \ (((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M]) @@ -535,6 +544,17 @@ enum irdma_cqp_op_type { #define IRDMA_AEQE_Q2DATA GENMASK_ULL(62, 61) #define IRDMA_AEQE_VALID BIT_ULL(63) +#define IRDMA_AEQE_Q2DATA_GEN_3 GENMASK_ULL(5, 4) +#define IRDMA_AEQE_TCPSTATE_GEN_3 GENMASK_ULL(3, 0) +#define IRDMA_AEQE_QPCQID_GEN_3 GENMASK_ULL(24, 0) +#define IRDMA_AEQE_AECODE_GEN_3 GENMASK_ULL(61, 50) +#define IRDMA_AEQE_OVERFLOW_GEN_3 BIT_ULL(62) +#define IRDMA_AEQE_WQDESCIDX_GEN_3 GENMASK_ULL(49, 32) +#define IRDMA_AEQE_IWSTATE_GEN_3 GENMASK_ULL(31, 29) +#define IRDMA_AEQE_AESRC_GEN_3 GENMASK_ULL(28, 25) +#define IRDMA_AEQE_CMPL_CTXT_S 6 +#define IRDMA_AEQE_CMPL_CTXT GENMASK_ULL(63, 6) + #define IRDMA_UDA_QPSQ_NEXT_HDR GENMASK_ULL(23, 16) #define IRDMA_UDA_QPSQ_OPCODE GENMASK_ULL(37, 32) #define IRDMA_UDA_QPSQ_L4LEN GENMASK_ULL(45, 42) @@ -557,11 +577,14 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_TPHVAL GENMASK_ULL(7, 0) -#define IRDMA_CQPSQ_VSIIDX GENMASK_ULL(17, 8) +#define IRDMA_CQPSQ_VSIIDX GENMASK_ULL(23, 8) #define IRDMA_CQPSQ_TPHEN BIT_ULL(60) #define IRDMA_CQPSQ_PBUFADDR IRDMA_CQPHC_QPCTX +#define IRDMA_CQPSQ_PASID GENMASK_ULL(51, 32) +#define IRDMA_CQPSQ_PASID_VALID BIT_ULL(62) + /* Create/Modify/Destroy QP */ #define IRDMA_CQPSQ_QP_NEWMSS GENMASK_ULL(45, 32) @@ -597,6 +620,8 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_CQ_CQCTX GENMASK_ULL(62, 0) #define IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD GENMASK(17, 0) +#define IRDMA_CQPSQ_CQ_CQID_HIGH GENMASK_ULL(52, 50) +#define IRDMA_CQPSQ_CQ_CEQID_HIGH GENMASK_ULL(59, 54) #define IRDMA_CQPSQ_CQ_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_CQ_CQRESIZE BIT_ULL(43) #define IRDMA_CQPSQ_CQ_LPBLSIZE GENMASK_ULL(45, 44) @@ -676,6 +701,8 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_CEQ_CEQSIZE GENMASK_ULL(21, 0) #define IRDMA_CQPSQ_CEQ_CEQID GENMASK_ULL(9, 0) +#define IRDMA_CQPSQ_CEQ_CEQID_HIGH GENMASK_ULL(15, 10) + #define IRDMA_CQPSQ_CEQ_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE #define IRDMA_CQPSQ_CEQ_VMAP BIT_ULL(47) #define IRDMA_CQPSQ_CEQ_ITRNOEXPIRE BIT_ULL(46) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index caad1b1f2ee3..459343ef72b9 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -282,6 +282,13 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) if (ret) break; + if (info->aeqe_overflow) { + ibdev_err(&iwdev->ibdev, "AEQ has overflowed\n"); + rf->reset = true; + rf->gen_ops.request_reset(rf); + return; + } + aeqcnt++; ibdev_dbg(&iwdev->ibdev, "AEQ: ae_id = 0x%x bool qp=%d qp_id = %d tcp_state=%d iwarp_state=%d ae_src=%d\n", @@ -442,6 +449,9 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_LCE_CQ_CATASTROPHIC: + case IRDMA_AE_REMOTE_QP_CATASTROPHIC: + case IRDMA_AE_LOCAL_QP_CATASTROPHIC: + case IRDMA_AE_RCE_QP_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n", @@ -686,7 +696,9 @@ static void irdma_destroy_aeq(struct irdma_pci_f *rf) int status = -EBUSY; if (!rf->msix_shared) { - rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, rf->iw_msixtbl->idx, false); + if (rf->sc_dev.privileged) + rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, + rf->iw_msixtbl->idx, false); irdma_destroy_irq(rf, rf->iw_msixtbl, rf); } if (rf->reset) @@ -752,9 +764,10 @@ static void irdma_del_ceq_0(struct irdma_pci_f *rf) if (rf->msix_shared) { msix_vec = &rf->iw_msixtbl[0]; - rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, - msix_vec->ceq_id, - msix_vec->idx, false); + if (rf->sc_dev.privileged) + rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, + msix_vec->ceq_id, + msix_vec->idx, false); irdma_destroy_irq(rf, msix_vec, rf); } else { msix_vec = &rf->iw_msixtbl[1]; @@ -785,8 +798,10 @@ static void irdma_del_ceqs(struct irdma_pci_f *rf) msix_vec = &rf->iw_msixtbl[2]; for (i = 1; i < rf->ceqs_count; i++, msix_vec++, iwceq++) { - rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, msix_vec->ceq_id, - msix_vec->idx, false); + if (rf->sc_dev.privileged) + rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, + msix_vec->ceq_id, + msix_vec->idx, false); irdma_destroy_irq(rf, msix_vec, iwceq); irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_DESTROY); @@ -1209,9 +1224,13 @@ static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, } msix_vec->ceq_id = ceq_id; - rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, ceq_id, msix_vec->idx, true); - - return 0; + if (rf->sc_dev.privileged) + rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, ceq_id, + msix_vec->idx, true); + else + status = irdma_vchnl_req_ceq_vec_map(&rf->sc_dev, ceq_id, + msix_vec->idx); + return status; } /** @@ -1224,7 +1243,7 @@ static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf) { struct irdma_msix_vector *msix_vec = rf->iw_msixtbl; - u32 ret = 0; + int ret = 0; if (!rf->msix_shared) { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, @@ -1235,12 +1254,16 @@ static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf) } if (ret) { ibdev_dbg(&rf->iwdev->ibdev, "ERR: aeq irq config fail\n"); - return -EINVAL; + return ret; } - rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx, true); + if (rf->sc_dev.privileged) + rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx, + true); + else + ret = irdma_vchnl_req_aeq_vec_map(&rf->sc_dev, msix_vec->idx); - return 0; + return ret; } /** @@ -1248,13 +1271,13 @@ static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf) * @rf: RDMA PCI function * @iwceq: pointer to the ceq resources to be created * @ceq_id: the id number of the iwceq - * @vsi: SC vsi struct + * @vsi_idx: vsi idx * * Return 0, if the ceq and the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, - u32 ceq_id, struct irdma_sc_vsi *vsi) + u32 ceq_id, u16 vsi_idx) { int status; struct irdma_ceq_init_info info = {}; @@ -1278,7 +1301,7 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, info.elem_cnt = ceq_size; iwceq->sc_ceq.ceq_id = ceq_id; info.dev = dev; - info.vsi = vsi; + info.vsi_idx = vsi_idx; status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info); if (!status) { if (dev->ceq_valid) @@ -1321,7 +1344,7 @@ static int irdma_setup_ceq_0(struct irdma_pci_f *rf) } iwceq = &rf->ceqlist[0]; - status = irdma_create_ceq(rf, iwceq, 0, &rf->default_vsi); + status = irdma_create_ceq(rf, iwceq, 0, rf->default_vsi.vsi_idx); if (status) { ibdev_dbg(&rf->iwdev->ibdev, "ERR: create ceq status = %d\n", status); @@ -1356,13 +1379,13 @@ exit: /** * irdma_setup_ceqs - manage the device ceq's and their interrupt resources * @rf: RDMA PCI function - * @vsi: VSI structure for this CEQ + * @vsi_idx: vsi_idx for this CEQ * * Allocate a list for all device completion event queues * Create the ceq's and configure their msix interrupt vectors * Return 0, if ceqs are successfully set up, otherwise return error */ -static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi) +static int irdma_setup_ceqs(struct irdma_pci_f *rf, u16 vsi_idx) { u32 i; u32 ceq_id; @@ -1375,7 +1398,7 @@ static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi) i = (rf->msix_shared) ? 1 : 2; for (ceq_id = 1; i < num_ceqs; i++, ceq_id++) { iwceq = &rf->ceqlist[ceq_id]; - status = irdma_create_ceq(rf, iwceq, ceq_id, vsi); + status = irdma_create_ceq(rf, iwceq, ceq_id, vsi_idx); if (status) { ibdev_dbg(&rf->iwdev->ibdev, "ERR: create ceq status = %d\n", status); @@ -1456,7 +1479,10 @@ static int irdma_create_aeq(struct irdma_pci_f *rf) aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt + hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt; aeq_size = min(aeq_size, dev->hw_attrs.max_hw_aeq_size); - + /* GEN_3 does not support virtual AEQ. Cap at max Kernel alloc size */ + if (rf->rdma_ver == IRDMA_GEN_3) + aeq_size = min(aeq_size, (u32)((PAGE_SIZE << MAX_PAGE_ORDER) / + sizeof(struct irdma_sc_aeqe))); aeq->mem.size = ALIGN(sizeof(struct irdma_sc_aeqe) * aeq_size, IRDMA_AEQ_ALIGNMENT); aeq->mem.va = dma_alloc_coherent(dev->hw->device, aeq->mem.size, @@ -1464,6 +1490,8 @@ static int irdma_create_aeq(struct irdma_pci_f *rf) GFP_KERNEL | __GFP_NOWARN); if (aeq->mem.va) goto skip_virt_aeq; + else if (rf->rdma_ver == IRDMA_GEN_3) + return -ENOMEM; /* physically mapped aeq failed. setup virtual aeq */ status = irdma_create_virt_aeq(rf, aeq_size); @@ -1737,9 +1765,6 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev) irdma_del_local_mac_entry(iwdev->rf, (u8)iwdev->mac_ip_table_idx); fallthrough; - case AEQ_CREATED: - case PBLE_CHUNK_MEM: - case CEQS_CREATED: case IEQ_CREATED: if (!iwdev->roce_mode) irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, @@ -1822,13 +1847,17 @@ void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) enum init_completion_state state = rf->init_state; rf->init_state = INVALID_STATE; - if (rf->rsrc_created) { - irdma_destroy_aeq(rf); - irdma_destroy_pble_prm(rf->pble_rsrc); - irdma_del_ceqs(rf); - rf->rsrc_created = false; - } + switch (state) { + case AEQ_CREATED: + irdma_destroy_aeq(rf); + fallthrough; + case PBLE_CHUNK_MEM: + irdma_destroy_pble_prm(rf->pble_rsrc); + fallthrough; + case CEQS_CREATED: + irdma_del_ceqs(rf); + fallthrough; case CEQ0_CREATED: irdma_del_ceq_0(rf); fallthrough; @@ -1907,32 +1936,6 @@ int irdma_rt_init_hw(struct irdma_device *iwdev, break; iwdev->init_state = IEQ_CREATED; } - if (!rf->rsrc_created) { - status = irdma_setup_ceqs(rf, &iwdev->vsi); - if (status) - break; - - iwdev->init_state = CEQS_CREATED; - - status = irdma_hmc_init_pble(&rf->sc_dev, - rf->pble_rsrc); - if (status) { - irdma_del_ceqs(rf); - break; - } - - iwdev->init_state = PBLE_CHUNK_MEM; - - status = irdma_setup_aeq(rf); - if (status) { - irdma_destroy_pble_prm(rf->pble_rsrc); - irdma_del_ceqs(rf); - break; - } - iwdev->init_state = AEQ_CREATED; - rf->rsrc_created = true; - } - if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) irdma_alloc_set_mac(iwdev); irdma_add_ip(iwdev); @@ -2014,6 +2017,25 @@ int irdma_ctrl_init_hw(struct irdma_pci_f *rf) } INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker); irdma_sc_ccq_arm(dev->ccq); + + status = irdma_setup_ceqs(rf, rf->iwdev ? rf->iwdev->vsi_num : 0); + if (status) + break; + + rf->init_state = CEQS_CREATED; + + status = irdma_hmc_init_pble(&rf->sc_dev, + rf->pble_rsrc); + if (status) + break; + + rf->init_state = PBLE_CHUNK_MEM; + + status = irdma_setup_aeq(rf); + if (status) + break; + rf->init_state = AEQ_CREATED; + return 0; } while (0); diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c b/drivers/infiniband/hw/irdma/ig3rdma_hw.c index 83ef6af82a8f..1d582c50e4d2 100644 --- a/drivers/infiniband/hw/irdma/ig3rdma_hw.c +++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c @@ -5,8 +5,53 @@ #include "protos.h" #include "ig3rdma_hw.h" +/** + * ig3rdma_ena_irq - Enable interrupt + * @dev: pointer to the device structure + * @idx: vector index + */ +static void ig3rdma_ena_irq(struct irdma_sc_dev *dev, u32 idx) +{ + u32 val; + u32 int_stride = 1; /* one u32 per register */ + + if (dev->is_pf) + int_stride = 0x400; + else + idx--; /* VFs use DYN_CTL_N */ + + val = FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTENA, 1) | + FIELD_PREP(IRDMA_GLINT_DYN_CTL_CLEARPBA, 1); + + writel(val, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx * int_stride)); +} + +/** + * ig3rdma_disable_irq - Disable interrupt + * @dev: pointer to the device structure + * @idx: vector index + */ +static void ig3rdma_disable_irq(struct irdma_sc_dev *dev, u32 idx) +{ + u32 int_stride = 1; /* one u32 per register */ + + if (dev->is_pf) + int_stride = 0x400; + else + idx--; /* VFs use DYN_CTL_N */ + + writel(0, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx * int_stride)); +} + +static const struct irdma_irq_ops ig3rdma_irq_ops = { + .irdma_dis_irq = ig3rdma_disable_irq, + .irdma_en_irq = ig3rdma_ena_irq, +}; + void ig3rdma_init_hw(struct irdma_sc_dev *dev) { + dev->irq_ops = &ig3rdma_irq_ops; + dev->hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_3; dev->hw_attrs.uk_attrs.max_hw_wq_frags = IG3RDMA_MAX_WQ_FRAGMENT_COUNT; dev->hw_attrs.uk_attrs.max_hw_read_sges = IG3RDMA_MAX_SGE_RD; diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index 7e3214904488..e012f795bce8 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -32,7 +32,16 @@ #define IRDMA_PFHMC_SDDATALOW_PMSDDATALOW GENMASK(31, 12) #define IRDMA_PFHMC_SDCMD_PMSDWR BIT(31) -#define IRDMA_INVALID_CQ_IDX 0xffffffff +#define IRDMA_INVALID_CQ_IDX 0xffffffff +#define IRDMA_Q_INVALID_IDX 0xffff + +enum irdma_dyn_idx_t { + IRDMA_IDX_ITR0 = 0, + IRDMA_IDX_ITR1 = 1, + IRDMA_IDX_ITR2 = 2, + IRDMA_IDX_NOITR = 3, +}; + enum irdma_registers { IRDMA_CQPTAIL, IRDMA_CQPDB, diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 7414e0ec6306..7300f8ab49ca 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -128,12 +128,12 @@ enum init_completion_state { HMC_OBJS_CREATED, HW_RSRC_INITIALIZED, CCQ_CREATED, - CEQ0_CREATED, /* Last state of probe */ - ILQ_CREATED, - IEQ_CREATED, + CEQ0_CREATED, CEQS_CREATED, PBLE_CHUNK_MEM, AEQ_CREATED, + ILQ_CREATED, + IEQ_CREATED, /* Last state of probe */ IP_ADDR_REGISTERED, /* Last state of open */ }; diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index e779ad8dcba5..875e3756aa20 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -472,6 +472,8 @@ struct irdma_sc_aeq { u32 msix_idx; u8 polarity; bool virtual_map:1; + bool pasid_valid:1; + u32 pasid; }; struct irdma_sc_ceq { @@ -487,13 +489,15 @@ struct irdma_sc_ceq { u8 tph_val; u32 first_pm_pbl_idx; u8 polarity; - struct irdma_sc_vsi *vsi; + u16 vsi_idx; struct irdma_sc_cq **reg_cq; u32 reg_cq_size; spinlock_t req_cq_lock; /* protect access to reg_cq array */ bool virtual_map:1; bool tph_en:1; bool itr_no_expire:1; + bool pasid_valid:1; + u32 pasid; }; struct irdma_sc_cq { @@ -501,6 +505,7 @@ struct irdma_sc_cq { u64 cq_pa; u64 shadow_area_pa; struct irdma_sc_dev *dev; + u16 vsi_idx; struct irdma_sc_vsi *vsi; void *pbl_list; void *back_cq; @@ -834,8 +839,8 @@ struct irdma_ceq_init_info { bool itr_no_expire:1; u8 pbl_chunk_size; u8 tph_val; + u16 vsi_idx; u32 first_pm_pbl_idx; - struct irdma_sc_vsi *vsi; struct irdma_sc_cq **reg_cq; u32 reg_cq_idx; }; @@ -1042,9 +1047,11 @@ struct irdma_aeqe_info { bool cq:1; bool sq:1; bool rq:1; + bool srq:1; bool in_rdrsp_wr:1; bool out_rdrsp:1; bool aeqe_overflow:1; + bool err_rq_idx_valid:1; u8 q2_data_written; u8 ae_src; }; diff --git a/drivers/infiniband/hw/irdma/virtchnl.c b/drivers/infiniband/hw/irdma/virtchnl.c index ea071ffc0204..8f4a2189f905 100644 --- a/drivers/infiniband/hw/irdma/virtchnl.c +++ b/drivers/infiniband/hw/irdma/virtchnl.c @@ -110,6 +110,8 @@ static int irdma_vchnl_req_verify_resp(struct irdma_vchnl_req *vchnl_req, return -EBADMSG; break; case IRDMA_VCHNL_OP_GET_REG_LAYOUT: + case IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP: + case IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP: break; default: return -EOPNOTSUPP; @@ -315,6 +317,88 @@ int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev) return 0; } +/** + * irdma_vchnl_req_aeq_vec_map - Map AEQ to vector on this function + * @dev: RDMA device pointer + * @v_idx: vector index + */ +int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx) +{ + struct irdma_vchnl_req_init_info info = {}; + struct irdma_vchnl_qvlist_info *qvl; + struct irdma_vchnl_qv_info *qv; + u16 qvl_size, num_vectors = 1; + int ret; + + if (!dev->vchnl_up) + return -EBUSY; + + qvl_size = struct_size(qvl, qv_info, num_vectors); + + qvl = kzalloc(qvl_size, GFP_KERNEL); + if (!qvl) + return -ENOMEM; + + qvl->num_vectors = 1; + qv = qvl->qv_info; + + qv->ceq_idx = IRDMA_Q_INVALID_IDX; + qv->v_idx = v_idx; + qv->itr_idx = IRDMA_IDX_ITR0; + + info.op_code = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP; + info.op_ver = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0; + info.req_parm = qvl; + info.req_parm_len = qvl_size; + + ret = irdma_vchnl_req_send_sync(dev, &info); + kfree(qvl); + + return ret; +} + +/** + * irdma_vchnl_req_ceq_vec_map - Map CEQ to vector on this function + * @dev: RDMA device pointer + * @ceq_id: CEQ index + * @v_idx: vector index + */ +int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id, u32 v_idx) +{ + struct irdma_vchnl_req_init_info info = {}; + struct irdma_vchnl_qvlist_info *qvl; + struct irdma_vchnl_qv_info *qv; + u16 qvl_size, num_vectors = 1; + int ret; + + if (!dev->vchnl_up) + return -EBUSY; + + qvl_size = struct_size(qvl, qv_info, num_vectors); + + qvl = kzalloc(qvl_size, GFP_KERNEL); + if (!qvl) + return -ENOMEM; + + qvl->num_vectors = num_vectors; + qv = qvl->qv_info; + + qv->aeq_idx = IRDMA_Q_INVALID_IDX; + qv->ceq_idx = ceq_id; + qv->v_idx = v_idx; + qv->itr_idx = IRDMA_IDX_ITR0; + + info.op_code = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP; + info.op_ver = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0; + info.req_parm = qvl; + info.req_parm_len = qvl_size; + + ret = irdma_vchnl_req_send_sync(dev, &info); + kfree(qvl); + + return ret; +} + /** * irdma_vchnl_req_get_ver - Request Channel version * @dev: RDMA device pointer diff --git a/drivers/infiniband/hw/irdma/virtchnl.h b/drivers/infiniband/hw/irdma/virtchnl.h index ba551dc70d19..6acd698620d6 100644 --- a/drivers/infiniband/hw/irdma/virtchnl.h +++ b/drivers/infiniband/hw/irdma/virtchnl.h @@ -15,6 +15,8 @@ #define IRDMA_VCHNL_OP_GET_HMC_FCN_V2 2 #define IRDMA_VCHNL_OP_PUT_HMC_FCN_V0 0 #define IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0 0 +#define IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0 0 +#define IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP_V0 0 #define IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0 0 #define IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE 1 @@ -53,6 +55,8 @@ enum irdma_vchnl_ops { IRDMA_VCHNL_OP_PUT_HMC_FCN = 2, IRDMA_VCHNL_OP_GET_REG_LAYOUT = 11, IRDMA_VCHNL_OP_GET_RDMA_CAPS = 13, + IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP = 14, + IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP = 15, }; struct irdma_vchnl_req_hmc_info { @@ -65,6 +69,18 @@ struct irdma_vchnl_resp_hmc_info { u16 qs_handle[IRDMA_MAX_USER_PRIORITY]; } __packed; +struct irdma_vchnl_qv_info { + u32 v_idx; + u16 ceq_idx; + u16 aeq_idx; + u8 itr_idx; +}; + +struct irdma_vchnl_qvlist_info { + u32 num_vectors; + struct irdma_vchnl_qv_info qv_info[]; +}; + struct irdma_vchnl_op_buf { u16 op_code; u16 op_ver; @@ -135,4 +151,7 @@ int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev); int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev, struct irdma_vchnl_req *vc_req); int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev); +int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx); +int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id, + u32 v_idx); #endif /* IRDMA_VIRTCHNL_H */ From da278cb29c41dc2d8344d62238de339db6695132 Mon Sep 17 00:00:00 2001 From: Krzysztof Czurylo Date: Wed, 27 Aug 2025 10:25:35 -0500 Subject: [PATCH 58/85] RDMA/irdma: Add GEN3 HW statistics support Plug into the unified HW statistics framework by adding a hardware statistics map array for GEN3, defining the HW-specific width and location for each counter in the statistics buffer. Signed-off-by: Krzysztof Czurylo Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-7-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 33 ++++-- drivers/infiniband/hw/irdma/defs.h | 2 +- drivers/infiniband/hw/irdma/ig3rdma_hw.c | 63 ++++++++++++ drivers/infiniband/hw/irdma/type.h | 19 +++- drivers/infiniband/hw/irdma/verbs.c | 124 +++++++++++++---------- 5 files changed, 173 insertions(+), 68 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 3f83ffaf448d..26b8905dbc03 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1968,7 +1968,8 @@ int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, (void *)((uintptr_t)stats_buff_mem->va + IRDMA_GATHER_STATS_BUF_SIZE); - irdma_hw_stats_start_timer(vsi); + if (vsi->dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3) + irdma_hw_stats_start_timer(vsi); /* when stat allocation is not required default to fcn_id. */ vsi->stats_idx = info->fcn_id; @@ -2013,7 +2014,9 @@ void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi) if (!vsi->pestat) return; - irdma_hw_stats_stop_timer(vsi); + + if (dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3) + irdma_hw_stats_stop_timer(vsi); dma_free_coherent(vsi->pestat->hw->device, vsi->pestat->gather_info.stats_buff_mem.size, vsi->pestat->gather_info.stats_buff_mem.va, @@ -5929,14 +5932,26 @@ void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable) */ void sc_vsi_update_stats(struct irdma_sc_vsi *vsi) { - struct irdma_gather_stats *gather_stats; - struct irdma_gather_stats *last_gather_stats; + struct irdma_dev_hw_stats *hw_stats = &vsi->pestat->hw_stats; + struct irdma_gather_stats *gather_stats = + vsi->pestat->gather_info.gather_stats_va; + struct irdma_gather_stats *last_gather_stats = + vsi->pestat->gather_info.last_gather_stats_va; + const struct irdma_hw_stat_map *map = vsi->dev->hw_stats_map; + u16 max_stat_idx = vsi->dev->hw_attrs.max_stat_idx; + u16 i; - gather_stats = vsi->pestat->gather_info.gather_stats_va; - last_gather_stats = vsi->pestat->gather_info.last_gather_stats_va; - irdma_update_stats(&vsi->pestat->hw_stats, gather_stats, - last_gather_stats, vsi->dev->hw_stats_map, - vsi->dev->hw_attrs.max_stat_idx); + if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + for (i = 0; i < max_stat_idx; i++) { + u16 idx = map[i].byteoff / sizeof(u64); + + hw_stats->stats_val[i] = gather_stats->val[idx]; + } + return; + } + + irdma_update_stats(hw_stats, gather_stats, last_gather_stats, + map, max_stat_idx); } /** diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 52ace06912eb..2fc8e3cf4395 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -415,7 +415,7 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_STATS_USE_INST BIT_ULL(61) #define IRDMA_CQPSQ_STATS_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_STATS_INST_INDEX GENMASK_ULL(6, 0) -#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(5, 0) +#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_WS_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(55, 52) #define IRDMA_SD_MAX GENMASK_ULL(15, 0) diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c b/drivers/infiniband/hw/irdma/ig3rdma_hw.c index 1d582c50e4d2..2a3d7144c771 100644 --- a/drivers/infiniband/hw/irdma/ig3rdma_hw.c +++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c @@ -48,9 +48,70 @@ static const struct irdma_irq_ops ig3rdma_irq_ops = { .irdma_en_irq = ig3rdma_ena_irq, }; +static const struct irdma_hw_stat_map ig3rdma_hw_stat_map[] = { + [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = { 24, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 32, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 40, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 48, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 56, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 64, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 72, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = { 80, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = { 88, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = { 96, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = { 104, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = { 112, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = { 120, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = { 128, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = { 136, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = { 144, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = { 152, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = { 160, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = { 168, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = { 176, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = { 184, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = { 192, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = { 200, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = { 208, 0, 0 }, + [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = { 216, 0, 0 }, + [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = { 224, 0, 0 }, + [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = { 232, 0, 0 }, + [IRDMA_HW_STAT_INDEX_TCPTXSEG] = { 240, 0, 0 }, + [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = { 248, 0, 0 }, + [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = { 256, 0, 0 }, + [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = { 264, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMARXWRS] = { 272, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMARXRDS] = { 280, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = { 288, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMATXWRS] = { 296, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMATXRDS] = { 304, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = { 312, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMAVBND] = { 320, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMAVINV] = { 328, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = { 336, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = { 344, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = { 352, 0, 0 }, + [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = { 360, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RNR_SENT] = { 368, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RNR_RCVD] = { 376, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT] = { 384, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT] = { 392, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMARXATS] = { 408, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RDMATXATS] = { 416, 0, 0 }, + [IRDMA_HW_STAT_INDEX_NAKSEQERR] = { 424, 0, 0 }, + [IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED] = { 432, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RTO] = { 440, 0, 0 }, + [IRDMA_HW_STAT_INDEX_RXOOOPKTS] = { 448, 0, 0 }, + [IRDMA_HW_STAT_INDEX_ICRCERR] = { 456, 0, 0 }, +}; + void ig3rdma_init_hw(struct irdma_sc_dev *dev) { dev->irq_ops = &ig3rdma_irq_ops; + dev->hw_stats_map = ig3rdma_hw_stat_map; dev->hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_3; dev->hw_attrs.uk_attrs.max_hw_wq_frags = IG3RDMA_MAX_WQ_FRAGMENT_COUNT; @@ -70,6 +131,8 @@ void ig3rdma_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G; dev->hw_attrs.max_hw_ird = IG3RDMA_MAX_IRD_SIZE; dev->hw_attrs.max_hw_ord = IG3RDMA_MAX_ORD_SIZE; + dev->hw_attrs.max_stat_inst = IG3RDMA_MAX_STATS_COUNT; + dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_3; dev->hw_attrs.uk_attrs.min_hw_wq_size = IG3RDMA_MIN_WQ_SIZE; dev->hw_attrs.uk_attrs.max_hw_srq_quanta = IRDMA_SRQ_MAX_QUANTA; dev->hw_attrs.uk_attrs.max_hw_inline = IG3RDMA_MAX_INLINE_DATA_SIZE; diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 875e3756aa20..5eeb50f5defc 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -156,6 +156,21 @@ enum irdma_hw_stats_index { IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED = 44, IRDMA_HW_STAT_INDEX_TXNPCNPSENT = 45, IRDMA_HW_STAT_INDEX_MAX_GEN_2 = 46, + + /* gen3 */ + IRDMA_HW_STAT_INDEX_RNR_SENT = 46, + IRDMA_HW_STAT_INDEX_RNR_RCVD = 47, + IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT = 48, + IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT = 49, + IRDMA_HW_STAT_INDEX_RDMARXATS = 50, + IRDMA_HW_STAT_INDEX_RDMATXATS = 51, + IRDMA_HW_STAT_INDEX_NAKSEQERR = 52, + IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED = 53, + IRDMA_HW_STAT_INDEX_RTO = 54, + IRDMA_HW_STAT_INDEX_RXOOOPKTS = 55, + IRDMA_HW_STAT_INDEX_ICRCERR = 56, + + IRDMA_HW_STAT_INDEX_MAX_GEN_3 = 57, }; enum irdma_feature_type { @@ -569,7 +584,7 @@ struct irdma_sc_qp { struct irdma_stats_inst_info { bool use_hmc_fcn_index; u8 hmc_fn_id; - u8 stats_idx; + u16 stats_idx; }; struct irdma_up_info { @@ -1027,7 +1042,7 @@ struct irdma_qp_host_ctx_info { u32 send_cq_num; u32 rcv_cq_num; u32 rem_endpoint_idx; - u8 stats_idx; + u16 stats_idx; bool srq_valid:1; bool tcp_info_valid:1; bool iwarp_info_valid:1; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 1bee1cbf7a4d..894c1f7bcb43 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3923,6 +3923,73 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq, return ret; } +static const struct rdma_stat_desc irdma_hw_stat_descs[] = { + /* gen1 - 32-bit */ + [IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards", + [IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts", + [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE].name = "ip4OutNoRoutes", + [IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards", + [IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts", + [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes", + [IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors", + /* gen1 - 64-bit */ + [IRDMA_HW_STAT_INDEX_IP4RXOCTS].name = "ip4InOctets", + [IRDMA_HW_STAT_INDEX_IP4RXPKTS].name = "ip4InPkts", + [IRDMA_HW_STAT_INDEX_IP4RXFRAGS].name = "ip4InReasmRqd", + [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS].name = "ip4InMcastPkts", + [IRDMA_HW_STAT_INDEX_IP4TXOCTS].name = "ip4OutOctets", + [IRDMA_HW_STAT_INDEX_IP4TXPKTS].name = "ip4OutPkts", + [IRDMA_HW_STAT_INDEX_IP4TXFRAGS].name = "ip4OutSegRqd", + [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS].name = "ip4OutMcastPkts", + [IRDMA_HW_STAT_INDEX_IP6RXOCTS].name = "ip6InOctets", + [IRDMA_HW_STAT_INDEX_IP6RXPKTS].name = "ip6InPkts", + [IRDMA_HW_STAT_INDEX_IP6RXFRAGS].name = "ip6InReasmRqd", + [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS].name = "ip6InMcastPkts", + [IRDMA_HW_STAT_INDEX_IP6TXOCTS].name = "ip6OutOctets", + [IRDMA_HW_STAT_INDEX_IP6TXPKTS].name = "ip6OutPkts", + [IRDMA_HW_STAT_INDEX_IP6TXFRAGS].name = "ip6OutSegRqd", + [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS].name = "ip6OutMcastPkts", + [IRDMA_HW_STAT_INDEX_RDMARXRDS].name = "InRdmaReads", + [IRDMA_HW_STAT_INDEX_RDMARXSNDS].name = "InRdmaSends", + [IRDMA_HW_STAT_INDEX_RDMARXWRS].name = "InRdmaWrites", + [IRDMA_HW_STAT_INDEX_RDMATXRDS].name = "OutRdmaReads", + [IRDMA_HW_STAT_INDEX_RDMATXSNDS].name = "OutRdmaSends", + [IRDMA_HW_STAT_INDEX_RDMATXWRS].name = "OutRdmaWrites", + [IRDMA_HW_STAT_INDEX_RDMAVBND].name = "RdmaBnd", + [IRDMA_HW_STAT_INDEX_RDMAVINV].name = "RdmaInv", + + /* gen2 - 32-bit */ + [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled", + [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED].name = "cnpIgnored", + [IRDMA_HW_STAT_INDEX_TXNPCNPSENT].name = "cnpSent", + /* gen2 - 64-bit */ + [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS].name = "ip4InMcastOctets", + [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS].name = "ip4OutMcastOctets", + [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS].name = "ip6InMcastOctets", + [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS].name = "ip6OutMcastOctets", + [IRDMA_HW_STAT_INDEX_UDPRXPKTS].name = "RxUDP", + [IRDMA_HW_STAT_INDEX_UDPTXPKTS].name = "TxUDP", + [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS].name = "RxECNMrkd", + [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "RetransSegs", + [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "InOptErrors", + [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "InProtoErrors", + [IRDMA_HW_STAT_INDEX_TCPRXSEGS].name = "InSegs", + [IRDMA_HW_STAT_INDEX_TCPTXSEG].name = "OutSegs", + + /* gen3 */ + [IRDMA_HW_STAT_INDEX_RNR_SENT].name = "RNR sent", + [IRDMA_HW_STAT_INDEX_RNR_RCVD].name = "RNR received", + [IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT].name = "ord limit count", + [IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT].name = "ird limit count", + [IRDMA_HW_STAT_INDEX_RDMARXATS].name = "Rx atomics", + [IRDMA_HW_STAT_INDEX_RDMATXATS].name = "Tx atomics", + [IRDMA_HW_STAT_INDEX_NAKSEQERR].name = "Nak Sequence Error", + [IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED].name = "Nak Sequence Error Implied", + [IRDMA_HW_STAT_INDEX_RTO].name = "RTO", + [IRDMA_HW_STAT_INDEX_RXOOOPKTS].name = "Rcvd Out of order packets", + [IRDMA_HW_STAT_INDEX_ICRCERR].name = "CRC errors", +}; + static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { @@ -3956,61 +4023,6 @@ static int irdma_iw_port_immutable(struct ib_device *ibdev, u32 port_num, return 0; } -static const struct rdma_stat_desc irdma_hw_stat_names[] = { - /* gen1 - 32-bit */ - [IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards", - [IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts", - [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE].name = "ip4OutNoRoutes", - [IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards", - [IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts", - [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes", - [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "tcpRetransSegs", - [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "tcpInOptErrors", - [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "tcpInProtoErrors", - [IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors", - /* gen1 - 64-bit */ - [IRDMA_HW_STAT_INDEX_IP4RXOCTS].name = "ip4InOctets", - [IRDMA_HW_STAT_INDEX_IP4RXPKTS].name = "ip4InPkts", - [IRDMA_HW_STAT_INDEX_IP4RXFRAGS].name = "ip4InReasmRqd", - [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS].name = "ip4InMcastPkts", - [IRDMA_HW_STAT_INDEX_IP4TXOCTS].name = "ip4OutOctets", - [IRDMA_HW_STAT_INDEX_IP4TXPKTS].name = "ip4OutPkts", - [IRDMA_HW_STAT_INDEX_IP4TXFRAGS].name = "ip4OutSegRqd", - [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS].name = "ip4OutMcastPkts", - [IRDMA_HW_STAT_INDEX_IP6RXOCTS].name = "ip6InOctets", - [IRDMA_HW_STAT_INDEX_IP6RXPKTS].name = "ip6InPkts", - [IRDMA_HW_STAT_INDEX_IP6RXFRAGS].name = "ip6InReasmRqd", - [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS].name = "ip6InMcastPkts", - [IRDMA_HW_STAT_INDEX_IP6TXOCTS].name = "ip6OutOctets", - [IRDMA_HW_STAT_INDEX_IP6TXPKTS].name = "ip6OutPkts", - [IRDMA_HW_STAT_INDEX_IP6TXFRAGS].name = "ip6OutSegRqd", - [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS].name = "ip6OutMcastPkts", - [IRDMA_HW_STAT_INDEX_TCPRXSEGS].name = "tcpInSegs", - [IRDMA_HW_STAT_INDEX_TCPTXSEG].name = "tcpOutSegs", - [IRDMA_HW_STAT_INDEX_RDMARXRDS].name = "iwInRdmaReads", - [IRDMA_HW_STAT_INDEX_RDMARXSNDS].name = "iwInRdmaSends", - [IRDMA_HW_STAT_INDEX_RDMARXWRS].name = "iwInRdmaWrites", - [IRDMA_HW_STAT_INDEX_RDMATXRDS].name = "iwOutRdmaReads", - [IRDMA_HW_STAT_INDEX_RDMATXSNDS].name = "iwOutRdmaSends", - [IRDMA_HW_STAT_INDEX_RDMATXWRS].name = "iwOutRdmaWrites", - [IRDMA_HW_STAT_INDEX_RDMAVBND].name = "iwRdmaBnd", - [IRDMA_HW_STAT_INDEX_RDMAVINV].name = "iwRdmaInv", - - /* gen2 - 32-bit */ - [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled", - [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED].name = "cnpIgnored", - [IRDMA_HW_STAT_INDEX_TXNPCNPSENT].name = "cnpSent", - /* gen2 - 64-bit */ - [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS].name = "ip4InMcastOctets", - [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS].name = "ip4OutMcastOctets", - [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS].name = "ip6InMcastOctets", - [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS].name = "ip6OutMcastOctets", - [IRDMA_HW_STAT_INDEX_UDPRXPKTS].name = "RxUDP", - [IRDMA_HW_STAT_INDEX_UDPTXPKTS].name = "TxUDP", - [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS].name = "RxECNMrkd", - -}; - static void irdma_get_dev_fw_str(struct ib_device *dev, char *str) { struct irdma_device *iwdev = to_iwdev(dev); @@ -4034,7 +4046,7 @@ static struct rdma_hw_stats *irdma_alloc_hw_port_stats(struct ib_device *ibdev, int num_counters = dev->hw_attrs.max_stat_idx; unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN; - return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters, + return rdma_alloc_hw_stats_struct(irdma_hw_stat_descs, num_counters, lifespan); } From 2ad49ae7330b8a456edf639c92241a343641a763 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 27 Aug 2025 10:25:36 -0500 Subject: [PATCH 59/85] RDMA/irdma: Introduce GEN3 vPort driver support In the IPU model, a function can host one or more logical network endpoints called vPorts. Each vPort may be associated with either a physical or an internal communication port, and can be RDMA capable. A vPort features a netdev and, if RDMA capable, must have an associated ib_dev. This change introduces a GEN3 auxiliary vPort driver responsible for registering a verbs device for every RDMA-capable vPort. Additionally, the UAPI is updated to prevent the binding of GEN3 devices to older user-space providers. Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-8-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/main.c | 120 ++++++++++++++++++++++++++++ drivers/infiniband/hw/irdma/main.h | 2 + drivers/infiniband/hw/irdma/verbs.c | 10 ++- include/uapi/rdma/irdma-abi.h | 1 + 4 files changed, 132 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 162f1fab32b5..95957d52883d 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "main.h" +#include MODULE_ALIAS("i40iw"); MODULE_DESCRIPTION("Intel(R) Ethernet Protocol Driver for RDMA"); @@ -46,6 +47,113 @@ void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev) ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n", mtu); } +static void ig3rdma_idc_vport_event_handler(struct iidc_rdma_vport_dev_info *cdev_info, + struct iidc_rdma_event *event) +{ + struct irdma_device *iwdev = auxiliary_get_drvdata(cdev_info->adev); + struct irdma_l2params l2params = {}; + + if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) { + ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu); + if (iwdev->vsi.mtu != iwdev->netdev->mtu) { + l2params.mtu = iwdev->netdev->mtu; + l2params.mtu_changed = true; + irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); + irdma_change_l2params(&iwdev->vsi, &l2params); + } + } +} + +static int ig3rdma_vport_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *id) +{ + struct iidc_rdma_vport_auxiliary_dev *idc_adev = + container_of(aux_dev, struct iidc_rdma_vport_auxiliary_dev, adev); + struct auxiliary_device *aux_core_dev = idc_adev->vdev_info->core_adev; + struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_core_dev); + struct irdma_l2params l2params = {}; + struct irdma_device *iwdev; + int err; + + if (!rf) { + WARN_ON_ONCE(1); + return -ENOMEM; + } + iwdev = ib_alloc_device(irdma_device, ibdev); + /* Fill iwdev info */ + iwdev->is_vport = true; + iwdev->rf = rf; + iwdev->vport_id = idc_adev->vdev_info->vport_id; + iwdev->netdev = idc_adev->vdev_info->netdev; + iwdev->init_state = INITIAL_STATE; + iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT; + iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT; + iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED; + iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE; + iwdev->roce_mode = true; + iwdev->push_mode = false; + + l2params.mtu = iwdev->netdev->mtu; + + err = irdma_rt_init_hw(iwdev, &l2params); + if (err) + goto err_rt_init; + + err = irdma_ib_register_device(iwdev); + if (err) + goto err_ibreg; + + auxiliary_set_drvdata(aux_dev, iwdev); + + ibdev_dbg(&iwdev->ibdev, + "INIT: Gen[%d] vport[%d] probe success. dev_name = %s, core_dev_name = %s, netdev=%s\n", + rf->rdma_ver, idc_adev->vdev_info->vport_id, + dev_name(&aux_dev->dev), + dev_name(&idc_adev->vdev_info->core_adev->dev), + netdev_name(idc_adev->vdev_info->netdev)); + + return 0; +err_ibreg: + irdma_rt_deinit_hw(iwdev); +err_rt_init: + ib_dealloc_device(&iwdev->ibdev); + + return err; +} + +static void ig3rdma_vport_remove(struct auxiliary_device *aux_dev) +{ + struct iidc_rdma_vport_auxiliary_dev *idc_adev = + container_of(aux_dev, struct iidc_rdma_vport_auxiliary_dev, adev); + struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev); + + ibdev_dbg(&iwdev->ibdev, + "INIT: Gen[%d] dev_name = %s, core_dev_name = %s, netdev=%s\n", + iwdev->rf->rdma_ver, dev_name(&aux_dev->dev), + dev_name(&idc_adev->vdev_info->core_adev->dev), + netdev_name(idc_adev->vdev_info->netdev)); + + irdma_ib_unregister_device(iwdev); +} + +static const struct auxiliary_device_id ig3rdma_vport_auxiliary_id_table[] = { + {.name = "idpf.8086.rdma.vdev", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, ig3rdma_vport_auxiliary_id_table); + +static struct iidc_rdma_vport_auxiliary_drv ig3rdma_vport_auxiliary_drv = { + .adrv = { + .name = "vdev", + .id_table = ig3rdma_vport_auxiliary_id_table, + .probe = ig3rdma_vport_probe, + .remove = ig3rdma_vport_remove, + }, + .event_handler = ig3rdma_idc_vport_event_handler, +}; + + static int __init irdma_init_module(void) { int ret; @@ -74,6 +182,17 @@ static int __init irdma_init_module(void) return ret; } + + ret = auxiliary_driver_register(&ig3rdma_vport_auxiliary_drv.adrv); + if (ret) { + auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv); + auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv); + auxiliary_driver_unregister(&i40iw_auxiliary_drv); + pr_err("Failed ig3rdma vport auxiliary_driver_register() ret=%d\n", + ret); + + return ret; + } irdma_register_notifiers(); return 0; @@ -85,6 +204,7 @@ static void __exit irdma_exit_module(void) auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv); auxiliary_driver_unregister(&i40iw_auxiliary_drv); auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv); + auxiliary_driver_unregister(&ig3rdma_vport_auxiliary_drv.adrv); } module_init(irdma_init_module); diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 7300f8ab49ca..c64d772dc644 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -354,12 +354,14 @@ struct irdma_device { u32 rcv_wnd; u16 mac_ip_table_idx; u16 vsi_num; + u16 vport_id; u8 rcv_wscale; u8 iw_status; bool roce_mode:1; bool roce_dcqcn_en:1; bool dcb_vlan_mode:1; bool iw_ooo:1; + bool is_vport:1; enum init_completion_state init_state; wait_queue_head_t suspend_wq; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 894c1f7bcb43..7f59bdd85da0 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -292,6 +292,10 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; + if (!(req.comp_mask & IRDMA_SUPPORT_WQE_FORMAT_V2) && + uk_attrs->hw_rev >= IRDMA_GEN_3) + return -EOPNOTSUPP; + if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) ucontext->use_raw_attrs = true; @@ -4891,5 +4895,9 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev) struct irdma_device *iwdev = to_iwdev(ibdev); irdma_rt_deinit_hw(iwdev); - irdma_ctrl_deinit_hw(iwdev->rf); + if (!iwdev->is_vport) { + irdma_ctrl_deinit_hw(iwdev->rf); + if (iwdev->rf->vchnl_wq) + destroy_workqueue(iwdev->rf->vchnl_wq); + } } diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index bb18f15489e3..4e42054cca33 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -25,6 +25,7 @@ enum irdma_memreg_type { enum { IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, + IRDMA_SUPPORT_WQE_FORMAT_V2 = 1 << 3, }; struct irdma_alloc_ucontext_req { From d6ed4b69b8ea756200432099b9a525f23d2a4c56 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 27 Aug 2025 10:25:37 -0500 Subject: [PATCH 60/85] RDMA/irdma: Add GEN3 virtual QP1 support Add a new RDMA virtual channel op during QP1 creation that allow the Control Plane (CP) to virtualize a regular QP as QP1 on non-default RDMA capable vPorts. Additionally, the CP will return the Qsets to use on the ib_device of the vPort. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-9-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 10 ++- drivers/infiniband/hw/irdma/main.h | 1 + drivers/infiniband/hw/irdma/utils.c | 30 ++++++++- drivers/infiniband/hw/irdma/verbs.c | 84 ++++++++++++++++++++------ drivers/infiniband/hw/irdma/virtchnl.c | 52 ++++++++++++++++ drivers/infiniband/hw/irdma/virtchnl.h | 19 ++++++ 6 files changed, 174 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 26b8905dbc03..f347e75a18b3 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -74,6 +74,14 @@ static void irdma_set_qos_info(struct irdma_sc_vsi *vsi, { u8 i; + if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { + vsi->qos[i].qs_handle = vsi->dev->qos[i].qs_handle; + vsi->qos[i].valid = true; + } + + return; + } vsi->qos_rel_bw = l2p->vsi_rel_bw; vsi->qos_prio_type = l2p->vsi_prio_type; vsi->dscp_mode = l2p->dscp_mode; @@ -1877,7 +1885,7 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, mutex_init(&vsi->qos[i].qos_mutex); INIT_LIST_HEAD(&vsi->qos[i].qplist); } - if (vsi->register_qset) { + if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) { vsi->dev->ws_add = irdma_ws_add; vsi->dev->ws_remove = irdma_ws_remove; vsi->dev->ws_reset = irdma_ws_reset; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index c64d772dc644..3d37cd12e9f6 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -260,6 +260,7 @@ struct irdma_pci_f { bool reset:1; bool rsrc_created:1; bool msix_shared:1; + bool hwqp1_rsvd:1; u8 rsrc_profile; u8 *hmc_info_mem; u8 *mem_rsrc; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index d4c51b56ed22..1fd09e287e6f 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -1113,6 +1113,26 @@ static void irdma_dealloc_push_page(struct irdma_pci_f *rf, irdma_put_cqp_request(&rf->cqp, cqp_request); } +static void irdma_free_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 qp_num) +{ + struct irdma_device *iwdev = iwqp->iwdev; + struct irdma_pci_f *rf = iwdev->rf; + unsigned long flags; + + if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3) + return; + + irdma_vchnl_req_del_vport(&rf->sc_dev, iwdev->vport_id, qp_num); + + if (qp_num == 1) { + spin_lock_irqsave(&rf->rsrc_lock, flags); + rf->hwqp1_rsvd = false; + spin_unlock_irqrestore(&rf->rsrc_lock, flags); + } else if (qp_num > 2) { + irdma_free_rsrc(rf, rf->allocated_qps, qp_num); + } +} + /** * irdma_free_qp_rsrc - free up memory resources for qp * @iwqp: qp ptr (user or kernel) @@ -1121,7 +1141,7 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp) { struct irdma_device *iwdev = iwqp->iwdev; struct irdma_pci_f *rf = iwdev->rf; - u32 qp_num = iwqp->ibqp.qp_num; + u32 qp_num = iwqp->sc_qp.qp_uk.qp_id; irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); irdma_dealloc_push_page(rf, &iwqp->sc_qp); @@ -1131,8 +1151,12 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp) iwqp->sc_qp.user_pri); } - if (qp_num > 2) - irdma_free_rsrc(rf, rf->allocated_qps, qp_num); + if (iwqp->ibqp.qp_type == IB_QPT_GSI) { + irdma_free_gsi_qp_rsrc(iwqp, qp_num); + } else { + if (qp_num > 2) + irdma_free_rsrc(rf, rf->allocated_qps, qp_num); + } dma_free_coherent(rf->sc_dev.hw->device, iwqp->q2_ctx_mem.size, iwqp->q2_ctx_mem.va, iwqp->q2_ctx_mem.pa); iwqp->q2_ctx_mem.va = NULL; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 7f59bdd85da0..8a7f2f4c8a54 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -545,6 +545,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); irdma_remove_push_mmap_entries(iwqp); + + if (iwqp->sc_qp.qp_uk.qp_id == 1) + iwdev->rf->hwqp1_rsvd = false; irdma_free_qp_rsrc(iwqp); return 0; @@ -723,6 +726,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; + ukinfo->qp_id = info->qp_uk_init_info.qp_id; iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; @@ -779,6 +783,8 @@ static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, roce_info = &iwqp->roce_info; ether_addr_copy(roce_info->mac_addr, iwdev->netdev->dev_addr); + if (iwqp->ibqp.qp_type == IB_QPT_GSI && iwqp->ibqp.qp_num != 1) + roce_info->is_qp1 = true; roce_info->rd_en = true; roce_info->wr_rdresp_en = true; roce_info->bind_en = true; @@ -868,6 +874,47 @@ static void irdma_flush_worker(struct work_struct *work) irdma_generate_flush_completions(iwqp); } +static int irdma_setup_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 *qp_num) +{ + struct irdma_device *iwdev = iwqp->iwdev; + struct irdma_pci_f *rf = iwdev->rf; + unsigned long flags; + int ret; + + if (rf->rdma_ver <= IRDMA_GEN_2) { + *qp_num = 1; + return 0; + } + + spin_lock_irqsave(&rf->rsrc_lock, flags); + if (!rf->hwqp1_rsvd) { + *qp_num = 1; + rf->hwqp1_rsvd = true; + spin_unlock_irqrestore(&rf->rsrc_lock, flags); + } else { + spin_unlock_irqrestore(&rf->rsrc_lock, flags); + ret = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp, + qp_num, &rf->next_qp); + if (ret) + return ret; + } + + ret = irdma_vchnl_req_add_vport(&rf->sc_dev, iwdev->vport_id, *qp_num, + (&iwdev->vsi)->qos); + if (ret) { + if (*qp_num != 1) { + irdma_free_rsrc(rf, rf->allocated_qps, *qp_num); + } else { + spin_lock_irqsave(&rf->rsrc_lock, flags); + rf->hwqp1_rsvd = false; + spin_unlock_irqrestore(&rf->rsrc_lock, flags); + } + return ret; + } + + return 0; +} + /** * irdma_create_qp - create qp * @ibqp: ptr of qp @@ -929,16 +976,20 @@ static int irdma_create_qp(struct ib_qp *ibqp, init_info.host_ctx = (__le64 *)(init_info.q2 + IRDMA_Q2_BUF_SIZE); init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE; - if (init_attr->qp_type == IB_QPT_GSI) - qp_num = 1; - else + if (init_attr->qp_type == IB_QPT_GSI) { + err_code = irdma_setup_gsi_qp_rsrc(iwqp, &qp_num); + if (err_code) + goto error; + iwqp->ibqp.qp_num = 1; + } else { err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp, &qp_num, &rf->next_qp); - if (err_code) - goto error; + if (err_code) + goto error; + iwqp->ibqp.qp_num = qp_num; + } iwqp->iwpd = iwpd; - iwqp->ibqp.qp_num = qp_num; qp = &iwqp->sc_qp; iwqp->iwscq = to_iwcq(init_attr->send_cq); iwqp->iwrcq = to_iwcq(init_attr->recv_cq); @@ -998,10 +1049,17 @@ static int irdma_create_qp(struct ib_qp *ibqp, ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; - if (rdma_protocol_roce(&iwdev->ibdev, 1)) + if (rdma_protocol_roce(&iwdev->ibdev, 1)) { + if (dev->ws_add(&iwdev->vsi, 0)) { + irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp); + err_code = -EINVAL; + goto error; + } + irdma_qp_add_qos(&iwqp->sc_qp); irdma_roce_fill_and_set_qpctx_info(iwqp, ctx_info); - else + } else { irdma_iw_fill_and_set_qpctx_info(iwqp, ctx_info); + } err_code = irdma_cqp_create_qp_cmd(iwqp); if (err_code) @@ -1013,16 +1071,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; rf->qp_table[qp_num] = iwqp; - if (rdma_protocol_roce(&iwdev->ibdev, 1)) { - if (dev->ws_add(&iwdev->vsi, 0)) { - irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp); - err_code = -EINVAL; - goto error; - } - - irdma_qp_add_qos(&iwqp->sc_qp); - } - if (udata) { /* GEN_1 legacy support with libi40iw does not have expanded uresp struct */ if (udata->outlen < sizeof(uresp)) { diff --git a/drivers/infiniband/hw/irdma/virtchnl.c b/drivers/infiniband/hw/irdma/virtchnl.c index 8f4a2189f905..16ad27247527 100644 --- a/drivers/infiniband/hw/irdma/virtchnl.c +++ b/drivers/infiniband/hw/irdma/virtchnl.c @@ -112,6 +112,8 @@ static int irdma_vchnl_req_verify_resp(struct irdma_vchnl_req *vchnl_req, case IRDMA_VCHNL_OP_GET_REG_LAYOUT: case IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP: case IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP: + case IRDMA_VCHNL_OP_ADD_VPORT: + case IRDMA_VCHNL_OP_DEL_VPORT: break; default: return -EOPNOTSUPP; @@ -317,6 +319,56 @@ int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev) return 0; } +int irdma_vchnl_req_add_vport(struct irdma_sc_dev *dev, u16 vport_id, + u32 qp1_id, struct irdma_qos *qos) +{ + struct irdma_vchnl_resp_vport_info resp_vport = { 0 }; + struct irdma_vchnl_req_vport_info req_vport = { 0 }; + struct irdma_vchnl_req_init_info info = { 0 }; + int ret, i; + + if (!dev->vchnl_up) + return -EBUSY; + + info.op_code = IRDMA_VCHNL_OP_ADD_VPORT; + info.op_ver = IRDMA_VCHNL_OP_ADD_VPORT_V0; + req_vport.vport_id = vport_id; + req_vport.qp1_id = qp1_id; + info.req_parm_len = sizeof(req_vport); + info.req_parm = &req_vport; + info.resp_parm = &resp_vport; + info.resp_parm_len = sizeof(resp_vport); + + ret = irdma_vchnl_req_send_sync(dev, &info); + if (ret) + return ret; + + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { + qos[i].qs_handle = resp_vport.qs_handle[i]; + qos[i].valid = true; + } + + return 0; +} + +int irdma_vchnl_req_del_vport(struct irdma_sc_dev *dev, u16 vport_id, u32 qp1_id) +{ + struct irdma_vchnl_req_init_info info = { 0 }; + struct irdma_vchnl_req_vport_info req_vport = { 0 }; + + if (!dev->vchnl_up) + return -EBUSY; + + info.op_code = IRDMA_VCHNL_OP_DEL_VPORT; + info.op_ver = IRDMA_VCHNL_OP_DEL_VPORT_V0; + req_vport.vport_id = vport_id; + req_vport.qp1_id = qp1_id; + info.req_parm_len = sizeof(req_vport); + info.req_parm = &req_vport; + + return irdma_vchnl_req_send_sync(dev, &info); +} + /** * irdma_vchnl_req_aeq_vec_map - Map AEQ to vector on this function * @dev: RDMA device pointer diff --git a/drivers/infiniband/hw/irdma/virtchnl.h b/drivers/infiniband/hw/irdma/virtchnl.h index 6acd698620d6..aa955a9125bd 100644 --- a/drivers/infiniband/hw/irdma/virtchnl.h +++ b/drivers/infiniband/hw/irdma/virtchnl.h @@ -17,6 +17,8 @@ #define IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0 0 #define IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0 0 #define IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP_V0 0 +#define IRDMA_VCHNL_OP_ADD_VPORT_V0 0 +#define IRDMA_VCHNL_OP_DEL_VPORT_V0 0 #define IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0 0 #define IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE 1 @@ -57,6 +59,8 @@ enum irdma_vchnl_ops { IRDMA_VCHNL_OP_GET_RDMA_CAPS = 13, IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP = 14, IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP = 15, + IRDMA_VCHNL_OP_ADD_VPORT = 16, + IRDMA_VCHNL_OP_DEL_VPORT = 17, }; struct irdma_vchnl_req_hmc_info { @@ -81,6 +85,15 @@ struct irdma_vchnl_qvlist_info { struct irdma_vchnl_qv_info qv_info[]; }; +struct irdma_vchnl_req_vport_info { + u16 vport_id; + u32 qp1_id; +}; + +struct irdma_vchnl_resp_vport_info { + u16 qs_handle[IRDMA_MAX_USER_PRIORITY]; +}; + struct irdma_vchnl_op_buf { u16 op_code; u16 op_ver; @@ -141,6 +154,8 @@ struct irdma_vchnl_req_init_info { u16 op_ver; } __packed; +struct irdma_qos; + int irdma_sc_vchnl_init(struct irdma_sc_dev *dev, struct irdma_vchnl_init_info *info); int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req, @@ -154,4 +169,8 @@ int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev); int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx); int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id, u32 v_idx); +int irdma_vchnl_req_add_vport(struct irdma_sc_dev *dev, u16 vport_id, + u32 qp1_id, struct irdma_qos *qos); +int irdma_vchnl_req_del_vport(struct irdma_sc_dev *dev, u16 vport_id, + u32 qp1_id); #endif /* IRDMA_VIRTCHNL_H */ From 87f413b6c930bec82a3831b499d013d59c83eb19 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 27 Aug 2025 10:25:38 -0500 Subject: [PATCH 61/85] RDMA/irdma: Extend QP context programming for GEN3 Extend the QP context structure with support for new fields specific to GEN3 hardware capabilities. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-10-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 184 +++++++++++++++++++++++++++- drivers/infiniband/hw/irdma/defs.h | 24 +++- drivers/infiniband/hw/irdma/type.h | 4 + drivers/infiniband/hw/irdma/uda_d.h | 5 +- drivers/infiniband/hw/irdma/verbs.c | 5 + 5 files changed, 215 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index f347e75a18b3..1ab47b1b6fa9 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -637,13 +637,14 @@ static u8 irdma_sc_get_encoded_ird_size(u16 ird_size) } /** - * irdma_sc_qp_setctx_roce - set qp's context + * irdma_sc_qp_setctx_roce_gen_2 - set qp's context * @qp: sc qp * @qp_ctx: context ptr * @info: ctx info */ -void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, - struct irdma_qp_host_ctx_info *info) +static void irdma_sc_qp_setctx_roce_gen_2(struct irdma_sc_qp *qp, + __le64 *qp_ctx, + struct irdma_qp_host_ctx_info *info) { struct irdma_roce_offload_info *roce_info; struct irdma_udp_offload_info *udp; @@ -761,6 +762,183 @@ void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, 8, qp_ctx, IRDMA_QP_CTX_SIZE, false); } +/** + * irdma_sc_get_encoded_ird_size_gen_3 - get encoded IRD size for GEN 3 + * @ird_size: IRD size + * The ird from the connection is rounded to a supported HW setting and then encoded + * for ird_size field of qp_ctx. Consumers are expected to provide valid ird size based + * on hardware attributes. IRD size defaults to a value of 4 in case of invalid input. + */ +static u8 irdma_sc_get_encoded_ird_size_gen_3(u16 ird_size) +{ + switch (ird_size ? + roundup_pow_of_two(2 * ird_size) : 4) { + case 4096: + return IRDMA_IRD_HW_SIZE_4096_GEN3; + case 2048: + return IRDMA_IRD_HW_SIZE_2048_GEN3; + case 1024: + return IRDMA_IRD_HW_SIZE_1024_GEN3; + case 512: + return IRDMA_IRD_HW_SIZE_512_GEN3; + case 256: + return IRDMA_IRD_HW_SIZE_256_GEN3; + case 128: + return IRDMA_IRD_HW_SIZE_128_GEN3; + case 64: + return IRDMA_IRD_HW_SIZE_64_GEN3; + case 32: + return IRDMA_IRD_HW_SIZE_32_GEN3; + case 16: + return IRDMA_IRD_HW_SIZE_16_GEN3; + case 8: + return IRDMA_IRD_HW_SIZE_8_GEN3; + case 4: + default: + break; + } + + return IRDMA_IRD_HW_SIZE_4_GEN3; +} + +/** + * irdma_sc_qp_setctx_roce_gen_3 - set qp's context + * @qp: sc qp + * @qp_ctx: context ptr + * @info: ctx info + */ +static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp, + __le64 *qp_ctx, + struct irdma_qp_host_ctx_info *info) +{ + struct irdma_roce_offload_info *roce_info = info->roce_info; + struct irdma_udp_offload_info *udp = info->udp_info; + u64 qw0, qw3, qw7 = 0, qw8 = 0; + u8 push_mode_en; + u32 push_idx; + + qp->user_pri = info->user_pri; + if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) { + push_mode_en = 0; + push_idx = 0; + } else { + push_mode_en = 1; + push_idx = qp->push_idx; + } + + qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) | + FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) | + FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) | + FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) | + FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) | + FIELD_PREP(IRDMAQPC_PPIDX, push_idx) | + FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) | + FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) | + FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) | + FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) | + FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) | + FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag); + set_64bit_val(qp_ctx, 0, qw0); + set_64bit_val(qp_ctx, 8, qp->sq_pa); + set_64bit_val(qp_ctx, 16, qp->rq_pa); + qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) | + FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) | + FIELD_PREP(IRDMAQPC_TTL, udp->ttl) | + FIELD_PREP(IRDMAQPC_TOS, udp->tos) | + FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) | + FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port); + set_64bit_val(qp_ctx, 24, qw3); + set_64bit_val(qp_ctx, 32, + FIELD_PREP(IRDMAQPC_DESTIPADDR2, udp->dest_ip_addr[2]) | + FIELD_PREP(IRDMAQPC_DESTIPADDR3, udp->dest_ip_addr[3])); + set_64bit_val(qp_ctx, 40, + FIELD_PREP(IRDMAQPC_DESTIPADDR0, udp->dest_ip_addr[0]) | + FIELD_PREP(IRDMAQPC_DESTIPADDR1, udp->dest_ip_addr[1])); + set_64bit_val(qp_ctx, 48, + FIELD_PREP(IRDMAQPC_SNDMSS, udp->snd_mss) | + FIELD_PREP(IRDMAQPC_VLANTAG, udp->vlan_tag) | + FIELD_PREP(IRDMAQPC_ARPIDX, udp->arp_idx)); + qw7 = FIELD_PREP(IRDMAQPC_PKEY, roce_info->p_key) | + FIELD_PREP(IRDMAQPC_ACKCREDITS, roce_info->ack_credits) | + FIELD_PREP(IRDMAQPC_FLOWLABEL, udp->flow_label); + set_64bit_val(qp_ctx, 56, qw7); + qw8 = FIELD_PREP(IRDMAQPC_QKEY, roce_info->qkey) | + FIELD_PREP(IRDMAQPC_DESTQP, roce_info->dest_qp); + set_64bit_val(qp_ctx, 64, qw8); + set_64bit_val(qp_ctx, 80, + FIELD_PREP(IRDMAQPC_PSNNXT, udp->psn_nxt) | + FIELD_PREP(IRDMAQPC_LSN, udp->lsn)); + set_64bit_val(qp_ctx, 88, + FIELD_PREP(IRDMAQPC_EPSN, udp->epsn)); + set_64bit_val(qp_ctx, 96, + FIELD_PREP(IRDMAQPC_PSNMAX, udp->psn_max) | + FIELD_PREP(IRDMAQPC_PSNUNA, udp->psn_una)); + set_64bit_val(qp_ctx, 112, + FIELD_PREP(IRDMAQPC_CWNDROCE, udp->cwnd)); + set_64bit_val(qp_ctx, 128, + FIELD_PREP(IRDMAQPC_MINRNR_TIMER, udp->min_rnr_timer) | + FIELD_PREP(IRDMAQPC_RNRNAK_THRESH, udp->rnr_nak_thresh) | + FIELD_PREP(IRDMAQPC_REXMIT_THRESH, udp->rexmit_thresh) | + FIELD_PREP(IRDMAQPC_RNRNAK_TMR, udp->rnr_nak_tmr) | + FIELD_PREP(IRDMAQPC_RTOMIN, roce_info->rtomin)); + set_64bit_val(qp_ctx, 136, + FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) | + FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num)); + set_64bit_val(qp_ctx, 152, + FIELD_PREP(IRDMAQPC_MACADDRESS, + ether_addr_to_u64(roce_info->mac_addr)) | + FIELD_PREP(IRDMAQPC_LOCALACKTIMEOUT, + roce_info->local_ack_timeout)); + set_64bit_val(qp_ctx, 160, + FIELD_PREP(IRDMAQPC_ORDSIZE_GEN3, roce_info->ord_size) | + FIELD_PREP(IRDMAQPC_IRDSIZE_GEN3, + irdma_sc_get_encoded_ird_size_gen_3(roce_info->ird_size)) | + FIELD_PREP(IRDMAQPC_WRRDRSPOK, roce_info->wr_rdresp_en) | + FIELD_PREP(IRDMAQPC_RDOK, roce_info->rd_en) | + FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, + info->stats_idx_valid) | + FIELD_PREP(IRDMAQPC_BINDEN, roce_info->bind_en) | + FIELD_PREP(IRDMAQPC_FASTREGEN, roce_info->fast_reg_en) | + FIELD_PREP(IRDMAQPC_DCQCNENABLE, roce_info->dcqcn_en) | + FIELD_PREP(IRDMAQPC_RCVNOICRC, roce_info->rcv_no_icrc) | + FIELD_PREP(IRDMAQPC_FW_CC_ENABLE, + roce_info->fw_cc_enable) | + FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE, + roce_info->udprivcq_en) | + FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) | + FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en)); + set_64bit_val(qp_ctx, 168, + FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx)); + set_64bit_val(qp_ctx, 176, + FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) | + FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) | + FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle)); + set_64bit_val(qp_ctx, 184, + FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, udp->local_ipaddr[3]) | + FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, udp->local_ipaddr[2])); + set_64bit_val(qp_ctx, 192, + FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, udp->local_ipaddr[1]) | + FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, udp->local_ipaddr[0])); + set_64bit_val(qp_ctx, 200, + FIELD_PREP(IRDMAQPC_THIGH, roce_info->t_high) | + FIELD_PREP(IRDMAQPC_TLOW, roce_info->t_low)); + set_64bit_val(qp_ctx, 208, roce_info->pd_id | + FIELD_PREP(IRDMAQPC_STAT_INDEX_GEN3, info->stats_idx) | + FIELD_PREP(IRDMAQPC_PKT_LIMIT, qp->pkt_limit)); + + print_hex_dump_debug("WQE: QP_HOST ROCE CTX WQE", DUMP_PREFIX_OFFSET, + 16, 8, qp_ctx, IRDMA_QP_CTX_SIZE, false); +} + +void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, + struct irdma_qp_host_ctx_info *info) +{ + if (qp->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) + irdma_sc_qp_setctx_roce_gen_2(qp, qp_ctx, info); + else + irdma_sc_qp_setctx_roce_gen_3(qp, qp_ctx, info); +} + /* irdma_sc_alloc_local_mac_entry - allocate a mac entry * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 2fc8e3cf4395..1eff7d8d8f15 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -14,6 +14,18 @@ #define IRDMA_PE_DB_SIZE_4M 1 #define IRDMA_PE_DB_SIZE_8M 2 +#define IRDMA_IRD_HW_SIZE_4_GEN3 0 +#define IRDMA_IRD_HW_SIZE_8_GEN3 1 +#define IRDMA_IRD_HW_SIZE_16_GEN3 2 +#define IRDMA_IRD_HW_SIZE_32_GEN3 3 +#define IRDMA_IRD_HW_SIZE_64_GEN3 4 +#define IRDMA_IRD_HW_SIZE_128_GEN3 5 +#define IRDMA_IRD_HW_SIZE_256_GEN3 6 +#define IRDMA_IRD_HW_SIZE_512_GEN3 7 +#define IRDMA_IRD_HW_SIZE_1024_GEN3 8 +#define IRDMA_IRD_HW_SIZE_2048_GEN3 9 +#define IRDMA_IRD_HW_SIZE_4096_GEN3 10 + #define IRDMA_IRD_HW_SIZE_4 0 #define IRDMA_IRD_HW_SIZE_16 1 #define IRDMA_IRD_HW_SIZE_64 2 @@ -836,7 +848,8 @@ enum irdma_cqp_op_type { #define IRDMAQPC_CWNDROCE GENMASK_ULL(55, 32) #define IRDMAQPC_SNDWL1 GENMASK_ULL(31, 0) #define IRDMAQPC_SNDWL2 GENMASK_ULL(63, 32) -#define IRDMAQPC_ERR_RQ_IDX GENMASK_ULL(45, 32) +#define IRDMAQPC_MINRNR_TIMER GENMASK_ULL(4, 0) +#define IRDMAQPC_ERR_RQ_IDX GENMASK_ULL(46, 32) #define IRDMAQPC_RTOMIN GENMASK_ULL(63, 57) #define IRDMAQPC_MAXSNDWND GENMASK_ULL(31, 0) #define IRDMAQPC_REXMIT_THRESH GENMASK_ULL(53, 48) @@ -849,8 +862,17 @@ enum irdma_cqp_op_type { #define IRDMAQPC_MACADDRESS GENMASK_ULL(63, 16) #define IRDMAQPC_ORDSIZE GENMASK_ULL(7, 0) +#define IRDMAQPC_LOCALACKTIMEOUT GENMASK_ULL(12, 8) +#define IRDMAQPC_RNRNAK_TMR GENMASK_ULL(4, 0) +#define IRDMAQPC_ORDSIZE_GEN3 GENMASK_ULL(10, 0) +#define IRDMAQPC_REMOTE_ATOMIC_EN BIT_ULL(18) +#define IRDMAQPC_STAT_INDEX_GEN3 GENMASK_ULL(47, 32) +#define IRDMAQPC_PKT_LIMIT GENMASK_ULL(55, 48) + #define IRDMAQPC_IRDSIZE GENMASK_ULL(18, 16) +#define IRDMAQPC_IRDSIZE_GEN3 GENMASK_ULL(17, 14) + #define IRDMAQPC_UDPRIVCQENABLE BIT_ULL(19) #define IRDMAQPC_WRRDRSPOK BIT_ULL(20) #define IRDMAQPC_RDOK BIT_ULL(21) diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 5eeb50f5defc..699d7678c626 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -574,6 +574,7 @@ struct irdma_sc_qp { bool flush_rq:1; bool sq_flush_code:1; bool rq_flush_code:1; + u32 pkt_limit; enum irdma_flush_opcode flush_code; enum irdma_qp_event_type event_type; u8 term_flags; @@ -915,6 +916,8 @@ struct irdma_udp_offload_info { u32 cwnd; u8 rexmit_thresh; u8 rnr_nak_thresh; + u8 rnr_nak_tmr; + u8 min_rnr_timer; }; struct irdma_roce_offload_info { @@ -941,6 +944,7 @@ struct irdma_roce_offload_info { bool dctcp_en:1; bool fw_cc_enable:1; bool use_stats_inst:1; + u8 local_ack_timeout; u16 t_high; u16 t_low; u8 last_byte_sent; diff --git a/drivers/infiniband/hw/irdma/uda_d.h b/drivers/infiniband/hw/irdma/uda_d.h index 5a9e6eabf032..4fb4daa20722 100644 --- a/drivers/infiniband/hw/irdma/uda_d.h +++ b/drivers/infiniband/hw/irdma/uda_d.h @@ -78,8 +78,7 @@ #define IRDMA_UDAQPC_IPID GENMASK_ULL(47, 32) #define IRDMA_UDAQPC_SNDMSS GENMASK_ULL(29, 16) #define IRDMA_UDAQPC_VLANTAG GENMASK_ULL(15, 0) - -#define IRDMA_UDA_CQPSQ_MAV_PDINDEXHI GENMASK_ULL(21, 20) +#define IRDMA_UDA_CQPSQ_MAV_PDINDEXHI GENMASK_ULL(27, 20) #define IRDMA_UDA_CQPSQ_MAV_PDINDEXLO GENMASK_ULL(63, 48) #define IRDMA_UDA_CQPSQ_MAV_SRCMACADDRINDEX GENMASK_ULL(29, 24) #define IRDMA_UDA_CQPSQ_MAV_ARPINDEX GENMASK_ULL(63, 48) @@ -94,7 +93,7 @@ #define IRDMA_UDA_CQPSQ_MAV_OPCODE GENMASK_ULL(37, 32) #define IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK BIT_ULL(62) #define IRDMA_UDA_CQPSQ_MAV_IPV4VALID BIT_ULL(59) -#define IRDMA_UDA_CQPSQ_MAV_AVIDX GENMASK_ULL(16, 0) +#define IRDMA_UDA_CQPSQ_MAV_AVIDX GENMASK_ULL(23, 0) #define IRDMA_UDA_CQPSQ_MAV_INSERTVLANTAG BIT_ULL(60) #define IRDMA_UDA_MGCTX_VFFLAG BIT_ULL(29) #define IRDMA_UDA_MGCTX_DESTPORT GENMASK_ULL(47, 32) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 8a7f2f4c8a54..2857631543b7 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1162,6 +1162,7 @@ static int irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->pkey_index = iwqp->roce_info.p_key; attr->retry_cnt = iwqp->udp_info.rexmit_thresh; attr->rnr_retry = iwqp->udp_info.rnr_nak_thresh; + attr->min_rnr_timer = iwqp->udp_info.min_rnr_timer; attr->max_rd_atomic = iwqp->roce_info.ord_size; attr->max_dest_rd_atomic = iwqp->roce_info.ird_size; } @@ -1294,6 +1295,10 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_RNR_RETRY) udp_info->rnr_nak_thresh = attr->rnr_retry; + if (attr_mask & IB_QP_MIN_RNR_TIMER && + dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + udp_info->min_rnr_timer = attr->min_rnr_timer; + if (attr_mask & IB_QP_RETRY_CNT) udp_info->rexmit_thresh = attr->retry_cnt; From 419afdd122ea39a0a98401b2688eed0678b67000 Mon Sep 17 00:00:00 2001 From: Vinoth Kumar Chandra Mohan Date: Wed, 27 Aug 2025 10:25:39 -0500 Subject: [PATCH 62/85] RDMA/irdma: Add support for V2 HMC resource management scheme HMC resource initialization is updated to support V1 or V2 approach based on the FW capability. In the V2 approach, driver receives the assigned HMC resources count and verifies if it will fit in the given local memory. If it doesn't fit, the driver load fails. Signed-off-by: Vinoth Kumar Chandra Mohan Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-11-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 121 ++++++++++++++++++++++++++++- drivers/infiniband/hw/irdma/defs.h | 3 + drivers/infiniband/hw/irdma/type.h | 25 +++--- 3 files changed, 130 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 1ab47b1b6fa9..8cc457214537 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2901,6 +2901,41 @@ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq, return 0; } +/** + * irdma_sc_get_decoded_ird_size_gen_3 - get decoded IRD size for GEN 3 + * @ird_enc: IRD encoding + * IRD size defaults to a value of 4 in case of invalid input. + */ +static u16 irdma_sc_get_decoded_ird_size_gen_3(u8 ird_enc) +{ + switch (ird_enc) { + case IRDMA_IRD_HW_SIZE_4096_GEN3: + return 4096; + case IRDMA_IRD_HW_SIZE_2048_GEN3: + return 2048; + case IRDMA_IRD_HW_SIZE_1024_GEN3: + return 1024; + case IRDMA_IRD_HW_SIZE_512_GEN3: + return 512; + case IRDMA_IRD_HW_SIZE_256_GEN3: + return 256; + case IRDMA_IRD_HW_SIZE_128_GEN3: + return 128; + case IRDMA_IRD_HW_SIZE_64_GEN3: + return 64; + case IRDMA_IRD_HW_SIZE_32_GEN3: + return 32; + case IRDMA_IRD_HW_SIZE_16_GEN3: + return 16; + case IRDMA_IRD_HW_SIZE_8_GEN3: + return 8; + case IRDMA_IRD_HW_SIZE_4_GEN3: + return 4; + default: + return 4; + } +} + /** * irdma_check_cqp_progress - check cqp processing progress * @timeout: timeout info struct @@ -3212,6 +3247,7 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, struct irdma_hmc_fpm_misc *hmc_fpm_misc) { struct irdma_hmc_obj_info *obj_info; + u8 ird_encoding; u64 temp; u32 size; u16 max_pe_sds; @@ -3287,6 +3323,14 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, hmc_fpm_misc->max_ceqs = FIELD_GET(IRDMA_QUERY_FPM_MAX_CEQS, temp); hmc_fpm_misc->ht_multiplier = FIELD_GET(IRDMA_QUERY_FPM_HTMULTIPLIER, temp); hmc_fpm_misc->timer_bucket = FIELD_GET(IRDMA_QUERY_FPM_TIMERBUCKET, temp); + if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, + dev->feature_info[IRDMA_FTN_FLAGS])) { + ird_encoding = (u8)FIELD_GET(IRDMA_QUERY_FPM_MAX_IRD, temp); + hmc_fpm_misc->ird = + irdma_sc_get_decoded_ird_size_gen_3(ird_encoding) / 2; + dev->hw_attrs.max_hw_ird = hmc_fpm_misc->ird; + dev->hw_attrs.max_hw_ord = hmc_fpm_misc->ird; + } if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) return 0; irdma_sc_decode_fpm_query(buf, 96, obj_info, IRDMA_HMC_IW_FSIMC); @@ -5444,10 +5488,71 @@ static void irdma_set_host_hmc_rsrc_gen_3(struct irdma_sc_dev *dev) avail_sds -= DIV_ROUND_UP(mrwanted, MAX_MR_PER_SD); } + if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]) && + pblewanted > avail_sds * MAX_PBLE_PER_SD) + ibdev_dbg(to_ibdev(dev), + "HMC: Warn: Resource version 2: pble wanted = 0x%x available = 0x%x\n", + pblewanted, avail_sds * MAX_PBLE_PER_SD); + pblewanted = min(pblewanted, avail_sds * MAX_PBLE_PER_SD); hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted; } +/** + * irdma_verify_commit_fpm_gen_3 - verify query fpm values + * @dev: sc device struct + * @max_pages: max local memory available + * @qpwanted: number of qp's wanted + */ +static int irdma_verify_commit_fpm_gen_3(struct irdma_sc_dev *dev, + u32 max_pages, + u32 qpwanted) +{ + struct irdma_hmc_fpm_misc *hmc_fpm_misc; + u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed; + struct irdma_hmc_info *hmc_info; + u32 rrffl_cnt = 0; + u32 xffl_cnt = 0; + u32 q1fl_cnt; + + hmc_info = dev->hmc_info; + hmc_fpm_misc = &dev->hmc_fpm_misc; + + rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted); + + if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt) + rrffl_cnt = + hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt / + hmc_fpm_misc->rrf_block_size; + + xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted); + + if (xf_cnt) + xffl_cnt = xf_cnt / hmc_fpm_misc->xf_block_size; + + timer_cnt = (round_up(qpwanted, 512) / 512 + 1) * + hmc_fpm_misc->timer_bucket; + + q1fl_cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size; + + pages_needed = irdma_get_objs_pages(dev, hmc_info, IRDMA_LOC_MEM); + if (pages_needed > max_pages) { + ibdev_dbg(to_ibdev(dev), + "HMC: FAIL: SW counts rrf_cnt = %u rrffl_cnt = %u timer_cnt = %u", + rrf_cnt, rrffl_cnt, timer_cnt); + ibdev_dbg(to_ibdev(dev), + "HMC: FAIL: SW counts xf_cnt = %u xffl_cnt = %u q1fl_cnt = %u", + xf_cnt, xffl_cnt, q1fl_cnt); + + return -EINVAL; + } + + hmc_fpm_misc->max_sds -= pages_needed; + hmc_fpm_misc->loc_mem_pages -= pages_needed; + + return 0; +} + /** * irdma_set_loc_hmc_rsrc_gen_3 - calculate hmc resources for gen 3 * @dev: sc device struct @@ -5463,6 +5568,9 @@ static int irdma_set_loc_hmc_rsrc_gen_3(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info; u32 ird, ord; + if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS])) + return irdma_verify_commit_fpm_gen_3(dev, max_pages, qpwanted); + hmc_info = dev->hmc_info; hmc_fpm_misc = &dev->hmc_fpm_misc; ird = dev->hw_attrs.max_hw_ird; @@ -5563,9 +5671,12 @@ static int cfg_fpm_value_gen_3(struct irdma_sc_dev *dev, hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].max_cnt = 0; hmc_info->hmc_obj[IRDMA_HMC_IW_HTE].max_cnt = 0; hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt = 0; - hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt = - min(hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt, - (u32)IRDMA_FSIAV_CNT_MAX); + + if (!FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS])) + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt = + min(hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt, + (u32)IRDMA_FSIAV_CNT_MAX); + for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt; @@ -5573,6 +5684,9 @@ static int cfg_fpm_value_gen_3(struct irdma_sc_dev *dev, if (!irdma_set_loc_hmc_rsrc_gen_3(dev, loc_mem_pages, qpwanted)) break; + if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS])) + return -EINVAL; + qpwanted /= 2; if (mrte_loc == IRDMA_LOC_MEM) { mrwanted = qpwanted * IRDMA_MIN_MR_PER_QP; @@ -5659,6 +5773,7 @@ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt); + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt; hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt = diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 1eff7d8d8f15..d8f5ad23770b 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -757,6 +757,7 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_SUSPENDQP_QPID GENMASK_ULL(23, 0) #define IRDMA_CQPSQ_RESUMEQP_QSHANDLE GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_RESUMEQP_QPID GENMASK(23, 0) +#define IRDMA_MANAGE_RSRC_VER2 BIT_ULL(2) #define IRDMA_CQPSQ_MIN_STAG_INVALID 0x0001 #define IRDMA_CQPSQ_MIN_SUSPEND_PND 0x0005 @@ -909,6 +910,7 @@ enum irdma_cqp_op_type { #define IRDMA_FEATURE_INFO GENMASK_ULL(47, 0) #define IRDMA_FEATURE_CNT GENMASK_ULL(47, 32) #define IRDMA_FEATURE_TYPE GENMASK_ULL(63, 48) +#define IRDMA_FEATURE_RSRC_MAX GENMASK_ULL(31, 0) #define IRDMAQPSQ_OPCODE GENMASK_ULL(37, 32) #define IRDMAQPSQ_COPY_HOST_PBL BIT_ULL(43) @@ -986,6 +988,7 @@ enum irdma_cqp_op_type { #define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(44, 32) #define IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3 GENMASK_ULL(47, 32) #define IRDMA_QUERY_FPM_MAX_CEQS GENMASK_ULL(9, 0) +#define IRDMA_QUERY_FPM_MAX_IRD GENMASK_ULL(53, 50) #define IRDMA_QUERY_FPM_XFBLOCKSIZE GENMASK_ULL(63, 32) #define IRDMA_QUERY_FPM_Q1BLOCKSIZE GENMASK_ULL(63, 32) #define IRDMA_QUERY_FPM_HTMULTIPLIER GENMASK_ULL(19, 16) diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 699d7678c626..f681baedd029 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -180,22 +180,14 @@ enum irdma_feature_type { IRDMA_CQ_MAX_INCR = 3, IRDMA_CEQ_MAX_INCR = 4, IRDMA_SD_MAX_INCR = 5, - IRDMA_QP_SMALL = 6, - IRDMA_QP_MEDIUM = 7, - IRDMA_QP_LARGE = 8, - IRDMA_QP_XLARGE = 9, - IRDMA_CQ_SMALL = 10, - IRDMA_CQ_MEDIUM = 11, - IRDMA_CQ_LARGE = 12, - IRDMA_CQ_XLARGE = 13, - IRDMA_CEQ_SMALL = 14, - IRDMA_CEQ_MEDIUM = 15, - IRDMA_CEQ_LARGE = 16, - IRDMA_CEQ_XLARGE = 17, - IRDMA_SD_SMALL = 18, - IRDMA_SD_MEDIUM = 19, - IRDMA_SD_LARGE = 20, - IRDMA_SD_XLARGE = 21, + IRDMA_MR_MAX_INCR = 6, + IRDMA_Q1_MAX_INCR = 7, + IRDMA_AH_MAX_INCR = 8, + IRDMA_SRQ_MAX_INCR = 9, + IRDMA_TIMER_MAX_INCR = 10, + IRDMA_XF_MAX_INCR = 11, + IRDMA_RRF_MAX_INCR = 12, + IRDMA_PBLE_MAX_INCR = 13, IRDMA_OBJ_1 = 22, IRDMA_OBJ_2 = 23, IRDMA_ENDPT_TRK = 24, @@ -615,6 +607,7 @@ struct irdma_hmc_fpm_misc { u32 max_ceqs; u32 max_sds; u32 loc_mem_pages; + u8 ird; u32 xf_block_size; u32 q1_block_size; u32 ht_multiplier; From 9a1d68786393c4767bf153c73dbcd1ac6c5ecdfe Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 27 Aug 2025 10:25:40 -0500 Subject: [PATCH 63/85] RDMA/irdma: Support 64-byte CQEs and GEN3 CQE opcode decoding Introduce support for 64-byte CQEs in GEN3 devices. Additionally, implement GEN3-specific CQE opcode decoding. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-12-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 6 ++++-- drivers/infiniband/hw/irdma/main.h | 3 ++- drivers/infiniband/hw/irdma/utils.c | 5 ++++- drivers/infiniband/hw/irdma/verbs.c | 30 ++++++++++++++++++++++++----- drivers/infiniband/hw/irdma/verbs.h | 13 +++++++++++++ 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 459343ef72b9..2931d1a879e9 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1117,13 +1117,15 @@ static int irdma_create_ccq(struct irdma_pci_f *rf) struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_ccq_init_info info = {}; struct irdma_ccq *ccq = &rf->ccq; + int ccq_size; int status; dev->ccq = &ccq->sc_cq; dev->ccq->dev = dev; info.dev = dev; + ccq_size = (rf->rdma_ver >= IRDMA_GEN_3) ? IW_GEN_3_CCQ_SIZE : IW_CCQ_SIZE; ccq->shadow_area.size = sizeof(struct irdma_cq_shadow_area); - ccq->mem_cq.size = ALIGN(sizeof(struct irdma_cqe) * IW_CCQ_SIZE, + ccq->mem_cq.size = ALIGN(sizeof(struct irdma_cqe) * ccq_size, IRDMA_CQ0_ALIGNMENT); ccq->mem_cq.va = dma_alloc_coherent(dev->hw->device, ccq->mem_cq.size, &ccq->mem_cq.pa, GFP_KERNEL); @@ -1140,7 +1142,7 @@ static int irdma_create_ccq(struct irdma_pci_f *rf) /* populate the ccq init info */ info.cq_base = ccq->mem_cq.va; info.cq_pa = ccq->mem_cq.pa; - info.num_elem = IW_CCQ_SIZE; + info.num_elem = ccq_size; info.shadow_area = ccq->shadow_area.va; info.shadow_area_pa = ccq->shadow_area.pa; info.ceqe_mask = false; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 3d37cd12e9f6..6922cfaac6d0 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -66,7 +66,8 @@ extern struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv; #define IRDMA_MACIP_ADD 1 #define IRDMA_MACIP_DELETE 2 -#define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 1) +#define IW_GEN_3_CCQ_SIZE (2 * IRDMA_CQP_SW_SQSIZE_2048 + 2) +#define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 2) #define IW_CEQ_SIZE 2048 #define IW_AEQ_SIZE 2048 diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 1fd09e287e6f..0b12a875dbe9 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2338,7 +2338,10 @@ bool irdma_cq_empty(struct irdma_cq *iwcq) u8 polarity; ukcq = &iwcq->sc_cq.cq_uk; - cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); + if (ukcq->avoid_mem_cflct) + cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(ukcq); + else + cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); get_64bit_val(cqe, 24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 2857631543b7..da0f56e0c897 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1971,8 +1971,13 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, if (!iwcq->user_mode) { entries++; - if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) + + if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct && + dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) entries *= 2; + + if (entries & 1) + entries += 1; /* cq size must be an even number */ } info.cq_size = max(entries, 4); @@ -2115,6 +2120,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, unsigned long flags; int err_code; int entries = attr->cqe; + bool cqe_64byte_ena; err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) @@ -2138,6 +2144,9 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; + cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? + true : false; + ukinfo->avoid_mem_cflct = cqe_64byte_ena; iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size; if (attr->comp_vector < rf->ceqs_count) info.ceq_id = attr->comp_vector; @@ -2213,11 +2222,18 @@ static int irdma_create_cq(struct ib_cq *ibcq, } entries++; - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) + if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) entries *= 2; + + if (entries & 1) + entries += 1; /* cq size must be an even number */ + ukinfo->cq_size = entries; - rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); + if (cqe_64byte_ena) + rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe); + else + rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); iwcq->kmem.size = ALIGN(round_up(rsize, 256), 256); iwcq->kmem.va = dma_alloc_coherent(dev->hw->device, iwcq->kmem.size, @@ -3784,8 +3800,12 @@ static void irdma_process_cqe(struct ib_wc *entry, if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) { set_ib_wc_op_sq(cq_poll_info, entry); } else { - set_ib_wc_op_rq(cq_poll_info, entry, - qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM); + if (qp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) + set_ib_wc_op_rq(cq_poll_info, entry, + qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ? + true : false); + else + set_ib_wc_op_rq_gen_3(cq_poll_info, entry); if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD && cq_poll_info->stag_invalid_set) { entry->ex.invalidate_rkey = cq_poll_info->inv_stag; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index cfa140b36395..fcb163c45252 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -267,6 +267,19 @@ static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info, } } +static inline void set_ib_wc_op_rq_gen_3(struct irdma_cq_poll_info *info, + struct ib_wc *entry) +{ + switch (info->op_type) { + case IRDMA_OP_TYPE_RDMA_WRITE: + case IRDMA_OP_TYPE_RDMA_WRITE_SOL: + entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; + break; + default: + entry->opcode = IB_WC_RECV; + } +} + static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info, struct ib_wc *entry, bool send_imm_support) { From 563e1feb5f6ed579acb55850f1bbb831aecf645a Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Wed, 27 Aug 2025 10:25:41 -0500 Subject: [PATCH 64/85] RDMA/irdma: Add SRQ support Implement verb API and UAPI changes to support SRQ functionality in GEN3 devices. Signed-off-by: Faisal Latif Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-13-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 236 +++++++++++++- drivers/infiniband/hw/irdma/defs.h | 36 ++- drivers/infiniband/hw/irdma/hw.c | 21 +- drivers/infiniband/hw/irdma/irdma.h | 1 + drivers/infiniband/hw/irdma/main.h | 12 +- drivers/infiniband/hw/irdma/type.h | 66 ++++ drivers/infiniband/hw/irdma/uk.c | 162 +++++++++- drivers/infiniband/hw/irdma/user.h | 41 +++ drivers/infiniband/hw/irdma/utils.c | 27 ++ drivers/infiniband/hw/irdma/verbs.c | 475 +++++++++++++++++++++++++++- drivers/infiniband/hw/irdma/verbs.h | 25 ++ include/uapi/rdma/irdma-abi.h | 15 +- 12 files changed, 1103 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 8cc457214537..ef2e46a22c3f 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -412,7 +412,8 @@ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info) pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) || - (info->virtual_map && info->rq_pa >= pble_obj_cnt)) + (!info->qp_uk_init_info.srq_uk && + info->virtual_map && info->rq_pa >= pble_obj_cnt)) return -EINVAL; qp->llp_stream_handle = (void *)(-1); @@ -446,6 +447,208 @@ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info) return 0; } +/** + * irdma_sc_srq_init - init sc_srq structure + * @srq: srq sc struct + * @info: parameters for srq init + */ +int irdma_sc_srq_init(struct irdma_sc_srq *srq, + struct irdma_srq_init_info *info) +{ + u32 srq_size_quanta; + int ret_code; + + ret_code = irdma_uk_srq_init(&srq->srq_uk, &info->srq_uk_init_info); + if (ret_code) + return ret_code; + + srq->dev = info->pd->dev; + srq->pd = info->pd; + srq->vsi = info->vsi; + srq->srq_pa = info->srq_pa; + srq->first_pm_pbl_idx = info->first_pm_pbl_idx; + srq->pasid = info->pasid; + srq->pasid_valid = info->pasid_valid; + srq->srq_limit = info->srq_limit; + srq->leaf_pbl_size = info->leaf_pbl_size; + srq->virtual_map = info->virtual_map; + srq->tph_en = info->tph_en; + srq->arm_limit_event = info->arm_limit_event; + srq->tph_val = info->tph_value; + srq->shadow_area_pa = info->shadow_area_pa; + + /* Smallest SRQ size is 256B i.e. 8 quanta */ + srq_size_quanta = max((u32)IRDMA_SRQ_MIN_QUANTA, + srq->srq_uk.srq_size * + srq->srq_uk.wqe_size_multiplier); + srq->hw_srq_size = irdma_get_encoded_wqe_size(srq_size_quanta, + IRDMA_QUEUE_TYPE_SRQ); + + return 0; +} + +/** + * irdma_sc_srq_create - send srq create CQP WQE + * @srq: srq sc struct + * @scratch: u64 saved to be used during cqp completion + * @post_sq: flag for cqp db to ring + */ +static int irdma_sc_srq_create(struct irdma_sc_srq *srq, u64 scratch, + bool post_sq) +{ + struct irdma_sc_cqp *cqp; + __le64 *wqe; + u64 hdr; + + cqp = srq->pd->dev->cqp; + if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id || + srq->srq_uk.srq_id > + (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1)) + return -EINVAL; + + wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); + if (!wqe) + return -ENOMEM; + + set_64bit_val(wqe, 0, + FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, srq->srq_limit) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size)); + set_64bit_val(wqe, 8, (uintptr_t)srq); + set_64bit_val(wqe, 16, + FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id)); + set_64bit_val(wqe, 32, + FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR, + srq->srq_pa >> + IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S)); + set_64bit_val(wqe, 40, + FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR, + srq->shadow_area_pa >> + IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S)); + set_64bit_val(wqe, 48, + FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX, + srq->first_pm_pbl_idx)); + + hdr = srq->srq_uk.srq_id | + FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_SRQ) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE, srq->leaf_pbl_size) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_VIRTMAP, srq->virtual_map) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT, + srq->arm_limit_event) | + FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); + + dma_wmb(); /* make sure WQE is written before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + + print_hex_dump_debug("WQE: SRQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8, + wqe, IRDMA_CQP_WQE_SIZE * 8, false); + if (post_sq) + irdma_sc_cqp_post_sq(cqp); + + return 0; +} + +/** + * irdma_sc_srq_modify - send modify_srq CQP WQE + * @srq: srq sc struct + * @info: parameters for srq modification + * @scratch: u64 saved to be used during cqp completion + * @post_sq: flag for cqp db to ring + */ +static int irdma_sc_srq_modify(struct irdma_sc_srq *srq, + struct irdma_modify_srq_info *info, u64 scratch, + bool post_sq) +{ + struct irdma_sc_cqp *cqp; + __le64 *wqe; + u64 hdr; + + cqp = srq->dev->cqp; + if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id || + srq->srq_uk.srq_id > + (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1)) + return -EINVAL; + + wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); + if (!wqe) + return -ENOMEM; + + set_64bit_val(wqe, 0, + FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, info->srq_limit) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size)); + set_64bit_val(wqe, 8, + FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQCTX, srq->srq_uk.srq_id)); + set_64bit_val(wqe, 16, + FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id)); + set_64bit_val(wqe, 32, + FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR, + srq->srq_pa >> + IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S)); + set_64bit_val(wqe, 40, + FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR, + srq->shadow_area_pa >> + IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S)); + set_64bit_val(wqe, 48, + FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX, + srq->first_pm_pbl_idx)); + + hdr = srq->srq_uk.srq_id | + FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_SRQ) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE, srq->leaf_pbl_size) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_VIRTMAP, srq->virtual_map) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT, + info->arm_limit_event) | + FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); + dma_wmb(); /* make sure WQE is written before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + + print_hex_dump_debug("WQE: SRQ_MODIFY WQE", DUMP_PREFIX_OFFSET, 16, 8, + wqe, IRDMA_CQP_WQE_SIZE * 8, false); + if (post_sq) + irdma_sc_cqp_post_sq(cqp); + + return 0; +} + +/** + * irdma_sc_srq_destroy - send srq_destroy CQP WQE + * @srq: srq sc struct + * @scratch: u64 saved to be used during cqp completion + * @post_sq: flag for cqp db to ring + */ +static int irdma_sc_srq_destroy(struct irdma_sc_srq *srq, u64 scratch, + bool post_sq) +{ + struct irdma_sc_cqp *cqp; + __le64 *wqe; + u64 hdr; + + cqp = srq->dev->cqp; + + wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); + if (!wqe) + return -ENOMEM; + + set_64bit_val(wqe, 8, (uintptr_t)srq); + + hdr = srq->srq_uk.srq_id | + FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_SRQ) | + FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); + dma_wmb(); /* make sure WQE is written before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + + print_hex_dump_debug("WQE: SRQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16, + 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false); + if (post_sq) + irdma_sc_cqp_post_sq(cqp); + + return 0; +} + /** * irdma_sc_qp_create - create qp * @qp: sc qp @@ -837,6 +1040,7 @@ static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp, FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) | FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) | FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) | + FIELD_PREP(IRDMAQPC_USE_SRQ, !qp->qp_uk.srq_uk ? 0 : 1) | FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag); set_64bit_val(qp_ctx, 0, qw0); set_64bit_val(qp_ctx, 8, qp->sq_pa); @@ -921,6 +1125,9 @@ static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp, FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, udp->local_ipaddr[0])); set_64bit_val(qp_ctx, 200, FIELD_PREP(IRDMAQPC_THIGH, roce_info->t_high) | + FIELD_PREP(IRDMAQPC_SRQ_ID, + !qp->qp_uk.srq_uk ? + 0 : qp->qp_uk.srq_uk->srq_id) | FIELD_PREP(IRDMAQPC_TLOW, roce_info->t_low)); set_64bit_val(qp_ctx, 208, roce_info->pd_id | FIELD_PREP(IRDMAQPC_STAT_INDEX_GEN3, info->stats_idx) | @@ -2219,6 +2426,14 @@ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type) { u8 encoded_size = 0; + if (queue_type == IRDMA_QUEUE_TYPE_SRQ) { + /* Smallest SRQ size is 256B (8 quanta) that gets + * encoded to 0. + */ + encoded_size = ilog2(wqsize) - 3; + + return encoded_size; + } /* cqp sq's hw coded value starts from 1 for size of 4 * while it starts from 0 for qp' wq's. */ @@ -4585,7 +4800,7 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, case IRDMA_AE_SRQ_LIMIT: info->srq = true; /* [63:6] from CMPL_CTXT, [5:0] from WQDESCIDX. */ - info->compl_ctx = compl_ctx | info->wqe_idx; + info->compl_ctx = compl_ctx; ae_src = IRDMA_AE_SOURCE_RSVD; break; case IRDMA_AE_PRIV_OPERATION_DENIED: @@ -6161,6 +6376,22 @@ static int irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, &pcmdinfo->in.u.mc_modify.info, pcmdinfo->in.u.mc_modify.scratch); break; + case IRDMA_OP_SRQ_CREATE: + status = irdma_sc_srq_create(pcmdinfo->in.u.srq_create.srq, + pcmdinfo->in.u.srq_create.scratch, + pcmdinfo->post_sq); + break; + case IRDMA_OP_SRQ_MODIFY: + status = irdma_sc_srq_modify(pcmdinfo->in.u.srq_modify.srq, + &pcmdinfo->in.u.srq_modify.info, + pcmdinfo->in.u.srq_modify.scratch, + pcmdinfo->post_sq); + break; + case IRDMA_OP_SRQ_DESTROY: + status = irdma_sc_srq_destroy(pcmdinfo->in.u.srq_destroy.srq, + pcmdinfo->in.u.srq_destroy.scratch, + pcmdinfo->post_sq); + break; default: status = -EOPNOTSUPP; break; @@ -6318,6 +6549,7 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, dev->protocol_used = info->protocol_used; /* Setup the hardware limits, hmc may limit further */ dev->hw_attrs.min_hw_qp_id = IRDMA_MIN_IW_QP_ID; + dev->hw_attrs.min_hw_srq_id = IRDMA_MIN_IW_SRQ_ID; dev->hw_attrs.min_hw_aeq_size = IRDMA_MIN_AEQ_ENTRIES; if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES_GEN_3; diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index d8f5ad23770b..408058b6ba55 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -140,7 +140,11 @@ enum irdma_protocol_used { #define IRDMA_QP_SW_MAX_RQ_QUANTA 32768 #define IRDMA_MAX_QP_WRS(max_quanta_per_wr) \ ((IRDMA_QP_SW_MAX_WQ_QUANTA - IRDMA_SQ_RSVD) / (max_quanta_per_wr)) +#define IRDMA_SRQ_MIN_QUANTA 8 #define IRDMA_SRQ_MAX_QUANTA 262144 +#define IRDMA_MAX_SRQ_WRS \ + ((IRDMA_SRQ_MAX_QUANTA - IRDMA_RQ_RSVD) / IRDMA_MAX_QUANTA_PER_WR) + #define IRDMAQP_TERM_SEND_TERM_AND_FIN 0 #define IRDMAQP_TERM_SEND_TERM_ONLY 1 #define IRDMAQP_TERM_SEND_FIN_ONLY 2 @@ -236,9 +240,12 @@ enum irdma_cqp_op_type { IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 46, IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 47, IRDMA_OP_CQ_MODIFY = 48, + IRDMA_OP_SRQ_CREATE = 49, + IRDMA_OP_SRQ_MODIFY = 50, + IRDMA_OP_SRQ_DESTROY = 51, /* Must be last entry*/ - IRDMA_MAX_CQP_OPS = 49, + IRDMA_MAX_CQP_OPS = 52, }; /* CQP SQ WQES */ @@ -248,6 +255,9 @@ enum irdma_cqp_op_type { #define IRDMA_CQP_OP_CREATE_CQ 0x03 #define IRDMA_CQP_OP_MODIFY_CQ 0x04 #define IRDMA_CQP_OP_DESTROY_CQ 0x05 +#define IRDMA_CQP_OP_CREATE_SRQ 0x06 +#define IRDMA_CQP_OP_MODIFY_SRQ 0x07 +#define IRDMA_CQP_OP_DESTROY_SRQ 0x08 #define IRDMA_CQP_OP_ALLOC_STAG 0x09 #define IRDMA_CQP_OP_REG_MR 0x0a #define IRDMA_CQP_OP_QUERY_STAG 0x0b @@ -519,6 +529,7 @@ enum irdma_cqp_op_type { #define IRDMA_CQ_ERROR BIT_ULL(55) #define IRDMA_CQ_SQ BIT_ULL(62) +#define IRDMA_CQ_SRQ BIT_ULL(52) #define IRDMA_CQ_VALID BIT_ULL(63) #define IRDMA_CQ_IMMVALID BIT_ULL(62) #define IRDMA_CQ_UDSMACVALID BIT_ULL(61) @@ -628,6 +639,24 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_QP_DBSHADOWADDR IRDMA_CQPHC_QPCTX +#define IRDMA_CQPSQ_SRQ_RQSIZE GENMASK_ULL(3, 0) +#define IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE GENMASK_ULL(5, 4) +#define IRDMA_CQPSQ_SRQ_SRQ_LIMIT GENMASK_ULL(43, 32) +#define IRDMA_CQPSQ_SRQ_SRQCTX GENMASK_ULL(63, 6) +#define IRDMA_CQPSQ_SRQ_PD_ID GENMASK_ULL(39, 16) +#define IRDMA_CQPSQ_SRQ_SRQ_ID GENMASK_ULL(15, 0) +#define IRDMA_CQPSQ_SRQ_OP GENMASK_ULL(37, 32) +#define IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE GENMASK_ULL(45, 44) +#define IRDMA_CQPSQ_SRQ_VIRTMAP BIT_ULL(47) +#define IRDMA_CQPSQ_SRQ_TPH_EN BIT_ULL(60) +#define IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT BIT_ULL(61) +#define IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX GENMASK_ULL(27, 0) +#define IRDMA_CQPSQ_SRQ_TPH_VALUE GENMASK_ULL(7, 0) +#define IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S 8 +#define IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR GENMASK_ULL(63, 8) +#define IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S 6 +#define IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR GENMASK_ULL(63, 6) + #define IRDMA_CQPSQ_CQ_CQSIZE GENMASK_ULL(20, 0) #define IRDMA_CQPSQ_CQ_CQCTX GENMASK_ULL(62, 0) #define IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD GENMASK(17, 0) @@ -779,6 +808,11 @@ enum irdma_cqp_op_type { #define IRDMAQPC_INSERTL2TAG2 BIT_ULL(11) #define IRDMAQPC_LIMIT GENMASK_ULL(13, 12) +#define IRDMAQPC_USE_SRQ BIT_ULL(10) +#define IRDMAQPC_SRQ_ID GENMASK_ULL(15, 0) +#define IRDMAQPC_PASID GENMASK_ULL(19, 0) +#define IRDMAQPC_PASID_VALID BIT_ULL(11) + #define IRDMAQPC_ECN_EN BIT_ULL(14) #define IRDMAQPC_DROPOOOSEG BIT_ULL(15) #define IRDMAQPC_DUPACK_THRESH GENMASK_ULL(18, 16) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 2931d1a879e9..27b9623c2b09 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -269,6 +269,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) struct irdma_sc_qp *qp = NULL; struct irdma_qp_host_ctx_info *ctx_info = NULL; struct irdma_device *iwdev = rf->iwdev; + struct irdma_sc_srq *srq; unsigned long flags; u32 aeqcnt = 0; @@ -320,6 +321,9 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) iwqp->last_aeq = info->ae_id; spin_unlock_irqrestore(&iwqp->lock, flags); ctx_info = &iwqp->ctx_info; + } else if (info->srq) { + if (info->ae_id != IRDMA_AE_SRQ_LIMIT) + continue; } else { if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR && info->ae_id != IRDMA_AE_CQP_DEFERRED_COMPLETE) @@ -417,6 +421,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) } irdma_cq_rem_ref(&iwcq->ibcq); break; + case IRDMA_AE_SRQ_LIMIT: + srq = (struct irdma_sc_srq *)(uintptr_t)info->compl_ctx; + irdma_srq_event(srq); + break; + case IRDMA_AE_SRQ_CATASTROPHIC_ERROR: + break; case IRDMA_AE_CQP_DEFERRED_COMPLETE: /* Remove completed CQP requests from pending list * and notify about those CQP ops completion. @@ -1839,7 +1849,9 @@ static void irdma_get_used_rsrc(struct irdma_device *iwdev) iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps, iwdev->rf->max_qp); iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs, - iwdev->rf->max_cq); + iwdev->rf->max_cq); + iwdev->rf->used_srqs = find_first_zero_bit(iwdev->rf->allocated_srqs, + iwdev->rf->max_srq); iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs, iwdev->rf->max_mr); } @@ -2056,7 +2068,8 @@ static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) rf->allocated_qps = (void *)(rf->mem_rsrc + (sizeof(struct irdma_arp_entry) * rf->arp_table_size)); rf->allocated_cqs = &rf->allocated_qps[BITS_TO_LONGS(rf->max_qp)]; - rf->allocated_mrs = &rf->allocated_cqs[BITS_TO_LONGS(rf->max_cq)]; + rf->allocated_srqs = &rf->allocated_cqs[BITS_TO_LONGS(rf->max_cq)]; + rf->allocated_mrs = &rf->allocated_srqs[BITS_TO_LONGS(rf->max_srq)]; rf->allocated_pds = &rf->allocated_mrs[BITS_TO_LONGS(rf->max_mr)]; rf->allocated_ahs = &rf->allocated_pds[BITS_TO_LONGS(rf->max_pd)]; rf->allocated_mcgs = &rf->allocated_ahs[BITS_TO_LONGS(rf->max_ah)]; @@ -2084,12 +2097,14 @@ static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf) rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_qp); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mr); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_cq); + rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_srq); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_pd); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->arp_table_size); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg); rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp; rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq; + rsrc_size += sizeof(struct irdma_srq **) * rf->max_srq; return rsrc_size; } @@ -2117,6 +2132,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) rf->max_qp = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt; rf->max_mr = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt; rf->max_cq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt; + rf->max_srq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].cnt; rf->max_pd = rf->sc_dev.hw_attrs.max_hw_pds; rf->arp_table_size = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt; rf->max_ah = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt; @@ -2136,6 +2152,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) set_bit(0, rf->allocated_mrs); set_bit(0, rf->allocated_qps); set_bit(0, rf->allocated_cqs); + set_bit(0, rf->allocated_srqs); set_bit(0, rf->allocated_pds); set_bit(0, rf->allocated_arps); set_bit(0, rf->allocated_ahs); diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index e012f795bce8..ff938a01d70c 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -162,6 +162,7 @@ struct irdma_hw_attrs { u32 max_done_count; u32 max_sleep_count; u32 max_cqp_compl_wait_time_ms; + u32 min_hw_srq_id; u16 max_stat_inst; u16 max_stat_idx; }; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 6922cfaac6d0..886b30da188a 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -274,6 +274,8 @@ struct irdma_pci_f { u32 max_mr; u32 max_qp; u32 max_cq; + u32 max_srq; + u32 next_srq; u32 max_ah; u32 next_ah; u32 max_mcg; @@ -287,6 +289,7 @@ struct irdma_pci_f { u32 mr_stagmask; u32 used_pds; u32 used_cqs; + u32 used_srqs; u32 used_mrs; u32 used_qps; u32 arp_table_size; @@ -298,6 +301,7 @@ struct irdma_pci_f { unsigned long *allocated_ws_nodes; unsigned long *allocated_qps; unsigned long *allocated_cqs; + unsigned long *allocated_srqs; unsigned long *allocated_mrs; unsigned long *allocated_pds; unsigned long *allocated_mcgs; @@ -421,6 +425,11 @@ static inline struct irdma_pci_f *dev_to_rf(struct irdma_sc_dev *dev) return container_of(dev, struct irdma_pci_f, sc_dev); } +static inline struct irdma_srq *to_iwsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct irdma_srq, ibsrq); +} + /** * irdma_alloc_resource - allocate a resource * @iwdev: device pointer @@ -516,7 +525,8 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, void irdma_cq_add_ref(struct ib_cq *ibcq); void irdma_cq_rem_ref(struct ib_cq *ibcq); void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); - +void irdma_srq_event(struct irdma_sc_srq *srq); +void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq); void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf); int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_modify_qp_info *info, bool wait); diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index f681baedd029..43dcdc7b846c 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -242,6 +242,7 @@ enum irdma_syn_rst_handling { enum irdma_queue_type { IRDMA_QUEUE_TYPE_SQ_RQ = 0, IRDMA_QUEUE_TYPE_CQP, + IRDMA_QUEUE_TYPE_SRQ, }; struct irdma_sc_dev; @@ -732,6 +733,51 @@ struct irdma_modify_cq_info { bool cq_resize:1; }; +struct irdma_srq_init_info { + struct irdma_sc_pd *pd; + struct irdma_sc_vsi *vsi; + u64 srq_pa; + u64 shadow_area_pa; + u32 first_pm_pbl_idx; + u32 pasid; + u32 srq_size; + u16 srq_limit; + u8 pasid_valid; + u8 wqe_size; + u8 leaf_pbl_size; + u8 virtual_map; + u8 tph_en; + u8 arm_limit_event; + u8 tph_value; + u8 pbl_chunk_size; + struct irdma_srq_uk_init_info srq_uk_init_info; +}; + +struct irdma_sc_srq { + struct irdma_sc_dev *dev; + struct irdma_sc_vsi *vsi; + struct irdma_sc_pd *pd; + struct irdma_srq_uk srq_uk; + void *back_srq; + u64 srq_pa; + u64 shadow_area_pa; + u32 first_pm_pbl_idx; + u32 pasid; + u32 hw_srq_size; + u16 srq_limit; + u8 pasid_valid; + u8 leaf_pbl_size; + u8 virtual_map; + u8 tph_en; + u8 arm_limit_event; + u8 tph_val; +}; + +struct irdma_modify_srq_info { + u16 srq_limit; + u8 arm_limit_event; +}; + struct irdma_create_qp_info { bool ord_valid:1; bool tcp_ctx_valid:1; @@ -1038,6 +1084,7 @@ struct irdma_qp_host_ctx_info { }; u32 send_cq_num; u32 rcv_cq_num; + u32 srq_id; u32 rem_endpoint_idx; u16 stats_idx; bool srq_valid:1; @@ -1337,6 +1384,8 @@ void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *inf int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, bool post_sq, bool poll_registers); +int irdma_sc_srq_init(struct irdma_sc_srq *srq, + struct irdma_srq_init_info *info); void sc_vsi_update_stats(struct irdma_sc_vsi *vsi); struct cqp_info { @@ -1580,6 +1629,23 @@ struct cqp_info { struct irdma_dma_mem query_buff_mem; u64 scratch; } query_rdma; + + struct { + struct irdma_sc_srq *srq; + u64 scratch; + } srq_create; + + struct { + struct irdma_sc_srq *srq; + struct irdma_modify_srq_info info; + u64 scratch; + } srq_modify; + + struct { + struct irdma_sc_srq *srq; + u64 scratch; + } srq_destroy; + } u; }; diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 38c54e59cc2e..e7ffde792781 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -198,6 +198,26 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, return wqe; } +__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, u32 *wqe_idx) +{ + int ret_code; + __le64 *wqe; + + if (IRDMA_RING_FULL_ERR(srq->srq_ring)) + return NULL; + + IRDMA_ATOMIC_RING_MOVE_HEAD(srq->srq_ring, *wqe_idx, ret_code); + if (ret_code) + return NULL; + + if (!*wqe_idx) + srq->srwqe_polarity = !srq->srwqe_polarity; + /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */ + wqe = srq->srq_base[*wqe_idx * (srq->wqe_size_multiplier)].elem; + + return wqe; +} + /** * irdma_qp_get_next_recv_wqe - get next qp's rcv wqe * @qp: hw qp ptr @@ -317,6 +337,58 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, return 0; } +/** + * irdma_uk_srq_post_receive - post a receive wqe to a shared rq + * @srq: shared rq ptr + * @info: post rq information + */ +int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq, + struct irdma_post_rq_info *info) +{ + u32 wqe_idx, i, byte_off; + u32 addl_frag_cnt; + __le64 *wqe; + u64 hdr; + + if (srq->max_srq_frag_cnt < info->num_sges) + return -EINVAL; + + wqe = irdma_srq_get_next_recv_wqe(srq, &wqe_idx); + if (!wqe) + return -ENOMEM; + + addl_frag_cnt = info->num_sges > 1 ? info->num_sges - 1 : 0; + srq->wqe_ops.iw_set_fragment(wqe, 0, info->sg_list, + srq->srwqe_polarity); + + for (i = 1, byte_off = 32; i < info->num_sges; i++) { + srq->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i], + srq->srwqe_polarity); + byte_off += 16; + } + + /* if not an odd number set valid bit in next fragment */ + if (srq->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) && + info->num_sges) { + srq->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + srq->srwqe_polarity); + if (srq->uk_attrs->hw_rev == IRDMA_GEN_2) + ++addl_frag_cnt; + } + + set_64bit_val(wqe, 16, (u64)info->wr_id); + hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(IRDMAQPSQ_VALID, srq->srwqe_polarity); + + dma_wmb(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + + set_64bit_val(srq->shadow_area, 0, (wqe_idx + 1) % srq->srq_ring.size); + + return 0; +} + /** * irdma_uk_rdma_read - rdma read command * @qp: hw qp ptr @@ -973,6 +1045,8 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, u64 comp_ctx, qword0, qword2, qword3; __le64 *cqe; struct irdma_qp_uk *qp; + struct irdma_srq_uk *srq; + u8 is_srq; struct irdma_ring *pring = NULL; u32 wqe_idx; int ret_code; @@ -1046,8 +1120,14 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, } info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); + is_srq = (u8)FIELD_GET(IRDMA_CQ_SRQ, qword3); info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); + get_64bit_val(cqe, 8, &comp_ctx); + if (is_srq) + get_64bit_val(cqe, 40, (u64 *)&qp); + else + qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx; if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3); @@ -1085,7 +1165,22 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->qp_handle = (irdma_qp_handle)(unsigned long)qp; info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); - if (info->q_type == IRDMA_CQE_QTYPE_RQ) { + if (info->q_type == IRDMA_CQE_QTYPE_RQ && is_srq) { + srq = qp->srq_uk; + + get_64bit_val(cqe, 8, &info->wr_id); + info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); + + if (qword3 & IRDMACQ_STAG) { + info->stag_invalid_set = true; + info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, + qword2); + } else { + info->stag_invalid_set = false; + } + IRDMA_RING_MOVE_TAIL(srq->srq_ring); + pring = &srq->srq_ring; + } else if (info->q_type == IRDMA_CQE_QTYPE_RQ && !is_srq) { u32 array_idx; array_idx = wqe_idx / qp->rq_wqe_size_multiplier; @@ -1210,10 +1305,10 @@ exit: } /** - * irdma_qp_round_up - return round up qp wq depth + * irdma_round_up_wq - return round up qp wq depth * @wqdepth: wq depth in quanta to round up */ -static int irdma_qp_round_up(u32 wqdepth) +static int irdma_round_up_wq(u32 wqdepth) { int scount = 1; @@ -1268,7 +1363,7 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, { u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; - *sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD); + *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); if (*sqdepth < min_size) *sqdepth = min_size; @@ -1290,7 +1385,7 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, { u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; - *rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD); + *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); if (*rqdepth < min_size) *rqdepth = min_size; @@ -1300,6 +1395,26 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, return 0; } +/* + * irdma_get_srqdepth - get SRQ depth (quanta) + * @uk_attrs: qp HW attributes + * @srq_size: SRQ size + * @shift: shift which determines size of WQE + * @srqdepth: depth of SRQ + */ +int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift, + u32 *srqdepth) +{ + *srqdepth = irdma_round_up_wq((srq_size << shift) + IRDMA_RQ_RSVD); + + if (*srqdepth < ((u32)uk_attrs->min_hw_wq_size << shift)) + *srqdepth = uk_attrs->min_hw_wq_size << shift; + else if (*srqdepth > uk_attrs->max_hw_srq_quanta) + return -EINVAL; + + return 0; +} + static const struct irdma_wqe_uk_ops iw_wqe_uk_ops = { .iw_copy_inline_data = irdma_copy_inline_data, .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta, @@ -1335,6 +1450,42 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); } +/** + * irdma_uk_srq_init - initialize shared qp + * @srq: hw srq (user and kernel) + * @info: srq initialization info + * + * Initializes the vars used in both user and kernel mode. + * The size of the wqe depends on number of max fragments + * allowed. Then size of wqe * the number of wqes should be the + * amount of memory allocated for srq. + */ +int irdma_uk_srq_init(struct irdma_srq_uk *srq, + struct irdma_srq_uk_init_info *info) +{ + u8 rqshift; + + srq->uk_attrs = info->uk_attrs; + if (info->max_srq_frag_cnt > srq->uk_attrs->max_hw_wq_frags) + return -EINVAL; + + irdma_get_wqe_shift(srq->uk_attrs, info->max_srq_frag_cnt, 0, &rqshift); + srq->srq_caps = info->srq_caps; + srq->srq_base = info->srq; + srq->shadow_area = info->shadow_area; + srq->srq_id = info->srq_id; + srq->srwqe_polarity = 0; + srq->srq_size = info->srq_size; + srq->wqe_size = rqshift; + srq->max_srq_frag_cnt = min(srq->uk_attrs->max_hw_wq_frags, + ((u32)2 << rqshift) - 1); + IRDMA_RING_INIT(srq->srq_ring, srq->srq_size); + srq->wqe_size_multiplier = 1 << rqshift; + srq->wqe_ops = iw_wqe_uk_ops; + + return 0; +} + /** * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ * @ukinfo: qp initialization info @@ -1461,6 +1612,7 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->wqe_ops = iw_wqe_uk_ops_gen_1; else qp->wqe_ops = iw_wqe_uk_ops; + qp->srq_uk = info->srq_uk; return ret_code; } diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 5f489feda32c..cf324f1c539e 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -60,6 +60,7 @@ enum irdma_device_caps_const { IRDMA_GATHER_STATS_BUF_SIZE = 1024, IRDMA_MIN_IW_QP_ID = 0, IRDMA_MAX_IW_QP_ID = 262143, + IRDMA_MIN_IW_SRQ_ID = 0, IRDMA_MIN_CEQID = 0, IRDMA_MAX_CEQID = 1023, IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1, @@ -148,6 +149,8 @@ enum irdma_qp_caps { IRDMA_PUSH_MODE = 8, }; +struct irdma_srq_uk; +struct irdma_srq_uk_init_info; struct irdma_qp_uk; struct irdma_cq_uk; struct irdma_qp_uk_init_info; @@ -301,6 +304,39 @@ int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, u32 *sq_depth, u8 *sq_shift); int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, u32 *rq_depth, u8 *rq_shift); +int irdma_uk_srq_init(struct irdma_srq_uk *srq, + struct irdma_srq_uk_init_info *info); +int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq, + struct irdma_post_rq_info *info); + +struct irdma_srq_uk { + u32 srq_caps; + struct irdma_qp_quanta *srq_base; + struct irdma_uk_attrs *uk_attrs; + __le64 *shadow_area; + struct irdma_ring srq_ring; + struct irdma_ring initial_ring; + u32 srq_id; + u32 srq_size; + u32 max_srq_frag_cnt; + struct irdma_wqe_uk_ops wqe_ops; + u8 srwqe_polarity; + u8 wqe_size; + u8 wqe_size_multiplier; + u8 deferred_flag; +}; + +struct irdma_srq_uk_init_info { + struct irdma_qp_quanta *srq; + struct irdma_uk_attrs *uk_attrs; + __le64 *shadow_area; + u64 *srq_wrid_array; + u32 srq_id; + u32 srq_caps; + u32 srq_size; + u32 max_srq_frag_cnt; +}; + struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; @@ -345,6 +381,7 @@ struct irdma_qp_uk { bool destroy_pending:1; /* Indicates the QP is being destroyed */ void *back_qp; u8 dbg_rq_flushed; + struct irdma_srq_uk *srq_uk; u8 sq_flush_seen; u8 rq_flush_seen; }; @@ -384,6 +421,7 @@ struct irdma_qp_uk_init_info { u8 rq_shift; int abi_ver; bool legacy_mode; + struct irdma_srq_uk *srq_uk; }; struct irdma_cq_uk_init_info { @@ -399,6 +437,7 @@ struct irdma_cq_uk_init_info { __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, u16 quanta, u32 total_size, struct irdma_post_sq_info *info); +__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, u32 *wqe_idx); __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx); void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq); int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); @@ -410,5 +449,7 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *wqdepth); int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *wqdepth); +int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift, + u32 *srqdepth); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); #endif /* IRDMA_USER_H */ diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 0b12a875dbe9..8b94d87b0192 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -697,6 +697,9 @@ static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = { [IRDMA_OP_ADD_LOCAL_MAC_ENTRY] = "Add Local MAC Entry Cmd", [IRDMA_OP_DELETE_LOCAL_MAC_ENTRY] = "Delete Local MAC Entry Cmd", [IRDMA_OP_CQ_MODIFY] = "CQ Modify Cmd", + [IRDMA_OP_SRQ_CREATE] = "Create SRQ Cmd", + [IRDMA_OP_SRQ_MODIFY] = "Modify SRQ Cmd", + [IRDMA_OP_SRQ_DESTROY] = "Destroy SRQ Cmd", }; static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = { @@ -1167,6 +1170,30 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp) kfree(iwqp->kqp.rq_wrid_mem); } +/** + * irdma_srq_wq_destroy - send srq destroy cqp + * @rf: RDMA PCI function + * @srq: hardware control srq + */ +void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq) +{ + struct irdma_cqp_request *cqp_request; + struct cqp_cmds_info *cqp_info; + + cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); + if (!cqp_request) + return; + + cqp_info = &cqp_request->info; + cqp_info->cqp_cmd = IRDMA_OP_SRQ_DESTROY; + cqp_info->post_sq = 1; + cqp_info->in.u.srq_destroy.srq = srq; + cqp_info->in.u.srq_destroy.scratch = (uintptr_t)cqp_request; + + irdma_handle_cqp_op(rf, cqp_request); + irdma_put_cqp_request(&rf->cqp, cqp_request); +} + /** * irdma_cq_wq_destroy - send cq destroy cqp * @rf: RDMA PCI function diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index da0f56e0c897..1134a3546d91 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -59,6 +59,9 @@ static int irdma_query_device(struct ib_device *ibdev, #define HCA_CLOCK_TIMESTAMP_MASK 0x1ffff if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_2) props->timestamp_mask = HCA_CLOCK_TIMESTAMP_MASK; + props->max_srq = rf->max_srq - rf->used_srqs; + props->max_srq_wr = IRDMA_MAX_SRQ_WRS; + props->max_srq_sge = hw_attrs->uk_attrs.max_hw_wq_frags; return 0; } @@ -336,6 +339,8 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; uresp.min_hw_wq_size = uk_attrs->min_hw_wq_size; uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE; + uresp.max_hw_srq_quanta = uk_attrs->max_hw_srq_quanta; + uresp.comp_mask |= IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); @@ -347,6 +352,8 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, spin_lock_init(&ucontext->cq_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->qp_reg_mem_list); spin_lock_init(&ucontext->qp_reg_mem_list_lock); + INIT_LIST_HEAD(&ucontext->srq_reg_mem_list); + spin_lock_init(&ucontext->srq_reg_mem_list_lock); return 0; @@ -571,7 +578,11 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev, if (iwpbl->pbl_allocated) { init_info->virtual_map = true; init_info->sq_pa = qpmr->sq_pbl.idx; - init_info->rq_pa = qpmr->rq_pbl.idx; + /* Need to use contiguous buffer for RQ of QP + * in case it is associated with SRQ. + */ + init_info->rq_pa = init_info->qp_uk_init_info.srq_uk ? + qpmr->rq_pa : qpmr->rq_pbl.idx; } else { init_info->sq_pa = qpmr->sq_pbl.addr; init_info->rq_pa = qpmr->rq_pbl.addr; @@ -940,6 +951,18 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {}; struct irdma_qp_host_ctx_info *ctx_info; + struct irdma_srq *iwsrq; + bool srq_valid = false; + u32 srq_id = 0; + + if (init_attr->srq) { + iwsrq = to_iwsrq(init_attr->srq); + srq_valid = true; + srq_id = iwsrq->srq_num; + init_attr->cap.max_recv_sge = uk_attrs->max_hw_wq_frags; + init_attr->cap.max_recv_wr = 4; + init_info.qp_uk_init_info.srq_uk = &iwsrq->sc_srq.srq_uk; + } err_code = irdma_validate_qp_attrs(init_attr, iwdev); if (err_code) @@ -1046,6 +1069,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, } ctx_info = &iwqp->ctx_info; + ctx_info->srq_valid = srq_valid; + ctx_info->srq_id = srq_id; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; @@ -1171,6 +1196,7 @@ static int irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->qp_context = iwqp->ibqp.qp_context; init_attr->send_cq = iwqp->ibqp.send_cq; init_attr->recv_cq = iwqp->ibqp.recv_cq; + init_attr->srq = iwqp->ibqp.srq; init_attr->cap = attr->cap; return 0; @@ -1833,6 +1859,24 @@ exit: return err; } +/** + * irdma_srq_free_rsrc - free up resources for srq + * @rf: RDMA PCI function + * @iwsrq: srq ptr + */ +static void irdma_srq_free_rsrc(struct irdma_pci_f *rf, struct irdma_srq *iwsrq) +{ + struct irdma_sc_srq *srq = &iwsrq->sc_srq; + + if (!iwsrq->user_mode) { + dma_free_coherent(rf->sc_dev.hw->device, iwsrq->kmem.size, + iwsrq->kmem.va, iwsrq->kmem.pa); + iwsrq->kmem.va = NULL; + } + + irdma_free_rsrc(rf, rf->allocated_srqs, srq->srq_uk.srq_id); +} + /** * irdma_cq_free_rsrc - free up resources for cq * @rf: RDMA PCI function @@ -1896,6 +1940,22 @@ static int irdma_process_resize_list(struct irdma_cq *iwcq, return cnt; } +/** + * irdma_destroy_srq - destroy srq + * @ibsrq: srq pointer + * @udata: user data + */ +static int irdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +{ + struct irdma_device *iwdev = to_iwdev(ibsrq->device); + struct irdma_srq *iwsrq = to_iwsrq(ibsrq); + struct irdma_sc_srq *srq = &iwsrq->sc_srq; + + irdma_srq_wq_destroy(iwdev->rf, srq); + irdma_srq_free_rsrc(iwdev->rf, iwsrq); + return 0; +} + /** * irdma_destroy_cq - destroy cq * @ib_cq: cq pointer @@ -2084,6 +2144,293 @@ error: return ret; } +/** + * irdma_srq_event - event notification for srq limit + * @srq: shared srq struct + */ +void irdma_srq_event(struct irdma_sc_srq *srq) +{ + struct irdma_srq *iwsrq = container_of(srq, struct irdma_srq, sc_srq); + struct ib_srq *ibsrq = &iwsrq->ibsrq; + struct ib_event event; + + srq->srq_limit = 0; + + if (!ibsrq->event_handler) + return; + + event.device = ibsrq->device; + event.element.port_num = 1; + event.element.srq = ibsrq; + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + ibsrq->event_handler(&event, ibsrq->srq_context); +} + +/** + * irdma_modify_srq - modify srq request + * @ibsrq: srq's pointer for modify + * @attr: access attributes + * @attr_mask: state mask + * @udata: user data + */ +static int irdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata) +{ + struct irdma_device *iwdev = to_iwdev(ibsrq->device); + struct irdma_srq *iwsrq = to_iwsrq(ibsrq); + struct irdma_cqp_request *cqp_request; + struct irdma_pci_f *rf = iwdev->rf; + struct irdma_modify_srq_info *info; + struct cqp_cmds_info *cqp_info; + int status; + + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (!(attr_mask & IB_SRQ_LIMIT)) + return 0; + + if (attr->srq_limit > iwsrq->sc_srq.srq_uk.srq_size) + return -EINVAL; + + /* Execute this cqp op synchronously, so we can update srq_limit + * upon successful completion. + */ + cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); + if (!cqp_request) + return -ENOMEM; + + cqp_info = &cqp_request->info; + info = &cqp_info->in.u.srq_modify.info; + info->srq_limit = attr->srq_limit; + if (info->srq_limit > 0xFFF) + info->srq_limit = 0xFFF; + info->arm_limit_event = 1; + + cqp_info->cqp_cmd = IRDMA_OP_SRQ_MODIFY; + cqp_info->post_sq = 1; + cqp_info->in.u.srq_modify.srq = &iwsrq->sc_srq; + cqp_info->in.u.srq_modify.scratch = (uintptr_t)cqp_request; + status = irdma_handle_cqp_op(rf, cqp_request); + irdma_put_cqp_request(&rf->cqp, cqp_request); + if (status) + return status; + + iwsrq->sc_srq.srq_limit = info->srq_limit; + + return 0; +} + +static int irdma_setup_umode_srq(struct irdma_device *iwdev, + struct irdma_srq *iwsrq, + struct irdma_srq_init_info *info, + struct ib_udata *udata) +{ +#define IRDMA_CREATE_SRQ_MIN_REQ_LEN \ + offsetofend(struct irdma_create_srq_req, user_shadow_area) + struct irdma_create_srq_req req = {}; + struct irdma_ucontext *ucontext; + struct irdma_srq_mr *srqmr; + struct irdma_pbl *iwpbl; + unsigned long flags; + + iwsrq->user_mode = true; + ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, + ibucontext); + + if (udata->inlen < IRDMA_CREATE_SRQ_MIN_REQ_LEN) + return -EINVAL; + + if (ib_copy_from_udata(&req, udata, + min(sizeof(req), udata->inlen))) + return -EFAULT; + + spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags); + iwpbl = irdma_get_pbl((unsigned long)req.user_srq_buf, + &ucontext->srq_reg_mem_list); + spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags); + if (!iwpbl) + return -EPROTO; + + iwsrq->iwpbl = iwpbl; + srqmr = &iwpbl->srq_mr; + + if (iwpbl->pbl_allocated) { + info->virtual_map = true; + info->pbl_chunk_size = 1; + info->first_pm_pbl_idx = srqmr->srq_pbl.idx; + info->leaf_pbl_size = 1; + } else { + info->srq_pa = srqmr->srq_pbl.addr; + } + info->shadow_area_pa = srqmr->shadow; + + return 0; +} + +static int irdma_setup_kmode_srq(struct irdma_device *iwdev, + struct irdma_srq *iwsrq, + struct irdma_srq_init_info *info, u32 depth, + u8 shift) +{ + struct irdma_srq_uk_init_info *ukinfo = &info->srq_uk_init_info; + struct irdma_dma_mem *mem = &iwsrq->kmem; + u32 size, ring_size; + + ring_size = depth * IRDMA_QP_WQE_MIN_SIZE; + size = ring_size + (IRDMA_SHADOW_AREA_SIZE << 3); + + mem->size = ALIGN(size, 256); + mem->va = dma_alloc_coherent(iwdev->rf->hw.device, mem->size, + &mem->pa, GFP_KERNEL); + if (!mem->va) + return -ENOMEM; + + ukinfo->srq = mem->va; + ukinfo->srq_size = depth >> shift; + ukinfo->shadow_area = mem->va + ring_size; + + info->shadow_area_pa = info->srq_pa + ring_size; + info->srq_pa = mem->pa; + + return 0; +} + +/** + * irdma_create_srq - create srq + * @ibsrq: ib's srq pointer + * @initattrs: attributes for srq + * @udata: user data for create srq + */ +static int irdma_create_srq(struct ib_srq *ibsrq, + struct ib_srq_init_attr *initattrs, + struct ib_udata *udata) +{ + struct irdma_device *iwdev = to_iwdev(ibsrq->device); + struct ib_srq_attr *attr = &initattrs->attr; + struct irdma_pd *iwpd = to_iwpd(ibsrq->pd); + struct irdma_srq *iwsrq = to_iwsrq(ibsrq); + struct irdma_srq_uk_init_info *ukinfo; + struct irdma_cqp_request *cqp_request; + struct irdma_srq_init_info info = {}; + struct irdma_pci_f *rf = iwdev->rf; + struct irdma_uk_attrs *uk_attrs; + struct cqp_cmds_info *cqp_info; + int err_code = 0; + u32 depth; + u8 shift; + + uk_attrs = &rf->sc_dev.hw_attrs.uk_attrs; + ukinfo = &info.srq_uk_init_info; + + if (initattrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + + if (!(uk_attrs->feature_flags & IRDMA_FEATURE_SRQ) || + attr->max_sge > uk_attrs->max_hw_wq_frags) + return -EINVAL; + + refcount_set(&iwsrq->refcnt, 1); + spin_lock_init(&iwsrq->lock); + err_code = irdma_alloc_rsrc(rf, rf->allocated_srqs, rf->max_srq, + &iwsrq->srq_num, &rf->next_srq); + if (err_code) + return err_code; + + ukinfo->max_srq_frag_cnt = attr->max_sge; + ukinfo->uk_attrs = uk_attrs; + ukinfo->srq_id = iwsrq->srq_num; + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_srq_frag_cnt, 0, + &shift); + + err_code = irdma_get_srqdepth(ukinfo->uk_attrs, attr->max_wr, + shift, &depth); + if (err_code) + return err_code; + + /* Actual SRQ size in WRs for ring and HW */ + ukinfo->srq_size = depth >> shift; + + /* Max postable WRs to SRQ */ + iwsrq->max_wr = (depth - IRDMA_RQ_RSVD) >> shift; + attr->max_wr = iwsrq->max_wr; + + if (udata) + err_code = irdma_setup_umode_srq(iwdev, iwsrq, &info, udata); + else + err_code = irdma_setup_kmode_srq(iwdev, iwsrq, &info, depth, + shift); + + if (err_code) + goto free_rsrc; + + info.vsi = &iwdev->vsi; + info.pd = &iwpd->sc_pd; + + err_code = irdma_sc_srq_init(&iwsrq->sc_srq, &info); + if (err_code) + goto free_dmem; + + cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); + if (!cqp_request) { + err_code = -ENOMEM; + goto free_dmem; + } + + cqp_info = &cqp_request->info; + cqp_info->cqp_cmd = IRDMA_OP_SRQ_CREATE; + cqp_info->post_sq = 1; + cqp_info->in.u.srq_create.srq = &iwsrq->sc_srq; + cqp_info->in.u.srq_create.scratch = (uintptr_t)cqp_request; + err_code = irdma_handle_cqp_op(rf, cqp_request); + irdma_put_cqp_request(&rf->cqp, cqp_request); + if (err_code) + goto free_dmem; + + if (udata) { + struct irdma_create_srq_resp resp = {}; + + resp.srq_id = iwsrq->srq_num; + resp.srq_size = ukinfo->srq_size; + if (ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen))) { + err_code = -EPROTO; + goto srq_destroy; + } + } + + return 0; + +srq_destroy: + irdma_srq_wq_destroy(rf, &iwsrq->sc_srq); + +free_dmem: + if (!iwsrq->user_mode) + dma_free_coherent(rf->hw.device, iwsrq->kmem.size, + iwsrq->kmem.va, iwsrq->kmem.pa); +free_rsrc: + irdma_free_rsrc(rf, rf->allocated_srqs, iwsrq->srq_num); + return err_code; +} + +/** + * irdma_query_srq - get SRQ attributes + * @ibsrq: the SRQ to query + * @attr: the attributes of the SRQ + */ +static int irdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct irdma_srq *iwsrq = to_iwsrq(ibsrq); + + attr->max_wr = iwsrq->max_wr; + attr->max_sge = iwsrq->sc_srq.srq_uk.max_srq_frag_cnt; + attr->srq_limit = iwsrq->sc_srq.srq_limit; + + return 0; +} + static inline int cq_validate_flags(u32 flags, u8 hw_rev) { /* GEN1 does not support CQ create flags */ @@ -2536,6 +2883,7 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev, struct irdma_mr *iwmr = iwpbl->iwmr; struct irdma_qp_mr *qpmr = &iwpbl->qp_mr; struct irdma_cq_mr *cqmr = &iwpbl->cq_mr; + struct irdma_srq_mr *srqmr = &iwpbl->srq_mr; struct irdma_hmc_pble *hmc_p; u64 *arr = iwmr->pgaddrmem; u32 pg_size, total; @@ -2555,7 +2903,10 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev, total = req->sq_pages + req->rq_pages; hmc_p = &qpmr->sq_pbl; qpmr->shadow = (dma_addr_t)arr[total]; - + /* Need to use physical address for RQ of QP + * in case it is associated with SRQ. + */ + qpmr->rq_pa = (dma_addr_t)arr[req->sq_pages]; if (lvl) { ret = irdma_check_mem_contiguous(arr, req->sq_pages, pg_size); @@ -2575,6 +2926,18 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev, hmc_p->addr = arr[req->sq_pages]; } break; + case IRDMA_MEMREG_TYPE_SRQ: + hmc_p = &srqmr->srq_pbl; + srqmr->shadow = (dma_addr_t)arr[req->rq_pages]; + if (lvl) + ret = irdma_check_mem_contiguous(arr, req->rq_pages, + pg_size); + + if (!ret) + hmc_p->idx = palloc->level1.idx; + else + hmc_p->addr = arr[0]; + break; case IRDMA_MEMREG_TYPE_CQ: hmc_p = &cqmr->cq_pbl; @@ -3045,6 +3408,37 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, return 0; } +static int irdma_reg_user_mr_type_srq(struct irdma_mem_reg_req req, + struct ib_udata *udata, + struct irdma_mr *iwmr) +{ + struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device); + struct irdma_pbl *iwpbl = &iwmr->iwpbl; + struct irdma_ucontext *ucontext; + unsigned long flags; + u32 total; + int err; + u8 lvl; + + total = req.rq_pages + IRDMA_SHADOW_PGCNT; + if (total > iwmr->page_cnt) + return -EINVAL; + + lvl = req.rq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0; + err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl); + if (err) + return err; + + ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, + ibucontext); + spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags); + list_add_tail(&iwpbl->list, &ucontext->srq_reg_mem_list); + iwpbl->on_list = true; + spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags); + + return 0; +} + static int irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req, struct ib_udata *udata, struct irdma_mr *iwmr) @@ -3135,6 +3529,12 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, if (err) goto error; + break; + case IRDMA_MEMREG_TYPE_SRQ: + err = irdma_reg_user_mr_type_srq(req, udata, iwmr); + if (err) + goto error; + break; case IRDMA_MEMREG_TYPE_CQ: err = irdma_reg_user_mr_type_cq(req, udata, iwmr); @@ -3455,6 +3855,14 @@ static void irdma_del_memlist(struct irdma_mr *iwmr, } spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; + case IRDMA_MEMREG_TYPE_SRQ: + spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags); + if (iwpbl->on_list) { + iwpbl->on_list = false; + list_del(&iwpbl->list); + } + spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags); + break; default: break; } @@ -3673,6 +4081,47 @@ static int irdma_post_send(struct ib_qp *ibqp, return err; } +/** + * irdma_post_srq_recv - post receive wr for kernel application + * @ibsrq: ib srq pointer + * @ib_wr: work request for receive + * @bad_wr: bad wr caused an error + */ +static int irdma_post_srq_recv(struct ib_srq *ibsrq, + const struct ib_recv_wr *ib_wr, + const struct ib_recv_wr **bad_wr) +{ + struct irdma_srq *iwsrq = to_iwsrq(ibsrq); + struct irdma_srq_uk *uksrq = &iwsrq->sc_srq.srq_uk; + struct irdma_post_rq_info post_recv = {}; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&iwsrq->lock, flags); + while (ib_wr) { + if (ib_wr->num_sge > uksrq->max_srq_frag_cnt) { + err = -EINVAL; + goto out; + } + post_recv.num_sges = ib_wr->num_sge; + post_recv.wr_id = ib_wr->wr_id; + post_recv.sg_list = ib_wr->sg_list; + err = irdma_uk_srq_post_receive(uksrq, &post_recv); + if (err) + goto out; + + ib_wr = ib_wr->next; + } + +out: + spin_unlock_irqrestore(&iwsrq->lock, flags); + + if (err) + *bad_wr = ib_wr; + + return err; +} + /** * irdma_post_recv - post receive wr for kernel application * @ibqp: ib qp pointer @@ -3692,6 +4141,11 @@ static int irdma_post_recv(struct ib_qp *ibqp, iwqp = to_iwqp(ibqp); ukqp = &iwqp->sc_qp.qp_uk; + if (ukqp->srq_uk) { + *bad_wr = ib_wr; + return -EINVAL; + } + spin_lock_irqsave(&iwqp->lock, flags); while (ib_wr) { post_recv.num_sges = ib_wr->num_sge; @@ -4780,6 +5234,18 @@ static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, return IB_LINK_LAYER_ETHERNET; } +static const struct ib_device_ops irdma_gen1_dev_ops = { + .dealloc_driver = irdma_ib_dealloc_device, +}; + +static const struct ib_device_ops irdma_gen3_dev_ops = { + .create_srq = irdma_create_srq, + .destroy_srq = irdma_destroy_srq, + .modify_srq = irdma_modify_srq, + .post_srq_recv = irdma_post_srq_recv, + .query_srq = irdma_query_srq, +}; + static const struct ib_device_ops irdma_roce_dev_ops = { .attach_mcast = irdma_attach_mcast, .create_ah = irdma_create_ah, @@ -4850,6 +5316,7 @@ static const struct ib_device_ops irdma_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_cq, irdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_mw, irdma_mr, ibmw), INIT_RDMA_OBJ_SIZE(ib_qp, irdma_qp, ibqp), + INIT_RDMA_OBJ_SIZE(ib_srq, irdma_srq, ibsrq), }; /** @@ -4897,6 +5364,10 @@ static void irdma_init_rdma_device(struct irdma_device *iwdev) iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count; iwdev->ibdev.dev.parent = &pcidev->dev; ib_set_device_ops(&iwdev->ibdev, &irdma_dev_ops); + if (iwdev->rf->rdma_ver == IRDMA_GEN_1) + ib_set_device_ops(&iwdev->ibdev, &irdma_gen1_dev_ops); + if (iwdev->rf->rdma_ver >= IRDMA_GEN_3) + ib_set_device_ops(&iwdev->ibdev, &irdma_gen3_dev_ops); } /** diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index fcb163c45252..2817122ba989 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -8,6 +8,7 @@ #define IRDMA_PKEY_TBL_SZ 1 #define IRDMA_DEFAULT_PKEY 0xFFFF +#define IRDMA_SHADOW_PGCNT 1 struct irdma_ucontext { struct ib_ucontext ibucontext; @@ -17,6 +18,8 @@ struct irdma_ucontext { spinlock_t cq_reg_mem_list_lock; /* protect CQ memory list */ struct list_head qp_reg_mem_list; spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */ + struct list_head srq_reg_mem_list; + spinlock_t srq_reg_mem_list_lock; /* protect SRQ memory list */ int abi_ver; u8 legacy_mode : 1; u8 use_raw_attrs : 1; @@ -65,10 +68,16 @@ struct irdma_cq_mr { bool split; }; +struct irdma_srq_mr { + struct irdma_hmc_pble srq_pbl; + dma_addr_t shadow; +}; + struct irdma_qp_mr { struct irdma_hmc_pble sq_pbl; struct irdma_hmc_pble rq_pbl; dma_addr_t shadow; + dma_addr_t rq_pa; struct page *sq_page; }; @@ -85,6 +94,7 @@ struct irdma_pbl { union { struct irdma_qp_mr qp_mr; struct irdma_cq_mr cq_mr; + struct irdma_srq_mr srq_mr; }; bool pbl_allocated:1; @@ -112,6 +122,21 @@ struct irdma_mr { struct irdma_pbl iwpbl; }; +struct irdma_srq { + struct ib_srq ibsrq; + struct irdma_sc_srq sc_srq __aligned(64); + struct irdma_dma_mem kmem; + u64 *srq_wrid_mem; + refcount_t refcnt; + spinlock_t lock; /* for poll srq */ + struct irdma_pbl *iwpbl; + struct irdma_sge *sg_list; + u16 srq_head; + u32 srq_num; + u32 max_wr; + bool user_mode:1; +}; + struct irdma_cq { struct ib_cq ibcq; struct irdma_sc_cq sc_cq; diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index 4e42054cca33..f7788d33376b 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -20,11 +20,13 @@ enum irdma_memreg_type { IRDMA_MEMREG_TYPE_MEM = 0, IRDMA_MEMREG_TYPE_QP = 1, IRDMA_MEMREG_TYPE_CQ = 2, + IRDMA_MEMREG_TYPE_SRQ = 3, }; enum { IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, + IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA = 1 << 2, IRDMA_SUPPORT_WQE_FORMAT_V2 = 1 << 3, }; @@ -55,7 +57,8 @@ struct irdma_alloc_ucontext_resp { __u8 rsvd2; __aligned_u64 comp_mask; __u16 min_hw_wq_size; - __u8 rsvd3[6]; + __u32 max_hw_srq_quanta; + __u8 rsvd3[2]; }; struct irdma_alloc_pd_resp { @@ -72,6 +75,16 @@ struct irdma_create_cq_req { __aligned_u64 user_shadow_area; }; +struct irdma_create_srq_req { + __aligned_u64 user_srq_buf; + __aligned_u64 user_shadow_area; +}; + +struct irdma_create_srq_resp { + __u32 srq_id; + __u32 srq_size; +}; + struct irdma_create_qp_req { __aligned_u64 user_wqe_bufs; __aligned_u64 user_compl_ctx; From eb31dfc2b41a1bccd697b57ad6f059534fbd5b71 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 27 Aug 2025 10:25:42 -0500 Subject: [PATCH 65/85] RDMA/irdma: Restrict Memory Window and CQE Timestamping to GEN3 With the deprecation of Memory Window and Timestamping support in GEN2, move these features to be exclusive to GEN3. This iteration supports only Type2 Memory Windows. Additionally, it includes the reporting of the timestamp mask and Host Channel Adapter (HCA) core clock frequency via the query device verb. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-14-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 42 ++++++++++++++++++----------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 1134a3546d91..78f1db759bab 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -41,7 +41,8 @@ static int irdma_query_device(struct ib_device *ibdev, props->max_cq = rf->max_cq - rf->used_cqs; props->max_cqe = rf->max_cqe - 1; props->max_mr = rf->max_mr - rf->used_mrs; - props->max_mw = props->max_mr; + if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) + props->max_mw = props->max_mr; props->max_pd = rf->max_pd - rf->used_pds; props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges; props->max_qp_rd_atom = hw_attrs->max_hw_ird; @@ -56,12 +57,16 @@ static int irdma_query_device(struct ib_device *ibdev, props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX; props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX; props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR; -#define HCA_CLOCK_TIMESTAMP_MASK 0x1ffff - if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_2) - props->timestamp_mask = HCA_CLOCK_TIMESTAMP_MASK; props->max_srq = rf->max_srq - rf->used_srqs; props->max_srq_wr = IRDMA_MAX_SRQ_WRS; props->max_srq_sge = hw_attrs->uk_attrs.max_hw_wq_frags; + if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) { +#define HCA_CORE_CLOCK_KHZ 1000000UL + props->timestamp_mask = GENMASK(31, 0); + props->hca_core_clock = HCA_CORE_CLOCK_KHZ; + } + if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; return 0; } @@ -798,7 +803,8 @@ static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, roce_info->is_qp1 = true; roce_info->rd_en = true; roce_info->wr_rdresp_en = true; - roce_info->bind_en = true; + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + roce_info->bind_en = true; roce_info->dcqcn_en = false; roce_info->rtomin = 5; @@ -829,7 +835,6 @@ static void irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp, ether_addr_copy(iwarp_info->mac_addr, iwdev->netdev->dev_addr); iwarp_info->rd_en = true; iwarp_info->wr_rdresp_en = true; - iwarp_info->bind_en = true; iwarp_info->ecn_en = true; iwarp_info->rtomin = 5; @@ -1147,8 +1152,6 @@ static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp) } if (iwqp->iwarp_info.rd_en) acc_flags |= IB_ACCESS_REMOTE_READ; - if (iwqp->iwarp_info.bind_en) - acc_flags |= IB_ACCESS_MW_BIND; } return acc_flags; } @@ -2433,8 +2436,8 @@ static int irdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) static inline int cq_validate_flags(u32 flags, u8 hw_rev) { - /* GEN1 does not support CQ create flags */ - if (hw_rev == IRDMA_GEN_1) + /* GEN1/2 does not support CQ create flags */ + if (hw_rev <= IRDMA_GEN_2) return flags ? -EOPNOTSUPP : 0; return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 0; @@ -2660,8 +2663,9 @@ cq_free_rsrc: /** * irdma_get_mr_access - get hw MR access permissions from IB access flags * @access: IB access flags + * @hw_rev: Hardware version */ -static inline u16 irdma_get_mr_access(int access) +static inline u16 irdma_get_mr_access(int access, u8 hw_rev) { u16 hw_access = 0; @@ -2671,8 +2675,10 @@ static inline u16 irdma_get_mr_access(int access) IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0; hw_access |= (access & IB_ACCESS_REMOTE_READ) ? IRDMA_ACCESS_FLAGS_REMOTEREAD : 0; - hw_access |= (access & IB_ACCESS_MW_BIND) ? - IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0; + if (hw_rev >= IRDMA_GEN_3) { + hw_access |= (access & IB_ACCESS_MW_BIND) ? + IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0; + } hw_access |= (access & IB_ZERO_BASED) ? IRDMA_ACCESS_FLAGS_ZERO_BASED : 0; hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD; @@ -3242,7 +3248,8 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; stag_info->stag_key = (u8)iwmr->stag; stag_info->total_len = iwmr->len; - stag_info->access_rights = irdma_get_mr_access(access); + stag_info->access_rights = irdma_get_mr_access(access, + iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev); stag_info->pd_id = iwpd->sc_pd.pd_id; stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED) @@ -4036,7 +4043,9 @@ static int irdma_post_send(struct ib_qp *ibqp, stag_info.signaled = info.signaled; stag_info.read_fence = info.read_fence; - stag_info.access_rights = irdma_get_mr_access(reg_wr(ib_wr)->access); + stag_info.access_rights = + irdma_get_mr_access(reg_wr(ib_wr)->access, + dev->hw_attrs.uk_attrs.hw_rev); stag_info.stag_key = reg_wr(ib_wr)->key & 0xff; stag_info.stag_idx = reg_wr(ib_wr)->key >> 8; stag_info.page_size = reg_wr(ib_wr)->mr->page_size; @@ -5239,7 +5248,9 @@ static const struct ib_device_ops irdma_gen1_dev_ops = { }; static const struct ib_device_ops irdma_gen3_dev_ops = { + .alloc_mw = irdma_alloc_mw, .create_srq = irdma_create_srq, + .dealloc_mw = irdma_dealloc_mw, .destroy_srq = irdma_destroy_srq, .modify_srq = irdma_modify_srq, .post_srq_recv = irdma_post_srq_recv, @@ -5280,7 +5291,6 @@ static const struct ib_device_ops irdma_dev_ops = { .alloc_hw_port_stats = irdma_alloc_hw_port_stats, .alloc_mr = irdma_alloc_mr, - .alloc_mw = irdma_alloc_mw, .alloc_pd = irdma_alloc_pd, .alloc_ucontext = irdma_alloc_ucontext, .create_cq = irdma_create_cq, From a24a29c8747f75c4b6967e689c2ca82445ccc9b1 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Wed, 27 Aug 2025 10:25:43 -0500 Subject: [PATCH 66/85] RDMA/irdma: Add Atomic Operations support Extend irdma to support atomic operations, namely Compare and Swap and Fetch and Add, for GEN3 devices. Signed-off-by: Faisal Latif Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-15-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 11 +++ drivers/infiniband/hw/irdma/defs.h | 10 ++- drivers/infiniband/hw/irdma/ig3rdma_hw.c | 3 - drivers/infiniband/hw/irdma/type.h | 4 + drivers/infiniband/hw/irdma/uk.c | 102 +++++++++++++++++++++++ drivers/infiniband/hw/irdma/user.h | 27 ++++++ drivers/infiniband/hw/irdma/verbs.c | 48 +++++++++++ drivers/infiniband/hw/irdma/verbs.h | 6 ++ 8 files changed, 207 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index ef2e46a22c3f..f2a19a856975 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1110,6 +1110,8 @@ static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp, FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE, roce_info->udprivcq_en) | FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) | + FIELD_PREP(IRDMAQPC_REMOTE_ATOMIC_EN, + info->remote_atomics_en) | FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en)); set_64bit_val(qp_ctx, 168, FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx)); @@ -1490,6 +1492,8 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, info->remote_access) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) | + FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN, + info->remote_atomics_en) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -1582,6 +1586,8 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, FIELD_PREP(IRDMA_CQPSQ_STAG_VABASEDTO, addr_type) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) | + FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN, + info->remote_atomics_en) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -1740,6 +1746,7 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_REMOTE_ATOMICS_EN, info->remote_atomics_en) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -5542,6 +5549,10 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev) } dev->feature_info[feat_type] = temp; } + + if (dev->feature_info[IRDMA_FTN_FLAGS] & IRDMA_ATOMICS_ALLOWED_BIT) + dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_ATOMIC_OPS; + exit: dma_free_coherent(dev->hw->device, feat_buf.size, feat_buf.va, feat_buf.pa); diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 408058b6ba55..3b3680816a65 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -189,6 +189,8 @@ enum irdma_protocol_used { #define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b #define IRDMAQP_OP_NOP 0x0c #define IRDMAQP_OP_RDMA_WRITE_SOL 0x0d +#define IRDMAQP_OP_ATOMIC_FETCH_ADD 0x0f +#define IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD 0x11 #define IRDMAQP_OP_GEN_RTS_AE 0x30 enum irdma_cqp_op_type { @@ -694,7 +696,8 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_STAG_USEPFRID BIT_ULL(61) #define IRDMA_CQPSQ_STAG_PBA IRDMA_CQPHC_QPCTX -#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(5, 0) +#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(15, 0) +#define IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN BIT_ULL(61) #define IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX GENMASK_ULL(27, 0) #define IRDMA_CQPSQ_QUERYSTAG_IDX IRDMA_CQPSQ_STAG_IDX @@ -981,6 +984,9 @@ enum irdma_cqp_op_type { #define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX +#define IRDMAQPSQ_STAG GENMASK_ULL(31, 0) +#define IRDMAQPSQ_REMOTE_STAG GENMASK_ULL(31, 0) + #define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48) #define IRDMAQPSQ_VABASEDTO BIT_ULL(53) #define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54) @@ -991,6 +997,8 @@ enum irdma_cqp_op_type { #define IRDMAQPSQ_BASEVA_TO_FBO IRDMA_CQPHC_QPCTX +#define IRDMAQPSQ_REMOTE_ATOMICS_EN BIT_ULL(55) + #define IRDMAQPSQ_LOCSTAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_STAGKEY GENMASK_ULL(7, 0) diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c b/drivers/infiniband/hw/irdma/ig3rdma_hw.c index 2a3d7144c771..2e8bb475e22a 100644 --- a/drivers/infiniband/hw/irdma/ig3rdma_hw.c +++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c @@ -120,9 +120,6 @@ void ig3rdma_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.first_hw_vf_fpm_id = 0; dev->hw_attrs.max_hw_vf_fpm_id = IG3_MAX_APFS + IG3_MAX_AVFS; dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_64_BYTE_CQE; - if (dev->feature_info[IRDMA_FTN_FLAGS] & IRDMA_ATOMICS_ALLOWED_BIT) - dev->hw_attrs.uk_attrs.feature_flags |= - IRDMA_FEATURE_ATOMIC_OPS; dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_CQE_TIMESTAMPING; dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_SRQ; diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 43dcdc7b846c..c11b901ff119 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -1087,6 +1087,7 @@ struct irdma_qp_host_ctx_info { u32 srq_id; u32 rem_endpoint_idx; u16 stats_idx; + bool remote_atomics_en:1; bool srq_valid:1; bool tcp_info_valid:1; bool iwarp_info_valid:1; @@ -1127,6 +1128,7 @@ struct irdma_allocate_stag_info { bool use_hmc_fcn_index:1; bool use_pf_rid:1; bool all_memory:1; + bool remote_atomics_en:1; u16 hmc_fcn_index; }; @@ -1155,6 +1157,7 @@ struct irdma_reg_ns_stag_info { u8 hmc_fcn_index; bool use_pf_rid:1; bool all_memory:1; + bool remote_atomics_en:1; }; struct irdma_fast_reg_stag_info { @@ -1178,6 +1181,7 @@ struct irdma_fast_reg_stag_info { u8 hmc_fcn_index; bool use_pf_rid:1; bool defer_flag:1; + bool remote_atomics_en:1; }; struct irdma_dealloc_stag_info { diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index e7ffde792781..fb944c49f864 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -337,6 +337,108 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, return 0; } +/** + * irdma_uk_atomic_fetch_add - atomic fetch and add operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) +{ + struct irdma_atomic_fetch_add *op_info; + u32 total_size = 0; + u16 quanta = 2; + u32 wqe_idx; + __le64 *wqe; + u64 hdr; + + op_info = &info->op.atomic_fetch_add; + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, + info); + if (!wqe) + return -ENOMEM; + + set_64bit_val(wqe, 0, op_info->tagged_offset); + set_64bit_val(wqe, 8, + FIELD_PREP(IRDMAQPSQ_STAG, op_info->stag)); + set_64bit_val(wqe, 16, op_info->remote_tagged_offset); + + hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) | + FIELD_PREP(IRDMAQPSQ_REMOTE_STAG, op_info->remote_stag) | + FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_FETCH_ADD) | + FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + set_64bit_val(wqe, 32, op_info->fetch_add_data_bytes); + set_64bit_val(wqe, 40, 0); + set_64bit_val(wqe, 48, 0); + set_64bit_val(wqe, 56, + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity)); + + dma_wmb(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + + if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + +/** + * irdma_uk_atomic_compare_swap - atomic compare and swap operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) +{ + struct irdma_atomic_compare_swap *op_info; + u32 total_size = 0; + u16 quanta = 2; + u32 wqe_idx; + __le64 *wqe; + u64 hdr; + + op_info = &info->op.atomic_compare_swap; + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, + info); + if (!wqe) + return -ENOMEM; + + set_64bit_val(wqe, 0, op_info->tagged_offset); + set_64bit_val(wqe, 8, + FIELD_PREP(IRDMAQPSQ_STAG, op_info->stag)); + set_64bit_val(wqe, 16, op_info->remote_tagged_offset); + + hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) | + FIELD_PREP(IRDMAQPSQ_REMOTE_STAG, op_info->remote_stag) | + FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD) | + FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); + + set_64bit_val(wqe, 32, op_info->swap_data_bytes); + set_64bit_val(wqe, 40, op_info->compare_data_bytes); + set_64bit_val(wqe, 48, 0); + set_64bit_val(wqe, 56, + FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity)); + + dma_wmb(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 24, hdr); + + if (post_sq) + irdma_uk_qp_post_wr(qp); + + return 0; +} + /** * irdma_uk_srq_post_receive - post a receive wqe to a shared rq * @srq: shared rq ptr diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index cf324f1c539e..ed7ce98e887b 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -41,6 +41,8 @@ #define IRDMA_OP_TYPE_INV_STAG 0x0a #define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b #define IRDMA_OP_TYPE_NOP 0x0c +#define IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD 0x0f +#define IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP 0x11 #define IRDMA_OP_TYPE_REC 0x3e #define IRDMA_OP_TYPE_REC_IMM 0x3f @@ -205,6 +207,24 @@ struct irdma_bind_window { bool ena_writes:1; irdma_stag mw_stag; bool mem_window_type_1:1; + bool remote_atomics_en:1; +}; + +struct irdma_atomic_fetch_add { + u64 tagged_offset; + u64 remote_tagged_offset; + u64 fetch_add_data_bytes; + u32 stag; + u32 remote_stag; +}; + +struct irdma_atomic_compare_swap { + u64 tagged_offset; + u64 remote_tagged_offset; + u64 swap_data_bytes; + u64 compare_data_bytes; + u32 stag; + u32 remote_stag; }; struct irdma_inv_local_stag { @@ -223,6 +243,7 @@ struct irdma_post_sq_info { bool report_rtt:1; bool udp_hdr:1; bool defer_flag:1; + bool remote_atomic_en:1; u32 imm_data; u32 stag_to_inv; union { @@ -231,6 +252,8 @@ struct irdma_post_sq_info { struct irdma_rdma_read rdma_read; struct irdma_bind_window bind_window; struct irdma_inv_local_stag inv_local_stag; + struct irdma_atomic_fetch_add atomic_fetch_add; + struct irdma_atomic_compare_swap atomic_compare_swap; } op; }; @@ -259,6 +282,10 @@ struct irdma_cq_poll_info { bool imm_valid:1; }; +int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); +int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_inline_send(struct irdma_qp_uk *qp, diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 78f1db759bab..167b5bdc668e 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -60,6 +60,11 @@ static int irdma_query_device(struct ib_device *ibdev, props->max_srq = rf->max_srq - rf->used_srqs; props->max_srq_wr = IRDMA_MAX_SRQ_WRS; props->max_srq_sge = hw_attrs->uk_attrs.max_hw_wq_frags; + if (hw_attrs->uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS) + props->atomic_cap = IB_ATOMIC_HCA; + else + props->atomic_cap = IB_ATOMIC_NONE; + props->masked_atomic_cap = props->atomic_cap; if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) { #define HCA_CORE_CLOCK_KHZ 1000000UL props->timestamp_mask = GENMASK(31, 0); @@ -1145,6 +1150,8 @@ static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp) acc_flags |= IB_ACCESS_REMOTE_READ; if (iwqp->roce_info.bind_en) acc_flags |= IB_ACCESS_MW_BIND; + if (iwqp->ctx_info.remote_atomics_en) + acc_flags |= IB_ACCESS_REMOTE_ATOMIC; } else { if (iwqp->iwarp_info.wr_rdresp_en) { acc_flags |= IB_ACCESS_LOCAL_WRITE; @@ -1152,6 +1159,8 @@ static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp) } if (iwqp->iwarp_info.rd_en) acc_flags |= IB_ACCESS_REMOTE_READ; + if (iwqp->ctx_info.remote_atomics_en) + acc_flags |= IB_ACCESS_REMOTE_ATOMIC; } return acc_flags; } @@ -1448,6 +1457,9 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, roce_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) roce_info->rd_en = true; + if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS) + if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) + ctx_info->remote_atomics_en = true; } wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend)); @@ -3250,6 +3262,8 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, stag_info->total_len = iwmr->len; stag_info->access_rights = irdma_get_mr_access(access, iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev); + if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS) + stag_info->remote_atomics_en = (access & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; stag_info->pd_id = iwpd->sc_pd.pd_id; stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED) @@ -3949,6 +3963,40 @@ static int irdma_post_send(struct ib_qp *ibqp, if (ib_wr->send_flags & IB_SEND_FENCE) info.read_fence = true; switch (ib_wr->opcode) { + case IB_WR_ATOMIC_CMP_AND_SWP: + if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags & + IRDMA_FEATURE_ATOMIC_OPS))) { + err = EINVAL; + break; + } + info.op_type = IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP; + info.op.atomic_compare_swap.tagged_offset = ib_wr->sg_list[0].addr; + info.op.atomic_compare_swap.remote_tagged_offset = + atomic_wr(ib_wr)->remote_addr; + info.op.atomic_compare_swap.swap_data_bytes = atomic_wr(ib_wr)->swap; + info.op.atomic_compare_swap.compare_data_bytes = + atomic_wr(ib_wr)->compare_add; + info.op.atomic_compare_swap.stag = ib_wr->sg_list[0].lkey; + info.op.atomic_compare_swap.remote_stag = atomic_wr(ib_wr)->rkey; + err = irdma_uk_atomic_compare_swap(ukqp, &info, false); + break; + case IB_WR_ATOMIC_FETCH_AND_ADD: + if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags & + IRDMA_FEATURE_ATOMIC_OPS))) { + err = EINVAL; + break; + } + info.op_type = IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD; + info.op.atomic_fetch_add.tagged_offset = ib_wr->sg_list[0].addr; + info.op.atomic_fetch_add.remote_tagged_offset = + atomic_wr(ib_wr)->remote_addr; + info.op.atomic_fetch_add.fetch_add_data_bytes = + atomic_wr(ib_wr)->compare_add; + info.op.atomic_fetch_add.stag = ib_wr->sg_list[0].lkey; + info.op.atomic_fetch_add.remote_stag = + atomic_wr(ib_wr)->rkey; + err = irdma_uk_atomic_fetch_add(ukqp, &info, false); + break; case IB_WR_SEND_WITH_IMM: if (ukqp->qp_caps & IRDMA_SEND_WITH_IMM) { info.imm_data_valid = true; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 2817122ba989..49972b0600a3 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -284,6 +284,12 @@ static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info, case IRDMA_OP_TYPE_FAST_REG_NSMR: entry->opcode = IB_WC_REG_MR; break; + case IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP: + entry->opcode = IB_WC_COMP_SWAP; + break; + case IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD: + entry->opcode = IB_WC_FETCH_ADD; + break; case IRDMA_OP_TYPE_INV_STAG: entry->opcode = IB_WC_LOCAL_INV; break; From 42f1d099093bc2ad6e6b1d631e1fd9bdbacdddb1 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 27 Aug 2025 10:25:44 -0500 Subject: [PATCH 67/85] RDMA/irdma: Extend CQE Error and Flush Handling for GEN3 Devices Enhance the CQE error and flush handling specific to GEN3 devices. Unlike GEN1/2 devices, which depend on software to generate completions in error, GEN3 devices leverage firmware to generate CQEs in error for all WQEs posted after a QP moves to an error state. Key changes include: - Updating the CQ poll logic to properly advance the CQ head in the event of a flush CQE. - Updating the flush logic for GEN3 to pass error WQE idx for SQ on an AE to flush out unprocessed WQEs in error. - Isolating the decoding of AE to flush codes into a separate routine irdma_ae_to_qp_err_code. This routine can now be leveraged to flush error CQEs on an AE and when error CQE is received for SRQ. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-16-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 9 ++ drivers/infiniband/hw/irdma/defs.h | 105 +-------------- drivers/infiniband/hw/irdma/hw.c | 99 ++++---------- drivers/infiniband/hw/irdma/type.h | 14 +- drivers/infiniband/hw/irdma/uk.c | 39 +++++- drivers/infiniband/hw/irdma/user.h | 194 +++++++++++++++++++++++++++- drivers/infiniband/hw/irdma/verbs.c | 5 +- 7 files changed, 277 insertions(+), 188 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index f2a19a856975..4ef1c29032f7 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2674,6 +2674,12 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE, info->ae_src) : 0; set_64bit_val(wqe, 8, temp); + if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + set_64bit_val(wqe, 40, + FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX, info->err_sq_idx)); + set_64bit_val(wqe, 48, + FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX, info->err_rq_idx)); + } hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) | @@ -2682,6 +2688,9 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) | FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); + if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + hdr |= FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID, info->err_sq_idx_valid) | + FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID, info->err_rq_idx_valid); dma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, 24, hdr); diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 3b3680816a65..983b22d7ae23 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -301,107 +301,6 @@ enum irdma_cqp_op_type { #define IRDMA_CQP_OP_GATHER_STATS 0x2e #define IRDMA_CQP_OP_UP_MAP 0x2f -/* Async Events codes */ -#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102 -#define IRDMA_AE_AMP_INVALID_STAG 0x0103 -#define IRDMA_AE_AMP_BAD_QP 0x0104 -#define IRDMA_AE_AMP_BAD_PD 0x0105 -#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106 -#define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107 -#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108 -#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109 -#define IRDMA_AE_AMP_TO_WRAP 0x010a -#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c -#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d -#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e -#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110 -#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111 -#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112 -#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113 -#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114 -#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115 -#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116 -#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117 -#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118 -#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119 -#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a -#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b -#define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c -#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d -#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e -#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f -#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120 -#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121 -#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132 -#define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133 -#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134 -#define IRDMA_AE_UDA_L4LEN_INVALID 0x0135 -#define IRDMA_AE_BAD_CLOSE 0x0201 -#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202 -#define IRDMA_AE_CQ_OPERATION_ERROR 0x0203 -#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205 -#define IRDMA_AE_STAG_ZERO_INVALID 0x0206 -#define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207 -#define IRDMA_AE_IB_INVALID_REQUEST 0x0208 -#define IRDMA_AE_SRQ_LIMIT 0x0209 -#define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a -#define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b -#define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c -#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d -#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e -#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR 0x020f -#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220 -#define IRDMA_AE_ATOMIC_ALIGNMENT 0x0221 -#define IRDMA_AE_ATOMIC_MASK 0x0222 -#define IRDMA_AE_INVALID_REQUEST 0x0223 -#define IRDMA_AE_PCIE_ATOMIC_DISABLE 0x0224 -#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 -#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 -#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 -#define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305 -#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306 -#define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307 -#define IRDMA_AE_DDP_NO_L_BIT 0x0308 -#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311 -#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312 -#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313 -#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314 -#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316 -#define IRDMA_AE_ROCE_EMPTY_MCG 0x0380 -#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381 -#define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382 -#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383 -#define IRDMA_AE_INVALID_ARP_ENTRY 0x0401 -#define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402 -#define IRDMA_AE_STALE_ARP_ENTRY 0x0403 -#define IRDMA_AE_INVALID_AH_ENTRY 0x0406 -#define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501 -#define IRDMA_AE_LLP_CONNECTION_RESET 0x0502 -#define IRDMA_AE_LLP_FIN_RECEIVED 0x0503 -#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504 -#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505 -#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507 -#define IRDMA_AE_LLP_SYN_RECEIVED 0x0508 -#define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509 -#define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a -#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b -#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c -#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e -#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f -#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 -#define IRDMA_AE_RESET_SENT 0x0601 -#define IRDMA_AE_TERMINATE_SENT 0x0602 -#define IRDMA_AE_RESET_NOT_SENT 0x0603 -#define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700 -#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 -#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702 -#define IRDMA_AE_REMOTE_QP_CATASTROPHIC 0x0703 -#define IRDMA_AE_LOCAL_QP_CATASTROPHIC 0x0704 -#define IRDMA_AE_RCE_QP_CATASTROPHIC 0x0705 -#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900 -#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901 -#define IRDMA_AE_ADAPTER_CATASTROPHIC 0x0B0B - #define FLD_LS_64(dev, val, field) \ (((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M]) #define FLD_RS_64(dev, val, field) \ @@ -771,6 +670,10 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_FWQE_USERFLCODE BIT_ULL(60) #define IRDMA_CQPSQ_FWQE_FLUSHSQ BIT_ULL(61) #define IRDMA_CQPSQ_FWQE_FLUSHRQ BIT_ULL(62) +#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID BIT_ULL(42) +#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX GENMASK_ULL(49, 32) +#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID BIT_ULL(43) +#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX GENMASK_ULL(46, 32) #define IRDMA_CQPSQ_MAPT_PORT GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_MAPT_ADDPORT BIT_ULL(62) #define IRDMA_CQPSQ_UPESD_SDCMD GENMASK_ULL(31, 0) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 27b9623c2b09..7bad0e38786a 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -135,76 +135,24 @@ static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq) static void irdma_set_flush_fields(struct irdma_sc_qp *qp, struct irdma_aeqe_info *info) { + struct qp_err_code qp_err; + qp->sq_flush_code = info->sq; qp->rq_flush_code = info->rq; - qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; - - switch (info->ae_id) { - case IRDMA_AE_AMP_BOUNDS_VIOLATION: - case IRDMA_AE_AMP_INVALID_STAG: - case IRDMA_AE_AMP_RIGHTS_VIOLATION: - case IRDMA_AE_AMP_UNALLOCATED_STAG: - case IRDMA_AE_AMP_BAD_PD: - case IRDMA_AE_AMP_BAD_QP: - case IRDMA_AE_AMP_BAD_STAG_KEY: - case IRDMA_AE_AMP_BAD_STAG_INDEX: - case IRDMA_AE_AMP_TO_WRAP: - case IRDMA_AE_PRIV_OPERATION_DENIED: - qp->flush_code = FLUSH_PROT_ERR; - qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; - break; - case IRDMA_AE_UDA_XMIT_BAD_PD: - case IRDMA_AE_WQE_UNEXPECTED_OPCODE: - qp->flush_code = FLUSH_LOC_QP_OP_ERR; - qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; - break; - case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: - case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: - case IRDMA_AE_UDA_L4LEN_INVALID: - case IRDMA_AE_DDP_UBE_INVALID_MO: - case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: - qp->flush_code = FLUSH_LOC_LEN_ERR; - qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; - break; - case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: - case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: - qp->flush_code = FLUSH_REM_ACCESS_ERR; - qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; - break; - case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: - case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: - case IRDMA_AE_ROCE_RSP_LENGTH_ERROR: - case IRDMA_AE_IB_REMOTE_OP_ERROR: - qp->flush_code = FLUSH_REM_OP_ERR; - qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; - break; - case IRDMA_AE_LCE_QP_CATASTROPHIC: - qp->flush_code = FLUSH_FATAL_ERR; - qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; - break; - case IRDMA_AE_IB_RREQ_AND_Q1_FULL: - qp->flush_code = FLUSH_GENERAL_ERR; - break; - case IRDMA_AE_LLP_TOO_MANY_RETRIES: - qp->flush_code = FLUSH_RETRY_EXC_ERR; - qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; - break; - case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: - case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: - case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: - case IRDMA_AE_AMP_MWBIND_VALID_STAG: - qp->flush_code = FLUSH_MW_BIND_ERR; - qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; - break; - case IRDMA_AE_IB_INVALID_REQUEST: - qp->flush_code = FLUSH_REM_INV_REQ_ERR; - qp->event_type = IRDMA_QP_EVENT_REQ_ERR; - break; - default: - qp->flush_code = FLUSH_GENERAL_ERR; - qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; - break; + if (qp->qp_uk.uk_attrs->hw_rev >= IRDMA_GEN_3) { + if (info->sq) { + qp->err_sq_idx_valid = true; + qp->err_sq_idx = info->wqe_idx; + } + if (info->rq) { + qp->err_rq_idx_valid = true; + qp->err_rq_idx = info->wqe_idx; + } } + + qp_err = irdma_ae_to_qp_err_code(info->ae_id); + qp->flush_code = qp_err.flush_code; + qp->event_type = qp_err.event_type; } /** @@ -320,7 +268,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE) iwqp->last_aeq = info->ae_id; spin_unlock_irqrestore(&iwqp->lock, flags); - ctx_info = &iwqp->ctx_info; } else if (info->srq) { if (info->ae_id != IRDMA_AE_SRQ_LIMIT) continue; @@ -466,9 +413,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) default: ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n", info->ae_id, info->qp, info->qp_cq_id, info->ae_src); - if (rdma_protocol_roce(&iwdev->ibdev, 1)) { - ctx_info->roce_info->err_rq_idx_valid = info->rq; - if (info->rq) { + ctx_info = &iwqp->ctx_info; + if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) { + ctx_info->roce_info->err_rq_idx_valid = + ctx_info->srq_valid ? false : info->err_rq_idx_valid; + if (ctx_info->roce_info->err_rq_idx_valid) { ctx_info->roce_info->err_rq_idx = info->wqe_idx; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); @@ -2832,7 +2781,9 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) struct irdma_pci_f *rf = iwqp->iwdev->rf; u8 flush_code = iwqp->sc_qp.flush_code; - if (!(flush_mask & IRDMA_FLUSH_SQ) && !(flush_mask & IRDMA_FLUSH_RQ)) + if ((!(flush_mask & IRDMA_FLUSH_SQ) && + !(flush_mask & IRDMA_FLUSH_RQ)) || + ((flush_mask & IRDMA_REFLUSH) && rf->rdma_ver >= IRDMA_GEN_3)) return; /* Set flush info fields*/ @@ -2845,6 +2796,10 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.rq_minor_code = FLUSH_GENERAL_ERR; info.userflushcode = true; + info.err_sq_idx_valid = iwqp->sc_qp.err_sq_idx_valid; + info.err_sq_idx = iwqp->sc_qp.err_sq_idx; + info.err_rq_idx_valid = iwqp->sc_qp.err_rq_idx_valid; + info.err_rq_idx = iwqp->sc_qp.err_rq_idx; if (flush_mask & IRDMA_REFLUSH) { if (info.sq) diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index c11b901ff119..4ae77cdde9dc 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -97,12 +97,6 @@ enum irdma_term_mpa_errors { MPA_REQ_RSP = 0x04, }; -enum irdma_qp_event_type { - IRDMA_QP_EVENT_CATASTROPHIC, - IRDMA_QP_EVENT_ACCESS_ERR, - IRDMA_QP_EVENT_REQ_ERR, -}; - enum irdma_hw_stats_index { /* gen1 - 32-bit */ IRDMA_HW_STAT_INDEX_IP4RXDISCARD = 0, @@ -565,6 +559,10 @@ struct irdma_sc_qp { bool virtual_map:1; bool flush_sq:1; bool flush_rq:1; + bool err_sq_idx_valid:1; + bool err_rq_idx_valid:1; + u32 err_sq_idx; + u32 err_rq_idx; bool sq_flush_code:1; bool rq_flush_code:1; u32 pkt_limit; @@ -1289,6 +1287,8 @@ struct irdma_cqp_manage_push_page_info { }; struct irdma_qp_flush_info { + u32 err_sq_idx; + u32 err_rq_idx; u16 sq_minor_code; u16 sq_major_code; u16 rq_minor_code; @@ -1299,6 +1299,8 @@ struct irdma_qp_flush_info { bool rq:1; bool userflushcode:1; bool generate_ae:1; + bool err_sq_idx_valid:1; + bool err_rq_idx_valid:1; }; struct irdma_gen_ae_info { diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index fb944c49f864..ce1ae10c30fc 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1148,6 +1148,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, __le64 *cqe; struct irdma_qp_uk *qp; struct irdma_srq_uk *srq; + struct qp_err_code qp_err; u8 is_srq; struct irdma_ring *pring = NULL; u32 wqe_idx; @@ -1233,16 +1234,35 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3); - if (info->major_err == IRDMA_FLUSH_MAJOR_ERR) { - info->comp_status = IRDMA_COMPL_STATUS_FLUSHED; + switch (info->major_err) { + case IRDMA_SRQFLUSH_RSVD_MAJOR_ERR: + qp_err = irdma_ae_to_qp_err_code(info->minor_err); + info->minor_err = qp_err.flush_code; + fallthrough; + case IRDMA_FLUSH_MAJOR_ERR: /* Set the min error to standard flush error code for remaining cqes */ if (info->minor_err != FLUSH_GENERAL_ERR) { qword3 &= ~IRDMA_CQ_MINERR; qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR); set_64bit_val(cqe, 24, qword3); } - } else { - info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; + info->comp_status = IRDMA_COMPL_STATUS_FLUSHED; + break; + default: +#define IRDMA_CIE_SIGNATURE 0xE +#define IRDMA_CQMAJERR_HIGH_NIBBLE GENMASK(15, 12) + if (info->q_type == IRDMA_CQE_QTYPE_SQ && + qp->qp_type == IRDMA_QP_TYPE_ROCE_UD && + FIELD_GET(IRDMA_CQMAJERR_HIGH_NIBBLE, info->major_err) + == IRDMA_CIE_SIGNATURE) { + info->error = 0; + info->major_err = 0; + info->minor_err = 0; + info->comp_status = IRDMA_COMPL_STATUS_SUCCESS; + } else { + info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; + } + break; } } else { info->comp_status = IRDMA_COMPL_STATUS_SUCCESS; @@ -1251,7 +1271,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, get_64bit_val(cqe, 0, &qword0); get_64bit_val(cqe, 16, &qword2); - info->tcp_seq_num_rtt = (u32)FIELD_GET(IRDMACQ_TCPSEQNUMRTT, qword0); info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2); info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2); @@ -1377,9 +1396,15 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, ret_code = 0; exit: - if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) + if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { if (pring && IRDMA_RING_MORE_WORK(*pring)) - move_cq_head = false; + /* Park CQ head during a flush to generate additional CQEs + * from SW for all unprocessed WQEs. For GEN3 and beyond + * FW will generate/flush these CQEs so move to the next CQE + */ + move_cq_head = qp->uk_attrs->hw_rev <= IRDMA_GEN_2 ? + false : true; + } if (move_cq_head) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index ed7ce98e887b..ab57f689827a 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -46,7 +46,109 @@ #define IRDMA_OP_TYPE_REC 0x3e #define IRDMA_OP_TYPE_REC_IMM 0x3f -#define IRDMA_FLUSH_MAJOR_ERR 1 +#define IRDMA_FLUSH_MAJOR_ERR 1 +#define IRDMA_SRQFLUSH_RSVD_MAJOR_ERR 0xfffe + +/* Async Events codes */ +#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102 +#define IRDMA_AE_AMP_INVALID_STAG 0x0103 +#define IRDMA_AE_AMP_BAD_QP 0x0104 +#define IRDMA_AE_AMP_BAD_PD 0x0105 +#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106 +#define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107 +#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108 +#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109 +#define IRDMA_AE_AMP_TO_WRAP 0x010a +#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c +#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d +#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e +#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110 +#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111 +#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112 +#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113 +#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114 +#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115 +#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116 +#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117 +#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118 +#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119 +#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a +#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b +#define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c +#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d +#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e +#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f +#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120 +#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121 +#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132 +#define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133 +#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134 +#define IRDMA_AE_UDA_L4LEN_INVALID 0x0135 +#define IRDMA_AE_BAD_CLOSE 0x0201 +#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202 +#define IRDMA_AE_CQ_OPERATION_ERROR 0x0203 +#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205 +#define IRDMA_AE_STAG_ZERO_INVALID 0x0206 +#define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207 +#define IRDMA_AE_IB_INVALID_REQUEST 0x0208 +#define IRDMA_AE_SRQ_LIMIT 0x0209 +#define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a +#define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b +#define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c +#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d +#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e +#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR 0x020f +#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220 +#define IRDMA_AE_ATOMIC_ALIGNMENT 0x0221 +#define IRDMA_AE_ATOMIC_MASK 0x0222 +#define IRDMA_AE_INVALID_REQUEST 0x0223 +#define IRDMA_AE_PCIE_ATOMIC_DISABLE 0x0224 +#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 +#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 +#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 +#define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305 +#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306 +#define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307 +#define IRDMA_AE_DDP_NO_L_BIT 0x0308 +#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311 +#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312 +#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313 +#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314 +#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316 +#define IRDMA_AE_ROCE_EMPTY_MCG 0x0380 +#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381 +#define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382 +#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383 +#define IRDMA_AE_INVALID_ARP_ENTRY 0x0401 +#define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402 +#define IRDMA_AE_STALE_ARP_ENTRY 0x0403 +#define IRDMA_AE_INVALID_AH_ENTRY 0x0406 +#define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501 +#define IRDMA_AE_LLP_CONNECTION_RESET 0x0502 +#define IRDMA_AE_LLP_FIN_RECEIVED 0x0503 +#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504 +#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505 +#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507 +#define IRDMA_AE_LLP_SYN_RECEIVED 0x0508 +#define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509 +#define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a +#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b +#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c +#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f +#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 +#define IRDMA_AE_RESET_SENT 0x0601 +#define IRDMA_AE_TERMINATE_SENT 0x0602 +#define IRDMA_AE_RESET_NOT_SENT 0x0603 +#define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700 +#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 +#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702 +#define IRDMA_AE_REMOTE_QP_CATASTROPHIC 0x0703 +#define IRDMA_AE_LOCAL_QP_CATASTROPHIC 0x0704 +#define IRDMA_AE_RCE_QP_CATASTROPHIC 0x0705 +#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900 +#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901 +#define IRDMA_AE_ADAPTER_CATASTROPHIC 0x0B0B enum irdma_device_caps_const { IRDMA_WQE_SIZE = 4, @@ -109,6 +211,13 @@ enum irdma_flush_opcode { FLUSH_RETRY_EXC_ERR, FLUSH_MW_BIND_ERR, FLUSH_REM_INV_REQ_ERR, + FLUSH_RNR_RETRY_EXC_ERR, +}; + +enum irdma_qp_event_type { + IRDMA_QP_EVENT_CATASTROPHIC, + IRDMA_QP_EVENT_ACCESS_ERR, + IRDMA_QP_EVENT_REQ_ERR, }; enum irdma_cmpl_status { @@ -282,6 +391,11 @@ struct irdma_cq_poll_info { bool imm_valid:1; }; +struct qp_err_code { + enum irdma_flush_opcode flush_code; + enum irdma_qp_event_type event_type; +}; + int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp, @@ -479,4 +593,82 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift, u32 *srqdepth); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); + +static inline struct qp_err_code irdma_ae_to_qp_err_code(u16 ae_id) +{ + struct qp_err_code qp_err = {}; + + switch (ae_id) { + case IRDMA_AE_AMP_BOUNDS_VIOLATION: + case IRDMA_AE_AMP_INVALID_STAG: + case IRDMA_AE_AMP_RIGHTS_VIOLATION: + case IRDMA_AE_AMP_UNALLOCATED_STAG: + case IRDMA_AE_AMP_BAD_PD: + case IRDMA_AE_AMP_BAD_QP: + case IRDMA_AE_AMP_BAD_STAG_KEY: + case IRDMA_AE_AMP_BAD_STAG_INDEX: + case IRDMA_AE_AMP_TO_WRAP: + case IRDMA_AE_PRIV_OPERATION_DENIED: + qp_err.flush_code = FLUSH_PROT_ERR; + qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; + break; + case IRDMA_AE_UDA_XMIT_BAD_PD: + case IRDMA_AE_WQE_UNEXPECTED_OPCODE: + qp_err.flush_code = FLUSH_LOC_QP_OP_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: + case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: + case IRDMA_AE_UDA_L4LEN_INVALID: + case IRDMA_AE_DDP_UBE_INVALID_MO: + case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + qp_err.flush_code = FLUSH_LOC_LEN_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: + case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: + qp_err.flush_code = FLUSH_REM_ACCESS_ERR; + qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; + break; + case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: + case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: + case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: + case IRDMA_AE_AMP_MWBIND_VALID_STAG: + qp_err.flush_code = FLUSH_MW_BIND_ERR; + qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; + break; + case IRDMA_AE_LLP_TOO_MANY_RETRIES: + qp_err.flush_code = FLUSH_RETRY_EXC_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_IB_INVALID_REQUEST: + qp_err.flush_code = FLUSH_REM_INV_REQ_ERR; + qp_err.event_type = IRDMA_QP_EVENT_REQ_ERR; + break; + case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: + case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: + case IRDMA_AE_ROCE_RSP_LENGTH_ERROR: + case IRDMA_AE_IB_REMOTE_OP_ERROR: + qp_err.flush_code = FLUSH_REM_OP_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_LLP_TOO_MANY_RNRS: + qp_err.flush_code = FLUSH_RNR_RETRY_EXC_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_LCE_QP_CATASTROPHIC: + case IRDMA_AE_REMOTE_QP_CATASTROPHIC: + case IRDMA_AE_LOCAL_QP_CATASTROPHIC: + case IRDMA_AE_RCE_QP_CATASTROPHIC: + qp_err.flush_code = FLUSH_FATAL_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + default: + qp_err.flush_code = FLUSH_GENERAL_ERR; + qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + } + + return qp_err; +} #endif /* IRDMA_USER_H */ diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 167b5bdc668e..24f9503f410f 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -542,7 +542,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) iwqp->sc_qp.qp_uk.destroy_pending = true; - if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) + if (iwqp->iwarp_state >= IRDMA_QP_STATE_IDLE) irdma_modify_qp_to_err(&iwqp->sc_qp); if (!iwqp->user_mode) @@ -4132,6 +4132,7 @@ static int irdma_post_send(struct ib_qp *ibqp, mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); } + if (err) *bad_wr = ib_wr; @@ -4255,6 +4256,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode return IB_WC_MW_BIND_ERR; case FLUSH_REM_INV_REQ_ERR: return IB_WC_REM_INV_REQ_ERR; + case FLUSH_RNR_RETRY_EXC_ERR: + return IB_WC_RNR_RETRY_EXC_ERR; case FLUSH_FATAL_ERR: default: return IB_WC_FATAL_ERR; From 060842fed53f77a73824c9147f51dc6746c1267a Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 27 Aug 2025 10:25:45 -0500 Subject: [PATCH 68/85] RDMA/irdma: Update Kconfig Update Kconfig to add dependency on idpf module and add IPU E2000 to the list of supported devices. Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-17-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig index 5f49a58590ed..0bd7e3fca1fb 100644 --- a/drivers/infiniband/hw/irdma/Kconfig +++ b/drivers/infiniband/hw/irdma/Kconfig @@ -4,10 +4,11 @@ config INFINIBAND_IRDMA depends on INET depends on IPV6 || !IPV6 depends on PCI - depends on ICE && I40E + depends on IDPF && ICE && I40E select GENERIC_ALLOCATOR select AUXILIARY_BUS select CRC32 help - This is an Intel(R) Ethernet Protocol Driver for RDMA driver - that support E810 (iWARP/RoCE) and X722 (iWARP) network devices. + This is an Intel(R) Ethernet Protocol Driver for RDMA that + supports IPU E2000 (RoCEv2), E810 (iWARP/RoCEv2) and X722 (iWARP) + network devices. From fa29d1e8877bcfb9813efc7d0138e01ca07f1863 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 16 Sep 2025 14:11:00 +0300 Subject: [PATCH 69/85] RDMA/core: Squash a single user static function To reduce dependencies in IFF_LOOPBACK in route and neighbour resolution steps, squash the static function to its single caller and simplify the code. Until now, network field was set even when neighbour resolution failed. With this change, dev_addr output fields are valid only when resolution is successful. Signed-off-by: Parav Pandit Reviewed-by: Vlad Dumitrescu Signed-off-by: Edward Srouji Link: https://patch.msgid.link/20250916111103.84069-2-edwards@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/addr.c | 55 ++++++++++++++-------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index be0743dac3ff..594e7ee335f7 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -465,17 +465,32 @@ static int addr_resolve_neigh(const struct dst_entry *dst, return ret; } -static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr, - const struct sockaddr *dst_in, - const struct dst_entry *dst, - const struct net_device *ndev) +static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, + unsigned int *ndev_flags, + const struct sockaddr *dst_in, + const struct dst_entry *dst) { - int ret = 0; + struct net_device *ndev = READ_ONCE(dst->dev); - if (dst->dev->flags & IFF_LOOPBACK) + *ndev_flags = ndev->flags; + /* A physical device must be the RDMA device to use */ + if (ndev->flags & IFF_LOOPBACK) { + int ret; + /* + * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or + * loopback IP address. So if route is resolved to loopback + * interface, translate that to a real ndev based on non + * loopback IP address. + */ + ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); + if (IS_ERR(ndev)) + return -ENODEV; ret = rdma_translate_ip(dst_in, dev_addr); - else + if (ret) + return ret; + } else { rdma_copy_src_l2_addr(dev_addr, dst->dev); + } /* * If there's a gateway and type of device not ARPHRD_INFINIBAND, @@ -490,31 +505,7 @@ static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr, else dev_addr->network = RDMA_NETWORK_IB; - return ret; -} - -static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, - unsigned int *ndev_flags, - const struct sockaddr *dst_in, - const struct dst_entry *dst) -{ - struct net_device *ndev = READ_ONCE(dst->dev); - - *ndev_flags = ndev->flags; - /* A physical device must be the RDMA device to use */ - if (ndev->flags & IFF_LOOPBACK) { - /* - * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or - * loopback IP address. So if route is resolved to loopback - * interface, translate that to a real ndev based on non - * loopback IP address. - */ - ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); - if (IS_ERR(ndev)) - return -ENODEV; - } - - return copy_src_l2_addr(dev_addr, dst_in, dst, ndev); + return 0; } static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr) From 200651b9b8aadfbbec852f0e5d042d9abe75e2ab Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 16 Sep 2025 14:11:01 +0300 Subject: [PATCH 70/85] RDMA/core: Resolve MAC of next-hop device without ARP support Currently, if the next-hop netdevice does not support ARP resolution, the destination MAC address is silently set to zero without reporting an error. This leads to incorrect behavior and may result in packet transmission failures. Fix this by deferring MAC resolution to the IP stack via neighbour lookup, allowing proper resolution or error reporting as appropriate. Fixes: 7025fcd36bd6 ("IB: address translation to map IP toIB addresses (GIDs)") Signed-off-by: Parav Pandit Reviewed-by: Vlad Dumitrescu Signed-off-by: Edward Srouji Link: https://patch.msgid.link/20250916111103.84069-3-edwards@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/addr.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 594e7ee335f7..ca86c482662f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -454,14 +454,10 @@ static int addr_resolve_neigh(const struct dst_entry *dst, { int ret = 0; - if (ndev_flags & IFF_LOOPBACK) { + if (ndev_flags & IFF_LOOPBACK) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); - } else { - if (!(ndev_flags & IFF_NOARP)) { - /* If the device doesn't do ARP internally */ - ret = fetch_ha(dst, addr, dst_in, seq); - } - } + else + ret = fetch_ha(dst, addr, dst_in, seq); return ret; } From c31e4038c97f355967bf906c0b6914edb4f20d75 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 16 Sep 2025 14:11:02 +0300 Subject: [PATCH 71/85] RDMA/core: Use route entry flag to decide on loopback traffic addr_resolve() considers a destination to be local if the next-hop device of the resolved route for the destination is the loopback netdevice. This fails when the source and destination IP addresses belong to a netdev enslaved to a VRF netdev. In this case the next-hop device is the VRF itself: $ ip link add name myvrf up type vrf table 100 $ ip link set ens2f0np0 master myvrf up $ ip addr add 192.168.1.1/24 dev ens2f0np0 $ ip route get 192.168.1.1 oif myvrf local 192.168.1.1 dev myvrf table 100 src 192.168.1.1 uid 0 cache This results in packets being generated with an incorrect destination MAC of the VRF netdevice and ib_write_bw failing with timeout. Solve this by determining if a destination is local or not based on the resolved route's type rather than based on its next-hop netdevice loopback flag. This enables to resolve loopback traffic with and without VRF configurations in a uniform way. Signed-off-by: Parav Pandit Reviewed-by: Vlad Dumitrescu Signed-off-by: Edward Srouji Link: https://patch.msgid.link/20250916111103.84069-4-edwards@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/addr.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index ca86c482662f..61596cda2b65 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -446,31 +446,40 @@ static int addr6_resolve(struct sockaddr *src_sock, } #endif +static bool is_dst_local(const struct dst_entry *dst) +{ + if (dst->ops->family == AF_INET) + return !!(dst_rtable(dst)->rt_type & RTN_LOCAL); + else if (dst->ops->family == AF_INET6) + return !!(dst_rt6_info(dst)->rt6i_flags & RTF_LOCAL); + else + return false; +} + static int addr_resolve_neigh(const struct dst_entry *dst, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, - unsigned int ndev_flags, u32 seq) { - int ret = 0; - - if (ndev_flags & IFF_LOOPBACK) + if (is_dst_local(dst)) { + /* When the destination is local entry, source and destination + * are same. Skip the neighbour lookup. + */ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); - else - ret = fetch_ha(dst, addr, dst_in, seq); - return ret; + return 0; + } + + return fetch_ha(dst, addr, dst_in, seq); } static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, - unsigned int *ndev_flags, const struct sockaddr *dst_in, const struct dst_entry *dst) { struct net_device *ndev = READ_ONCE(dst->dev); - *ndev_flags = ndev->flags; /* A physical device must be the RDMA device to use */ - if (ndev->flags & IFF_LOOPBACK) { + if (is_dst_local(dst)) { int ret; /* * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or @@ -538,7 +547,6 @@ static int addr_resolve(struct sockaddr *src_in, u32 seq) { struct dst_entry *dst = NULL; - unsigned int ndev_flags = 0; struct rtable *rt = NULL; int ret; @@ -575,7 +583,7 @@ static int addr_resolve(struct sockaddr *src_in, rcu_read_unlock(); goto done; } - ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst); + ret = rdma_set_src_addr_rcu(addr, dst_in, dst); rcu_read_unlock(); /* @@ -583,7 +591,7 @@ static int addr_resolve(struct sockaddr *src_in, * only if src addr translation didn't fail. */ if (!ret && resolve_neigh) - ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq); + ret = addr_resolve_neigh(dst, dst_in, addr, seq); if (src_in->sa_family == AF_INET) ip_rt_put(rt); From 42f993d3439827c4959ea77e60620d7ebfb3a477 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Tue, 16 Sep 2025 14:11:03 +0300 Subject: [PATCH 72/85] IB/ipoib: Ignore L3 master device Currently, all master upper netdevices (e.g., bond, VRF) are treated equally. When a VRF netdevice is used over an IPoIB netdevice, the expected netdev resolution is on the lower IPoIB device which has the IP address assigned to it and not the VRF device. The rdma_cm module (CMA) tries to match incoming requests to a particular netdevice. When successful, it also validates that the return path points to the same device by performing a routing table lookup. Currently, the former would resolve to the VRF netdevice, while the latter to the correct lower IPoIB netdevice, leading to failure in rdma_cm. Improve this by ignoring the VRF master netdevice, if it exists, and instead return the lower IPoIB device. Signed-off-by: Vlad Dumitrescu Reviewed-by: Parav Pandit Signed-off-by: Edward Srouji Link: https://patch.msgid.link/20250916111103.84069-5-edwards@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7acafc5c0e09..5b4d76e97437 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -351,26 +351,27 @@ static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr, } /* - * Find the master net_device on top of the given net_device. + * Find the L2 master net_device on top of the given net_device. * @dev: base IPoIB net_device * - * Returns the master net_device with a reference held, or the same net_device - * if no master exists. + * Returns the L2 master net_device with reference held if the L2 master + * exists (such as bond netdevice), or returns same netdev with reference + * held when master does not exist or when L3 master (such as VRF netdev). */ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) { struct net_device *master; rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); + if (!master || netif_is_l3_master(master)) + master = dev; + dev_hold(master); rcu_read_unlock(); - if (master) - return master; - - dev_hold(dev); - return dev; + return master; } struct ipoib_walk_data { @@ -522,7 +523,7 @@ static struct net_device *ipoib_get_net_dev_by_params( if (ret) return NULL; - /* See if we can find a unique device matching the L2 parameters */ + /* See if we can find a unique device matching the pkey and GID */ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, gid, NULL, &net_dev); @@ -535,7 +536,7 @@ static struct net_device *ipoib_get_net_dev_by_params( dev_put(net_dev); - /* Couldn't find a unique device with L2 parameters only. Use L3 + /* Couldn't find a unique device with pkey and GID only. Use L3 * address to uniquely match the net device */ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, gid, addr, &net_dev); From 1428cd764cd708d53a072a2f208d87014bfe05bc Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Tue, 16 Sep 2025 19:31:12 +0300 Subject: [PATCH 73/85] IB/sa: Fix sa_local_svc_timeout_ms read race When computing the delta, the sa_local_svc_timeout_ms is read without ib_nl_request_lock held. Though unlikely in practice, this can cause a race condition if multiple local service threads are managing the timeout. Fixes: 2ca546b92a02 ("IB/sa: Route SA pathrecord query through netlink") Signed-off-by: Vlad Dumitrescu Reviewed-by: Mark Zhang Signed-off-by: Edward Srouji Link: https://patch.msgid.link/20250916163112.98414-1-edwards@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/sa_query.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index c0a7af1b4fe4..c23e9c847314 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1074,6 +1074,8 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX) timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX; + spin_lock_irqsave(&ib_nl_request_lock, flags); + delta = timeout - sa_local_svc_timeout_ms; if (delta < 0) abs_delta = -delta; @@ -1081,7 +1083,6 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, abs_delta = delta; if (delta != 0) { - spin_lock_irqsave(&ib_nl_request_lock, flags); sa_local_svc_timeout_ms = timeout; list_for_each_entry(query, &ib_nl_request_list, list) { if (delta < 0 && abs_delta > query->timeout) @@ -1099,9 +1100,10 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, if (delay) mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, (unsigned long)delay); - spin_unlock_irqrestore(&ib_nl_request_lock, flags); } + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + settimeout_out: return 0; } From 8ca7eada62fcfabf6ec1dc7468941e791c1d8729 Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Fri, 19 Sep 2025 02:52:12 +0000 Subject: [PATCH 74/85] RDMA/rxe: Fix race in do_task() when draining When do_task() exhausts its iteration budget (!ret), it sets the state to TASK_STATE_IDLE to reschedule, without a secondary check on the current task->state. This can overwrite the TASK_STATE_DRAINING state set by a concurrent call to rxe_cleanup_task() or rxe_disable_task(). While state changes are protected by a spinlock, both rxe_cleanup_task() and rxe_disable_task() release the lock while waiting for the task to finish draining in the while(!is_done(task)) loop. The race occurs if do_task() hits its iteration limit and acquires the lock in this window. The cleanup logic may then proceed while the task incorrectly reschedules itself, leading to a potential use-after-free. This bug was introduced during the migration from tasklets to workqueues, where the special handling for the draining case was lost. Fix this by restoring the original pre-migration behavior. If the state is TASK_STATE_DRAINING when iterations are exhausted, set cont to 1 to force a new loop iteration. This allows the task to finish its work, so that a subsequent iteration can reach the switch statement and correctly transition the state to TASK_STATE_DRAINED, stopping the task as intended. Fixes: 9b4b7c1f9f54 ("RDMA/rxe: Add workqueue support for rxe tasks") Reviewed-by: Zhu Yanjun Signed-off-by: Gui-Dong Han Link: https://patch.msgid.link/20250919025212.1682087-1-hanguidong02@gmail.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_task.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 6f8f353e9583..f522820b950c 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -132,8 +132,12 @@ static void do_task(struct rxe_task *task) * yield the cpu and reschedule the task */ if (!ret) { - task->state = TASK_STATE_IDLE; - resched = 1; + if (task->state != TASK_STATE_DRAINING) { + task->state = TASK_STATE_IDLE; + resched = 1; + } else { + cont = 1; + } goto exit; } From ed9836c040bac2823dffd4e10c107206d88ac548 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Fri, 19 Sep 2025 17:43:00 +0530 Subject: [PATCH 75/85] RDMA/ionic: Fix build failure on SPARC due to xchg() operand size xchg() is used to safely handle the event queue arming. However SPARC xchg operates only 4B of variable. Change variable type from bool to int. Unverified Error/Warning (likely false positive, kindly check if interested): ERROR: modpost: "__xchg_called_with_bad_pointer" [drivers/infiniband/hw/ionic/ionic_rdma.ko] undefined! Error/Warning ids grouped by kconfigs: recent_errors `-- sparc-allmodconfig `-- ERROR:__xchg_called_with_bad_pointer-drivers-infiniband-hw-ionic-ionic_rdma.ko-undefined Fixes: f3bdbd42702c ("RDMA/ionic: Create device queues to support admin operations") Reported-by: Leon Romanovsky Closes: https://lore.kernel.org/lkml/20250918180750.GA135135@unreal/ Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250919121301.1113759-1-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_admin.c | 8 ++++---- drivers/infiniband/hw/ionic/ionic_ibdev.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/ionic/ionic_admin.c b/drivers/infiniband/hw/ionic/ionic_admin.c index 1ba7a8ecc073..d9221ef134c4 100644 --- a/drivers/infiniband/hw/ionic/ionic_admin.c +++ b/drivers/infiniband/hw/ionic/ionic_admin.c @@ -945,7 +945,7 @@ static void ionic_poll_eq_work(struct work_struct *work) npolled, 0); queue_work(ionic_evt_workq, &eq->work); } else { - xchg(&eq->armed, true); + xchg(&eq->armed, 1); ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr, 0, IONIC_INTR_CRED_UNMASK); } @@ -954,10 +954,10 @@ static void ionic_poll_eq_work(struct work_struct *work) static irqreturn_t ionic_poll_eq_isr(int irq, void *eqptr) { struct ionic_eq *eq = eqptr; - bool was_armed; + int was_armed; u32 npolled; - was_armed = xchg(&eq->armed, false); + was_armed = xchg(&eq->armed, 0); if (unlikely(!eq->enable) || !was_armed) return IRQ_HANDLED; @@ -968,7 +968,7 @@ static irqreturn_t ionic_poll_eq_isr(int irq, void *eqptr) npolled, 0); queue_work(ionic_evt_workq, &eq->work); } else { - xchg(&eq->armed, true); + xchg(&eq->armed, 1); ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr, 0, IONIC_INTR_CRED_UNMASK); } diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h index b7a1a57bae03..82fda1e3cdb6 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.h +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h @@ -126,7 +126,7 @@ struct ionic_eq { struct ionic_queue q; - bool armed; + int armed; bool enable; struct work_struct work; From 260cce64aaa2828d42956d356c682389aca24b47 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Fri, 19 Sep 2025 17:43:01 +0530 Subject: [PATCH 76/85] RDMA/ionic: Use ether_addr_copy instead of memcpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eth header from ib_ud_header structure packs the mac into 4B high and 2B low parts. But when 4B high is used in memcpy, it sees it as overflow. However, this is safe due to the 4B high and 2B low arrangement in the structure. To avoid the memcpy warning, use ether_addr_copy to copy the mac address. In function ‘fortify_memcpy_chk’, inlined from ‘ionic_set_ah_attr.isra’ at drivers/infiniband/hw/ionic/ionic_controlpath.c:609:3: ./include/linux/fortify-string.h:580:25: error: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Werror=attribute-warning] 580 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors make[6]: *** [scripts/Makefile.build:287: drivers/infiniband/hw/ionic/ionic_controlpath.o] Error 1 make[5]: *** [scripts/Makefile.build:556: drivers/infiniband/hw/ionic] Error 2 make[5]: *** Waiting for unfinished jobs.... make[4]: *** [scripts/Makefile.build:556: drivers/infiniband/hw] Error 2 make[3]: *** [scripts/Makefile.build:556: drivers/infiniband] Error 2 make[2]: *** [scripts/Makefile.build:556: drivers] Error 2 make[1]: *** [/tmp/tmp53nb1nwr/Makefile:2011: .] Error 2 make: *** [Makefile:248: __sub-make] Error 2 Fixes: e8521822c733 ("RDMA/ionic: Register device ops for control path") Reported-by: Leon Romanovsky Closes: https://lore.kernel.org/lkml/20250918180750.GA135135@unreal/ Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250919121301.1113759-2-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_controlpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c index 9ce7c2e6d7a8..ea12d9b8e125 100644 --- a/drivers/infiniband/hw/ionic/ionic_controlpath.c +++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c @@ -606,7 +606,7 @@ static void ionic_set_ah_attr(struct ionic_ibdev *dev, memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE; if (hdr->eth_present) - memcpy(&ah_attr->roce.dmac, &hdr->eth.dmac_h, ETH_ALEN); + ether_addr_copy(ah_attr->roce.dmac, hdr->eth.dmac_h); rdma_ah_set_sl(ah_attr, vlan >> VLAN_PRIO_SHIFT); rdma_ah_set_port_num(ah_attr, 1); rdma_ah_set_grh(ah_attr, NULL, flow_label, sgid_index, ttl, tos); From 4b6b6233f50f72353b54295ba594990b19f33223 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 18 Sep 2025 20:53:41 +0300 Subject: [PATCH 77/85] RDMA: Use %pe format specifier for error pointers Convert error logging throughout the RDMA subsystem to use the %pe format specifier instead of PTR_ERR() with integer format specifiers. Link: https://patch.msgid.link/e81ec02df1e474be20417fb62e779776e3f47a50.1758217936.git.leon@kernel.org Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/agent.c | 3 +-- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++-- drivers/infiniband/hw/bnxt_re/main.c | 3 +-- drivers/infiniband/hw/cxgb4/device.c | 5 ++--- drivers/infiniband/hw/efa/efa_com.c | 4 ++-- drivers/infiniband/hw/efa/efa_verbs.c | 6 ++++-- drivers/infiniband/hw/hfi1/device.c | 4 ++-- drivers/infiniband/hw/hfi1/user_sdma.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 8 ++++---- drivers/infiniband/hw/ionic/ionic_admin.c | 18 +++++++++--------- drivers/infiniband/hw/irdma/verbs.c | 6 +++--- drivers/infiniband/hw/mana/main.c | 5 ++--- drivers/infiniband/hw/mana/mr.c | 6 ++++-- drivers/infiniband/hw/mlx4/mad.c | 8 ++++---- drivers/infiniband/hw/mlx4/qp.c | 3 ++- drivers/infiniband/hw/mlx5/data_direct.c | 2 +- drivers/infiniband/hw/mlx5/gsi.c | 15 +++++++++------ drivers/infiniband/hw/mlx5/main.c | 14 ++++++++++---- drivers/infiniband/hw/mlx5/mr.c | 3 +-- drivers/infiniband/ulp/srpt/ib_srpt.c | 16 +++++++--------- 20 files changed, 72 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 3bb46696731e..25a060a28301 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -110,8 +110,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * agent = port_priv->agent[qpn]; ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); if (IS_ERR(ah)) { - dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n", - PTR_ERR(ah)); + dev_err(&device->dev, "ib_create_ah_from_wc error %pe\n", ah); return; } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f12d6cd3ae93..2639ca21a027 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3307,9 +3307,9 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->resize_umem)) { rc = PTR_ERR(cq->resize_umem); + ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %pe\n", + __func__, cq->resize_umem); cq->resize_umem = NULL; - ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %d\n", - __func__, rc); goto fail; } cq->resize_cqe = entries; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index d8d3999d329e..d822cdc82e65 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1956,8 +1956,7 @@ static void bnxt_re_read_vpd_info(struct bnxt_re_dev *rdev) vpd_data = pci_vpd_alloc(pdev, &vpd_size); if (IS_ERR(vpd_data)) { - pci_warn(pdev, "Unable to read VPD, err=%ld\n", - PTR_ERR(vpd_data)); + pci_warn(pdev, "Unable to read VPD, err=%pe\n", vpd_data); return; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index b67747ae6a68..d892f55febe2 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1228,9 +1228,8 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) if (!ctx->dev) { ctx->dev = c4iw_alloc(&ctx->lldi); if (IS_ERR(ctx->dev)) { - pr_err("%s: initialization failed: %ld\n", - pci_name(ctx->lldi.pdev), - PTR_ERR(ctx->dev)); + pr_err("%s: initialization failed: %pe\n", + pci_name(ctx->lldi.pdev), ctx->dev); ctx->dev = NULL; break; } diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index e6377602a9c4..0e979ca10d24 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -635,9 +635,9 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, if (IS_ERR(comp_ctx)) { ibdev_err_ratelimited( aq->efa_dev, - "Failed to submit command %s (opcode %u) err %ld\n", + "Failed to submit command %s (opcode %u) err %pe\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); + cmd->aq_common_descriptor.opcode, comp_ctx); up(&aq->avail_cmds); atomic64_inc(&aq->stats.cmd_err); diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 886923d5fe50..d9a12681f843 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1788,7 +1788,8 @@ struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, access_flags); if (IS_ERR(umem_dmabuf)) { err = PTR_ERR(umem_dmabuf); - ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); + ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%pe]\n", + umem_dmabuf); goto err_free; } @@ -1832,7 +1833,8 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); ibdev_dbg(&dev->ibdev, - "Failed to pin and map user space memory[%d]\n", err); + "Failed to pin and map user space memory[%pe]\n", + mr->umem); goto err_free; } diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index 4250d077b06f..a98a4175e53b 100644 --- a/drivers/infiniband/hw/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -64,9 +64,9 @@ int hfi1_cdev_init(int minor, const char *name, if (IS_ERR(device)) { ret = PTR_ERR(device); + pr_err("Could not create device for minor %d, %s (err %pe)\n", + minor, name, device); device = NULL; - pr_err("Could not create device for minor %d, %s (err %d)\n", - minor, name, -ret); cdev_del(cdev); } done: diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index b72625283fcf..9b1aece1b080 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -498,8 +498,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, ntids, sizeof(*req->tids)); if (IS_ERR(tmp)) { ret = PTR_ERR(tmp); - SDMA_DBG(req, "Failed to copy %d TIDs (%d)", - ntids, ret); + SDMA_DBG(req, "Failed to copy %d TIDs (%pe)", ntids, + tmp); goto free_req; } req->tids = tmp; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 0f037e545520..31cb8699e198 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -594,8 +594,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); if (IS_ERR(mtr->umem)) { - ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", - PTR_ERR(mtr->umem)); + ibdev_err(ibdev, "failed to get umem, ret = %pe.\n", + mtr->umem); return -ENOMEM; } } else { @@ -605,8 +605,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, !mtr_has_mtt(buf_attr) ? HNS_ROCE_BUF_DIRECT : 0); if (IS_ERR(mtr->kmem)) { - ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", - PTR_ERR(mtr->kmem)); + ibdev_err(ibdev, "failed to alloc kmem, ret = %pe.\n", + mtr->kmem); return PTR_ERR(mtr->kmem); } } diff --git a/drivers/infiniband/hw/ionic/ionic_admin.c b/drivers/infiniband/hw/ionic/ionic_admin.c index d9221ef134c4..c2ff21bcd96f 100644 --- a/drivers/infiniband/hw/ionic/ionic_admin.c +++ b/drivers/infiniband/hw/ionic/ionic_admin.c @@ -1108,13 +1108,13 @@ int ionic_create_rdma_admin(struct ionic_ibdev *dev) if (eq_i < IONIC_EQ_COUNT_MIN) { ibdev_err(&dev->ibdev, - "fail create eq %d\n", rc); + "fail create eq %pe\n", eq); goto out; } /* ok, just fewer eq than device supports */ - ibdev_dbg(&dev->ibdev, "eq count %d want %d rc %d\n", - eq_i, dev->lif_cfg.eq_count, rc); + ibdev_dbg(&dev->ibdev, "eq count %d want %d rc %pe\n", + eq_i, dev->lif_cfg.eq_count, eq); rc = 0; break; @@ -1140,13 +1140,13 @@ int ionic_create_rdma_admin(struct ionic_ibdev *dev) if (!aq_i) { ibdev_err(&dev->ibdev, - "failed to create acq %d\n", rc); + "failed to create acq %pe\n", vcq); goto out; } /* ok, just fewer adminq than device supports */ - ibdev_dbg(&dev->ibdev, "acq count %d want %d rc %d\n", - aq_i, dev->lif_cfg.aq_count, rc); + ibdev_dbg(&dev->ibdev, "acq count %d want %d rc %pe\n", + aq_i, dev->lif_cfg.aq_count, vcq); break; } @@ -1161,13 +1161,13 @@ int ionic_create_rdma_admin(struct ionic_ibdev *dev) if (!aq_i) { ibdev_err(&dev->ibdev, - "failed to create aq %d\n", rc); + "failed to create aq %pe\n", aq); goto out; } /* ok, just fewer adminq than device supports */ - ibdev_dbg(&dev->ibdev, "aq count %d want %d rc %d\n", - aq_i, dev->lif_cfg.aq_count, rc); + ibdev_dbg(&dev->ibdev, "aq count %d want %d rc %pe\n", + aq_i, dev->lif_cfg.aq_count, aq); break; } diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 24f9503f410f..a47ccc86e485 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3600,9 +3600,9 @@ static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, access); if (IS_ERR(umem_dmabuf)) { - err = PTR_ERR(umem_dmabuf); - ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); - return ERR_PTR(err); + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%pe]\n", + umem_dmabuf); + return ERR_CAST(umem_dmabuf); } iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, IRDMA_MEMREG_TYPE_MEM); diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 6a2471f2e804..fac159f7128d 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -273,9 +273,8 @@ int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size, umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) { - err = PTR_ERR(umem); - ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %d\n", err); - return err; + ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem); + return PTR_ERR(umem); } err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region); diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c index 55701046ffba..3d0245a4c1ed 100644 --- a/drivers/infiniband/hw/mana/mr.c +++ b/drivers/infiniband/hw/mana/mr.c @@ -138,7 +138,8 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); ibdev_dbg(ibdev, - "Failed to get umem for register user-mr, %d\n", err); + "Failed to get umem for register user-mr, %pe\n", + mr->umem); goto err_free; } @@ -220,7 +221,8 @@ struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 leng umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags); if (IS_ERR(umem_dmabuf)) { err = PTR_ERR(umem_dmabuf); - ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err); + ibdev_dbg(ibdev, "Failed to get dmabuf umem, %pe\n", + umem_dmabuf); goto err_free; } diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index e6e132f10625..91c714f72099 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1836,9 +1836,9 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr); if (IS_ERR(tun_qp->qp)) { ret = PTR_ERR(tun_qp->qp); + pr_err("Couldn't create %s QP (%pe)\n", + create_tun ? "tunnel" : "special", tun_qp->qp); tun_qp->qp = NULL; - pr_err("Couldn't create %s QP (%d)\n", - create_tun ? "tunnel" : "special", ret); return ret; } @@ -2017,14 +2017,14 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, NULL, ctx, &cq_attr); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); - pr_err("Couldn't create tunnel CQ (%d)\n", ret); + pr_err("Couldn't create tunnel CQ (%pe)\n", ctx->cq); goto err_buf; } ctx->pd = ib_alloc_pd(ctx->ib_dev, 0); if (IS_ERR(ctx->pd)) { ret = PTR_ERR(ctx->pd); - pr_err("Couldn't create tunnel PD (%d)\n", ret); + pr_err("Couldn't create tunnel PD (%pe)\n", ctx->pd); goto err_cq; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 50fd407103c7..f2887ae6390e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1652,7 +1652,8 @@ int mlx4_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, sqp->roce_v2_gsi = ib_create_qp(pd, init_attr); if (IS_ERR(sqp->roce_v2_gsi)) { - pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi)); + pr_err("Failed to create GSI QP for RoCEv2 (%pe)\n", + sqp->roce_v2_gsi); sqp->roce_v2_gsi = NULL; } else { to_mqp(sqp->roce_v2_gsi)->flags |= diff --git a/drivers/infiniband/hw/mlx5/data_direct.c b/drivers/infiniband/hw/mlx5/data_direct.c index b9ba84afaae2..b81ac5709b56 100644 --- a/drivers/infiniband/hw/mlx5/data_direct.c +++ b/drivers/infiniband/hw/mlx5/data_direct.c @@ -35,7 +35,7 @@ static int mlx5_data_direct_vpd_get_vuid(struct mlx5_data_direct_dev *dev) vpd_data = pci_vpd_alloc(pdev, &vpd_size); if (IS_ERR(vpd_data)) { - pci_err(pdev, "Unable to read VPD, err=%ld\n", PTR_ERR(vpd_data)); + pci_err(pdev, "Unable to read VPD, err=%pe\n", vpd_data); return PTR_ERR(vpd_data); } diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index b804f2dd5628..d5487834ed25 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -131,8 +131,9 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0, IB_POLL_SOFTIRQ); if (IS_ERR(gsi->cq)) { - mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", - PTR_ERR(gsi->cq)); + mlx5_ib_warn(dev, + "unable to create send CQ for GSI QP. error %pe\n", + gsi->cq); ret = PTR_ERR(gsi->cq); goto err_free_wrs; } @@ -147,8 +148,9 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); if (IS_ERR(gsi->rx_qp)) { - mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", - PTR_ERR(gsi->rx_qp)); + mlx5_ib_warn(dev, + "unable to create hardware GSI QP. error %pe\n", + gsi->rx_qp); ret = PTR_ERR(gsi->rx_qp); goto err_destroy_cq; } @@ -294,8 +296,9 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) qp = create_gsi_ud_qp(gsi); if (IS_ERR(qp)) { - mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n", - PTR_ERR(qp)); + mlx5_ib_warn(dev, + "unable to create hardware UD QP for GSI: %pe\n", + qp); return; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1fbc0351063d..fc1e86f6c409 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3051,14 +3051,16 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) pd = ib_alloc_pd(ibdev, 0); if (IS_ERR(pd)) { ret = PTR_ERR(pd); - mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%d\n", ret); + mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%pe\n", + pd); goto unlock; } cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); if (IS_ERR(cq)) { ret = PTR_ERR(cq); - mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%d\n", ret); + mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%pe\n", + cq); ib_dealloc_pd(pd); goto unlock; } @@ -3102,7 +3104,9 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) s0 = ib_create_srq(devr->p0, &attr); if (IS_ERR(s0)) { ret = PTR_ERR(s0); - mlx5_ib_err(dev, "Couldn't create SRQ 0 for res init, err=%d\n", ret); + mlx5_ib_err(dev, + "Couldn't create SRQ 0 for res init, err=%pe\n", + s0); goto unlock; } @@ -3114,7 +3118,9 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) s1 = ib_create_srq(devr->p0, &attr); if (IS_ERR(s1)) { ret = PTR_ERR(s1); - mlx5_ib_err(dev, "Couldn't create SRQ 1 for res init, err=%d\n", ret); + mlx5_ib_err(dev, + "Couldn't create SRQ 1 for res init, err=%pe\n", + s1); ib_destroy_srq(s0); } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d3c82ffa300e..325fa04cbe8a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1652,8 +1652,7 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, fd, access_flags); if (IS_ERR(umem_dmabuf)) { - mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n", - PTR_ERR(umem_dmabuf)); + mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf); return ERR_CAST(umem_dmabuf); } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 5dfb4644446b..71269446353d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -667,9 +667,9 @@ static int srpt_refresh_port(struct srpt_port *sport) srpt_mad_recv_handler, sport, 0); if (IS_ERR(mad_agent)) { - pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n", + pr_err("%s-%d: MAD agent registration failed (%pe). Note: this is expected if SR-IOV is enabled.\n", dev_name(&sport->sdev->device->dev), sport->port, - PTR_ERR(mad_agent)); + mad_agent); sport->mad_agent = NULL; memset(&port_modify, 0, sizeof(port_modify)); port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; @@ -1865,8 +1865,8 @@ retry: IB_POLL_WORKQUEUE); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); - pr_err("failed to create CQ cqe= %d ret= %d\n", - ch->rq_size + sq_size, ret); + pr_err("failed to create CQ cqe= %d ret= %pe\n", + ch->rq_size + sq_size, ch->cq); goto out; } ch->cq_size = ch->rq_size + sq_size; @@ -3132,7 +3132,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev) WARN_ON_ONCE(sdev->srq); srq = ib_create_srq(sdev->pd, &srq_attr); if (IS_ERR(srq)) { - pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(srq)); + pr_debug("ib_create_srq() failed: %pe\n", srq); return PTR_ERR(srq); } @@ -3236,8 +3236,7 @@ static int srpt_add_one(struct ib_device *device) if (rdma_port_get_link_layer(device, 1) == IB_LINK_LAYER_INFINIBAND) sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); if (IS_ERR(sdev->cm_id)) { - pr_info("ib_create_cm_id() failed: %ld\n", - PTR_ERR(sdev->cm_id)); + pr_info("ib_create_cm_id() failed: %pe\n", sdev->cm_id); ret = PTR_ERR(sdev->cm_id); sdev->cm_id = NULL; if (!rdma_cm_id) @@ -3687,8 +3686,7 @@ static struct rdma_cm_id *srpt_create_rdma_id(struct sockaddr *listen_addr) rdma_cm_id = rdma_create_id(&init_net, srpt_rdma_cm_handler, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(rdma_cm_id)) { - pr_err("RDMA/CM ID creation failed: %ld\n", - PTR_ERR(rdma_cm_id)); + pr_err("RDMA/CM ID creation failed: %pe\n", rdma_cm_id); goto out; } From 9b9e32f75aa3d257e3ee3ab0a0f9ad5fbfb298af Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 21 Sep 2025 01:18:48 -0700 Subject: [PATCH 78/85] RDMA/bnxt_re: Fix incorrect errno used in function comments The function comments in qplib_rcfw.c mention -ETIMEOUT as a possible return value. However, the correct errno is -ETIMEDOUT. Update the comments to reflect the proper return value to avoid confusion for developers and users referring to the code. Signed-off-by: Alok Tiwari Link: https://patch.msgid.link/20250921081854.1059094-1-alok.a.tiwari@oracle.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 5e34395472c5..295a9610f3e6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -186,7 +186,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) * wait for command completion. Maximum holding interval is 8 second. * * Returns: - * -ETIMEOUT if command is not completed in specific time interval. + * -ETIMEDOUT if command is not completed in specific time interval. * 0 if command is completed by firmware. */ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) @@ -382,7 +382,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, * This function can not be called from non-sleepable context. * * Returns: - * -ETIMEOUT if command is not completed in specific time interval. + * -ETIMEDOUT if command is not completed in specific time interval. * 0 if command is completed by firmware. */ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) From 604f202bc9ebf9d8a96caec3375a7ba3bcf149b2 Mon Sep 17 00:00:00 2001 From: Anantha Prabhu Date: Tue, 23 Sep 2025 11:56:56 +0530 Subject: [PATCH 79/85] RDMA/bnxt_re: Add debugfs info entry for device and resource information Add a new debugfs info entry that displays device information and non-statistics data using the seq_file interface. This entry shows: - Resource watermarks (peak usage tracking) - Operational counters (CQ resize count) - Doorbell pacing information Link: https://patch.msgid.link/r/20250923062657.981487-2-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Anantha Prabhu Signed-off-by: Kalesh AP Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/debugfs.c | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c index e632f1661b92..be5e9b5ca2f0 100644 --- a/drivers/infiniband/hw/bnxt_re/debugfs.c +++ b/drivers/infiniband/hw/bnxt_re/debugfs.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "bnxt_ulp.h" @@ -314,6 +315,40 @@ static const struct file_operations bnxt_re_cc_config_ops = { .write = bnxt_re_cc_config_set, }; +static int info_show(struct seq_file *m, void *unused) +{ + struct bnxt_re_dev *rdev = m->private; + struct bnxt_re_res_cntrs *res_s = &rdev->stats.res; + + seq_puts(m, "Info:\n"); + seq_printf(m, "Device Name\t\t: %s\n", dev_name(&rdev->ibdev.dev)); + seq_printf(m, "PD Watermark\t\t: %llu\n", res_s->pd_watermark); + seq_printf(m, "AH Watermark\t\t: %llu\n", res_s->ah_watermark); + seq_printf(m, "QP Watermark\t\t: %llu\n", res_s->qp_watermark); + seq_printf(m, "RC QP Watermark\t\t: %llu\n", res_s->rc_qp_watermark); + seq_printf(m, "UD QP Watermark\t\t: %llu\n", res_s->ud_qp_watermark); + seq_printf(m, "SRQ Watermark\t\t: %llu\n", res_s->srq_watermark); + seq_printf(m, "CQ Watermark\t\t: %llu\n", res_s->cq_watermark); + seq_printf(m, "MR Watermark\t\t: %llu\n", res_s->mr_watermark); + seq_printf(m, "MW Watermark\t\t: %llu\n", res_s->mw_watermark); + seq_printf(m, "CQ Resize Count\t\t: %d\n", atomic_read(&res_s->resize_count)); + if (rdev->pacing.dbr_pacing) { + seq_printf(m, "DB Pacing Reschedule\t: %llu\n", rdev->stats.pacing.resched); + seq_printf(m, "DB Pacing Complete\t: %llu\n", rdev->stats.pacing.complete); + seq_printf(m, "DB Pacing Alerts\t: %llu\n", rdev->stats.pacing.alerts); + seq_printf(m, "DB FIFO Register\t: 0x%x\n", + readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off)); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(info); + +static void bnxt_re_debugfs_add_info(struct bnxt_re_dev *rdev) +{ + debugfs_create_file("info", 0400, rdev->dbg_root, rdev, &info_fops); +} + void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev) { struct pci_dev *pdev = rdev->en_dev->pdev; @@ -325,6 +360,8 @@ void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev) rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root); rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root); + bnxt_re_debugfs_add_info(rdev); + rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL); for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) { From 7fcf00bd7f30540cf7096c8f2fadab5d890d4cf2 Mon Sep 17 00:00:00 2001 From: Anantha Prabhu Date: Tue, 23 Sep 2025 11:56:57 +0530 Subject: [PATCH 80/85] RDMA/bnxt_re: Remove non-statistics counters from hw_counters Remove non-statistics counters from the RDMA hw_counters framework. The removed data includes: - Active resource counts (ACTIVE_PD, ACTIVE_QP, etc.) - Resource watermarks (WATERMARK_PD, WATERMARK_QP, etc.) - Operational counters (RESIZE_CQ_CNT) - DB pacing metrics (PACING_RESCHED, PACING_CMPL, etc.) This change ensures hw_counters contains only true performance and error statistics. Link: https://patch.msgid.link/r/20250923062657.981487-3-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Anantha Prabhu Signed-off-by: Kalesh AP Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 58 --------------------- drivers/infiniband/hw/bnxt_re/hw_counters.h | 23 -------- 2 files changed, 81 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 32ec67e4d922..651cf9d0e0c7 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -51,25 +51,6 @@ #include "hw_counters.h" static const struct rdma_stat_desc bnxt_re_stat_descs[] = { - [BNXT_RE_ACTIVE_PD].name = "active_pds", - [BNXT_RE_ACTIVE_AH].name = "active_ahs", - [BNXT_RE_ACTIVE_QP].name = "active_qps", - [BNXT_RE_ACTIVE_RC_QP].name = "active_rc_qps", - [BNXT_RE_ACTIVE_UD_QP].name = "active_ud_qps", - [BNXT_RE_ACTIVE_SRQ].name = "active_srqs", - [BNXT_RE_ACTIVE_CQ].name = "active_cqs", - [BNXT_RE_ACTIVE_MR].name = "active_mrs", - [BNXT_RE_ACTIVE_MW].name = "active_mws", - [BNXT_RE_WATERMARK_PD].name = "watermark_pds", - [BNXT_RE_WATERMARK_AH].name = "watermark_ahs", - [BNXT_RE_WATERMARK_QP].name = "watermark_qps", - [BNXT_RE_WATERMARK_RC_QP].name = "watermark_rc_qps", - [BNXT_RE_WATERMARK_UD_QP].name = "watermark_ud_qps", - [BNXT_RE_WATERMARK_SRQ].name = "watermark_srqs", - [BNXT_RE_WATERMARK_CQ].name = "watermark_cqs", - [BNXT_RE_WATERMARK_MR].name = "watermark_mrs", - [BNXT_RE_WATERMARK_MW].name = "watermark_mws", - [BNXT_RE_RESIZE_CQ_CNT].name = "resize_cq_cnt", [BNXT_RE_RX_PKTS].name = "rx_pkts", [BNXT_RE_RX_BYTES].name = "rx_bytes", [BNXT_RE_TX_PKTS].name = "tx_pkts", @@ -139,10 +120,6 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_TX_CNP].name = "np_cnp_pkts", [BNXT_RE_RX_CNP].name = "rp_cnp_handled", [BNXT_RE_RX_ECN].name = "np_ecn_marked_roce_packets", - [BNXT_RE_PACING_RESCHED].name = "pacing_reschedule", - [BNXT_RE_PACING_CMPL].name = "pacing_complete", - [BNXT_RE_PACING_ALERT].name = "pacing_alerts", - [BNXT_RE_DB_FIFO_REG].name = "db_fifo_register", [BNXT_RE_REQ_CQE_ERROR].name = "req_cqe_error", [BNXT_RE_RESP_CQE_ERROR].name = "resp_cqe_error", [BNXT_RE_RESP_REMOTE_ACCESS_ERRS].name = "resp_remote_access_errors", @@ -292,18 +269,6 @@ static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev, err_s->res_tx_no_perm; } -static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev, - struct rdma_hw_stats *stats) -{ - struct bnxt_re_db_pacing_stats *pacing_s = &rdev->stats.pacing; - - stats->value[BNXT_RE_PACING_RESCHED] = pacing_s->resched; - stats->value[BNXT_RE_PACING_CMPL] = pacing_s->complete; - stats->value[BNXT_RE_PACING_ALERT] = pacing_s->alerts; - stats->value[BNXT_RE_DB_FIFO_REG] = - readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); -} - int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad) { struct ib_pma_portcounters_ext *pma_cnt_ext; @@ -399,7 +364,6 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, u32 port, int index) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_re_res_cntrs *res_s = &rdev->stats.res; struct bnxt_qplib_roce_stats *err_s = NULL; struct ctx_hw_stats *hw_stats = NULL; int rc = 0; @@ -408,26 +372,6 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, if (!port || !stats) return -EINVAL; - stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&res_s->qp_count); - stats->value[BNXT_RE_ACTIVE_RC_QP] = atomic_read(&res_s->rc_qp_count); - stats->value[BNXT_RE_ACTIVE_UD_QP] = atomic_read(&res_s->ud_qp_count); - stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&res_s->srq_count); - stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&res_s->cq_count); - stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&res_s->mr_count); - stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&res_s->mw_count); - stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&res_s->pd_count); - stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&res_s->ah_count); - stats->value[BNXT_RE_WATERMARK_QP] = res_s->qp_watermark; - stats->value[BNXT_RE_WATERMARK_RC_QP] = res_s->rc_qp_watermark; - stats->value[BNXT_RE_WATERMARK_UD_QP] = res_s->ud_qp_watermark; - stats->value[BNXT_RE_WATERMARK_SRQ] = res_s->srq_watermark; - stats->value[BNXT_RE_WATERMARK_CQ] = res_s->cq_watermark; - stats->value[BNXT_RE_WATERMARK_MR] = res_s->mr_watermark; - stats->value[BNXT_RE_WATERMARK_MW] = res_s->mw_watermark; - stats->value[BNXT_RE_WATERMARK_PD] = res_s->pd_watermark; - stats->value[BNXT_RE_WATERMARK_AH] = res_s->ah_watermark; - stats->value[BNXT_RE_RESIZE_CQ_CNT] = atomic_read(&res_s->resize_count); - if (hw_stats) { stats->value[BNXT_RE_RECOVERABLE_ERRORS] = le64_to_cpu(hw_stats->tx_bcast_pkts); @@ -466,8 +410,6 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } } - if (rdev->pacing.dbr_pacing && bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) - bnxt_re_copy_db_pacing_stats(rdev, stats); } done: diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index be8e69458734..09d371d442aa 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -41,25 +41,6 @@ #define __BNXT_RE_HW_STATS_H__ enum bnxt_re_hw_stats { - BNXT_RE_ACTIVE_PD, - BNXT_RE_ACTIVE_AH, - BNXT_RE_ACTIVE_QP, - BNXT_RE_ACTIVE_RC_QP, - BNXT_RE_ACTIVE_UD_QP, - BNXT_RE_ACTIVE_SRQ, - BNXT_RE_ACTIVE_CQ, - BNXT_RE_ACTIVE_MR, - BNXT_RE_ACTIVE_MW, - BNXT_RE_WATERMARK_PD, - BNXT_RE_WATERMARK_AH, - BNXT_RE_WATERMARK_QP, - BNXT_RE_WATERMARK_RC_QP, - BNXT_RE_WATERMARK_UD_QP, - BNXT_RE_WATERMARK_SRQ, - BNXT_RE_WATERMARK_CQ, - BNXT_RE_WATERMARK_MR, - BNXT_RE_WATERMARK_MW, - BNXT_RE_RESIZE_CQ_CNT, BNXT_RE_RX_PKTS, BNXT_RE_RX_BYTES, BNXT_RE_TX_PKTS, @@ -129,10 +110,6 @@ enum bnxt_re_hw_stats { BNXT_RE_TX_CNP, BNXT_RE_RX_CNP, BNXT_RE_RX_ECN, - BNXT_RE_PACING_RESCHED, - BNXT_RE_PACING_CMPL, - BNXT_RE_PACING_ALERT, - BNXT_RE_DB_FIFO_REG, BNXT_RE_REQ_CQE_ERROR, BNXT_RE_RESP_CQE_ERROR, BNXT_RE_RESP_REMOTE_ACCESS_ERRS, From 4bab6d9584497191c449212c85799de4d84a1263 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Sep 2025 14:20:45 +0300 Subject: [PATCH 81/85] RDMA/irdma: Fix positive vs negative error codes in irdma_post_send() This code accidentally returns positive EINVAL instead of negative -EINVAL. Some of the callers treat positive returns as success. Add the missing '-' char. Fixes: a24a29c8747f ("RDMA/irdma: Add Atomic Operations support") Link: https://patch.msgid.link/r/aNKCjcD6Nab1jWEV@stanley.mountain Signed-off-by: Dan Carpenter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index a47ccc86e485..3e5196ee61ef 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3966,7 +3966,7 @@ static int irdma_post_send(struct ib_qp *ibqp, case IB_WR_ATOMIC_CMP_AND_SWP: if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS))) { - err = EINVAL; + err = -EINVAL; break; } info.op_type = IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP; @@ -3983,7 +3983,7 @@ static int irdma_post_send(struct ib_qp *ibqp, case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS))) { - err = EINVAL; + err = -EINVAL; break; } info.op_type = IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD; From 880245fd029a8f8ee8fd557c2681d077c1b1a959 Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Tue, 23 Sep 2025 14:21:28 +0000 Subject: [PATCH 82/85] RDMA/irdma: Remove unused struct irdma_cq fields These fields were set but not used anywhere, so remove them. Link: https://patch.msgid.link/r/20250923142128.943240-1-jmoroni@google.com Signed-off-by: Jacob Moroni Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 3 --- drivers/infiniband/hw/irdma/verbs.h | 6 ------ 2 files changed, 9 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 3e5196ee61ef..76ce6137f2ba 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2544,8 +2544,6 @@ static int irdma_create_cq(struct ib_cq *ibcq, goto cq_free_rsrc; } - iwcq->iwpbl = iwpbl; - iwcq->cq_mem_size = 0; cqmr = &iwpbl->cq_mr; if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags & @@ -2560,7 +2558,6 @@ static int irdma_create_cq(struct ib_cq *ibcq, err_code = -EPROTO; goto cq_free_rsrc; } - iwcq->iwpbl_shadow = iwpbl_shadow; cqmr_shadow = &iwpbl_shadow->cq_mr; info.shadow_area_pa = cqmr_shadow->cq_pbl.addr; cqmr->split = true; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 49972b0600a3..ed21c1b56e8e 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -140,21 +140,15 @@ struct irdma_srq { struct irdma_cq { struct ib_cq ibcq; struct irdma_sc_cq sc_cq; - u16 cq_head; - u16 cq_size; u16 cq_num; bool user_mode; atomic_t armed; enum irdma_cmpl_notify last_notify; - u32 polled_cmpls; - u32 cq_mem_size; struct irdma_dma_mem kmem; struct irdma_dma_mem kmem_shadow; struct completion free_cq; refcount_t refcnt; spinlock_t lock; /* for poll cq */ - struct irdma_pbl *iwpbl; - struct irdma_pbl *iwpbl_shadow; struct list_head resize_list; struct irdma_cq_poll_info cur_cqe; struct list_head cmpl_generated; From fb0b08297ecb429b60cb2b6ed40632ce5294220c Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 24 Sep 2025 04:01:27 -0700 Subject: [PATCH 83/85] RDMA/bnxt_re: improve clarity in ALLOC_PAGE handler Update uverbs_copy_to call to use sizeof(dpi) instead of sizeof(length) when copying the device page index (DPI) back to user space. Both dpi and length are declared as u32, so this change has no functional impact but makes the code clearer. Link: https://patch.msgid.link/r/20250924110130.340195-1-alok.a.tiwari@oracle.com Signed-off-by: Alok Tiwari Reviewed-by: Kalesh AP Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 2639ca21a027..8f2c242c578b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -4750,7 +4750,7 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle * return err; err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_DPI, - &dpi, sizeof(length)); + &dpi, sizeof(dpi)); if (err) return err; From fdd0fe94d68649322e391c5c27dd9f436b4e955e Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Tue, 23 Sep 2025 16:45:36 +0200 Subject: [PATCH 84/85] RDMA/siw: Always report immediate post SQ errors In siw_post_send(), any immediate error encountered during processing of the work request list must be reported to the caller, even if previous work requests in that list were just accepted and added to the send queue. Not reporting those errors confuses the caller, which would wait indefinitely for the failing and potentially subsequently aborted work requests completion. This fixes a case where immediate errors were overwritten by subsequent code in siw_post_send(). Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Link: https://patch.msgid.link/r/20250923144536.103825-1-bernard.metzler@linux.dev Suggested-by: Stefan Metzmacher Signed-off-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 35c3bde0d00a..efa2f097b582 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -769,7 +769,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, struct siw_wqe *wqe = tx_wqe(qp); unsigned long flags; - int rv = 0; + int rv = 0, imm_err = 0; if (wr && !rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); @@ -955,9 +955,17 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, * Send directly if SQ processing is not in progress. * Eventual immediate errors (rv < 0) do not affect the involved * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ - * processing, if new work is already pending. But rv must be passed - * to caller. + * processing, if new work is already pending. But rv and pointer + * to failed work request must be passed to caller. */ + if (unlikely(rv < 0)) { + /* + * Immediate error + */ + siw_dbg_qp(qp, "Immediate error %d\n", rv); + imm_err = rv; + *bad_wr = wr; + } if (wqe->wr_status != SIW_WR_IDLE) { spin_unlock_irqrestore(&qp->sq_lock, flags); goto skip_direct_sending; @@ -982,15 +990,10 @@ skip_direct_sending: up_read(&qp->state_lock); - if (rv >= 0) - return 0; - /* - * Immediate error - */ - siw_dbg_qp(qp, "error %d\n", rv); + if (unlikely(imm_err)) + return imm_err; - *bad_wr = wr; - return rv; + return (rv >= 0) ? 0 : rv; } /* From e6d736bd08902ba53460df1b62ee4218bbd17d9b Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 24 Sep 2025 19:51:23 +0530 Subject: [PATCH 85/85] RDMA/ionic: Fix memory leak of admin q_wr The admin queue work request buffer, aq->q_wr, is allocated via kcalloc in __ionic_create_rdma_adminq. However, it was not being freed in the corresponding teardown function __ionic_destroy_rdma_adminq. This results in a memory leak. Fix this leak by adding the missing kfree(aq->q_wr) in the destruction path. Fixes: f3bdbd42702c ("RDMA/ionic: Create device queues to support admin operations") Link: https://patch.msgid.link/r/20250924142123.18344-1-abhijit.gangurde@amd.com Signed-off-by: Abhijit Gangurde Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ionic/ionic_admin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/ionic/ionic_admin.c b/drivers/infiniband/hw/ionic/ionic_admin.c index c2ff21bcd96f..2537aa55d12d 100644 --- a/drivers/infiniband/hw/ionic/ionic_admin.c +++ b/drivers/infiniband/hw/ionic/ionic_admin.c @@ -600,6 +600,7 @@ err_q: static void __ionic_destroy_rdma_adminq(struct ionic_ibdev *dev, struct ionic_aq *aq) { + kfree(aq->q_wr); ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev); kfree(aq); }