Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for scatterlist allocation, and interrupt race condition #3

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 41 additions & 38 deletions dnvme_cmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ static int data_buf_to_prp(struct nvme_device *nvme_dev,
enum data_buf_type data_buf_type, u16 cmd_id);
static int map_user_pg_to_dma(struct nvme_device *nvme_dev,
enum dma_data_direction kernel_dir, unsigned long buf_addr,
unsigned total_buf_len, struct scatterlist **sg_list,
struct nvme_prps *prps, enum data_buf_type data_buf_type);
unsigned total_buf_len, struct nvme_prps *prps,
enum data_buf_type data_buf_type);
static int pages_to_sg(struct page **pages, int num_pages, int buf_offset,
unsigned len, struct scatterlist **sg_list);
static int setup_prps(struct nvme_device *nvme_dev, struct scatterlist *sg,
unsigned len, struct sg_table *sg_tbl);
static int setup_prps(struct nvme_device *nvme_dev,
s32 buf_len, struct nvme_prps *prps, u8 cr_io_q,
enum send_64b_bitmask prp_mask);
static void unmap_user_pg_to_dma(struct nvme_device *nvme_dev,
Expand Down Expand Up @@ -182,7 +182,7 @@ static int data_buf_to_prp(struct nvme_device *nvme_dev,
{
int err;
unsigned long addr;
struct scatterlist *sg_list = NULL;

enum dma_data_direction kernel_dir;
#ifdef TEST_PRP_DEBUG
int last_prp, i, j;
Expand All @@ -207,12 +207,12 @@ static int data_buf_to_prp(struct nvme_device *nvme_dev,

/* Mapping user pages to dma memory */
err = map_user_pg_to_dma(nvme_dev, kernel_dir, addr,
nvme_64b_send->data_buf_size, &sg_list, prps, data_buf_type);
nvme_64b_send->data_buf_size, prps, data_buf_type);
if (err < 0) {
return err;
}

err = setup_prps(nvme_dev, sg_list, nvme_64b_send->data_buf_size, prps,
err = setup_prps(nvme_dev, nvme_64b_send->data_buf_size, prps,
data_buf_type, nvme_64b_send->bit_mask);
if (err < 0) {
unmap_user_pg_to_dma(nvme_dev, prps);
Expand Down Expand Up @@ -278,8 +278,8 @@ static int data_buf_to_prp(struct nvme_device *nvme_dev,

static int map_user_pg_to_dma(struct nvme_device *nvme_dev,
enum dma_data_direction kernel_dir, unsigned long buf_addr,
unsigned total_buf_len, struct scatterlist **sg_list,
struct nvme_prps *prps, enum data_buf_type data_buf_type)
unsigned total_buf_len, struct nvme_prps *prps,
enum data_buf_type data_buf_type)
{
int i, err, buf_pg_offset, buf_pg_count, num_sg_entries;
struct page **pages;
Expand Down Expand Up @@ -323,9 +323,9 @@ static int map_user_pg_to_dma(struct nvme_device *nvme_dev,
}
}

/* Generate SG List from pinned down pages */
/* Generate SG Table from pinned down pages */
err = pages_to_sg(pages, buf_pg_count, buf_pg_offset,
total_buf_len, sg_list);
total_buf_len, &prps->st);
if (err < 0) {
LOG_ERR("Generation of sg lists failed");
goto error_unmap;
Expand All @@ -334,7 +334,7 @@ static int map_user_pg_to_dma(struct nvme_device *nvme_dev,
/* Mapping SG List to DMA; NOTE: The sg list could be coalesced by either
* an IOMMU or the kernel, so checking whether or not the number of
* mapped entries equate to the number given to the func is not warranted */
num_sg_entries = dma_map_sg(&nvme_dev->private_dev.pdev->dev, *sg_list,
num_sg_entries = dma_map_sg(&nvme_dev->private_dev.pdev->dev, prps->st.sgl,
buf_pg_count, kernel_dir);
LOG_DBG("%d elements mapped out of %d sglist elements",
num_sg_entries, num_sg_entries);
Expand All @@ -349,7 +349,7 @@ static int map_user_pg_to_dma(struct nvme_device *nvme_dev,
{
struct scatterlist *work;
LOG_DBG("Dump sg list after DMA mapping");
for (i = 0, work = *sg_list; i < num_sg_entries; i++,
for (i = 0, work = prps->st.sgl; i < num_sg_entries; i++,
work = sg_next(work)) {

LOG_DBG(" sg list: page_link=0x%016lx", work->page_link);
Expand All @@ -362,7 +362,6 @@ static int map_user_pg_to_dma(struct nvme_device *nvme_dev,
#endif

/* Fill in nvme_prps */
prps->sg = *sg_list;
prps->num_map_pgs = num_sg_entries;
prps->vir_kern_addr = vir_kern_addr;
prps->data_dir = kernel_dir;
Expand All @@ -381,32 +380,32 @@ static int map_user_pg_to_dma(struct nvme_device *nvme_dev,
}

static int pages_to_sg(struct page **pages, int num_pages, int buf_offset,
unsigned len, struct scatterlist **sg_list)
{
unsigned len, struct sg_table *sg_tbl)
{
int i;
struct scatterlist *sg;

*sg_list = NULL;
sg = kmalloc((num_pages * sizeof(struct scatterlist)), GFP_KERNEL);
if (sg == NULL) {
LOG_ERR("Memory alloc for sg list failed");
return -ENOMEM;
}

int rc;
struct scatterlist *sg_cur = NULL, *sg_prv = NULL;

rc = sg_alloc_table(sg_tbl, num_pages, GFP_KERNEL);
if (rc) {
LOG_ERR("Memory alloc for sg table failed rc=0x%X", rc);
return rc;
}
/* Building the SG List */
sg_init_table(sg, num_pages);
for (i = 0; i < num_pages; i++) {
for (i = 0, sg_cur=sg_tbl->sgl; i < num_pages; i++, sg_cur=sg_next(sg_cur)) {
if (pages[i] == NULL) {
kfree(sg);
sg_free_table(sg_tbl);
return -EFAULT;
}
sg_set_page(&sg[i], pages[i],
sg_set_page(sg_cur, pages[i],
min_t(int, len, (PAGE_SIZE - buf_offset)), buf_offset);
len -= (PAGE_SIZE - buf_offset);
buf_offset = 0;
sg_prv = sg_cur;
}
sg_mark_end(&sg[i - 1]);
*sg_list = sg;
sg_mark_end(sg_prv);

return 0;
}

Expand All @@ -415,7 +414,7 @@ static int pages_to_sg(struct page **pages, int num_pages, int buf_offset,
* Sets up PRP'sfrom DMA'ed memory
* Returns Error codes
*/
static int setup_prps(struct nvme_device *nvme_dev, struct scatterlist *sg,
static int setup_prps(struct nvme_device *nvme_dev,
s32 buf_len, struct nvme_prps *prps, u8 cr_io_q,
enum send_64b_bitmask prp_mask)
{
Expand All @@ -426,7 +425,10 @@ static int setup_prps(struct nvme_device *nvme_dev, struct scatterlist *sg,
u32 num_prps, num_pg, prp_page = 0;
int index, err;
struct dma_pool *prp_page_pool;

struct scatterlist *sg;

sg = prps->st.sgl;

dma_addr = sg_dma_address(sg);
dma_len = sg_dma_len(sg);
offset = offset_in_page(dma_addr);
Expand Down Expand Up @@ -593,7 +595,8 @@ static void unmap_user_pg_to_dma(struct nvme_device *nvme_dev,
{
int i;
struct page *pg;

struct scatterlist *sg_cur;

if (!prps) {
return;
}
Expand All @@ -604,19 +607,19 @@ static void unmap_user_pg_to_dma(struct nvme_device *nvme_dev,
}

if (prps->type != NO_PRP) {
dma_unmap_sg(&nvme_dev->private_dev.pdev->dev, prps->sg,
dma_unmap_sg(&nvme_dev->private_dev.pdev->dev, prps->st.sgl,
prps->num_map_pgs, prps->data_dir);

for (i = 0; i < prps->num_map_pgs; i++) {
pg = sg_page(&prps->sg[i]);
for (i = 0, sg_cur = prps->st.sgl; i < prps->num_map_pgs; i++, sg_cur = sg_next(sg_cur)) {
pg = sg_page(sg_cur);
if ((prps->data_dir == DMA_FROM_DEVICE) ||
(prps->data_dir == DMA_BIDIRECTIONAL)) {

set_page_dirty_lock(pg);
}
put_page(pg);
}
kfree(prps->sg);
sg_free_table(&prps->st);
}
}

Expand Down
6 changes: 3 additions & 3 deletions dnvme_ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ int driver_log(struct nvme_file *n_file)
pmetrics_cq_list->private_cq.prp_persist.data_buf_size);
vfs_write(file, work, strlen(work), &pos);
snprintf(work, SIZE_OF_WORK, IDNT_L4"sq = 0X%llX",
(u64)pmetrics_cq_list->private_cq.prp_persist.sg);
(u64)pmetrics_cq_list->private_cq.prp_persist.st.sgl);
vfs_write(file, work, strlen(work), &pos);
snprintf(work, SIZE_OF_WORK, IDNT_L4"num_map_pgs = 0X%llX",
(u64)pmetrics_cq_list->private_cq.prp_persist.num_map_pgs);
Expand Down Expand Up @@ -375,7 +375,7 @@ int driver_log(struct nvme_file *n_file)
pmetrics_sq_list->private_sq.prp_persist.data_buf_size);
vfs_write(file, work, strlen(work), &pos);
snprintf(work, SIZE_OF_WORK, IDNT_L4"sg = 0X%llX",
(u64)pmetrics_sq_list->private_sq.prp_persist.sg);
(u64)pmetrics_sq_list->private_sq.prp_persist.st.sgl);
vfs_write(file, work, strlen(work), &pos);
snprintf(work, SIZE_OF_WORK, IDNT_L4"num_map_pgs = 0X%llX",
(u64)pmetrics_sq_list->private_sq.prp_persist.num_map_pgs);
Expand Down Expand Up @@ -436,7 +436,7 @@ int driver_log(struct nvme_file *n_file)
pcmd_track_list->prp_nonpersist.data_buf_size);
vfs_write(file, work, strlen(work), &pos);
snprintf(work, SIZE_OF_WORK, IDNT_L6"sg = 0X%llX",
(u64)pcmd_track_list->prp_nonpersist.sg);
(u64)pcmd_track_list->prp_nonpersist.st.sgl);
vfs_write(file, work, strlen(work), &pos);
snprintf(work, SIZE_OF_WORK, IDNT_L6"num_map_pgs = 0X%llX",
(u64)pcmd_track_list->prp_nonpersist.num_map_pgs);
Expand Down
5 changes: 3 additions & 2 deletions dnvme_ds.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ struct nvme_prps {
dma_addr_t first_dma; /* First entry in PRP List */
/* Size of data buffer for the specific command */
u32 data_buf_size;
/* Pointer to SG list generated */
struct scatterlist *sg;
/* SG table use sg_next(nvme_prps.st.sgl) */
struct sg_table st;
/* Number of pages mapped to DMA area */
u32 num_map_pgs;
/* Address of data buffer for the specific command */
Expand Down Expand Up @@ -135,6 +135,7 @@ struct irq_track {
u32 int_vec; /* vec number; assigned by OS */
u8 isr_fired; /* flag to indicate if irq has fired */
u32 isr_count; /* total no. of times irq fired */
u32 outstanding_cmd_count; /* count of outstanding cmds */
};

/*
Expand Down
14 changes: 12 additions & 2 deletions dnvme_ioctls.c
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ int driver_toxic_dword(struct metrics_device_list *pmetrics_device,
LOG_DBG("After tgt_DW%d = 0x%08X", user_data->dword, *tgt_dword);

dma_sync_sg_for_device(pmetrics_device->metrics_device->
private_dev.dmadev, pmetrics_sq->private_sq.prp_persist.sg,
private_dev.dmadev, pmetrics_sq->private_sq.prp_persist.st.sgl,
pmetrics_sq->private_sq.prp_persist.num_map_pgs,
pmetrics_sq->private_sq.prp_persist.data_dir);

Expand Down Expand Up @@ -883,6 +883,8 @@ int driver_send_64b(struct metrics_device_list *pmetrics_device,
struct metrics_sq *p_cmd_sq;
/* Particular CQ (within CMD) from linked list of Q's for device */
struct metrics_cq *p_cmd_cq;
/* Particular CQ from linked list of SQ's for device */
struct metrics_cq *pmetrics_cq;
/* struct describing the meta buf */
struct metrics_meta *meta_buf;
/* Kernel space memory for passed in command */
Expand Down Expand Up @@ -1214,11 +1216,19 @@ int driver_send_64b(struct metrics_device_list *pmetrics_device,
nvme_cmd_ker, cmd_buf_size);

dma_sync_sg_for_device(pmetrics_device->metrics_device->
private_dev.dmadev, pmetrics_sq->private_sq.prp_persist.sg,
private_dev.dmadev, pmetrics_sq->private_sq.prp_persist.st.sgl,
pmetrics_sq->private_sq.prp_persist.num_map_pgs,
pmetrics_sq->private_sq.prp_persist.data_dir);
}


pmetrics_cq = find_cq(pmetrics_device, pmetrics_sq->public_sq.cq_id);
if ((pmetrics_cq->public_cq.irq_enabled) && (pmetrics_device->
metrics_device->public_dev.irq_active.irq_type != INT_NONE)) {
incr_outstanding_cmd_count(pmetrics_device, pmetrics_cq->public_cq.irq_no);
}


/* Increment the Tail pointer and handle roll over conditions */
pmetrics_sq->public_sq.tail_ptr_virt =
(u16)(((u32)pmetrics_sq->public_sq.tail_ptr_virt + 1UL) %
Expand Down
46 changes: 42 additions & 4 deletions dnvme_irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -913,10 +913,15 @@ static void inc_isr_count(struct irq_processing *pirq_process,
list_for_each_entry(pirq_node, &pirq_process->irq_track_list,
irq_list_hd) {
if (irq_no == pirq_node->irq_no) {
pirq_node->isr_fired = 1;
pirq_node->isr_count++;
LOG_DBG("BH:isr count = %d for irq no = %d",
pirq_node->isr_count, pirq_node->irq_no);
if (pirq_node->outstanding_cmd_count > 0) {
pirq_node->isr_fired = 1;
pirq_node->isr_count++;
LOG_DBG("BH:isr count = %d for irq no = %d",
pirq_node->isr_count, pirq_node->irq_no);
} else {
/*unmask ints */
unmask_interrupts(irq_no, pirq_process);
}
} /* end of if ivec */
} /* end of list for irq */
}
Expand Down Expand Up @@ -996,6 +1001,7 @@ static int add_irq_node(struct metrics_device_list *pmetrics_device_elem,
irq_trk_node->irq_no = irq_no; /* irq number assigned */
irq_trk_node->isr_fired = 0;
irq_trk_node->isr_count = 0;
irq_trk_node->outstanding_cmd_count = 0;

/* Init irq cq linked list */
INIT_LIST_HEAD(&irq_trk_node->irq_cq_track);
Expand Down Expand Up @@ -1374,3 +1380,35 @@ int reset_isr_flag(struct metrics_device_list *pmetrics_device,
return 0;
}

int incr_outstanding_cmd_count(struct metrics_device_list *pmetrics_device,
u16 irq_no) {
struct irq_track *pirq_node;
pirq_node = find_irq_node(pmetrics_device, irq_no);
if (pirq_node == NULL) {
LOG_ERR("Irq node not found for irq_no: %d", irq_no);
return -EINVAL;
}
LOG_DBG("incr outstanding, before: %d irq_no=%d", pirq_node->outstanding_cmd_count, irq_no);
pirq_node->outstanding_cmd_count++;
return 0;
}

int sub_outstanding_cmd_count(struct metrics_device_list *pmetrics_device,
u16 irq_no, u32 reaped_count) {
struct irq_track *pirq_node;
pirq_node = find_irq_node(pmetrics_device, irq_no);
if (pirq_node == NULL) {
LOG_ERR("Irq node not found for irq_no: %d", irq_no);
return -EINVAL;
}
if (pirq_node->outstanding_cmd_count < reaped_count) {
LOG_ERR("outstanding_cmd_count: %d < reaped_count: %d",pirq_node->outstanding_cmd_count, reaped_count);
pirq_node->outstanding_cmd_count = 0;
} else {
pirq_node->outstanding_cmd_count -= reaped_count;
}

LOG_DBG("sub outstanding, after: %d irq_no=%d reaped= %d", pirq_node->outstanding_cmd_count, irq_no, reaped_count);
return 0;
}

9 changes: 9 additions & 0 deletions dnvme_irq.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,13 @@ int reap_inquiry_isr(struct metrics_cq *pmetrics_cq_node,
int reset_isr_flag(struct metrics_device_list *pmetrics_device,
u16 irq_no);

/* update incr outstanding cmd count
*/
int incr_outstanding_cmd_count(struct metrics_device_list *pmetrics_device,
u16 irq_no);

int sub_outstanding_cmd_count(struct metrics_device_list *pmetrics_device,
u16 irq_no, u32 reaped_count);


#endif
9 changes: 8 additions & 1 deletion dnvme_queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ u32 reap_inquiry(struct metrics_cq *pmetrics_cq_node, struct device *dev)
(comp_entry_size * (u32)pmetrics_cq_node->public_cq.head_ptr);
} else {
/* do sync and update when pointer to discontig Q is reaped inq */
dma_sync_sg_for_cpu(dev, pmetrics_cq_node->private_cq.prp_persist.sg,
dma_sync_sg_for_cpu(dev, pmetrics_cq_node->private_cq.prp_persist.st.sgl,
pmetrics_cq_node->private_cq.prp_persist.num_map_pgs,
pmetrics_cq_node->private_cq.prp_persist.data_dir);
queue_base_addr =
Expand Down Expand Up @@ -1431,6 +1431,13 @@ int driver_reap_cq(struct metrics_device_list *pmetrics_device,
writel(pmetrics_cq_node->public_cq.head_ptr, pmetrics_cq_node->
private_cq.dbs);

if ((pmetrics_cq_node->public_cq.irq_enabled == 1) &&
(pmetrics_device->metrics_device->public_dev.irq_active.irq_type != INT_NONE)) {
sub_outstanding_cmd_count(pmetrics_device,
pmetrics_cq_node->public_cq.irq_no, user_data->num_reaped);
}


/* if 0 CE in a given cq, then reset the isr flag. */
if ((pmetrics_cq_node->public_cq.irq_enabled == 1) &&
(user_data->num_remaining == 0) &&
Expand Down