module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
+
+static int ring_avail_percent_lowater = 10;
+module_param(ring_avail_percent_lowater, int, S_IRUGO);
+MODULE_PARM_DESC(ring_avail_percent_lowater,
+ "Select a channel if available ring size > this in percent");
+
/*
* Timeout in seconds for all devices managed by this driver.
*/
* Mask of CPUs bound to subchannels.
*/
struct cpumask alloced_cpus;
+ /*
+ * Pre-allocated struct cpumask for each hardware queue.
+ * struct cpumask is used by selecting out-going channels. It is a
+ * big structure, default to 1024k bytes when CONFIG_MAXSMP=y.
+ * Pre-allocate it to avoid allocation on the kernel stack.
+ */
+ struct cpumask *cpumask_chns;
/* Used for vsc/vsp channel reset process */
struct storvsc_cmd_request init_request;
struct storvsc_cmd_request reset_request;
if (stor_device->stor_chns == NULL)
return -ENOMEM;
+ stor_device->cpumask_chns = kcalloc(num_possible_cpus(),
+ sizeof(struct cpumask), GFP_KERNEL);
+ if (stor_device->cpumask_chns == NULL) {
+ kfree(stor_device->stor_chns);
+ return -ENOMEM;
+ }
+
stor_device->stor_chns[device->channel->target_cpu] = device->channel;
cpumask_set_cpu(device->channel->target_cpu,
&stor_device->alloced_cpus);
vmbus_close(device->channel);
kfree(stor_device->stor_chns);
+ kfree(stor_device->cpumask_chns);
kfree(stor_device);
return 0;
}
{
u16 slot = 0;
u16 hash_qnum;
- struct cpumask alloced_mask;
+ struct cpumask *alloced_mask = &stor_device->cpumask_chns[q_num];
int num_channels, tgt_cpu;
if (stor_device->num_sc == 0)
* III. Mapping is persistent.
*/
- cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
+ cpumask_and(alloced_mask, &stor_device->alloced_cpus,
cpumask_of_node(cpu_to_node(q_num)));
- num_channels = cpumask_weight(&alloced_mask);
+ num_channels = cpumask_weight(alloced_mask);
if (num_channels == 0)
return stor_device->device->channel;
while (hash_qnum >= num_channels)
hash_qnum -= num_channels;
- for_each_cpu(tgt_cpu, &alloced_mask) {
+ for_each_cpu(tgt_cpu, alloced_mask) {
if (slot == hash_qnum)
break;
slot++;
{
struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet;
- struct vmbus_channel *outgoing_channel;
+ struct vmbus_channel *outgoing_channel, *channel;
int ret = 0;
- struct cpumask alloced_mask;
+ struct cpumask *alloced_mask;
int tgt_cpu;
vstor_packet = &request->vstor_packet;
/*
* Select an an appropriate channel to send the request out.
*/
-
if (stor_device->stor_chns[q_num] != NULL) {
outgoing_channel = stor_device->stor_chns[q_num];
- if (outgoing_channel->target_cpu == smp_processor_id()) {
+ if (outgoing_channel->target_cpu == q_num) {
/*
* Ideally, we want to pick a different channel if
* available on the same NUMA node.
*/
- cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
+ alloced_mask = &stor_device->cpumask_chns[q_num];
+ cpumask_and(alloced_mask, &stor_device->alloced_cpus,
cpumask_of_node(cpu_to_node(q_num)));
- for_each_cpu_wrap(tgt_cpu, &alloced_mask,
- outgoing_channel->target_cpu + 1) {
- if (tgt_cpu != outgoing_channel->target_cpu) {
- outgoing_channel =
- stor_device->stor_chns[tgt_cpu];
- break;
+
+ for_each_cpu_wrap(tgt_cpu, alloced_mask, q_num + 1) {
+ if (tgt_cpu == q_num)
+ continue;
+ channel = stor_device->stor_chns[tgt_cpu];
+ if (hv_get_avail_to_write_percent(
+ &channel->outbound)
+ > ring_avail_percent_lowater) {
+ outgoing_channel = channel;
+ goto found_channel;
+ }
+ }
+
+ /*
+ * All the other channels on the same NUMA node are
+ * busy. Try to use the channel on the current CPU
+ */
+ if (hv_get_avail_to_write_percent(
+ &outgoing_channel->outbound)
+ > ring_avail_percent_lowater)
+ goto found_channel;
+
+ /*
+ * If we reach here, all the channels on the current
+ * NUMA node are busy. Try to find a channel in
+ * other NUMA nodes
+ */
+ cpumask_andnot(alloced_mask, &stor_device->alloced_cpus,
+ cpumask_of_node(cpu_to_node(q_num)));
+
+ for_each_cpu(tgt_cpu, alloced_mask) {
+ channel = stor_device->stor_chns[tgt_cpu];
+ if (hv_get_avail_to_write_percent(
+ &channel->outbound)
+ > ring_avail_percent_lowater) {
+ outgoing_channel = channel;
+ goto found_channel;
}
}
}
outgoing_channel = get_og_chn(stor_device, q_num);
}
-
+found_channel:
vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) -
max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
}
- scsi_driver.can_queue = (max_outstanding_req_per_channel *
- (max_sub_channels + 1));
+ scsi_driver.can_queue = max_outstanding_req_per_channel *
+ (max_sub_channels + 1) *
+ (100 - ring_avail_percent_lowater) / 100;
host = scsi_host_alloc(&scsi_driver,
sizeof(struct hv_host_device));
err_out1:
kfree(stor_device->stor_chns);
+ kfree(stor_device->cpumask_chns);
kfree(stor_device);
err_out0: