summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c5
-rw-r--r--kernel/irq/Kconfig18
-rw-r--r--kernel/irq/Makefile2
-rw-r--r--kernel/irq/affinity.c76
-rw-r--r--kernel/irq/autoprobe.c4
-rw-r--r--kernel/irq/chip.c195
-rw-r--r--kernel/irq/cpuhotplug.c150
-rw-r--r--kernel/irq/debugfs.c213
-rw-r--r--kernel/irq/devres.c86
-rw-r--r--kernel/irq/generic-chip.c7
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/irq/internals.h225
-rw-r--r--kernel/irq/irqdesc.c36
-rw-r--r--kernel/irq/irqdomain.c359
-rw-r--r--kernel/irq/manage.c119
-rw-r--r--kernel/irq/migration.c30
-rw-r--r--kernel/irq/msi.c13
-rw-r--r--kernel/irq/proc.c110
-rw-r--r--kernel/irq/timings.c369
19 files changed, 1755 insertions, 264 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cb5103413bd8..b86b32ebb3b2 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.startup.single = smpboot_unpark_threads,
.teardown.single = NULL,
},
+ [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
+ .name = "irq/affinity:online",
+ .startup.single = irq_affinity_online_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_AP_PERF_ONLINE] = {
.name = "perf:online",
.startup.single = perf_event_init_cpu,
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 3bbfd6a9c475..27c4e774071c 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -21,6 +21,10 @@ config GENERIC_IRQ_SHOW
config GENERIC_IRQ_SHOW_LEVEL
bool
+# Supports effective affinity mask
+config GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ bool
+
# Facility to allocate a hardware interrupt. This is legacy support
# and should not be used in new code. Use irq domains instead.
config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
@@ -81,6 +85,9 @@ config GENERIC_MSI_IRQ_DOMAIN
config HANDLE_DOMAIN_IRQ
bool
+config IRQ_TIMINGS
+ bool
+
config IRQ_DOMAIN_DEBUG
bool "Expose hardware/virtual IRQ mapping via debugfs"
depends on IRQ_DOMAIN && DEBUG_FS
@@ -108,4 +115,15 @@ config SPARSE_IRQ
If you don't know what to do here, say N.
+config GENERIC_IRQ_DEBUGFS
+ bool "Expose irq internals in debugfs"
+ depends on DEBUG_FS
+ default n
+ ---help---
+
+ Exposes internal state information through debugfs. Mostly for
+ developers and debugging of hard to diagnose interrupt problems.
+
+ If you don't know what to do here, say N.
+
endmenu
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 1d3ee3169202..e4aef7351f2b 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,6 @@
obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
+obj-$(CONFIG_IRQ_TIMINGS) += timings.o
obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
@@ -10,3 +11,4 @@ obj-$(CONFIG_PM_SLEEP) += pm.o
obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
obj-$(CONFIG_SMP) += affinity.o
+obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index e2d356dd7581..d2747f9c5707 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -1,4 +1,7 @@
-
+/*
+ * Copyright (C) 2016 Thomas Gleixner.
+ * Copyright (C) 2016-2017 Christoph Hellwig.
+ */
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -35,13 +38,54 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
}
}
-static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
+static cpumask_var_t *alloc_node_to_present_cpumask(void)
+{
+ cpumask_var_t *masks;
+ int node;
+
+ masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
+ if (!masks)
+ return NULL;
+
+ for (node = 0; node < nr_node_ids; node++) {
+ if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
+ goto out_unwind;
+ }
+
+ return masks;
+
+out_unwind:
+ while (--node >= 0)
+ free_cpumask_var(masks[node]);
+ kfree(masks);
+ return NULL;
+}
+
+static void free_node_to_present_cpumask(cpumask_var_t *masks)
+{
+ int node;
+
+ for (node = 0; node < nr_node_ids; node++)
+ free_cpumask_var(masks[node]);
+ kfree(masks);
+}
+
+static void build_node_to_present_cpumask(cpumask_var_t *masks)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
+}
+
+static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask,
+ const struct cpumask *mask, nodemask_t *nodemsk)
{
int n, nodes = 0;
/* Calculate the number of nodes in the supplied affinity mask */
- for_each_online_node(n) {
- if (cpumask_intersects(mask, cpumask_of_node(n))) {
+ for_each_node(n) {
+ if (cpumask_intersects(mask, node_to_present_cpumask[n])) {
node_set(n, *nodemsk);
nodes++;
}
@@ -64,7 +108,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
int last_affv = affv + affd->pre_vectors;
nodemask_t nodemsk = NODE_MASK_NONE;
struct cpumask *masks;
- cpumask_var_t nmsk;
+ cpumask_var_t nmsk, *node_to_present_cpumask;
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return NULL;
@@ -73,13 +117,19 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
if (!masks)
goto out;
+ node_to_present_cpumask = alloc_node_to_present_cpumask();
+ if (!node_to_present_cpumask)
+ goto out;
+
/* Fill out vectors at the beginning that don't need affinity */
for (curvec = 0; curvec < affd->pre_vectors; curvec++)
cpumask_copy(masks + curvec, irq_default_affinity);
/* Stabilize the cpumasks */
get_online_cpus();
- nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk);
+ build_node_to_present_cpumask(node_to_present_cpumask);
+ nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask,
+ &nodemsk);
/*
* If the number of nodes in the mask is greater than or equal the
@@ -87,7 +137,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
*/
if (affv <= nodes) {
for_each_node_mask(n, nodemsk) {
- cpumask_copy(masks + curvec, cpumask_of_node(n));
+ cpumask_copy(masks + curvec,
+ node_to_present_cpumask[n]);
if (++curvec == last_affv)
break;
}
@@ -101,7 +152,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
/* Get the cpus on this node which are in the mask */
- cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
+ cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]);
/* Calculate the number of cpus per vector */
ncpus = cpumask_weight(nmsk);
@@ -133,6 +184,7 @@ done:
/* Fill out vectors at the end that don't need affinity */
for (; curvec < nvecs; curvec++)
cpumask_copy(masks + curvec, irq_default_affinity);
+ free_node_to_present_cpumask(node_to_present_cpumask);
out:
free_cpumask_var(nmsk);
return masks;
@@ -147,12 +199,10 @@ int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
{
int resv = affd->pre_vectors + affd->post_vectors;
int vecs = maxvec - resv;
- int cpus;
+ int ret;
- /* Stabilize the cpumasks */
get_online_cpus();
- cpus = cpumask_weight(cpu_online_mask);
+ ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv;
put_online_cpus();
-
- return min(cpus, vecs) + resv;
+ return ret;
}
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 0119b9d467ae..d30a0dd5cc02 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -53,7 +53,7 @@ unsigned long probe_irq_on(void)
if (desc->irq_data.chip->irq_set_type)
desc->irq_data.chip->irq_set_type(&desc->irq_data,
IRQ_TYPE_PROBE);
- irq_startup(desc, false);
+ irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -70,7 +70,7 @@ unsigned long probe_irq_on(void)
raw_spin_lock_irq(&desc->lock);
if (!desc->action && irq_settings_can_probe(desc)) {
desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
- if (irq_startup(desc, false))
+ if (irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE))
desc->istate |= IRQS_PENDING;
}
raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c94da688ee9b..2e30d925a40d 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -185,47 +185,162 @@ static void irq_state_set_masked(struct irq_desc *desc)
irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
}
-int irq_startup(struct irq_desc *desc, bool resend)
+static void irq_state_clr_started(struct irq_desc *desc)
{
- int ret = 0;
+ irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED);
+}
- irq_state_clr_disabled(desc);
- desc->depth = 0;
+static void irq_state_set_started(struct irq_desc *desc)
+{
+ irqd_set(&desc->irq_data, IRQD_IRQ_STARTED);
+}
+
+enum {
+ IRQ_STARTUP_NORMAL,
+ IRQ_STARTUP_MANAGED,
+ IRQ_STARTUP_ABORT,
+};
+
+#ifdef CONFIG_SMP
+static int
+__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+
+ if (!irqd_affinity_is_managed(d))
+ return IRQ_STARTUP_NORMAL;
+
+ irqd_clr_managed_shutdown(d);
- irq_domain_activate_irq(&desc->irq_data);
- if (desc->irq_data.chip->irq_startup) {
- ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
+ if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) {
+ /*
+ * Catch code which fiddles with enable_irq() on a managed
+ * and potentially shutdown IRQ. Chained interrupt
+ * installment or irq auto probing should not happen on
+ * managed irqs either. Emit a warning, break the affinity
+ * and start it up as a normal interrupt.
+ */
+ if (WARN_ON_ONCE(force))
+ return IRQ_STARTUP_NORMAL;
+ /*
+ * The interrupt was requested, but there is no online CPU
+ * in it's affinity mask. Put it into managed shutdown
+ * state and let the cpu hotplug mechanism start it up once
+ * a CPU in the mask becomes available.
+ */
+ irqd_set_managed_shutdown(d);
+ return IRQ_STARTUP_ABORT;
+ }
+ return IRQ_STARTUP_MANAGED;
+}
+#else
+static int
+__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+{
+ return IRQ_STARTUP_NORMAL;
+}
+#endif
+
+static int __irq_startup(struct irq_desc *desc)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+ int ret = 0;
+
+ irq_domain_activate_irq(d);
+ if (d->chip->irq_startup) {
+ ret = d->chip->irq_startup(d);
+ irq_state_clr_disabled(desc);
irq_state_clr_masked(desc);
} else {
irq_enable(desc);
}
+ irq_state_set_started(desc);
+ return ret;
+}
+
+int irq_startup(struct irq_desc *desc, bool resend, bool force)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+ struct cpumask *aff = irq_data_get_affinity_mask(d);
+ int ret = 0;
+
+ desc->depth = 0;
+
+ if (irqd_is_started(d)) {
+ irq_enable(desc);
+ } else {
+ switch (__irq_startup_managed(desc, aff, force)) {
+ case IRQ_STARTUP_NORMAL:
+ ret = __irq_startup(desc);
+ irq_setup_affinity(desc);
+ break;
+ case IRQ_STARTUP_MANAGED:
+ ret = __irq_startup(desc);
+ irq_set_affinity_locked(d, aff, false);
+ break;
+ case IRQ_STARTUP_ABORT:
+ return 0;
+ }
+ }
if (resend)
check_irq_resend(desc);
+
return ret;
}
+static void __irq_disable(struct irq_desc *desc, bool mask);
+
void irq_shutdown(struct irq_desc *desc)
{
- irq_state_set_disabled(desc);
- desc->depth = 1;
- if (desc->irq_data.chip->irq_shutdown)
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
- else if (desc->irq_data.chip->irq_disable)
- desc->irq_data.chip->irq_disable(&desc->irq_data);
- else
- desc->irq_data.chip->irq_mask(&desc->irq_data);
+ if (irqd_is_started(&desc->irq_data)) {
+ desc->depth = 1;
+ if (desc->irq_data.chip->irq_shutdown) {
+ desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ irq_state_set_disabled(desc);
+ irq_state_set_masked(desc);
+ } else {
+ __irq_disable(desc, true);
+ }
+ irq_state_clr_started(desc);
+ }
+ /*
+ * This must be called even if the interrupt was never started up,
+ * because the activation can happen before the interrupt is
+ * available for request/startup. It has it's own state tracking so
+ * it's safe to call it unconditionally.
+ */
irq_domain_deactivate_irq(&desc->irq_data);
- irq_state_set_masked(desc);
}
void irq_enable(struct irq_desc *desc)
{
- irq_state_clr_disabled(desc);
- if (desc->irq_data.chip->irq_enable)
- desc->irq_data.chip->irq_enable(&desc->irq_data);
- else
- desc->irq_data.chip->irq_unmask(&desc->irq_data);
- irq_state_clr_masked(desc);
+ if (!irqd_irq_disabled(&desc->irq_data)) {
+ unmask_irq(desc);
+ } else {
+ irq_state_clr_disabled(desc);
+ if (desc->irq_data.chip->irq_enable) {
+ desc->irq_data.chip->irq_enable(&desc->irq_data);
+ irq_state_clr_masked(desc);
+ } else {
+ unmask_irq(desc);
+ }
+ }
+}
+
+static void __irq_disable(struct irq_desc *desc, bool mask)
+{
+ if (irqd_irq_disabled(&desc->irq_data)) {
+ if (mask)
+ mask_irq(desc);
+ } else {
+ irq_state_set_disabled(desc);
+ if (desc->irq_data.chip->irq_disable) {
+ desc->irq_data.chip->irq_disable(&desc->irq_data);
+ irq_state_set_masked(desc);
+ } else if (mask) {
+ mask_irq(desc);
+ }
+ }
}
/**
@@ -250,13 +365,7 @@ void irq_enable(struct irq_desc *desc)
*/
void irq_disable(struct irq_desc *desc)
{
- irq_state_set_disabled(desc);
- if (desc->irq_data.chip->irq_disable) {
- desc->irq_data.chip->irq_disable(&desc->irq_data);
- irq_state_set_masked(desc);
- } else if (irq_settings_disable_unlazy(desc)) {
- mask_irq(desc);
- }
+ __irq_disable(desc, irq_settings_disable_unlazy(desc));
}
void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu)
@@ -279,18 +388,21 @@ void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu)
static inline void mask_ack_irq(struct irq_desc *desc)
{
- if (desc->irq_data.chip->irq_mask_ack)
+ if (desc->irq_data.chip->irq_mask_ack) {
desc->irq_data.chip->irq_mask_ack(&desc->irq_data);
- else {
- desc->irq_data.chip->irq_mask(&desc->irq_data);
+ irq_state_set_masked(desc);
+ } else {
+ mask_irq(desc);
if (desc->irq_data.chip->irq_ack)
desc->irq_data.chip->irq_ack(&desc->irq_data);
}
- irq_state_set_masked(desc);
}
void mask_irq(struct irq_desc *desc)
{
+ if (irqd_irq_masked(&desc->irq_data))
+ return;
+
if (desc->irq_data.chip->irq_mask) {
desc->irq_data.chip->irq_mask(&desc->irq_data);
irq_state_set_masked(desc);
@@ -299,6 +411,9 @@ void mask_irq(struct irq_desc *desc)
void unmask_irq(struct irq_desc *desc)
{
+ if (!irqd_irq_masked(&desc->irq_data))
+ return;
+
if (desc->irq_data.chip->irq_unmask) {
desc->irq_data.chip->irq_unmask(&desc->irq_data);
irq_state_clr_masked(desc);
@@ -312,10 +427,7 @@ void unmask_threaded_irq(struct irq_desc *desc)
if (chip->flags & IRQCHIP_EOI_THREADED)
chip->irq_eoi(&desc->irq_data);
- if (chip->irq_unmask) {
- chip->irq_unmask(&desc->irq_data);
- irq_state_clr_masked(desc);
- }
+ unmask_irq(desc);
}
/*
@@ -851,7 +963,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
irq_settings_set_norequest(desc);
irq_settings_set_nothread(desc);
desc->action = &chained_action;
- irq_startup(desc, true);
+ irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE);
}
}
@@ -903,6 +1015,13 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
if (!desc)
return;
+
+ /*
+ * Warn when a driver sets the no autoenable flag on an already
+ * active interrupt.
+ */
+ WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN));
+
irq_settings_clr_and_set(desc, clr, set);
irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 011f8c4c63da..aee8f7ec40af 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -14,37 +14,99 @@
#include "internals.h"
+/* For !GENERIC_IRQ_EFFECTIVE_AFF_MASK this looks at general affinity mask */
+static inline bool irq_needs_fixup(struct irq_data *d)
+{
+ const struct cpumask *m = irq_data_get_effective_affinity_mask(d);
+
+ return cpumask_test_cpu(smp_processor_id(), m);
+}
+
static bool migrate_one_irq(struct irq_desc *desc)
{
struct irq_data *d = irq_desc_get_irq_data(desc);
- const struct cpumask *affinity = d->common->affinity;
- struct irq_chip *c;
- bool ret = false;
+ struct irq_chip *chip = irq_data_get_irq_chip(d);
+ bool maskchip = !irq_can_move_pcntxt(d) && !irqd_irq_masked(d);
+ const struct cpumask *affinity;
+ bool brokeaff = false;
+ int err;
/*
- * If this is a per-CPU interrupt, or the affinity does not
- * include this CPU, then we have nothing to do.
+ * IRQ chip might be already torn down, but the irq descriptor is
+ * still in the radix tree. Also if the chip has no affinity setter,
+ * nothing can be done here.
*/
- if (irqd_is_per_cpu(d) ||
- !cpumask_test_cpu(smp_processor_id(), affinity))
+ if (!chip || !chip->irq_set_affinity) {
+ pr_debug("IRQ %u: Unable to migrate away\n", d->irq);
return false;
+ }
+
+ /*
+ * No move required, if:
+ * - Interrupt is per cpu
+ * - Interrupt is not started
+ * - Affinity mask does not include this CPU.
+ *
+ * Note: Do not check desc->action as this might be a chained
+ * interrupt.
+ */
+ if (irqd_is_per_cpu(d) || !irqd_is_started(d) || !irq_needs_fixup(d)) {
+ /*
+ * If an irq move is pending, abort it if the dying CPU is
+ * the sole target.
+ */
+ irq_fixup_move_pending(desc, false);
+ return false;
+ }
+
+ /*
+ * Complete an eventually pending irq move cleanup. If this
+ * interrupt was moved in hard irq context, then the vectors need
+ * to be cleaned up. It can't wait until this interrupt actually
+ * happens and this CPU was involved.
+ */
+ irq_force_complete_move(desc);
+
+ /*
+ * If there is a setaffinity pending, then try to reuse the pending
+ * mask, so the last change of the affinity does not get lost. If
+ * there is no move pending or the pending mask does not contain
+ * any online CPU, use the current affinity mask.
+ */
+ if (irq_fixup_move_pending(desc, true))
+ affinity = irq_desc_get_pending_mask(desc);
+ else
+ affinity = irq_data_get_affinity_mask(d);
+
+ /* Mask the chip for interrupts which cannot move in process context */
+ if (maskchip && chip->irq_mask)
+ chip->irq_mask(d);
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ /*
+ * If the interrupt is managed, then shut it down and leave
+ * the affinity untouched.
+ */
+ if (irqd_affinity_is_managed(d)) {
+ irqd_set_managed_shutdown(d);
+ irq_shutdown(desc);
+ return false;
+ }
affinity = cpu_online_mask;
- ret = true;
+ brokeaff = true;
}
- c = irq_data_get_irq_chip(d);
- if (!c->irq_set_affinity) {
- pr_debug("IRQ%u: unable to set affinity\n", d->irq);
- } else {
- int r = irq_do_set_affinity(d, affinity, false);
- if (r)
- pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
- d->irq, r);
+ err = irq_do_set_affinity(d, affinity, true);
+ if (err) {
+ pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
+ d->irq, err);
+ brokeaff = false;
}
- return ret;
+ if (maskchip && chip->irq_unmask)
+ chip->irq_unmask(d);
+
+ return brokeaff;
}
/**
@@ -59,11 +121,8 @@ static bool migrate_one_irq(struct irq_desc *desc)
*/
void irq_migrate_all_off_this_cpu(void)
{
- unsigned int irq;
struct irq_desc *desc;
- unsigned long flags;
-
- local_irq_save(flags);
+ unsigned int irq;
for_each_active_irq(irq) {
bool affinity_broken;
@@ -73,10 +132,53 @@ void irq_migrate_all_off_this_cpu(void)
affinity_broken = migrate_one_irq(desc);
raw_spin_unlock(&desc->lock);
- if (affinity_broken)
- pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
+ if (affinity_broken) {
+ pr_warn_ratelimited("IRQ %u: no longer affine to CPU%u\n",
irq, smp_processor_id());
+ }
+ }
+}
+
+static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
+{
+ struct irq_data *data = irq_desc_get_irq_data(desc);
+ const struct cpumask *affinity = irq_data_get_affinity_mask(data);
+
+ if (!irqd_affinity_is_managed(data) || !desc->action ||
+ !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
+ return;
+
+ if (irqd_is_managed_and_shutdown(data)) {
+ irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+ return;
+ }
+
+ /*
+ * If the interrupt can only be directed to a single target
+ * CPU then it is already assigned to a CPU in the affinity
+ * mask. No point in trying to move it around.
+ */
+ if (!irqd_is_single_target(data))
+ irq_set_affinity_locked(data, affinity, false);
+}
+
+/**
+ * irq_affinity_online_cpu - Restore affinity for managed interrupts
+ * @cpu: Upcoming CPU for which interrupts should be restored
+ */
+int irq_affinity_online_cpu(unsigned int cpu)
+{
+ struct irq_desc *desc;
+ unsigned int irq;
+
+ irq_lock_sparse();
+ for_each_active_irq(irq) {
+ desc = irq_to_desc(irq);
+ raw_spin_lock_irq(&desc->lock);
+ irq_restore_affinity_of_irq(desc, cpu);
+ raw_spin_unlock_irq(&desc->lock);
}
+ irq_unlock_sparse();
- local_irq_restore(flags);
+ return 0;
}
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
new file mode 100644
index 000000000000..4d384edc0c64
--- /dev/null
+++ b/kernel/irq/debugfs.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2017 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * This file is licensed under the GPL V2.
+ */
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+
+#include "internals.h"
+
+static struct dentry *irq_dir;
+
+struct irq_bit_descr {
+ unsigned int mask;
+ char *name;
+};
+#define BIT_MASK_DESCR(m) { .mask = m, .name = #m }
+
+static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+ const struct irq_bit_descr *sd, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++, sd++) {
+ if (state & sd->mask)
+ seq_printf(m, "%*s%s\n", ind + 12, "", sd->name);
+ }
+}
+
+#ifdef CONFIG_SMP
+static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc)
+{
+ struct irq_data *data = irq_desc_get_irq_data(desc);
+ struct cpumask *msk;
+
+ msk = irq_data_get_affinity_mask(data);
+ seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk));
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ msk = irq_data_get_effective_affinity_mask(data);
+ seq_printf(m, "effectiv: %*pbl\n", cpumask_pr_args(msk));
+#endif
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ msk = desc->pending_mask;
+ seq_printf(m, "pending: %*pbl\n", cpumask_pr_args(msk));
+#endif
+}
+#else
+static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) { }
+#endif
+
+static const struct irq_bit_descr irqchip_flags[] = {
+ BIT_MASK_DESCR(IRQCHIP_SET_TYPE_MASKED),
+ BIT_MASK_DESCR(IRQCHIP_EOI_IF_HANDLED),
+ BIT_MASK_DESCR(IRQCHIP_MASK_ON_SUSPEND),
+ BIT_MASK_DESCR(IRQCHIP_ONOFFLINE_ENABLED),
+ BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE),
+ BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE),
+ BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
+};
+
+static void
+irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind)
+{
+ struct irq_chip *chip = data->chip;
+
+ if (!chip) {
+ seq_printf(m, "chip: None\n");
+ return;
+ }
+ seq_printf(m, "%*schip: %s\n", ind, "", chip->name);
+ seq_printf(m, "%*sflags: 0x%lx\n", ind + 1, "", chip->flags);
+ irq_debug_show_bits(m, ind, chip->flags, irqchip_flags,
+ ARRAY_SIZE(irqchip_flags));
+}
+
+static void
+irq_debug_show_data(struct seq_file *m, struct irq_data *data, int ind)
+{
+ seq_printf(m, "%*sdomain: %s\n", ind, "",
+ data->domain ? data->domain->name : "");
+ seq_printf(m, "%*shwirq: 0x%lx\n", ind + 1, "", data->hwirq);
+ irq_debug_show_chip(m, data, ind + 1);
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ if (!data->parent_data)
+ return;
+ seq_printf(m, "%*sparent:\n", ind + 1, "");
+ irq_debug_show_data(m, data->parent_data, ind + 4);
+#endif
+}
+
+static const struct irq_bit_descr irqdata_states[] = {
+ BIT_MASK_DESCR(IRQ_TYPE_EDGE_RISING),
+ BIT_MASK_DESCR(IRQ_TYPE_EDGE_FALLING),
+ BIT_MASK_DESCR(IRQ_TYPE_LEVEL_HIGH),
+ BIT_MASK_DESCR(IRQ_TYPE_LEVEL_LOW),
+ BIT_MASK_DESCR(IRQD_LEVEL),
+
+ BIT_MASK_DESCR(IRQD_ACTIVATED),
+ BIT_MASK_DESCR(IRQD_IRQ_STARTED),
+ BIT_MASK_DESCR(IRQD_IRQ_DISABLED),
+ BIT_MASK_DESCR(IRQD_IRQ_MASKED),
+ BIT_MASK_DESCR(IRQD_IRQ_INPROGRESS),
+
+ BIT_MASK_DESCR(IRQD_PER_CPU),
+ BIT_MASK_DESCR(IRQD_NO_BALANCING),
+
+ BIT_MASK_DESCR(IRQD_SINGLE_TARGET),
+ BIT_MASK_DESCR(IRQD_MOVE_PCNTXT),
+ BIT_MASK_DESCR(IRQD_AFFINITY_SET),
+ BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
+ BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
+ BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
+
+ BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
+
+ BIT_MASK_DESCR(IRQD_WAKEUP_STATE),
+ BIT_MASK_DESCR(IRQD_WAKEUP_ARMED),
+};
+
+static const struct irq_bit_descr irqdesc_states[] = {
+ BIT_MASK_DESCR(_IRQ_NOPROBE),
+ BIT_MASK_DESCR(_IRQ_NOREQUEST),
+ BIT_MASK_DESCR(_IRQ_NOTHREAD),
+ BIT_MASK_DESCR(_IRQ_NOAUTOEN),
+ BIT_MASK_DESCR(_IRQ_NESTED_THREAD),
+ BIT_MASK_DESCR(_IRQ_PER_CPU_DEVID),
+ BIT_MASK_DESCR(_IRQ_IS_POLLED),
+ BIT_MASK_DESCR(_IRQ_DISABLE_UNLAZY),
+};
+
+static const struct irq_bit_descr irqdesc_istates[] = {
+ BIT_MASK_DESCR(IRQS_AUTODETECT),
+ BIT_MASK_DESCR(IRQS_SPURIOUS_DISABLED),
+ BIT_MASK_DESCR(IRQS_POLL_INPROGRESS),
+ BIT_MASK_DESCR(IRQS_ONESHOT),
+ BIT_MASK_DESCR(IRQS_REPLAY),
+ BIT_MASK_DESCR(IRQS_WAITING),
+ BIT_MASK_DESCR(IRQS_PENDING),
+ BIT_MASK_DESCR(IRQS_SUSPENDED),
+};
+
+
+static int irq_debug_show(struct seq_file *m, void *p)
+{
+ struct irq_desc *desc = m->private;
+ struct irq_data *data;
+
+ raw_spin_lock_irq(&desc->lock);
+ data = irq_desc_get_irq_data(desc);
+ seq_printf(m, "handler: %pf\n", desc->handle_irq);
+ seq_printf(m, "status: 0x%08x\n", desc->status_use_accessors);
+ irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states,
+ ARRAY_SIZE(irqdesc_states));
+ seq_printf(m, "istate: 0x%08x\n", desc->istate);
+ irq_debug_show_bits(m, 0, desc->istate, irqdesc_istates,
+ ARRAY_SIZE(irqdesc_istates));
+ seq_printf(m, "ddepth: %u\n", desc->depth);
+ seq_printf(m, "wdepth: %u\n", desc->wake_depth);
+ seq_printf(m, "dstate: 0x%08x\n", irqd_get(data));
+ irq_debug_show_bits(m, 0, irqd_get(data), irqdata_states,
+ ARRAY_SIZE(irqdata_states));
+ seq_printf(m, "node: %d\n", irq_data_get_node(data));
+ irq_debug_show_masks(m, desc);
+ irq_debug_show_data(m, data, 0);
+ raw_spin_unlock_irq(&desc->lock);
+ return 0;
+}
+
+static int irq_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_irq_ops = {
+ .open = irq_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc)
+{
+ char name [10];
+
+ if (!irq_dir || !desc || desc->debugfs_file)
+ return;
+
+ sprintf(name, "%d", irq);
+ desc->debugfs_file = debugfs_create_file(name, 0444, irq_dir, desc,
+ &dfs_irq_ops);
+}
+
+static int __init irq_debugfs_init(void)
+{
+ struct dentry *root_dir;
+ int irq;
+
+ root_dir = debugfs_create_dir("irq", NULL);
+ if (!root_dir)
+ return -ENOMEM;
+
+ irq_domain_debugfs_init(root_dir);
+
+ irq_dir = debugfs_create_dir("irqs", root_dir);
+
+ irq_lock_sparse();
+ for_each_active_irq(irq)
+ irq_add_debugfs_entry(irq, irq_to_desc(irq));
+ irq_unlock_sparse();
+
+ return 0;
+}
+__initcall(irq_debugfs_init);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 1613bfd48365..194c506d9d20 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -4,6 +4,8 @@
#include <linux/gfp.h>
#include <linux/irq.h>
+#include "internals.h"
+
/*
* Device resource management aware IRQ request/free implementation.
*/
@@ -198,3 +200,87 @@ int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from,
return base;
}
EXPORT_SYMBOL_GPL(__devm_irq_alloc_descs);
+
+#ifdef CONFIG_GENERIC_IRQ_CHIP
+/**
+ * devm_irq_alloc_generic_chip - Allocate and initialize a generic chip
+ * for a managed device
+ * @dev: Device to allocate the generic chip for
+ * @name: Name of the irq chip
+ * @num_ct: Number of irq_chip_type instances associated with this
+ * @irq_base: Interrupt base nr for this chip
+ * @reg_base: Register base address (virtual)
+ * @handler: Default flow handler associated with this chip
+ *
+ * Returns an initialized irq_chip_generic structure. The chip defaults
+ * to the primary (index 0) irq_chip_type and @handler
+ */
+struct irq_chip_generic *
+devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
+ unsigned int irq_base, void __iomem *reg_base,
+ irq_flow_handler_t handler)
+{
+ struct irq_chip_generic *gc;
+ unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+
+ gc = devm_kzalloc(dev, sz, GFP_KERNEL);
+ if (gc)
+ irq_init_generic_chip(gc, name, num_ct,
+ irq_base, reg_base, handler);
+
+ return gc;
+}
+EXPORT_SYMBOL_GPL(devm_irq_alloc_generic_chip);
+
+struct irq_generic_chip_devres {
+ struct irq_chip_generic *gc;
+ u32 msk;
+ unsigned int clr;
+ unsigned int set;
+};
+
+static void devm_irq_remove_generic_chip(struct device *dev, void *res)
+{
+ struct irq_generic_chip_devres *this = res;
+
+ irq_remove_generic_chip(this->gc, this->msk, this->clr, this->set);
+}
+
+/**
+ * devm_irq_setup_generic_chip - Setup a range of interrupts with a generic
+ * chip for a managed device
+ *
+ * @dev: Device to setup the generic chip for
+ * @gc: Generic irq chip holding all data
+ * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @flags: Flags for initialization
+ * @clr: IRQ_* bits to clear
+ * @set: IRQ_* bits to set
+ *
+ * Set up max. 32 interrupts starting from gc->irq_base. Note, this
+ * initializes all interrupts to the primary irq_chip_type and its
+ * associated handler.
+ */
+int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
+ u32 msk, enum irq_gc_flags flags,
+ unsigned int clr, unsigned int set)
+{
+ struct irq_generic_chip_devres *dr;
+
+ dr = devres_alloc(devm_irq_remove_generic_chip,
+ sizeof(*dr), GFP_KERNEL);
+ if (!dr)
+ return -ENOMEM;
+
+ irq_setup_generic_chip(gc, msk, flags, clr, set);
+
+ dr->gc = gc;
+ dr->msk = msk;
+ dr->clr = clr;
+ dr->set = set;
+ devres_add(dev, dr);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip);
+#endif /* CONFIG_GENERIC_IRQ_CHIP */
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index ee32870079c9..f7086b78ad6e 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -201,10 +201,9 @@ static void irq_writel_be(u32 val, void __iomem *addr)
iowrite32be(val, addr);
}
-static void
-irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
- int num_ct, unsigned int irq_base,
- void __iomem *reg_base, irq_flow_handler_t handler)
+void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
+ int num_ct, unsigned int irq_base,
+ void __iomem *reg_base, irq_flow_handler_t handler)
{
raw_spin_lock_init(&gc->lock);
gc->num_ct = num_ct;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d3f24905852c..eb4d3e8945b8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -138,6 +138,8 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
unsigned int irq = desc->irq_data.irq;
struct irqaction *action;
+ record_irq_time(desc);
+
for_each_action_of_desc(desc, action) {
irqreturn_t res;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index bc226e783bd2..9da14d125df4 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -8,6 +8,7 @@
#include <linux/irqdesc.h>
#include <linux/kernel_stat.h>
#include <linux/pm_runtime.h>
+#include <linux/sched/clock.h>
#ifdef CONFIG_SPARSE_IRQ
# define IRQ_BITMAP_BITS (NR_IRQS + 8196)
@@ -57,6 +58,7 @@ enum {
IRQS_WAITING = 0x00000080,
IRQS_PENDING = 0x00000200,
IRQS_SUSPENDED = 0x00000800,
+ IRQS_TIMINGS = 0x00001000,
};
#include "debug.h"
@@ -66,7 +68,14 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned long flags);
extern void __disable_irq(struct irq_desc *desc);
extern void __enable_irq(struct irq_desc *desc);
-extern int irq_startup(struct irq_desc *desc, bool resend);
+#define IRQ_RESEND true
+#define IRQ_NORESEND false
+
+#define IRQ_START_FORCE true
+#define IRQ_START_COND false
+
+extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
+
extern void irq_shutdown(struct irq_desc *desc);
extern void irq_enable(struct irq_desc *desc);
extern void irq_disable(struct irq_desc *desc);
@@ -109,13 +118,19 @@ static inline void unregister_handler_proc(unsigned int irq,
extern bool irq_can_set_affinity_usr(unsigned int irq);
-extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
+extern int irq_select_affinity_usr(unsigned int irq);
extern void irq_set_thread_affinity(struct irq_desc *desc);
extern int irq_do_set_affinity(struct irq_data *data,
const struct cpumask *dest, bool force);
+#ifdef CONFIG_SMP
+extern int irq_setup_affinity(struct irq_desc *desc);
+#else
+static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; }
+#endif
+
/* Inline functions for support of irq chips on slow busses */
static inline void chip_bus_lock(struct irq_desc *desc)
{
@@ -169,6 +184,11 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
+static inline unsigned int irqd_get(struct irq_data *d)
+{
+ return __irqd_to_state(d);
+}
+
/*
* Manipulation functions for irq_data.state
*/
@@ -182,6 +202,16 @@ static inline void irqd_clr_move_pending(struct irq_data *d)
__irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING;
}
+static inline void irqd_set_managed_shutdown(struct irq_data *d)
+{
+ __irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN;
+}
+
+static inline void irqd_clr_managed_shutdown(struct irq_data *d)
+{
+ __irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN;
+}
+
static inline void irqd_clear(struct irq_data *d, unsigned int mask)
{
__irqd_to_state(d) &= ~mask;
@@ -226,3 +256,194 @@ irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
static inline void
irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { }
#endif
+
+#ifdef CONFIG_IRQ_TIMINGS
+
+#define IRQ_TIMINGS_SHIFT 5
+#define IRQ_TIMINGS_SIZE (1 << IRQ_TIMINGS_SHIFT)
+#define IRQ_TIMINGS_MASK (IRQ_TIMINGS_SIZE - 1)
+
+/**
+ * struct irq_timings - irq timings storing structure
+ * @values: a circular buffer of u64 encoded <timestamp,irq> values
+ * @count: the number of elements in the array
+ */
+struct irq_timings {
+ u64 values[IRQ_TIMINGS_SIZE];
+ int count;
+};
+
+DECLARE_PER_CPU(struct irq_timings, irq_timings);
+
+extern void irq_timings_free(int irq);
+extern int irq_timings_alloc(int irq);
+
+static inline void irq_remove_timings(struct irq_desc *desc)
+{
+ desc->istate &= ~IRQS_TIMINGS;
+
+ irq_timings_free(irq_desc_get_irq(desc));
+}
+
+static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act)
+{
+ int irq = irq_desc_get_irq(desc);
+ int ret;
+
+ /*
+ * We don't need the measurement because the idle code already
+ * knows the next expiry event.
+ */
+ if (act->flags & __IRQF_TIMER)
+ return;
+
+ /*
+ * In case the timing allocation fails, we just want to warn,
+ * not fail, so letting the system boot anyway.
+ */
+ ret = irq_timings_alloc(irq);
+ if (ret) {
+ pr_warn("Failed to allocate irq timing stats for irq%d (%d)",
+ irq, ret);
+ return;
+ }
+
+ desc->istate |= IRQS_TIMINGS;
+}
+
+extern void irq_timings_enable(void);
+extern void irq_timings_disable(void);
+
+DECLARE_STATIC_KEY_FALSE(irq_timing_enabled);
+
+/*
+ * The interrupt number and the timestamp are encoded into a single
+ * u64 variable to optimize the size.
+ * 48 bit time stamp and 16 bit IRQ number is way sufficient.
+ * Who cares an IRQ after 78 hours of idle time?
+ */
+static inline u64 irq_timing_encode(u64 timestamp, int irq)
+{
+ return (timestamp << 16) | irq;
+}
+
+static inline int irq_timing_decode(u64 value, u64 *timestamp)
+{
+ *timestamp = value >> 16;
+ return value & U16_MAX;
+}
+
+/*
+ * The function record_irq_time is only called in one place in the
+ * interrupts handler. We want this function always inline so the code
+ * inside is embedded in the function and the static key branching
+ * code can act at the higher level. Without the explicit
+ * __always_inline we can end up with a function call and a small
+ * overhead in the hotpath for nothing.
+ */
+static __always_inline void record_irq_time(struct irq_desc *desc)
+{
+ if (!static_branch_likely(&irq_timing_enabled))
+ return;
+
+ if (desc->istate & IRQS_TIMINGS) {
+ struct irq_timings *timings = this_cpu_ptr(&irq_timings);
+
+ timings->values[timings->count & IRQ_TIMINGS_MASK] =
+ irq_timing_encode(local_clock(),
+ irq_desc_get_irq(desc));
+
+ timings->count++;
+ }
+}
+#else
+static inline void irq_remove_timings(struct irq_desc *desc) {}
+static inline void irq_setup_timings(struct irq_desc *desc,
+ struct irqaction *act) {};
+static inline void record_irq_time(struct irq_desc *desc) {}
+#endif /* CONFIG_IRQ_TIMINGS */
+
+
+#ifdef CONFIG_GENERIC_IRQ_CHIP
+void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
+ int num_ct, unsigned int irq_base,
+ void __iomem *reg_base, irq_flow_handler_t handler);
+#else
+static inline void
+irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
+ int num_ct, unsigned int irq_base,
+ void __iomem *reg_base, irq_flow_handler_t handler) { }
+#endif /* CONFIG_GENERIC_IRQ_CHIP */
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+static inline bool irq_can_move_pcntxt(struct irq_data *data)
+{
+ return irqd_can_move_in_process_context(data);
+}
+static inline bool irq_move_pending(struct irq_data *data)
+{
+ return irqd_is_setaffinity_pending(data);
+}
+static inline void
+irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
+{
+ cpumask_copy(desc->pending_mask, mask);
+}
+static inline void
+irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
+{
+ cpumask_copy(mask, desc->pending_mask);
+}
+static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
+{
+ return desc->pending_mask;
+}
+bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
+#else /* CONFIG_GENERIC_PENDING_IRQ */
+static inline bool irq_can_move_pcntxt(struct irq_data *data)
+{
+ return true;
+}
+static inline bool irq_move_pending(struct irq_data *data)
+{
+ return false;
+}
+static inline void
+irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
+{
+}
+static inline void
+irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
+{
+}
+static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
+{
+ return NULL;
+}
+static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
+{
+ return false;
+}
+#endif /* !CONFIG_GENERIC_PENDING_IRQ */
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+#include <linux/debugfs.h>
+
+void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
+static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
+{
+ debugfs_remove(desc->debugfs_file);
+}
+# ifdef CONFIG_IRQ_DOMAIN
+void irq_domain_debugfs_init(struct dentry *root);
+# else
+static inline void irq_domain_debugfs_init(struct dentry *root);
+# endif
+#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
+static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
+{
+}
+static inline void irq_remove_debugfs_entry(struct irq_desc *d)
+{
+}
+#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 00bb0aeea1d0..948b50e78549 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -54,14 +54,25 @@ static void __init init_irq_default_affinity(void)
#endif
#ifdef CONFIG_SMP
-static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
+static int alloc_masks(struct irq_desc *desc, int node)
{
if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity,
- gfp, node))
+ GFP_KERNEL, node))
return -ENOMEM;
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity,
+ GFP_KERNEL, node)) {
+ free_cpumask_var(desc->irq_common_data.affinity);
+ return -ENOMEM;
+ }
+#endif
+
#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
+ if (!zalloc_cpumask_var_node(&desc->pending_mask, GFP_KERNEL, node)) {
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
free_cpumask_var(desc->irq_common_data.affinity);
return -ENOMEM;
}
@@ -86,7 +97,7 @@ static void desc_smp_init(struct irq_desc *desc, int node,
#else
static inline int
-alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; }
+alloc_masks(struct irq_desc *desc, int node) { return 0; }
static inline void
desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { }
#endif
@@ -105,6 +116,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
desc->irq_data.chip_data = NULL;
irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
+ irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
desc->handle_irq = handle_bad_irq;
desc->depth = 1;
desc->irq_count = 0;
@@ -324,6 +336,9 @@ static void free_masks(struct irq_desc *desc)
free_cpumask_var(desc->pending_mask);
#endif
free_cpumask_var(desc->irq_common_data.affinity);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
}
#else
static inline void free_masks(struct irq_desc *desc) { }
@@ -344,9 +359,8 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
struct module *owner)
{
struct irq_desc *desc;
- gfp_t gfp = GFP_KERNEL;
- desc = kzalloc_node(sizeof(*desc), gfp, node);
+ desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node);
if (!desc)
return NULL;
/* allocate based on nr_cpu_ids */
@@ -354,7 +368,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
if (!desc->kstat_irqs)
goto err_desc;
- if (alloc_masks(desc, gfp, node))
+ if (alloc_masks(desc, node))
goto err_kstat;
raw_spin_lock_init(&desc->lock);
@@ -394,6 +408,7 @@ static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
+ irq_remove_debugfs_entry(desc);
unregister_irq_proc(irq, desc);
/*
@@ -480,7 +495,8 @@ int __init early_irq_init(void)
/* Let arch update nr_irqs and return the nr of preallocated irqs */
initcnt = arch_probe_nr_irqs();
- printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt);
+ printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
+ NR_IRQS, nr_irqs, initcnt);
if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS))
nr_irqs = IRQ_BITMAP_BITS;
@@ -516,14 +532,14 @@ int __init early_irq_init(void)
init_irq_default_affinity();
- printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
+ printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS);
desc = irq_desc;
count = ARRAY_SIZE(irq_desc);
for (i = 0; i < count; i++) {
desc[i].kstat_irqs = alloc_percpu(unsigned int);
- alloc_masks(&desc[i], GFP_KERNEL, node);
+ alloc_masks(&desc[i], node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
desc_set_defaults(i, &desc[i], node, NULL, NULL);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 31805f237396..14fe862aa2e3 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -26,39 +26,69 @@ static struct irq_domain *irq_default_domain;
static void irq_domain_check_hierarchy(struct irq_domain *domain);
struct irqchip_fwid {
- struct fwnode_handle fwnode;
- char *name;
+ struct fwnode_handle fwnode;
+ unsigned int type;
+ char *name;
void *data;
};
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static void debugfs_add_domain_dir(struct irq_domain *d);
+static void debugfs_remove_domain_dir(struct irq_domain *d);
+#else
+static inline void debugfs_add_domain_dir(struct irq_domain *d) { }
+static inline void debugfs_remove_domain_dir(struct irq_domain *d) { }
+#endif
+
/**
* irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for
* identifying an irq domain
- * @data: optional user-provided data
+ * @type: Type of irqchip_fwnode. See linux/irqdomain.h
+ * @name: Optional user provided domain name
+ * @id: Optional user provided id if name != NULL
+ * @data: Optional user-provided data
*
- * Allocate a struct device_node, and return a poiner to the embedded
+ * Allocate a struct irqchip_fwid, and return a poiner to the embedded
* fwnode_handle (or NULL on failure).
+ *
+ * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are
+ * solely to transport name information to irqdomain creation code. The
+ * node is not stored. For other types the pointer is kept in the irq
+ * domain struct.
*/
-struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
+struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
+ const char *name, void *data)
{
struct irqchip_fwid *fwid;
- char *name;
+ char *n;
fwid = kzalloc(sizeof(*fwid), GFP_KERNEL);
- name = kasprintf(GFP_KERNEL, "irqchip@%p", data);
- if (!fwid || !name) {
+ switch (type) {
+ case IRQCHIP_FWNODE_NAMED:
+ n = kasprintf(GFP_KERNEL, "%s", name);
+ break;
+ case IRQCHIP_FWNODE_NAMED_ID:
+ n = kasprintf(GFP_KERNEL, "%s-%d", name, id);
+ break;
+ default:
+ n = kasprintf(GFP_KERNEL, "irqchip@%p", data);
+ break;
+ }
+
+ if (!fwid || !n) {
kfree(fwid);
- kfree(name);
+ kfree(n);
return NULL;
}
- fwid->name = name;
+ fwid->type = type;
+ fwid->name = n;
fwid->data = data;
fwid->fwnode.type = FWNODE_IRQCHIP;
return &fwid->fwnode;
}
-EXPORT_SYMBOL_GPL(irq_domain_alloc_fwnode);
+EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode);
/**
* irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
@@ -97,26 +127,82 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
void *host_data)
{
struct device_node *of_node = to_of_node(fwnode);
+ struct irqchip_fwid *fwid;
struct irq_domain *domain;
+ static atomic_t unknown_domains;
+
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
GFP_KERNEL, of_node_to_nid(of_node));
if (WARN_ON(!domain))
return NULL;
+ if (fwnode && is_fwnode_irqchip(fwnode)) {
+ fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
+
+ switch (fwid->type) {
+ case IRQCHIP_FWNODE_NAMED:
+ case IRQCHIP_FWNODE_NAMED_ID:
+ domain->name = kstrdup(fwid->name, GFP_KERNEL);
+ if (!domain->name) {
+ kfree(domain);
+ return NULL;
+ }
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+ break;
+ default:
+ domain->fwnode = fwnode;
+ domain->name = fwid->name;
+ break;
+ }
+ } else if (of_node) {
+ char *name;
+
+ /*
+ * DT paths contain '/', which debugfs is legitimately
+ * unhappy about. Replace them with ':', which does
+ * the trick and is not as offensive as '\'...
+ */
+ name = kstrdup(of_node_full_name(of_node), GFP_KERNEL);
+ if (!name) {
+ kfree(domain);
+ return NULL;
+ }
+
+ strreplace(name, '/', ':');
+
+ domain->name = name;
+ domain->fwnode = fwnode;
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+ }
+
+ if (!domain->name) {
+ if (fwnode) {
+ pr_err("Invalid fwnode type (%d) for irqdomain\n",
+ fwnode->type);
+ }
+ domain->name = kasprintf(GFP_KERNEL, "unknown-%d",
+ atomic_inc_return(&unknown_domains));
+ if (!domain->name) {
+ kfree(domain);
+ return NULL;
+ }
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+ }
+
of_node_get(of_node);
/* Fill structure */
INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
domain->ops = ops;
domain->host_data = host_data;
- domain->fwnode = fwnode;
domain->hwirq_max = hwirq_max;
domain->revmap_size = size;
domain->revmap_direct_max_irq = direct_max;
irq_domain_check_hierarchy(domain);
mutex_lock(&irq_domain_mutex);
+ debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
@@ -136,6 +222,7 @@ EXPORT_SYMBOL_GPL(__irq_domain_add);
void irq_domain_remove(struct irq_domain *domain)
{
mutex_lock(&irq_domain_mutex);
+ debugfs_remove_domain_dir(domain);
WARN_ON(!radix_tree_empty(&domain->revmap_tree));
@@ -152,10 +239,43 @@ void irq_domain_remove(struct irq_domain *domain)
pr_debug("Removed domain %s\n", domain->name);
of_node_put(irq_domain_get_of_node(domain));
+ if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
+ kfree(domain->name);
kfree(domain);
}
EXPORT_SYMBOL_GPL(irq_domain_remove);
+void irq_domain_update_bus_token(struct irq_domain *domain,
+ enum irq_domain_bus_token bus_token)
+{
+ char *name;
+
+ if (domain->bus_token == bus_token)
+ return;
+
+ mutex_lock(&irq_domain_mutex);
+
+ domain->bus_token = bus_token;
+
+ name = kasprintf(GFP_KERNEL, "%s-%d", domain->name, bus_token);
+ if (!name) {
+ mutex_unlock(&irq_domain_mutex);
+ return;
+ }
+
+ debugfs_remove_domain_dir(domain);
+
+ if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
+ kfree(domain->name);
+ else
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+
+ domain->name = name;
+ debugfs_add_domain_dir(domain);
+
+ mutex_unlock(&irq_domain_mutex);
+}
+
/**
* irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs
* @of_node: pointer to interrupt controller's device tree node.
@@ -344,6 +464,7 @@ void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
irq_data->domain = NULL;
irq_data->hwirq = 0;
+ domain->mapcount--;
/* Clear reverse map for this hwirq */
if (hwirq < domain->revmap_size) {
@@ -395,6 +516,7 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
domain->name = irq_data->chip->name;
}
+ domain->mapcount++;
if (hwirq < domain->revmap_size) {
domain->linear_revmap[hwirq] = virq;
} else {
@@ -746,13 +868,54 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
EXPORT_SYMBOL_GPL(irq_find_mapping);
#ifdef CONFIG_IRQ_DOMAIN_DEBUG
+static void virq_debug_show_one(struct seq_file *m, struct irq_desc *desc)
+{
+ struct irq_domain *domain;
+ struct irq_data *data;
+
+ domain = desc->irq_data.domain;
+ data = &desc->irq_data;
+
+ while (domain) {
+ unsigned int irq = data->irq;
+ unsigned long hwirq = data->hwirq;
+ struct irq_chip *chip;
+ bool direct;
+
+ if (data == &desc->irq_data)
+ seq_printf(m, "%5d ", irq);
+ else
+ seq_printf(m, "%5d+ ", irq);
+ seq_printf(m, "0x%05lx ", hwirq);
+
+ chip = irq_data_get_irq_chip(data);
+ seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none");
+
+ seq_printf(m, data ? "0x%p " : " %p ",
+ irq_data_get_irq_chip_data(data));
+
+ seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' ');
+ direct = (irq == hwirq) && (irq < domain->revmap_direct_max_irq);
+ seq_printf(m, "%6s%-8s ",
+ (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX",
+ direct ? "(DIRECT)" : "");
+ seq_printf(m, "%s\n", domain->name);
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ domain = domain->parent;
+ data = data->parent_data;
+#else
+ domain = NULL;
+#endif
+ }
+}
+
static int virq_debug_show(struct seq_file *m, void *private)
{
unsigned long flags;
struct irq_desc *desc;
struct irq_domain *domain;
struct radix_tree_iter iter;
- void *data, **slot;
+ void **slot;
int i;
seq_printf(m, " %-16s %-6s %-10s %-10s %s\n",
@@ -760,15 +923,26 @@ static int virq_debug_show(struct seq_file *m, void *private)
mutex_lock(&irq_domain_mutex);
list_for_each_entry(domain, &irq_domain_list, link) {
struct device_node *of_node;
+ const char *name;
+
int count = 0;
+
of_node = irq_domain_get_of_node(domain);
+ if (of_node)
+ name = of_node_full_name(of_node);
+ else if (is_fwnode_irqchip(domain->fwnode))
+ name = container_of(domain->fwnode, struct irqchip_fwid,
+ fwnode)->name;
+ else
+ name = "";
+
radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0)
count++;
seq_printf(m, "%c%-16s %6u %10u %10u %s\n",
domain == irq_default_domain ? '*' : ' ', domain->name,
domain->revmap_size + count, domain->revmap_size,
domain->revmap_direct_max_irq,
- of_node ? of_node_full_name(of_node) : "");
+ name);
}
mutex_unlock(&irq_domain_mutex);
@@ -782,30 +956,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
continue;
raw_spin_lock_irqsave(&desc->lock, flags);
- domain = desc->irq_data.domain;
-
- if (domain) {
- struct irq_chip *chip;
- int hwirq = desc->irq_data.hwirq;
- bool direct;
-
- seq_printf(m, "%5d ", i);
- seq_printf(m, "0x%05x ", hwirq);
-
- chip = irq_desc_get_chip(desc);
- seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none");
-
- data = irq_desc_get_chip_data(desc);
- seq_printf(m, data ? "0x%p " : " %p ", data);
-
- seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' ');
- direct = (i == hwirq) && (i < domain->revmap_direct_max_irq);
- seq_printf(m, "%6s%-8s ",
- (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX",
- direct ? "(DIRECT)" : "");
- seq_printf(m, "%s\n", desc->irq_data.domain->name);
- }
-
+ virq_debug_show_one(m, desc);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
@@ -973,6 +1124,7 @@ static void irq_domain_insert_irq(int virq)
struct irq_domain *domain = data->domain;
irq_hw_number_t hwirq = data->hwirq;
+ domain->mapcount++;
if (hwirq < domain->revmap_size) {
domain->linear_revmap[hwirq] = virq;
} else {
@@ -1002,6 +1154,7 @@ static void irq_domain_remove_irq(int virq)
struct irq_domain *domain = data->domain;
irq_hw_number_t hwirq = data->hwirq;
+ domain->mapcount--;
if (hwirq < domain->revmap_size) {
domain->linear_revmap[hwirq] = 0;
} else {
@@ -1189,43 +1342,18 @@ void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_common(domain, virq, nr_irqs);
}
-static bool irq_domain_is_auto_recursive(struct irq_domain *domain)
-{
- return domain->flags & IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
-}
-
-static void irq_domain_free_irqs_recursive(struct irq_domain *domain,
+static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain,
unsigned int irq_base,
unsigned int nr_irqs)
{
domain->ops->free(domain, irq_base, nr_irqs);
- if (irq_domain_is_auto_recursive(domain)) {
- BUG_ON(!domain->parent);
- irq_domain_free_irqs_recursive(domain->parent, irq_base,
- nr_irqs);
- }
}
-int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
unsigned int irq_base,
unsigned int nr_irqs, void *arg)
{
- int ret = 0;
- struct irq_domain *parent = domain->parent;
- bool recursive = irq_domain_is_auto_recursive(domain);
-
- BUG_ON(recursive && !parent);
- if (recursive)
- ret = irq_domain_alloc_irqs_recursive(parent, irq_base,
- nr_irqs, arg);
- if (ret < 0)
- return ret;
-
- ret = domain->ops->alloc(domain, irq_base, nr_irqs, arg);
- if (ret < 0 && recursive)
- irq_domain_free_irqs_recursive(parent, irq_base, nr_irqs);
-
- return ret;
+ return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
/**
@@ -1286,7 +1414,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
}
mutex_lock(&irq_domain_mutex);
- ret = irq_domain_alloc_irqs_recursive(domain, virq, nr_irqs, arg);
+ ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
if (ret < 0) {
mutex_unlock(&irq_domain_mutex);
goto out_free_irq_data;
@@ -1321,7 +1449,7 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
mutex_lock(&irq_domain_mutex);
for (i = 0; i < nr_irqs; i++)
irq_domain_remove_irq(virq + i);
- irq_domain_free_irqs_recursive(data->domain, virq, nr_irqs);
+ irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs);
mutex_unlock(&irq_domain_mutex);
irq_domain_free_irq_data(virq, nr_irqs);
@@ -1341,15 +1469,11 @@ int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
unsigned int irq_base, unsigned int nr_irqs,
void *arg)
{
- /* irq_domain_alloc_irqs_recursive() has called parent's alloc() */
- if (irq_domain_is_auto_recursive(domain))
- return 0;
+ if (!domain->parent)
+ return -ENOSYS;
- domain = domain->parent;
- if (domain)
- return irq_domain_alloc_irqs_recursive(domain, irq_base,
- nr_irqs, arg);
- return -ENOSYS;
+ return irq_domain_alloc_irqs_hierarchy(domain->parent, irq_base,
+ nr_irqs, arg);
}
EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
@@ -1364,10 +1488,10 @@ EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
void irq_domain_free_irqs_parent(struct irq_domain *domain,
unsigned int irq_base, unsigned int nr_irqs)
{
- /* irq_domain_free_irqs_recursive() will call parent's free */
- if (!irq_domain_is_auto_recursive(domain) && domain->parent)
- irq_domain_free_irqs_recursive(domain->parent, irq_base,
- nr_irqs);
+ if (!domain->parent)
+ return;
+
+ irq_domain_free_irqs_hierarchy(domain->parent, irq_base, nr_irqs);
}
EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
@@ -1487,3 +1611,78 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain)
{
}
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static struct dentry *domain_dir;
+
+static void
+irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
+{
+ seq_printf(m, "%*sname: %s\n", ind, "", d->name);
+ seq_printf(m, "%*ssize: %u\n", ind + 1, "",
+ d->revmap_size + d->revmap_direct_max_irq);
+ seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
+ seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags);
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ if (!d->parent)
+ return;
+ seq_printf(m, "%*sparent: %s\n", ind + 1, "", d->parent->name);
+ irq_domain_debug_show_one(m, d->parent, ind + 4);
+#endif
+}
+
+static int irq_domain_debug_show(struct seq_file *m, void *p)
+{
+ struct irq_domain *d = m->private;
+
+ /* Default domain? Might be NULL */
+ if (!d) {
+ if (!irq_default_domain)
+ return 0;
+ d = irq_default_domain;
+ }
+ irq_domain_debug_show_one(m, d, 0);
+ return 0;
+}
+
+static int irq_domain_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_domain_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_domain_ops = {
+ .open = irq_domain_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void debugfs_add_domain_dir(struct irq_domain *d)
+{
+ if (!d->name || !domain_dir || d->debugfs_file)
+ return;
+ d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
+ &dfs_domain_ops);
+}
+
+static void debugfs_remove_domain_dir(struct irq_domain *d)
+{
+ if (d->debugfs_file)
+ debugfs_remove(d->debugfs_file);
+}
+
+void __init irq_domain_debugfs_init(struct dentry *root)
+{
+ struct irq_domain *d;
+
+ domain_dir = debugfs_create_dir("domains", root);
+ if (!domain_dir)
+ return;
+
+ debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops);
+ mutex_lock(&irq_domain_mutex);
+ list_for_each_entry(d, &irq_domain_list, link)
+ debugfs_add_domain_dir(d);
+ mutex_unlock(&irq_domain_mutex);
+}
+#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 425170d4439b..5c11c1730ba5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -168,34 +168,6 @@ void irq_set_thread_affinity(struct irq_desc *desc)
set_bit(IRQTF_AFFINITY, &action->thread_flags);
}
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-static inline bool irq_can_move_pcntxt(struct irq_data *data)
-{
- return irqd_can_move_in_process_context(data);
-}
-static inline bool irq_move_pending(struct irq_data *data)
-{
- return irqd_is_setaffinity_pending(data);
-}
-static inline void
-irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
-{
- cpumask_copy(desc->pending_mask, mask);
-}
-static inline void
-irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
-{
- cpumask_copy(mask, desc->pending_mask);
-}
-#else
-static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; }
-static inline bool irq_move_pending(struct irq_data *data) { return false; }
-static inline void
-irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
-static inline void
-irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
-#endif
-
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@@ -345,15 +317,18 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
/*
* Generic version of the affinity autoselector.
*/
-static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
+int irq_setup_affinity(struct irq_desc *desc)
{
struct cpumask *set = irq_default_affinity;
- int node = irq_desc_get_node(desc);
+ int ret, node = irq_desc_get_node(desc);
+ static DEFINE_RAW_SPINLOCK(mask_lock);
+ static struct cpumask mask;
/* Excludes PER_CPU and NO_BALANCE interrupts */
if (!__irq_can_set_affinity(desc))
return 0;
+ raw_spin_lock(&mask_lock);
/*
* Preserve the managed affinity setting and a userspace affinity
* setup, but make sure that one of the targets is online.
@@ -367,46 +342,40 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
}
- cpumask_and(mask, cpu_online_mask, set);
+ cpumask_and(&mask, cpu_online_mask, set);
if (node != NUMA_NO_NODE) {
const struct cpumask *nodemask = cpumask_of_node(node);
/* make sure at least one of the cpus in nodemask is online */
- if (cpumask_intersects(mask, nodemask))
- cpumask_and(mask, mask, nodemask);
+ if (cpumask_intersects(&mask, nodemask))
+ cpumask_and(&mask, &mask, nodemask);
}
- irq_do_set_affinity(&desc->irq_data, mask, false);
- return 0;
+ ret = irq_do_set_affinity(&desc->irq_data, &mask, false);
+ raw_spin_unlock(&mask_lock);
+ return ret;
}
#else
/* Wrapper for ALPHA specific affinity selector magic */
-static inline int setup_affinity(struct irq_desc *d, struct cpumask *mask)
+int irq_setup_affinity(struct irq_desc *desc)
{
- return irq_select_affinity(irq_desc_get_irq(d));
+ return irq_select_affinity(irq_desc_get_irq(desc));
}
#endif
/*
- * Called when affinity is set via /proc/irq
+ * Called when a bogus affinity is set via /proc/irq
*/
-int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask)
+int irq_select_affinity_usr(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&desc->lock, flags);
- ret = setup_affinity(desc, mask);
+ ret = irq_setup_affinity(desc);
raw_spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
-
-#else
-static inline int
-setup_affinity(struct irq_desc *desc, struct cpumask *mask)
-{
- return 0;
-}
#endif
/**
@@ -533,9 +502,15 @@ void __enable_irq(struct irq_desc *desc)
goto err_out;
/* Prevent probing on this irq: */
irq_settings_set_noprobe(desc);
- irq_enable(desc);
- check_irq_resend(desc);
- /* fall-through */
+ /*
+ * Call irq_startup() not irq_enable() here because the
+ * interrupt might be marked NOAUTOEN. So irq_startup()
+ * needs to be invoked when it gets enabled the first
+ * time. If it was already started up, then irq_startup()
+ * will invoke irq_enable() under the hood.
+ */
+ irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+ break;
}
default:
desc->depth--;
@@ -1122,7 +1097,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
struct irqaction *old, **old_ptr;
unsigned long flags, thread_mask = 0;
int ret, nested, shared = 0;
- cpumask_var_t mask;
if (!desc)
return -EINVAL;
@@ -1181,11 +1155,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
}
- if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
- ret = -ENOMEM;
- goto out_thread;
- }
-
/*
* Drivers are often written to work w/o knowledge about the
* underlying irq chip implementation, so a request for a
@@ -1250,7 +1219,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
if (thread_mask == ~0UL) {
ret = -EBUSY;
- goto out_mask;
+ goto out_unlock;
}
/*
* The thread_mask for the action is or'ed to
@@ -1294,7 +1263,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
irq);
ret = -EINVAL;
- goto out_mask;
+ goto out_unlock;
}
if (!shared) {
@@ -1302,7 +1271,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (ret) {
pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n",
new->name, irq, desc->irq_data.chip->name);
- goto out_mask;
+ goto out_unlock;
}
init_waitqueue_head(&desc->wait_for_threads);
@@ -1314,7 +1283,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (ret) {
irq_release_resources(desc);
- goto out_mask;
+ goto out_unlock;
}
}
@@ -1330,20 +1299,25 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (new->flags & IRQF_ONESHOT)
desc->istate |= IRQS_ONESHOT;
- if (irq_settings_can_autoenable(desc))
- irq_startup(desc, true);
- else
- /* Undo nested disables: */
- desc->depth = 1;
-
/* Exclude IRQ from balancing if requested */
if (new->flags & IRQF_NOBALANCING) {
irq_settings_set_no_balancing(desc);
irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
}
- /* Set default affinity mask once everything is setup */
- setup_affinity(desc, mask);
+ if (irq_settings_can_autoenable(desc)) {
+ irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+ } else {
+ /*
+ * Shared interrupts do not go well with disabling
+ * auto enable. The sharing interrupt might request
+ * it while it's still disabled and then wait for
+ * interrupts forever.
+ */
+ WARN_ON_ONCE(new->flags & IRQF_SHARED);
+ /* Undo nested disables: */
+ desc->depth = 1;
+ }
} else if (new->flags & IRQF_TRIGGER_MASK) {
unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
@@ -1374,6 +1348,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_setup_timings(desc, new);
+
/*
* Strictly no need to wake it up, but hung_task complains
* when no hard interrupt wakes the thread up.
@@ -1384,10 +1360,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
wake_up_process(new->secondary->thread);
register_irq_proc(irq, desc);
+ irq_add_debugfs_entry(irq, desc);
new->dir = NULL;
register_handler_proc(irq, new);
- free_cpumask_var(mask);
-
return 0;
mismatch:
@@ -1400,9 +1375,8 @@ mismatch:
}
ret = -EBUSY;
-out_mask:
+out_unlock:
raw_spin_unlock_irqrestore(&desc->lock, flags);
- free_cpumask_var(mask);
out_thread:
if (new->thread) {
@@ -1502,6 +1476,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
irq_settings_clr_disable_unlazy(desc);
irq_shutdown(desc);
irq_release_resources(desc);
+ irq_remove_timings(desc);
}
#ifdef CONFIG_SMP
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 37ddb7bda651..6ca054a3f91d 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,6 +4,36 @@
#include "internals.h"
+/**
+ * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU
+ * @desc: Interrupt descpriptor to clean up
+ * @force_clear: If set clear the move pending bit unconditionally.
+ * If not set, clear it only when the dying CPU is the
+ * last one in the pending mask.
+ *
+ * Returns true if the pending bit was set and the pending mask contains an
+ * online CPU other than the dying CPU.
+ */
+bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
+{
+ struct irq_data *data = irq_desc_get_irq_data(desc);
+
+ if (!irqd_is_setaffinity_pending(data))
+ return false;
+
+ /*
+ * The outgoing CPU might be the last online target in a pending
+ * interrupt move. If that's the case clear the pending move bit.
+ */
+ if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) {
+ irqd_clr_move_pending(data);
+ return false;
+ }
+ if (force_clear)
+ irqd_clr_move_pending(data);
+ return true;
+}
+
void irq_move_masked_irq(struct irq_data *idata)
{
struct irq_desc *desc = irq_data_to_desc(idata);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index ddc2f5427f75..48eadf416c24 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -265,13 +265,20 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent)
{
+ struct irq_domain *domain;
+
if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
msi_domain_update_dom_ops(info);
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
msi_domain_update_chip_ops(info);
- return irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0,
- fwnode, &msi_domain_ops, info);
+ domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0,
+ fwnode, &msi_domain_ops, info);
+
+ if (domain && !domain->name && info->chip)
+ domain->name = info->chip->name;
+
+ return domain;
}
int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
@@ -308,7 +315,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
ops->set_desc(arg, desc);
/* Assumes the domain mutex is held! */
- ret = irq_domain_alloc_irqs_recursive(domain, virq, 1, arg);
+ ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
if (ret)
break;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index c53edad7b459..7f9642a1e267 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -37,19 +37,47 @@ static struct proc_dir_entry *root_irq_dir;
#ifdef CONFIG_SMP
-static int show_irq_affinity(int type, struct seq_file *m, void *v)
+enum {
+ AFFINITY,
+ AFFINITY_LIST,
+ EFFECTIVE,
+ EFFECTIVE_LIST,
+};
+
+static int show_irq_affinity(int type, struct seq_file *m)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
- const struct cpumask *mask = desc->irq_common_data.affinity;
+ const struct cpumask *mask;
+ switch (type) {
+ case AFFINITY:
+ case AFFINITY_LIST:
+ mask = desc->irq_common_data.affinity;
#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (irqd_is_setaffinity_pending(&desc->irq_data))
- mask = desc->pending_mask;
+ if (irqd_is_setaffinity_pending(&desc->irq_data))
+ mask = desc->pending_mask;
#endif
- if (type)
+ break;
+ case EFFECTIVE:
+ case EFFECTIVE_LIST:
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ mask = desc->irq_common_data.effective_affinity;
+ break;
+#else
+ return -EINVAL;
+#endif
+ };
+
+ switch (type) {
+ case AFFINITY_LIST:
+ case EFFECTIVE_LIST:
seq_printf(m, "%*pbl\n", cpumask_pr_args(mask));
- else
+ break;
+ case AFFINITY:
+ case EFFECTIVE:
seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
+ break;
+ }
return 0;
}
@@ -80,12 +108,12 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
int no_irq_affinity;
static int irq_affinity_proc_show(struct seq_file *m, void *v)
{
- return show_irq_affinity(0, m, v);
+ return show_irq_affinity(AFFINITY, m);
}
static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
{
- return show_irq_affinity(1, m, v);
+ return show_irq_affinity(AFFINITY_LIST, m);
}
@@ -120,9 +148,11 @@ static ssize_t write_irq_affinity(int type, struct file *file,
* one online CPU still has to be targeted.
*/
if (!cpumask_intersects(new_value, cpu_online_mask)) {
- /* Special case for empty set - allow the architecture
- code to set default SMP affinity. */
- err = irq_select_affinity_usr(irq, new_value) ? -EINVAL : count;
+ /*
+ * Special case for empty set - allow the architecture code
+ * to set default SMP affinity.
+ */
+ err = irq_select_affinity_usr(irq) ? -EINVAL : count;
} else {
irq_set_affinity(irq, new_value);
err = count;
@@ -183,6 +213,44 @@ static const struct file_operations irq_affinity_list_proc_fops = {
.write = irq_affinity_list_proc_write,
};
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static int irq_effective_aff_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(EFFECTIVE, m);
+}
+
+static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(EFFECTIVE_LIST, m);
+}
+
+static int irq_effective_aff_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode));
+}
+
+static int irq_effective_aff_list_proc_open(struct inode *inode,
+ struct file *file)
+{
+ return single_open(file, irq_effective_aff_list_proc_show,
+ PDE_DATA(inode));
+}
+
+static const struct file_operations irq_effective_aff_proc_fops = {
+ .open = irq_effective_aff_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations irq_effective_aff_list_proc_fops = {
+ .open = irq_effective_aff_list_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
static int default_affinity_show(struct seq_file *m, void *v)
{
seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity));
@@ -324,6 +392,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
void register_irq_proc(unsigned int irq, struct irq_desc *desc)
{
static DEFINE_MUTEX(register_lock);
+ void __maybe_unused *irqp = (void *)(unsigned long) irq;
char name [MAX_NAMELEN];
if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip))
@@ -349,20 +418,25 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
#ifdef CONFIG_SMP
/* create /proc/irq/<irq>/smp_affinity */
proc_create_data("smp_affinity", 0644, desc->dir,
- &irq_affinity_proc_fops, (void *)(long)irq);
+ &irq_affinity_proc_fops, irqp);
/* create /proc/irq/<irq>/affinity_hint */
proc_create_data("affinity_hint", 0444, desc->dir,
- &irq_affinity_hint_proc_fops, (void *)(long)irq);
+ &irq_affinity_hint_proc_fops, irqp);
/* create /proc/irq/<irq>/smp_affinity_list */
proc_create_data("smp_affinity_list", 0644, desc->dir,
- &irq_affinity_list_proc_fops, (void *)(long)irq);
+ &irq_affinity_list_proc_fops, irqp);
proc_create_data("node", 0444, desc->dir,
- &irq_node_proc_fops, (void *)(long)irq);
+ &irq_node_proc_fops, irqp);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ proc_create_data("effective_affinity", 0444, desc->dir,
+ &irq_effective_aff_proc_fops, irqp);
+ proc_create_data("effective_affinity_list", 0444, desc->dir,
+ &irq_effective_aff_list_proc_fops, irqp);
+# endif
#endif
-
proc_create_data("spurious", 0444, desc->dir,
&irq_spurious_proc_fops, (void *)(long)irq);
@@ -381,6 +455,10 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
remove_proc_entry("affinity_hint", desc->dir);
remove_proc_entry("smp_affinity_list", desc->dir);
remove_proc_entry("node", desc->dir);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ remove_proc_entry("effective_affinity", desc->dir);
+ remove_proc_entry("effective_affinity_list", desc->dir);
+# endif
#endif
remove_proc_entry("spurious", desc->dir);
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
new file mode 100644
index 000000000000..c8c1d073fbf1
--- /dev/null
+++ b/kernel/irq/timings.c
@@ -0,0 +1,369 @@
+/*
+ * linux/kernel/irq/timings.c
+ *
+ * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/static_key.h>
+#include <linux/interrupt.h>
+#include <linux/idr.h>
+#include <linux/irq.h>
+#include <linux/math64.h>
+
+#include <trace/events/irq.h>
+
+#include "internals.h"
+
+DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
+
+DEFINE_PER_CPU(struct irq_timings, irq_timings);
+
+struct irqt_stat {
+ u64 next_evt;
+ u64 last_ts;
+ u64 variance;
+ u32 avg;
+ u32 nr_samples;
+ int anomalies;
+ int valid;
+};
+
+static DEFINE_IDR(irqt_stats);
+
+void irq_timings_enable(void)
+{
+ static_branch_enable(&irq_timing_enabled);
+}
+
+void irq_timings_disable(void)
+{
+ static_branch_disable(&irq_timing_enabled);
+}
+
+/**
+ * irqs_update - update the irq timing statistics with a new timestamp
+ *
+ * @irqs: an irqt_stat struct pointer
+ * @ts: the new timestamp
+ *
+ * The statistics are computed online, in other words, the code is
+ * designed to compute the statistics on a stream of values rather
+ * than doing multiple passes on the values to compute the average,
+ * then the variance. The integer division introduces a loss of
+ * precision but with an acceptable error margin regarding the results
+ * we would have with the double floating precision: we are dealing
+ * with nanosec, so big numbers, consequently the mantisse is
+ * negligeable, especially when converting the time in usec
+ * afterwards.
+ *
+ * The computation happens at idle time. When the CPU is not idle, the
+ * interrupts' timestamps are stored in the circular buffer, when the
+ * CPU goes idle and this routine is called, all the buffer's values
+ * are injected in the statistical model continuying to extend the
+ * statistics from the previous busy-idle cycle.
+ *
+ * The observations showed a device will trigger a burst of periodic
+ * interrupts followed by one or two peaks of longer time, for
+ * instance when a SD card device flushes its cache, then the periodic
+ * intervals occur again. A one second inactivity period resets the
+ * stats, that gives us the certitude the statistical values won't
+ * exceed 1x10^9, thus the computation won't overflow.
+ *
+ * Basically, the purpose of the algorithm is to watch the periodic
+ * interrupts and eliminate the peaks.
+ *
+ * An interrupt is considered periodically stable if the interval of
+ * its occurences follow the normal distribution, thus the values
+ * comply with:
+ *
+ * avg - 3 x stddev < value < avg + 3 x stddev
+ *
+ * Which can be simplified to:
+ *
+ * -3 x stddev < value - avg < 3 x stddev
+ *
+ * abs(value - avg) < 3 x stddev
+ *
+ * In order to save a costly square root computation, we use the
+ * variance. For the record, stddev = sqrt(variance). The equation
+ * above becomes:
+ *
+ * abs(value - avg) < 3 x sqrt(variance)
+ *
+ * And finally we square it:
+ *
+ * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
+ *
+ * (value - avg) x (value - avg) < 9 x variance
+ *
+ * Statistically speaking, any values out of this interval is
+ * considered as an anomaly and is discarded. However, a normal
+ * distribution appears when the number of samples is 30 (it is the
+ * rule of thumb in statistics, cf. "30 samples" on Internet). When
+ * there are three consecutive anomalies, the statistics are resetted.
+ *
+ */
+static void irqs_update(struct irqt_stat *irqs, u64 ts)
+{
+ u64 old_ts = irqs->last_ts;
+ u64 variance = 0;
+ u64 interval;
+ s64 diff;
+
+ /*
+ * The timestamps are absolute time values, we need to compute
+ * the timing interval between two interrupts.
+ */
+ irqs->last_ts = ts;
+
+ /*
+ * The interval type is u64 in order to deal with the same
+ * type in our computation, that prevent mindfuck issues with
+ * overflow, sign and division.
+ */
+ interval = ts - old_ts;
+
+ /*
+ * The interrupt triggered more than one second apart, that
+ * ends the sequence as predictible for our purpose. In this
+ * case, assume we have the beginning of a sequence and the
+ * timestamp is the first value. As it is impossible to
+ * predict anything at this point, return.
+ *
+ * Note the first timestamp of the sequence will always fall
+ * in this test because the old_ts is zero. That is what we
+ * want as we need another timestamp to compute an interval.
+ */
+ if (interval >= NSEC_PER_SEC) {
+ memset(irqs, 0, sizeof(*irqs));
+ irqs->last_ts = ts;
+ return;
+ }
+
+ /*
+ * Pre-compute the delta with the average as the result is
+ * used several times in this function.
+ */
+ diff = interval - irqs->avg;
+
+ /*
+ * Increment the number of samples.
+ */
+ irqs->nr_samples++;
+
+ /*
+ * Online variance divided by the number of elements if there
+ * is more than one sample. Normally the formula is division
+ * by nr_samples - 1 but we assume the number of element will be
+ * more than 32 and dividing by 32 instead of 31 is enough
+ * precise.
+ */
+ if (likely(irqs->nr_samples > 1))
+ variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
+
+ /*
+ * The rule of thumb in statistics for the normal distribution
+ * is having at least 30 samples in order to have the model to
+ * apply. Values outside the interval are considered as an
+ * anomaly.
+ */
+ if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
+ /*
+ * After three consecutive anomalies, we reset the
+ * stats as it is no longer stable enough.
+ */
+ if (irqs->anomalies++ >= 3) {
+ memset(irqs, 0, sizeof(*irqs));
+ irqs->last_ts = ts;
+ return;
+ }
+ } else {
+ /*
+ * The anomalies must be consecutives, so at this
+ * point, we reset the anomalies counter.
+ */
+ irqs->anomalies = 0;
+ }
+
+ /*
+ * The interrupt is considered stable enough to try to predict
+ * the next event on it.
+ */
+ irqs->valid = 1;
+
+ /*
+ * Online average algorithm:
+ *
+ * new_average = average + ((value - average) / count)
+ *
+ * The variance computation depends on the new average
+ * to be computed here first.
+ *
+ */
+ irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
+
+ /*
+ * Online variance algorithm:
+ *
+ * new_variance = variance + (value - average) x (value - new_average)
+ *
+ * Warning: irqs->avg is updated with the line above, hence
+ * 'interval - irqs->avg' is no longer equal to 'diff'
+ */
+ irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
+
+ /*
+ * Update the next event
+ */
+ irqs->next_evt = ts + irqs->avg;
+}
+
+/**
+ * irq_timings_next_event - Return when the next event is supposed to arrive
+ *
+ * During the last busy cycle, the number of interrupts is incremented
+ * and stored in the irq_timings structure. This information is
+ * necessary to:
+ *
+ * - know if the index in the table wrapped up:
+ *
+ * If more than the array size interrupts happened during the
+ * last busy/idle cycle, the index wrapped up and we have to
+ * begin with the next element in the array which is the last one
+ * in the sequence, otherwise it is a the index 0.
+ *
+ * - have an indication of the interrupts activity on this CPU
+ * (eg. irq/sec)
+ *
+ * The values are 'consumed' after inserting in the statistical model,
+ * thus the count is reinitialized.
+ *
+ * The array of values **must** be browsed in the time direction, the
+ * timestamp must increase between an element and the next one.
+ *
+ * Returns a nanosec time based estimation of the earliest interrupt,
+ * U64_MAX otherwise.
+ */
+u64 irq_timings_next_event(u64 now)
+{
+ struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
+ struct irqt_stat *irqs;
+ struct irqt_stat __percpu *s;
+ u64 ts, next_evt = U64_MAX;
+ int i, irq = 0;
+
+ /*
+ * This function must be called with the local irq disabled in
+ * order to prevent the timings circular buffer to be updated
+ * while we are reading it.
+ */
+ WARN_ON_ONCE(!irqs_disabled());
+
+ /*
+ * Number of elements in the circular buffer: If it happens it
+ * was flushed before, then the number of elements could be
+ * smaller than IRQ_TIMINGS_SIZE, so the count is used,
+ * otherwise the array size is used as we wrapped. The index
+ * begins from zero when we did not wrap. That could be done
+ * in a nicer way with the proper circular array structure
+ * type but with the cost of extra computation in the
+ * interrupt handler hot path. We choose efficiency.
+ *
+ * Inject measured irq/timestamp to the statistical model
+ * while decrementing the counter because we consume the data
+ * from our circular buffer.
+ */
+ for (i = irqts->count & IRQ_TIMINGS_MASK,
+ irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
+ irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+
+ irq = irq_timing_decode(irqts->values[i], &ts);
+
+ s = idr_find(&irqt_stats, irq);
+ if (s) {
+ irqs = this_cpu_ptr(s);
+ irqs_update(irqs, ts);
+ }
+ }
+
+ /*
+ * Look in the list of interrupts' statistics, the earliest
+ * next event.
+ */
+ idr_for_each_entry(&irqt_stats, s, i) {
+
+ irqs = this_cpu_ptr(s);
+
+ if (!irqs->valid)
+ continue;
+
+ if (irqs->next_evt <= now) {
+ irq = i;
+ next_evt = now;
+
+ /*
+ * This interrupt mustn't use in the future
+ * until new events occur and update the
+ * statistics.
+ */
+ irqs->valid = 0;
+ break;
+ }
+
+ if (irqs->next_evt < next_evt) {
+ irq = i;
+ next_evt = irqs->next_evt;
+ }
+ }
+
+ return next_evt;
+}
+
+void irq_timings_free(int irq)
+{
+ struct irqt_stat __percpu *s;
+
+ s = idr_find(&irqt_stats, irq);
+ if (s) {
+ free_percpu(s);
+ idr_remove(&irqt_stats, irq);
+ }
+}
+
+int irq_timings_alloc(int irq)
+{
+ struct irqt_stat __percpu *s;
+ int id;
+
+ /*
+ * Some platforms can have the same private interrupt per cpu,
+ * so this function may be be called several times with the
+ * same interrupt number. Just bail out in case the per cpu
+ * stat structure is already allocated.
+ */
+ s = idr_find(&irqt_stats, irq);
+ if (s)
+ return 0;
+
+ s = alloc_percpu(*s);
+ if (!s)
+ return -ENOMEM;
+
+ idr_preload(GFP_KERNEL);
+ id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
+ idr_preload_end();
+
+ if (id < 0) {
+ free_percpu(s);
+ return id;
+ }
+
+ return 0;
+}