1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
|
#include <linux/seq_file.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/msi.h>
#include <linux/irq.h>
#include <linux/pci.h>
#include <linux/irqdomain.h>
#include <asm/hw_irq.h>
#include <asm/irq_remapping.h>
#include <asm/processor.h>
#include <asm/x86_init.h>
#include <asm/apic.h>
#include <asm/hpet.h>
#include "irq_remapping.h"
int irq_remapping_enabled;
int irq_remap_broken;
int disable_sourceid_checking;
int no_x2apic_optout;
static int disable_irq_remap;
static struct irq_remap_ops *remap_ops;
static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
int index, int sub_handle);
static int set_remapped_irq_affinity(struct irq_data *data,
const struct cpumask *mask,
bool force);
static bool irq_remapped(struct irq_cfg *cfg)
{
return (cfg->remapped == 1);
}
static void irq_remapping_disable_io_apic(void)
{
/*
* With interrupt-remapping, for now we will use virtual wire A
* mode, as virtual wire B is little complex (need to configure
* both IOAPIC RTE as well as interrupt-remapping table entry).
* As this gets called during crash dump, keep this simple for
* now.
*/
if (cpu_has_apic || apic_from_smp_config())
disconnect_bsp_APIC(0);
}
#ifndef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
static unsigned int irq_alloc_hwirqs(int cnt, int node)
{
return irq_domain_alloc_irqs(NULL, -1, cnt, node, NULL);
}
static void irq_free_hwirqs(unsigned int from, int cnt)
{
irq_domain_free_irqs(from, cnt);
}
#endif
static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
{
int ret, sub_handle, nvec_pow2, index = 0;
unsigned int irq;
struct msi_desc *msidesc;
msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
irq = irq_alloc_hwirqs(nvec, dev_to_node(&dev->dev));
if (irq == 0)
return -ENOSPC;
nvec_pow2 = __roundup_pow_of_two(nvec);
for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
if (!sub_handle) {
index = msi_alloc_remapped_irq(dev, irq, nvec_pow2);
if (index < 0) {
ret = index;
goto error;
}
} else {
ret = msi_setup_remapped_irq(dev, irq + sub_handle,
index, sub_handle);
if (ret < 0)
goto error;
}
ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
if (ret < 0)
goto error;
}
return 0;
error:
irq_free_hwirqs(irq, nvec);
/*
* Restore altered MSI descriptor fields and prevent just destroyed
* IRQs from tearing down again in default_teardown_msi_irqs()
*/
msidesc->irq = 0;
return ret;
}
static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
{
int node, ret, sub_handle, index = 0;
struct msi_desc *msidesc;
unsigned int irq;
node = dev_to_node(&dev->dev);
sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = irq_alloc_hwirqs(1, node);
if (irq == 0)
return -1;
if (sub_handle == 0)
ret = index = msi_alloc_remapped_irq(dev, irq, nvec);
else
ret = msi_setup_remapped_irq(dev, irq, index, sub_handle);
if (ret < 0)
goto error;
ret = setup_msi_irq(dev, msidesc, irq, 0);
if (ret < 0)
goto error;
sub_handle += 1;
irq += 1;
}
return 0;
error:
irq_free_hwirqs(irq, 1);
return ret;
}
static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,
int nvec, int type)
{
if (type == PCI_CAP_ID_MSI)
return do_setup_msi_irqs(dev, nvec);
else
return do_setup_msix_irqs(dev, nvec);
}
static void eoi_ioapic_pin_remapped(int apic, int pin, int vector)
{
/*
* Intr-remapping uses pin number as the virtual vector
* in the RTE. Actual vector is programmed in
* intr-remapping table entry. Hence for the io-apic
* EOI we use the pin number.
*/
io_apic_eoi(apic, pin);
}
static void __init irq_remapping_modify_x86_ops(void)
{
x86_io_apic_ops.disable = irq_remapping_disable_io_apic;
x86_io_apic_ops.set_affinity = set_remapped_irq_affinity;
x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry;
x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped;
x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs;
x86_msi.setup_hpet_msi = setup_hpet_msi_remapped;
x86_msi.compose_msi_msg = compose_remapped_msi_msg;
}
static __init int setup_nointremap(char *str)
{
disable_irq_remap = 1;
return 0;
}
early_param("nointremap", setup_nointremap);
static __init int setup_irqremap(char *str)
{
if (!str)
return -EINVAL;
while (*str) {
if (!strncmp(str, "on", 2))
disable_irq_remap = 0;
else if (!strncmp(str, "off", 3))
disable_irq_remap = 1;
else if (!strncmp(str, "nosid", 5))
disable_sourceid_checking = 1;
else if (!strncmp(str, "no_x2apic_optout", 16))
no_x2apic_optout = 1;
str += strcspn(str, ",");
while (*str == ',')
str++;
}
return 0;
}
early_param("intremap", setup_irqremap);
void set_irq_remapping_broken(void)
{
irq_remap_broken = 1;
}
int __init irq_remapping_prepare(void)
{
if (disable_irq_remap)
return -ENOSYS;
if (intel_irq_remap_ops.prepare() == 0)
remap_ops = &intel_irq_remap_ops;
else if (IS_ENABLED(CONFIG_AMD_IOMMU) &&
amd_iommu_irq_ops.prepare() == 0)
remap_ops = &amd_iommu_irq_ops;
else
return -ENOSYS;
return 0;
}
int __init irq_remapping_enable(void)
{
int ret;
if (!remap_ops->enable)
return -ENODEV;
ret = remap_ops->enable();
if (irq_remapping_enabled)
irq_remapping_modify_x86_ops();
return ret;
}
void irq_remapping_disable(void)
{
if (irq_remapping_enabled && remap_ops->disable)
remap_ops->disable();
}
int irq_remapping_reenable(int mode)
{
if (irq_remapping_enabled && remap_ops->reenable)
return remap_ops->reenable(mode);
return 0;
}
int __init irq_remap_enable_fault_handling(void)
{
if (!irq_remapping_enabled)
return 0;
if (!remap_ops->enable_faulting)
return -ENODEV;
return remap_ops->enable_faulting();
}
int setup_ioapic_remapped_entry(int irq,
struct IO_APIC_route_entry *entry,
unsigned int destination, int vector,
struct io_apic_irq_attr *attr)
{
if (!remap_ops->setup_ioapic_entry)
return -ENODEV;
return remap_ops->setup_ioapic_entry(irq, entry, destination,
vector, attr);
}
static int set_remapped_irq_affinity(struct irq_data *data,
const struct cpumask *mask, bool force)
{
if (!config_enabled(CONFIG_SMP) || !remap_ops->set_affinity)
return 0;
return remap_ops->set_affinity(data, mask, force);
}
void free_remapped_irq(int irq)
{
struct irq_cfg *cfg = irq_cfg(irq);
if (irq_remapped(cfg) && remap_ops->free_irq)
remap_ops->free_irq(irq);
}
void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id)
{
struct irq_cfg *cfg = irq_cfg(irq);
if (!irq_remapped(cfg))
native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
else if (remap_ops->compose_msi_msg)
remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
}
static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
{
if (!remap_ops->msi_alloc_irq)
return -ENODEV;
return remap_ops->msi_alloc_irq(pdev, irq, nvec);
}
static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
int index, int sub_handle)
{
if (!remap_ops->msi_setup_irq)
return -ENODEV;
return remap_ops->msi_setup_irq(pdev, irq, index, sub_handle);
}
int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
{
int ret;
if (!remap_ops->alloc_hpet_msi)
return -ENODEV;
ret = remap_ops->alloc_hpet_msi(irq, id);
if (ret)
return -EINVAL;
return default_setup_hpet_msi(irq, id);
}
void panic_if_irq_remap(const char *msg)
{
if (irq_remapping_enabled)
panic(msg);
}
void ir_ack_apic_edge(struct irq_data *data)
{
ack_APIC_irq();
}
static void ir_ack_apic_level(struct irq_data *data)
{
ack_APIC_irq();
eoi_ioapic_irq(data->irq, irqd_cfg(data));
}
void irq_remapping_print_chip(struct irq_data *data, struct seq_file *p)
{
/*
* Assume interrupt is remapped if the parent irqdomain isn't the
* vector domain, which is true for MSI, HPET and IOAPIC on x86
* platforms.
*/
if (data->domain && data->domain->parent != arch_get_ir_parent_domain())
seq_printf(p, " IR-%s", data->chip->name);
else
seq_printf(p, " %s", data->chip->name);
}
static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
{
seq_printf(p, " IR-%s", data->chip->name);
}
void irq_remap_modify_chip_defaults(struct irq_chip *chip)
{
chip->irq_print_chip = ir_print_prefix;
chip->irq_ack = ir_ack_apic_edge;
chip->irq_eoi = ir_ack_apic_level;
chip->irq_set_affinity = x86_io_apic_ops.set_affinity;
}
bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip)
{
if (!irq_remapped(cfg))
return false;
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
irq_remap_modify_chip_defaults(chip);
return true;
}
/**
* irq_remapping_get_ir_irq_domain - Get the irqdomain associated with the IOMMU
* device serving request @info
* @info: interrupt allocation information, used to identify the IOMMU device
*
* It's used to get parent irqdomain for HPET and IOAPIC irqdomains.
* Returns pointer to IRQ domain, or NULL on failure.
*/
struct irq_domain *
irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info)
{
if (!remap_ops || !remap_ops->get_ir_irq_domain)
return NULL;
return remap_ops->get_ir_irq_domain(info);
}
/**
* irq_remapping_get_irq_domain - Get the irqdomain serving the request @info
* @info: interrupt allocation information, used to identify the IOMMU device
*
* There will be one PCI MSI/MSIX irqdomain associated with each interrupt
* remapping device, so this interface is used to retrieve the PCI MSI/MSIX
* irqdomain serving request @info.
* Returns pointer to IRQ domain, or NULL on failure.
*/
struct irq_domain *
irq_remapping_get_irq_domain(struct irq_alloc_info *info)
{
if (!remap_ops || !remap_ops->get_irq_domain)
return NULL;
return remap_ops->get_irq_domain(info);
}
|