summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-04-02 13:22:35 +0200
committerIngo Molnar <mingo@kernel.org>2015-04-02 13:22:35 +0200
commit223aa646d5aa6096230a9481e4c6c9cb67cfc4a6 (patch)
tree3e9013b2d6a099fabf4c5148dd9e522e04f8f073
parentaaa9fa387541a30f6f4138bea6e082319de5e3fe (diff)
parent34f439278cef7b1177f8ce24f9fc81dfc6221d3b (diff)
downloadlinux-223aa646d5aa6096230a9481e4c6c9cb67cfc4a6.tar.gz
linux-223aa646d5aa6096230a9481e4c6c9cb67cfc4a6.tar.bz2
linux-223aa646d5aa6096230a9481e4c6c9cb67cfc4a6.zip
Merge branch 'perf/timer' into perf/core
This WIP branch is now ready to be merged. Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--MAINTAINERS13
-rw-r--r--arch/arm/plat-omap/counter_32k.c2
-rw-r--r--arch/arm64/include/asm/cmpxchg.h32
-rw-r--r--arch/arm64/include/asm/mmu_context.h9
-rw-r--r--arch/arm64/include/asm/percpu.h44
-rw-r--r--arch/arm64/kernel/vdso.c10
-rw-r--r--arch/metag/include/asm/io.h1
-rw-r--r--arch/metag/include/asm/pgtable-bits.h104
-rw-r--r--arch/metag/include/asm/pgtable.h95
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h3
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/kernel/cputable.c20
-rw-r--r--arch/powerpc/kernel/dbell.c2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2
-rw-r--r--arch/powerpc/platforms/powernv/smp.c14
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c44
-rw-r--r--arch/s390/include/asm/elf.h2
-rw-r--r--arch/s390/kernel/ftrace.c61
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c7
-rw-r--r--arch/s390/kernel/swsusp_asm64.S11
-rw-r--r--arch/s390/kernel/time.c20
-rw-r--r--arch/sparc/include/asm/hypervisor.h12
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/hvcalls.S16
-rw-r--r--arch/sparc/kernel/pcr.c33
-rw-r--r--arch/sparc/kernel/perf_event.c55
-rw-r--r--arch/sparc/kernel/process_64.c4
-rw-r--r--arch/sparc/kernel/time_32.c6
-rw-r--r--arch/sparc/lib/memmove.S35
-rw-r--r--arch/tile/kernel/time.c24
-rw-r--r--arch/x86/kernel/cpu/perf_event.c14
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c24
-rw-r--r--arch/x86/kvm/ioapic.c4
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/vmx.c7
-rw-r--r--arch/x86/kvm/x86.c14
-rw-r--r--block/blk-merge.c2
-rw-r--r--block/blk-mq-tag.c6
-rw-r--r--block/blk-mq.c6
-rw-r--r--drivers/ata/libata-core.c4
-rw-r--r--drivers/base/regmap/internal.h8
-rw-r--r--drivers/base/regmap/regcache.c16
-rw-r--r--drivers/base/regmap/regmap.c32
-rw-r--r--drivers/block/nbd.c8
-rw-r--r--drivers/block/nvme-core.c1
-rw-r--r--drivers/clocksource/em_sti.c2
-rw-r--r--drivers/clocksource/sh_cmt.c2
-rw-r--r--drivers/clocksource/sh_tmu.c2
-rw-r--r--drivers/gpu/drm/drm_crtc.c13
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c38
-rw-r--r--drivers/gpu/drm/i915/intel_display.c18
-rw-r--r--drivers/md/dm.c26
-rw-r--r--drivers/mfd/kempld-core.c2
-rw-r--r--drivers/mfd/rtsx_usb.c30
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c31
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c17
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c131
-rw-r--r--drivers/net/usb/cx82310_eth.c30
-rw-r--r--drivers/regulator/palmas-regulator.c4
-rw-r--r--drivers/rtc/rtc-mrst.c17
-rw-r--r--drivers/scsi/ipr.c3
-rw-r--r--drivers/scsi/libsas/sas_ata.c3
-rw-r--r--drivers/spi/spi-dw-mid.c6
-rw-r--r--drivers/spi/spi-qup.c9
-rw-r--r--drivers/spi/spi.c5
-rw-r--r--fs/affs/file.c19
-rw-r--r--fs/hfsplus/brec.c20
-rw-r--r--include/linux/clockchips.h21
-rw-r--r--include/linux/clocksource.h25
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/mfd/palmas.h3
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--include/linux/regulator/driver.h2
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/linux/timekeeper_internal.h16
-rw-r--r--include/linux/timekeeping.h6
-rw-r--r--include/net/netfilter/nf_log.h10
-rw-r--r--include/trace/events/regmap.h123
-rw-r--r--include/uapi/linux/perf_event.h6
-rw-r--r--kernel/events/core.c77
-rw-r--r--kernel/sched/fair.c8
-rw-r--r--kernel/time/clockevents.c88
-rw-r--r--kernel/time/clocksource.c170
-rw-r--r--kernel/time/jiffies.c5
-rw-r--r--kernel/time/sched_clock.c236
-rw-r--r--kernel/time/timekeeping.c345
-rw-r--r--kernel/time/timer_list.c32
-rw-r--r--lib/Kconfig.debug13
-rw-r--r--mm/huge_memory.c26
-rw-r--r--mm/memory.c22
-rw-r--r--mm/memory_hotplug.c13
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/page-writeback.c7
-rw-r--r--mm/page_isolation.c1
-rw-r--r--mm/pagewalk.c9
-rw-r--r--mm/rmap.c7
-rw-r--r--mm/slub.c6
-rw-r--r--net/compat.c7
-rw-r--r--net/ipv4/netfilter/ip_tables.c6
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv6/fib6_rules.c1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c6
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/netfilter/nf_log.c24
-rw-r--r--net/netfilter/nf_tables_api.c5
-rw-r--r--net/netfilter/nf_tables_core.c8
-rw-r--r--net/netfilter/nfnetlink_cthelper.c3
-rw-r--r--net/netfilter/nft_compat.c6
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/xt_TPROXY.c4
-rw-r--r--net/socket.c4
-rw-r--r--tools/testing/selftests/Makefile8
-rw-r--r--virt/kvm/kvm_main.c14
116 files changed, 1813 insertions, 861 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 358eb0105e00..88c09ca2584f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1186,7 +1186,7 @@ M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-mvebu/
-F: drivers/rtc/armada38x-rtc
+F: drivers/rtc/rtc-armada38x.c
ARM/Marvell Berlin SoC support
M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
@@ -1675,8 +1675,8 @@ F: drivers/misc/eeprom/at24.c
F: include/linux/platform_data/at24.h
ATA OVER ETHERNET (AOE) DRIVER
-M: "Ed L. Cashin" <ecashin@coraid.com>
-W: http://support.coraid.com/support/linux
+M: "Ed L. Cashin" <ed.cashin@acm.org>
+W: http://www.openaoe.org/
S: Supported
F: Documentation/aoe/
F: drivers/block/aoe/
@@ -3252,6 +3252,13 @@ S: Maintained
F: Documentation/hwmon/dme1737
F: drivers/hwmon/dme1737.c
+DMI/SMBIOS SUPPORT
+M: Jean Delvare <jdelvare@suse.de>
+S: Maintained
+F: drivers/firmware/dmi-id.c
+F: drivers/firmware/dmi_scan.c
+F: include/linux/dmi.h
+
DOCKING STATION DRIVER
M: Shaohua Li <shaohua.li@intel.com>
L: linux-acpi@vger.kernel.org
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 61b4d705c267..43cf74561cfd 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -103,7 +103,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
/*
* 120000 rough estimate from the calculations in
- * __clocksource_updatefreq_scale.
+ * __clocksource_update_freq_scale.
*/
clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
32768, NSEC_PER_SEC, 120000);
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index cb9593079f29..d8c25b7b18fb 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -246,14 +246,30 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \
})
-#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
- cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \
- o1, o2, n1, n2)
+#define _protect_cmpxchg_local(pcp, o, n) \
+({ \
+ typeof(*raw_cpu_ptr(&(pcp))) __ret; \
+ preempt_disable(); \
+ __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \
+ preempt_enable(); \
+ __ret; \
+})
+
+#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+
+#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
+({ \
+ int __ret; \
+ preempt_disable(); \
+ __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
+ raw_cpu_ptr(&(ptr2)), \
+ o1, o2, n1, n2); \
+ preempt_enable(); \
+ __ret; \
+})
#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))
#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..101a42bde728 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
unsigned int cpu = smp_processor_id();
+ /*
+ * init_mm.pgd does not contain any user mappings and it is always
+ * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
+ */
+ if (next == &init_mm) {
+ cpu_set_reserved_ttbr0();
+ return;
+ }
+
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
check_and_switch_context(next, tsk);
}
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 09da25bc596f..4fde8c1df97f 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -204,25 +204,47 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
return ret;
}
+#define _percpu_read(pcp) \
+({ \
+ typeof(pcp) __retval; \
+ preempt_disable(); \
+ __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \
+ sizeof(pcp)); \
+ preempt_enable(); \
+ __retval; \
+})
+
+#define _percpu_write(pcp, val) \
+do { \
+ preempt_disable(); \
+ __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \
+ sizeof(pcp)); \
+ preempt_enable(); \
+} while(0) \
+
+#define _pcp_protect(operation, pcp, val) \
+({ \
+ typeof(pcp) __retval; \
+ preempt_disable(); \
+ __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \
+ (val), sizeof(pcp)); \
+ preempt_enable(); \
+ __retval; \
+})
+
#define _percpu_add(pcp, val) \
- __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+ _pcp_protect(__percpu_add, pcp, val)
-#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val))
+#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
#define _percpu_and(pcp, val) \
- __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+ _pcp_protect(__percpu_and, pcp, val)
#define _percpu_or(pcp, val) \
- __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
-
-#define _percpu_read(pcp) (typeof(pcp)) \
- (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp)))
-
-#define _percpu_write(pcp, val) \
- __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))
+ _pcp_protect(__percpu_or, pcp, val)
#define _percpu_xchg(pcp, val) (typeof(pcp)) \
- (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)))
+ _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 32aeea083d93..ec37ab3f524f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -200,7 +200,7 @@ up_fail:
void update_vsyscall(struct timekeeper *tk)
{
struct timespec xtime_coarse;
- u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+ u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
++vdso_data->tb_seq_count;
smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
- vdso_data->cs_cycle_last = tk->tkr.cycle_last;
+ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
- vdso_data->cs_mult = tk->tkr.mult;
- vdso_data->cs_shift = tk->tkr.shift;
+ vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
+ vdso_data->cs_mult = tk->tkr_mono.mult;
+ vdso_data->cs_shift = tk->tkr_mono.shift;
}
smp_wmb();
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
index 9359e5048442..d5779b0ec573 100644
--- a/arch/metag/include/asm/io.h
+++ b/arch/metag/include/asm/io.h
@@ -2,6 +2,7 @@
#define _ASM_METAG_IO_H
#include <linux/types.h>
+#include <asm/pgtable-bits.h>
#define IO_SPACE_LIMIT 0
diff --git a/arch/metag/include/asm/pgtable-bits.h b/arch/metag/include/asm/pgtable-bits.h
new file mode 100644
index 000000000000..25ba6729f496
--- /dev/null
+++ b/arch/metag/include/asm/pgtable-bits.h
@@ -0,0 +1,104 @@
+/*
+ * Meta page table definitions.
+ */
+
+#ifndef _METAG_PGTABLE_BITS_H
+#define _METAG_PGTABLE_BITS_H
+
+#include <asm/metag_mem.h>
+
+/*
+ * Definitions for MMU descriptors
+ *
+ * These are the hardware bits in the MMCU pte entries.
+ * Derived from the Meta toolkit headers.
+ */
+#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT
+#define _PAGE_WRITE MMCU_ENTRY_WR_BIT
+#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT
+/* Write combine bit - this can cause writes to occur out of order */
+#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT
+/* Sys coherent bit - this bit is never used by Linux */
+#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT
+#define _PAGE_ALWAYS_ZERO_1 0x020
+#define _PAGE_CACHE_CTRL0 0x040
+#define _PAGE_CACHE_CTRL1 0x080
+#define _PAGE_ALWAYS_ZERO_2 0x100
+#define _PAGE_ALWAYS_ZERO_3 0x200
+#define _PAGE_ALWAYS_ZERO_4 0x400
+#define _PAGE_ALWAYS_ZERO_5 0x800
+
+/* These are software bits that we stuff into the gaps in the hardware
+ * pte entries that are not used. Note, these DO get stored in the actual
+ * hardware, but the hardware just does not use them.
+ */
+#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
+#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
+
+/* Pages owned, and protected by, the kernel. */
+#define _PAGE_KERNEL _PAGE_PRIV
+
+/* No cacheing of this page */
+#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
+/* burst cacheing - good for data streaming */
+#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
+/* One cache way per thread */
+#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
+/* Full on cacheing */
+#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
+
+#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
+
+/* which bits are used for cache control ... */
+#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
+ _PAGE_WR_COMBINE)
+
+/* This is a mask of the bits that pte_modify is allowed to change. */
+#define _PAGE_CHG_MASK (PAGE_MASK)
+
+#define _PAGE_SZ_SHIFT 1
+#define _PAGE_SZ_4K (0x0)
+#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)
+
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define _PAGE_SZ (_PAGE_SZ_4K)
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define _PAGE_SZ (_PAGE_SZ_8K)
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define _PAGE_SZ (_PAGE_SZ_16K)
+#endif
+#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define _PAGE_SZHUGE (_PAGE_SZ_8K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define _PAGE_SZHUGE (_PAGE_SZ_16K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define _PAGE_SZHUGE (_PAGE_SZ_32K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define _PAGE_SZHUGE (_PAGE_SZ_64K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define _PAGE_SZHUGE (_PAGE_SZ_128K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define _PAGE_SZHUGE (_PAGE_SZ_256K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define _PAGE_SZHUGE (_PAGE_SZ_512K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define _PAGE_SZHUGE (_PAGE_SZ_1M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define _PAGE_SZHUGE (_PAGE_SZ_2M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define _PAGE_SZHUGE (_PAGE_SZ_4M)
+#endif
+
+#endif /* _METAG_PGTABLE_BITS_H */
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
index d0604c0a8702..ffa3a3a2ecad 100644
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -5,6 +5,7 @@
#ifndef _METAG_PGTABLE_H
#define _METAG_PGTABLE_H
+#include <asm/pgtable-bits.h>
#include <asm-generic/pgtable-nopmd.h>
/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
@@ -21,100 +22,6 @@
#endif
/*
- * Definitions for MMU descriptors
- *
- * These are the hardware bits in the MMCU pte entries.
- * Derived from the Meta toolkit headers.
- */
-#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT
-#define _PAGE_WRITE MMCU_ENTRY_WR_BIT
-#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT
-/* Write combine bit - this can cause writes to occur out of order */
-#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT
-/* Sys coherent bit - this bit is never used by Linux */
-#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT
-#define _PAGE_ALWAYS_ZERO_1 0x020
-#define _PAGE_CACHE_CTRL0 0x040
-#define _PAGE_CACHE_CTRL1 0x080
-#define _PAGE_ALWAYS_ZERO_2 0x100
-#define _PAGE_ALWAYS_ZERO_3 0x200
-#define _PAGE_ALWAYS_ZERO_4 0x400
-#define _PAGE_ALWAYS_ZERO_5 0x800
-
-/* These are software bits that we stuff into the gaps in the hardware
- * pte entries that are not used. Note, these DO get stored in the actual
- * hardware, but the hardware just does not use them.
- */
-#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
-#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
-
-/* Pages owned, and protected by, the kernel. */
-#define _PAGE_KERNEL _PAGE_PRIV
-
-/* No cacheing of this page */
-#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
-/* burst cacheing - good for data streaming */
-#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
-/* One cache way per thread */
-#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
-/* Full on cacheing */
-#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
-
-#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
-
-/* which bits are used for cache control ... */
-#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
- _PAGE_WR_COMBINE)
-
-/* This is a mask of the bits that pte_modify is allowed to change. */
-#define _PAGE_CHG_MASK (PAGE_MASK)
-
-#define _PAGE_SZ_SHIFT 1
-#define _PAGE_SZ_4K (0x0)
-#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)
-
-#if defined(CONFIG_PAGE_SIZE_4K)
-#define _PAGE_SZ (_PAGE_SZ_4K)
-#elif defined(CONFIG_PAGE_SIZE_8K)
-#define _PAGE_SZ (_PAGE_SZ_8K)
-#elif defined(CONFIG_PAGE_SIZE_16K)
-#define _PAGE_SZ (_PAGE_SZ_16K)
-#endif
-#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)
-
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
-# define _PAGE_SZHUGE (_PAGE_SZ_8K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
-# define _PAGE_SZHUGE (_PAGE_SZ_16K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
-# define _PAGE_SZHUGE (_PAGE_SZ_32K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-# define _PAGE_SZHUGE (_PAGE_SZ_64K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
-# define _PAGE_SZHUGE (_PAGE_SZ_128K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
-# define _PAGE_SZHUGE (_PAGE_SZ_256K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-# define _PAGE_SZHUGE (_PAGE_SZ_512K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
-# define _PAGE_SZHUGE (_PAGE_SZ_1M)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
-# define _PAGE_SZHUGE (_PAGE_SZ_2M)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
-# define _PAGE_SZHUGE (_PAGE_SZ_4M)
-#endif
-
-/*
* The Linux memory management assumes a three-level page table setup. On
* Meta, we use that, but "fold" the mid level into the top-level page
* table.
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 03cd858a401c..4cbe23af400a 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -153,6 +153,7 @@
#define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff
#define PPC_INST_MFTMR 0x7c0002dc
#define PPC_INST_MSGSND 0x7c00019c
+#define PPC_INST_MSGCLR 0x7c0001dc
#define PPC_INST_MSGSNDP 0x7c00011c
#define PPC_INST_MTTMR 0x7c0003dc
#define PPC_INST_NOP 0x60000000
@@ -309,6 +310,8 @@
___PPC_RB(b) | __PPC_EH(eh))
#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \
___PPC_RB(b))
+#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \
+ ___PPC_RB(b))
#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \
___PPC_RB(b))
#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1c874fb533bb..af56b5c6c81a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -608,13 +608,16 @@
#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
+#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 */
#define SRR1_WAKESYSERR 0x00300000 /* System error */
#define SRR1_WAKEEE 0x00200000 /* External interrupt */
#define SRR1_WAKEMT 0x00280000 /* mtctrl */
#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */
#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
+#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell on P8 */
#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
#define SRR1_WAKERESET 0x00100000 /* System reset */
+#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */
#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained,
* may not be recoverable */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index f337666768a7..f83046878336 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -437,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
+ { /* Power8NVL */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004c0000,
+ .cpu_name = "POWER8NVL (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power8",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
{ /* Power8 DD1: Does not support doorbell IPIs */
.pvr_mask = 0xffffff00,
.pvr_value = 0x004d0100,
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index f4217819cc31..2128f3a96c32 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -17,6 +17,7 @@
#include <asm/dbell.h>
#include <asm/irq_regs.h>
+#include <asm/kvm_ppc.h>
#ifdef CONFIG_SMP
void doorbell_setup_this_cpu(void)
@@ -41,6 +42,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
+ kvmppc_set_host_ipi(smp_processor_id(), 0);
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux();
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index c2df8150bd7a..9519e6bdc6d7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1408,7 +1408,7 @@ machine_check_handle_early:
bne 9f /* continue in V mode if we are. */
5:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
* We are coming from kernel context. Check if we are coming from
* guest. if yes, then we can continue. We will fall through
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index fc34025ef822..38a45088f633 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -33,6 +33,8 @@
#include <asm/runlatch.h>
#include <asm/code-patching.h>
#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
#include "powernv.h"
@@ -149,7 +151,7 @@ static int pnv_smp_cpu_disable(void)
static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
- unsigned long srr1;
+ unsigned long srr1, wmask;
u32 idle_states;
/* Standard hot unplug procedure */
@@ -161,6 +163,10 @@ static void pnv_smp_cpu_kill_self(void)
generic_set_cpu_dead(cpu);
smp_wmb();
+ wmask = SRR1_WAKEMASK;
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ wmask = SRR1_WAKEMASK_P8;
+
idle_states = pnv_get_supported_cpuidle_states();
/* We don't want to take decrementer interrupts while we are offline,
* so clear LPCR:PECE1. We keep PECE2 enabled.
@@ -191,10 +197,14 @@ static void pnv_smp_cpu_kill_self(void)
* having finished executing in a KVM guest, then srr1
* contains 0.
*/
- if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {
+ if ((srr1 & wmask) == SRR1_WAKEEE) {
icp_native_flush_interrupt();
local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
smp_mb();
+ } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+ asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+ kvmppc_set_host_ipi(cpu, 0);
}
if (cpu_core_split_required())
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 90cf3dcbd9f2..8f35d525cede 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -25,10 +25,10 @@
static struct kobject *mobility_kobj;
struct update_props_workarea {
- u32 phandle;
- u32 state;
- u64 reserved;
- u32 nprops;
+ __be32 phandle;
+ __be32 state;
+ __be64 reserved;
+ __be32 nprops;
} __packed;
#define NODE_ACTION_MASK 0xff000000
@@ -54,11 +54,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope)
return rc;
}
-static int delete_dt_node(u32 phandle)
+static int delete_dt_node(__be32 phandle)
{
struct device_node *dn;
- dn = of_find_node_by_phandle(phandle);
+ dn = of_find_node_by_phandle(be32_to_cpu(phandle));
if (!dn)
return -ENOENT;
@@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
return 0;
}
-static int update_dt_node(u32 phandle, s32 scope)
+static int update_dt_node(__be32 phandle, s32 scope)
{
struct update_props_workarea *upwa;
struct device_node *dn;
@@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope)
char *prop_data;
char *rtas_buf;
int update_properties_token;
+ u32 nprops;
u32 vd;
update_properties_token = rtas_token("ibm,update-properties");
@@ -146,7 +147,7 @@ static int update_dt_node(u32 phandle, s32 scope)
if (!rtas_buf)
return -ENOMEM;
- dn = of_find_node_by_phandle(phandle);
+ dn = of_find_node_by_phandle(be32_to_cpu(phandle));
if (!dn) {
kfree(rtas_buf);
return -ENOENT;
@@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope)
break;
prop_data = rtas_buf + sizeof(*upwa);
+ nprops = be32_to_cpu(upwa->nprops);
/* On the first call to ibm,update-properties for a node the
* the first property value descriptor contains an empty
@@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope)
*/
if (*prop_data == 0) {
prop_data++;
- vd = *(u32 *)prop_data;
+ vd = be32_to_cpu(*(__be32 *)prop_data);
prop_data += vd + sizeof(vd);
- upwa->nprops--;
+ nprops--;
}
- for (i = 0; i < upwa->nprops; i++) {
+ for (i = 0; i < nprops; i++) {
char *prop_name;
prop_name = prop_data;
prop_data += strlen(prop_name) + 1;
- vd = *(u32 *)prop_data;
+ vd = be32_to_cpu(*(__be32 *)prop_data);
prop_data += sizeof(vd);
switch (vd) {
@@ -212,13 +214,13 @@ static int update_dt_node(u32 phandle, s32 scope)
return 0;
}
-static int add_dt_node(u32 parent_phandle, u32 drc_index)
+static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
{
struct device_node *dn;
struct device_node *parent_dn;
int rc;
- parent_dn = of_find_node_by_phandle(parent_phandle);
+ parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
if (!parent_dn)
return -ENOENT;
@@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index)
int pseries_devicetree_update(s32 scope)
{
char *rtas_buf;
- u32 *data;
+ __be32 *data;
int update_nodes_token;
int rc;
@@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope)
if (rc && rc != 1)
break;
- data = (u32 *)rtas_buf + 4;
- while (*data & NODE_ACTION_MASK) {
+ data = (__be32 *)rtas_buf + 4;
+ while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
int i;
- u32 action = *data & NODE_ACTION_MASK;
- int node_count = *data & NODE_COUNT_MASK;
+ u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+ u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
data++;
for (i = 0; i < node_count; i++) {
- u32 phandle = *data++;
- u32 drc_index;
+ __be32 phandle = *data++;
+ __be32 drc_index;
switch (action) {
case DELETE_DT_NODE:
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index c9df40b5c0ac..c9c875d9ed31 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -211,7 +211,7 @@ do { \
extern unsigned long mmap_rnd_mask;
-#define STACK_RND_MASK (mmap_rnd_mask)
+#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
#define ARCH_DLINFO \
do { \
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 82c19899574f..6c79f1b44fe7 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -57,6 +57,44 @@
unsigned long ftrace_plt;
+static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
+{
+#ifdef CC_USING_HOTPATCH
+ /* brcl 0,0 */
+ insn->opc = 0xc004;
+ insn->disp = 0;
+#else
+ /* stg r14,8(r15) */
+ insn->opc = 0xe3e0;
+ insn->disp = 0xf0080024;
+#endif
+}
+
+static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ if (insn->opc == BREAKPOINT_INSTRUCTION)
+ return 1;
+#endif
+ return 0;
+}
+
+static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ insn->opc = BREAKPOINT_INSTRUCTION;
+ insn->disp = KPROBE_ON_FTRACE_NOP;
+#endif
+}
+
+static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ insn->opc = BREAKPOINT_INSTRUCTION;
+ insn->disp = KPROBE_ON_FTRACE_CALL;
+#endif
+}
+
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
@@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return -EFAULT;
if (addr == MCOUNT_ADDR) {
/* Initial code replacement */
-#ifdef CC_USING_HOTPATCH
- /* We expect to see brcl 0,0 */
- ftrace_generate_nop_insn(&orig);
-#else
- /* We expect to see stg r14,8(r15) */
- orig.opc = 0xe3e0;
- orig.disp = 0xf0080024;
-#endif
+ ftrace_generate_orig_insn(&orig);
ftrace_generate_nop_insn(&new);
- } else if (old.opc == BREAKPOINT_INSTRUCTION) {
+ } else if (is_kprobe_on_ftrace(&old)) {
/*
* If we find a breakpoint instruction, a kprobe has been
* placed at the beginning of the function. We write the
@@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
* bytes of the original instruction so that the kprobes
* handler can execute a nop, if it reaches this breakpoint.
*/
- new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
- orig.disp = KPROBE_ON_FTRACE_CALL;
- new.disp = KPROBE_ON_FTRACE_NOP;
+ ftrace_generate_kprobe_call_insn(&orig);
+ ftrace_generate_kprobe_nop_insn(&new);
} else {
/* Replace ftrace call with a nop. */
ftrace_generate_call_insn(&orig, rec->ip);
@@ -111,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
- if (old.opc == BREAKPOINT_INSTRUCTION) {
+ if (is_kprobe_on_ftrace(&old)) {
/*
* If we find a breakpoint instruction, a kprobe has been
* placed at the beginning of the function. We write the
@@ -119,9 +149,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* bytes of the original instruction so that the kprobes
* handler can execute a brasl if it reaches this breakpoint.
*/
- new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
- orig.disp = KPROBE_ON_FTRACE_NOP;
- new.disp = KPROBE_ON_FTRACE_CALL;
+ ftrace_generate_kprobe_nop_insn(&orig);
+ ftrace_generate_kprobe_call_insn(&new);
} else {
/* Replace nop with an ftrace call. */
ftrace_generate_nop_insn(&orig);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index c3f8d157cb0d..e6a1578fc000 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
static struct attribute *cpumsf_pmu_events_attr[] = {
CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
- CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
+ NULL,
NULL,
};
@@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void)
return -EINVAL;
}
- if (si.ad)
+ if (si.ad) {
sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+ cpumsf_pmu_events_attr[1] =
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+ }
sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
if (!sfdbg)
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index 6b09fdffbd2f..ca6294645dd3 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -177,6 +177,17 @@ restart_entry:
lhi %r1,1
sigp %r1,%r0,SIGP_SET_ARCHITECTURE
sam64
+#ifdef CONFIG_SMP
+ larl %r1,smp_cpu_mt_shift
+ icm %r1,15,0(%r1)
+ jz smt_done
+ llgfr %r1,%r1
+smt_loop:
+ sigp %r1,%r0,SIGP_SET_MULTI_THREADING
+ brc 8,smt_done /* accepted */
+ brc 2,smt_loop /* busy, try again */
+smt_done:
+#endif
larl %r1,.Lnew_pgm_check_psw
lpswe 0(%r1)
pgm_check_entry:
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 20660dddb2d6..170ddd2018b3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
{
u64 nsecps;
- if (tk->tkr.clock != &clocksource_tod)
+ if (tk->tkr_mono.clock != &clocksource_tod)
return;
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_wmb();
- vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+ vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+ vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->wtom_clock_sec =
tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
- + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
- nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+ vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
while (vdso_data->wtom_clock_nsec >= nsecps) {
vdso_data->wtom_clock_nsec -= nsecps;
vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec =
- (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+ (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
vdso_data->wtom_coarse_sec =
vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtom_coarse_sec++;
}
- vdso_data->tk_mult = tk->tkr.mult;
- vdso_data->tk_shift = tk->tkr.shift;
+ vdso_data->tk_mult = tk->tkr_mono.mult;
+ vdso_data->tk_shift = tk->tkr_mono.shift;
smp_wmb();
++vdso_data->tb_update_count;
}
@@ -283,7 +283,7 @@ void __init time_init(void)
if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
panic("Couldn't request external interrupt 0x1406");
- if (clocksource_register(&clocksource_tod) != 0)
+ if (__clocksource_register(&clocksource_tod) != 0)
panic("Could not register TOD clock source");
/* Enable TOD clock interrupts on the boot cpu. */
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 4f6725ff4c33..f5b6537306f0 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif
+
+#define HV_FAST_M7_GET_PERFREG 0x43
+#define HV_FAST_M7_SET_PERFREG 0x44
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_m7_get_perfreg(unsigned long reg_num,
+ unsigned long *reg_val);
+unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
+ unsigned long reg_val);
+#endif
+
/* Function numbers for HV_CORE_TRAP. */
#define HV_CORE_SET_VER 0x00
#define HV_CORE_PUTCHAR 0x01
@@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
#define HV_GRP_SDIO 0x0108
#define HV_GRP_SDIO_ERR 0x0109
#define HV_GRP_REBOOT_DATA 0x0110
+#define HV_GRP_M7_PERF 0x0114
#define HV_GRP_NIAG_PERF 0x0200
#define HV_GRP_FIRE_PERF 0x0201
#define HV_GRP_N2_CPU 0x0202
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 5c55145bfbf0..662500fa555f 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -48,6 +48,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_VT_CPU, },
{ .group = HV_GRP_T5_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_M7_PERF, },
};
static DEFINE_SPINLOCK(hvapi_lock);
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index caedf8320416..afbaba52d2f1 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg)
retl
nop
ENDPROC(sun4v_t5_set_perfreg)
+
+ENTRY(sun4v_m7_get_perfreg)
+ mov %o1, %o4
+ mov HV_FAST_M7_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_m7_get_perfreg)
+
+ENTRY(sun4v_m7_set_perfreg)
+ mov HV_FAST_M7_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_m7_set_perfreg)
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 7e967c8018c8..eb978c77c76a 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = {
.pcr_nmi_disable = PCR_N4_PICNPT,
};
+static u64 m7_pcr_read(unsigned long reg_num)
+{
+ unsigned long val;
+
+ (void) sun4v_m7_get_perfreg(reg_num, &val);
+
+ return val;
+}
+
+static void m7_pcr_write(unsigned long reg_num, u64 val)
+{
+ (void) sun4v_m7_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops m7_pcr_ops = {
+ .read_pcr = m7_pcr_read,
+ .write_pcr = m7_pcr_write,
+ .read_pic = n4_pic_read,
+ .write_pic = n4_pic_write,
+ .nmi_picl_value = n4_picl_value,
+ .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
+ PCR_N4_UTRACE | PCR_N4_TOE |
+ (26 << PCR_N4_SL_SHIFT)),
+ .pcr_nmi_disable = PCR_N4_PICNPT,
+};
static unsigned long perf_hsvc_group;
static unsigned long perf_hsvc_major;
@@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void)
perf_hsvc_group = HV_GRP_T5_CPU;
break;
+ case SUN4V_CHIP_SPARC_M7:
+ perf_hsvc_group = HV_GRP_M7_PERF;
+ break;
+
default:
return -ENODEV;
}
@@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void)
pcr_ops = &n5_pcr_ops;
break;
+ case SUN4V_CHIP_SPARC_M7:
+ pcr_ops = &m7_pcr_ops;
+ break;
+
default:
ret = -ENODEV;
break;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 46a5e4508752..86eebfa3b158 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = {
.num_pic_regs = 4,
};
+static void sparc_m7_write_pmc(int idx, u64 val)
+{
+ u64 pcr;
+
+ pcr = pcr_ops->read_pcr(idx);
+ /* ensure ov and ntc are reset */
+ pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
+
+ pcr_ops->write_pic(idx, val & 0xffffffff);
+
+ pcr_ops->write_pcr(idx, pcr);
+}
+
+static const struct sparc_pmu sparc_m7_pmu = {
+ .event_map = niagara4_event_map,
+ .cache_map = &niagara4_cache_map,
+ .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
+ .read_pmc = sparc_vt_read_pmc,
+ .write_pmc = sparc_m7_write_pmc,
+ .upper_shift = 5,
+ .lower_shift = 5,
+ .event_mask = 0x7ff,
+ .user_bit = PCR_N4_UTRACE,
+ .priv_bit = PCR_N4_STRACE,
+
+ /* We explicitly don't support hypervisor tracing. */
+ .hv_bit = 0,
+
+ .irq_bit = PCR_N4_TOE,
+ .upper_nop = 0,
+ .lower_nop = 0,
+ .flags = 0,
+ .max_hw_events = 4,
+ .num_pcrs = 4,
+ .num_pic_regs = 4,
+};
static const struct sparc_pmu *sparc_pmu __read_mostly;
static u64 event_encoding(u64 event_id, int idx)
@@ -960,6 +996,8 @@ out:
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
}
+static void sparc_pmu_start(struct perf_event *event, int flags);
+
/* On this PMU each PIC has it's own PCR control register. */
static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
{
@@ -972,20 +1010,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
struct perf_event *cp = cpuc->event[i];
struct hw_perf_event *hwc = &cp->hw;
int idx = hwc->idx;
- u64 enc;
if (cpuc->current_idx[i] != PIC_NO_INDEX)
continue;
- sparc_perf_event_set_period(cp, hwc, idx);
cpuc->current_idx[i] = idx;
- enc = perf_event_get_enc(cpuc->events[i]);
- cpuc->pcr[idx] &= ~mask_for_index(idx);
- if (hwc->state & PERF_HES_STOPPED)
- cpuc->pcr[idx] |= nop_for_index(idx);
- else
- cpuc->pcr[idx] |= event_encoding(enc, idx);
+ sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
for (i = 0; i < cpuc->n_events; i++) {
@@ -1101,7 +1132,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
int i;
local_irq_save(flags);
- perf_pmu_disable(event->pmu);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event[i]) {
@@ -1127,7 +1157,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
}
}
- perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
@@ -1361,7 +1390,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
unsigned long flags;
local_irq_save(flags);
- perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
if (n0 >= sparc_pmu->max_hw_events)
@@ -1394,7 +1422,6 @@ nocheck:
ret = 0;
out:
- perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
}
@@ -1667,6 +1694,10 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara4_pmu;
return true;
}
+ if (!strcmp(sparc_pmu_type, "sparc-m7")) {
+ sparc_pmu = &sparc_m7_pmu;
+ return true;
+ }
return false;
}
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 0be7bf978cb1..46a59643bb1c 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n",
gp->tpc, gp->o7, gp->i7, gp->rpc);
}
+
+ touch_nmi_watchdog();
}
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
@@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void)
(cpu == this_cpu ? '*' : ' '), cpu,
pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3],
pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]);
+
+ touch_nmi_watchdog();
}
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 2f80d23a0a44..18147a5523d9 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
.rating = 100,
.read = timer_cs_read,
.mask = CLOCKSOURCE_MASK(64),
- .shift = 2,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static __init int setup_timer_cs(void)
{
timer_cs_enabled = 1;
- timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
- timer_cs.shift);
-
- return clocksource_register(&timer_cs);
+ return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
}
#ifdef CONFIG_SMP
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
index b7f6334e159f..857ad4f8905f 100644
--- a/arch/sparc/lib/memmove.S
+++ b/arch/sparc/lib/memmove.S
@@ -8,9 +8,11 @@
.text
ENTRY(memmove) /* o0=dst o1=src o2=len */
- mov %o0, %g1
+ brz,pn %o2, 99f
+ mov %o0, %g1
+
cmp %o0, %o1
- bleu,pt %xcc, memcpy
+ bleu,pt %xcc, 2f
add %o1, %o2, %g7
cmp %g7, %o0
bleu,pt %xcc, memcpy
@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
stb %g7, [%o0]
bne,pt %icc, 1b
sub %o0, 1, %o0
-
+99:
retl
mov %g1, %o0
+
+ /* We can't just call memcpy for these memmove cases. On some
+ * chips the memcpy uses cache initializing stores and when dst
+ * and src are close enough, those can clobber the source data
+ * before we've loaded it in.
+ */
+2: or %o0, %o1, %g7
+ or %o2, %g7, %g7
+ andcc %g7, 0x7, %g0
+ bne,pn %xcc, 4f
+ nop
+
+3: ldx [%o1], %g7
+ add %o1, 8, %o1
+ subcc %o2, 8, %o2
+ add %o0, 8, %o0
+ bne,pt %icc, 3b
+ stx %g7, [%o0 - 0x8]
+ ba,a,pt %xcc, 99b
+
+4: ldub [%o1], %g7
+ add %o1, 1, %o1
+ subcc %o2, 1, %o2
+ add %o0, 1, %o0
+ bne,pt %icc, 4b
+ stb %g7, [%o0 - 0x1]
+ ba,a,pt %xcc, 99b
ENDPROC(memmove)
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d412b0856c0a..00178ecf9aea 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
- if (tk->tkr.clock != &cycle_counter_cs)
+ if (tk->tkr_mono.clock != &cycle_counter_cs)
return;
write_seqcount_begin(&vdso_data->tb_seq);
- vdso_data->cycle_last = tk->tkr.cycle_last;
- vdso_data->mask = tk->tkr.mask;
- vdso_data->mult = tk->tkr.mult;
- vdso_data->shift = tk->tkr.shift;
+ vdso_data->cycle_last = tk->tkr_mono.cycle_last;
+ vdso_data->mask = tk->tkr_mono.mask;
+ vdso_data->mult = tk->tkr_mono.mult;
+ vdso_data->shift = tk->tkr_mono.shift;
vdso_data->wall_time_sec = tk->xtime_sec;
- vdso_data->wall_time_snsec = tk->tkr.xtime_nsec;
+ vdso_data->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdso_data->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
- vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
+ vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
- << tk->tkr.shift);
+ << tk->tkr_mono.shift);
while (vdso_data->monotonic_time_snsec >=
- (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+ (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdso_data->monotonic_time_snsec -=
- ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+ ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdso_data->monotonic_time_sec++;
}
vdso_data->wall_time_coarse_sec = tk->xtime_sec;
- vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
- tk->tkr.shift);
+ vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+ tk->tkr_mono.shift);
vdso_data->monotonic_time_coarse_sec =
vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ac41b3ad1fc9..0420ebcac116 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1978,13 +1978,23 @@ void arch_perf_update_userpage(struct perf_event *event,
data = cyc2ns_read_begin();
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always in the local_clock domain.
+ */
userpg->cap_user_time = 1;
userpg->time_mult = data->cyc2ns_mul;
userpg->time_shift = data->cyc2ns_shift;
userpg->time_offset = data->cyc2ns_offset - now;
- userpg->cap_user_time_zero = 1;
- userpg->time_zero = data->cyc2ns_offset;
+ /*
+ * cap_user_time_zero doesn't make sense when we're using a different
+ * time base for the records.
+ */
+ if (event->clock == &local_clock) {
+ userpg->cap_user_time_zero = 1;
+ userpg->time_zero = data->cyc2ns_offset;
+ }
cyc2ns_read_end(data);
}
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index c7d791f32b98..51e330416995 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
gtod_write_begin(vdata);
/* copy vsyscall data */
- vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode;
- vdata->cycle_last = tk->tkr.cycle_last;
- vdata->mask = tk->tkr.mask;
- vdata->mult = tk->tkr.mult;
- vdata->shift = tk->tkr.shift;
+ vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->cycle_last = tk->tkr_mono.cycle_last;
+ vdata->mask = tk->tkr_mono.mask;
+ vdata->mult = tk->tkr_mono.mult;
+ vdata->shift = tk->tkr_mono.shift;
vdata->wall_time_sec = tk->xtime_sec;
- vdata->wall_time_snsec = tk->tkr.xtime_nsec;
+ vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdata->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
- vdata->monotonic_time_snsec = tk->tkr.xtime_nsec
+ vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
- << tk->tkr.shift);
+ << tk->tkr_mono.shift);
while (vdata->monotonic_time_snsec >=
- (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+ (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdata->monotonic_time_snsec -=
- ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+ ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdata->monotonic_time_sec++;
}
vdata->wall_time_coarse_sec = tk->xtime_sec;
- vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
- tk->tkr.shift);
+ vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+ tk->tkr_mono.shift);
vdata->monotonic_time_coarse_sec =
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index b1947e0f3e10..46d4449772bc 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -422,6 +422,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
struct kvm_ioapic *ioapic, int vector, int trigger_mode)
{
int i;
+ struct kvm_lapic *apic = vcpu->arch.apic;
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
@@ -443,7 +444,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
spin_lock(&ioapic->lock);
- if (trigger_mode != IOAPIC_LEVEL_TRIG)
+ if (trigger_mode != IOAPIC_LEVEL_TRIG ||
+ kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
continue;
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bd4e34de24c7..4ee827d7bf36 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -833,8 +833,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
{
- if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
- kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
+ if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
int trigger_mode;
if (apic_test_vector(vector, apic->regs + APIC_TMR))
trigger_mode = IOAPIC_LEVEL_TRIG;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 10a481b7674d..ae4f6d35d19c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2479,8 +2479,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
vmx->nested.nested_vmx_secondary_ctls_high |=
- SECONDARY_EXEC_ENABLE_EPT |
- SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ SECONDARY_EXEC_ENABLE_EPT;
vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
VMX_EPT_INVEPT_BIT;
@@ -2494,6 +2493,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
} else
vmx->nested.nested_vmx_ept_caps = 0;
+ if (enable_unrestricted_guest)
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_UNRESTRICTED_GUEST;
+
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC,
vmx->nested.nested_vmx_misc_low,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 32bf19ef3115..0ee725f1896d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1070,19 +1070,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
u64 boot_ns;
- boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+ boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
write_seqcount_begin(&vdata->seq);
/* copy pvclock gtod data */
- vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode;
- vdata->clock.cycle_last = tk->tkr.cycle_last;
- vdata->clock.mask = tk->tkr.mask;
- vdata->clock.mult = tk->tkr.mult;
- vdata->clock.shift = tk->tkr.shift;
+ vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
+ vdata->clock.mask = tk->tkr_mono.mask;
+ vdata->clock.mult = tk->tkr_mono.mult;
+ vdata->clock.shift = tk->tkr_mono.shift;
vdata->boot_ns = boot_ns;
- vdata->nsec_base = tk->tkr.xtime_nsec;
+ vdata->nsec_base = tk->tkr_mono.xtime_nsec;
write_seqcount_end(&vdata->seq);
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index fc1ff3b1ea1f..fd3fee81c23c 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -592,7 +592,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) {
struct bio_vec *bprev;
- bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1];
+ bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1];
if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset))
return false;
}
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index d53a764b05ea..be3290cc0644 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -278,9 +278,11 @@ static int bt_get(struct blk_mq_alloc_data *data,
/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
- * some to complete.
+ * some to complete. Note that hctx can be NULL here for
+ * reserved tag allocation.
*/
- blk_mq_run_hw_queue(hctx, false);
+ if (hctx)
+ blk_mq_run_hw_queue(hctx, false);
/*
* Retry tag allocation after running the hardware queue,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4f4bea21052e..b7b8933ec241 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1938,7 +1938,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
*/
if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
- goto err_map;
+ goto err_mq_usage;
setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
blk_queue_rq_timeout(q, 30000);
@@ -1981,7 +1981,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
if (blk_mq_init_hw_queues(q, set))
- goto err_hw;
+ goto err_mq_usage;
mutex_lock(&all_q_mutex);
list_add_tail(&q->all_q_node, &all_q_list);
@@ -1993,7 +1993,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
return q;
-err_hw:
+err_mq_usage:
blk_cleanup_queue(q);
err_hctxs:
kfree(map);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4c35f0822d06..ef150ebb4c30 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
return NULL;
/* libsas case */
- if (!ap->scsi_host) {
+ if (ap->flags & ATA_FLAG_SAS_HOST) {
tag = ata_sas_allocate_tag(ap);
if (tag < 0)
return NULL;
@@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
tag = qc->tag;
if (likely(ata_tag_valid(tag))) {
qc->tag = ATA_TAG_POISON;
- if (!ap->scsi_host)
+ if (ap->flags & ATA_FLAG_SAS_HOST)
ata_sas_free_tag(tag, ap);
}
}
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index beb8b27d4621..a13587b5c2be 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -243,4 +243,12 @@ extern struct regcache_ops regcache_rbtree_ops;
extern struct regcache_ops regcache_lzo_ops;
extern struct regcache_ops regcache_flat_ops;
+static inline const char *regmap_name(const struct regmap *map)
+{
+ if (map->dev)
+ return dev_name(map->dev);
+
+ return map->name;
+}
+
#endif
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index da84f544c544..87db9893b463 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -218,7 +218,7 @@ int regcache_read(struct regmap *map,
ret = map->cache_ops->read(map, reg, value);
if (ret == 0)
- trace_regmap_reg_read_cache(map->dev, reg, *value);
+ trace_regmap_reg_read_cache(map, reg, *value);
return ret;
}
@@ -311,7 +311,7 @@ int regcache_sync(struct regmap *map)
dev_dbg(map->dev, "Syncing %s cache\n",
map->cache_ops->name);
name = map->cache_ops->name;
- trace_regcache_sync(map->dev, name, "start");
+ trace_regcache_sync(map, name, "start");
if (!map->cache_dirty)
goto out;
@@ -346,7 +346,7 @@ out:
regmap_async_complete(map);
- trace_regcache_sync(map->dev, name, "stop");
+ trace_regcache_sync(map, name, "stop");
return ret;
}
@@ -381,7 +381,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min,
name = map->cache_ops->name;
dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max);
- trace_regcache_sync(map->dev, name, "start region");
+ trace_regcache_sync(map, name, "start region");
if (!map->cache_dirty)
goto out;
@@ -401,7 +401,7 @@ out:
regmap_async_complete(map);
- trace_regcache_sync(map->dev, name, "stop region");
+ trace_regcache_sync(map, name, "stop region");
return ret;
}
@@ -428,7 +428,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min,
map->lock(map->lock_arg);
- trace_regcache_drop_region(map->dev, min, max);
+ trace_regcache_drop_region(map, min, max);
ret = map->cache_ops->drop(map, min, max);
@@ -455,7 +455,7 @@ void regcache_cache_only(struct regmap *map, bool enable)
map->lock(map->lock_arg);
WARN_ON(map->cache_bypass && enable);
map->cache_only = enable;
- trace_regmap_cache_only(map->dev, enable);
+ trace_regmap_cache_only(map, enable);
map->unlock(map->lock_arg);
}
EXPORT_SYMBOL_GPL(regcache_cache_only);
@@ -493,7 +493,7 @@ void regcache_cache_bypass(struct regmap *map, bool enable)
map->lock(map->lock_arg);
WARN_ON(map->cache_only && enable);
map->cache_bypass = enable;
- trace_regmap_cache_bypass(map->dev, enable);
+ trace_regmap_cache_bypass(map, enable);
map->unlock(map->lock_arg);
}
EXPORT_SYMBOL_GPL(regcache_cache_bypass);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index f99b098ddabf..dbfe6a69c3da 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1281,7 +1281,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
if (map->async && map->bus->async_write) {
struct regmap_async *async;
- trace_regmap_async_write_start(map->dev, reg, val_len);
+ trace_regmap_async_write_start(map, reg, val_len);
spin_lock_irqsave(&map->async_lock, flags);
async = list_first_entry_or_null(&map->async_free,
@@ -1339,8 +1339,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
return ret;
}
- trace_regmap_hw_write_start(map->dev, reg,
- val_len / map->format.val_bytes);
+ trace_regmap_hw_write_start(map, reg, val_len / map->format.val_bytes);
/* If we're doing a single register write we can probably just
* send the work_buf directly, otherwise try to do a gather
@@ -1372,8 +1371,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
kfree(buf);
}
- trace_regmap_hw_write_done(map->dev, reg,
- val_len / map->format.val_bytes);
+ trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes);
return ret;
}
@@ -1407,12 +1405,12 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
map->format.format_write(map, reg, val);
- trace_regmap_hw_write_start(map->dev, reg, 1);
+ trace_regmap_hw_write_start(map, reg, 1);
ret = map->bus->write(map->bus_context, map->work_buf,
map->format.buf_size);
- trace_regmap_hw_write_done(map->dev, reg, 1);
+ trace_regmap_hw_write_done(map, reg, 1);
return ret;
}
@@ -1470,7 +1468,7 @@ int _regmap_write(struct regmap *map, unsigned int reg,
dev_info(map->dev, "%x <= %x\n", reg, val);
#endif
- trace_regmap_reg_write(map->dev, reg, val);
+ trace_regmap_reg_write(map, reg, val);
return map->reg_write(context, reg, val);
}
@@ -1773,7 +1771,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
for (i = 0; i < num_regs; i++) {
int reg = regs[i].reg;
int val = regs[i].def;
- trace_regmap_hw_write_start(map->dev, reg, 1);
+ trace_regmap_hw_write_start(map, reg, 1);
map->format.format_reg(u8, reg, map->reg_shift);
u8 += reg_bytes + pad_bytes;
map->format.format_val(u8, val, 0);
@@ -1788,7 +1786,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
for (i = 0; i < num_regs; i++) {
int reg = regs[i].reg;
- trace_regmap_hw_write_done(map->dev, reg, 1);
+ trace_regmap_hw_write_done(map, reg, 1);
}
return ret;
}
@@ -2059,15 +2057,13 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
*/
u8[0] |= map->read_flag_mask;
- trace_regmap_hw_read_start(map->dev, reg,
- val_len / map->format.val_bytes);
+ trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
ret = map->bus->read(map->bus_context, map->work_buf,
map->format.reg_bytes + map->format.pad_bytes,
val, val_len);
- trace_regmap_hw_read_done(map->dev, reg,
- val_len / map->format.val_bytes);
+ trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes);
return ret;
}
@@ -2123,7 +2119,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
dev_info(map->dev, "%x => %x\n", reg, *val);
#endif
- trace_regmap_reg_read(map->dev, reg, *val);
+ trace_regmap_reg_read(map, reg, *val);
if (!map->cache_bypass)
regcache_write(map, reg, *val);
@@ -2480,7 +2476,7 @@ void regmap_async_complete_cb(struct regmap_async *async, int ret)
struct regmap *map = async->map;
bool wake;
- trace_regmap_async_io_complete(map->dev);
+ trace_regmap_async_io_complete(map);
spin_lock(&map->async_lock);
list_move(&async->list, &map->async_free);
@@ -2525,7 +2521,7 @@ int regmap_async_complete(struct regmap *map)
if (!map->bus || !map->bus->async_write)
return 0;
- trace_regmap_async_complete_start(map->dev);
+ trace_regmap_async_complete_start(map);
wait_event(map->async_waitq, regmap_async_is_done(map));
@@ -2534,7 +2530,7 @@ int regmap_async_complete(struct regmap *map)
map->async_ret = 0;
spin_unlock_irqrestore(&map->async_lock, flags);
- trace_regmap_async_complete_done(map->dev);
+ trace_regmap_async_complete_done(map);
return ret;
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4bc2a5cb9935..a98c41f72c63 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -803,10 +803,6 @@ static int __init nbd_init(void)
return -EINVAL;
}
- nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
- if (!nbd_dev)
- return -ENOMEM;
-
part_shift = 0;
if (max_part > 0) {
part_shift = fls(max_part);
@@ -828,6 +824,10 @@ static int __init nbd_init(void)
if (nbds_max > 1UL << (MINORBITS - part_shift))
return -EINVAL;
+ nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
+ if (!nbd_dev)
+ return -ENOMEM;
+
for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = alloc_disk(1 << part_shift);
if (!disk)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index ceb32dd52a6c..e23be20a3417 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -3003,6 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
get_device(dev->device);
+ INIT_LIST_HEAD(&dev->node);
INIT_WORK(&dev->probe_work, nvme_async_probe);
schedule_work(&dev->probe_work);
return 0;
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c
index d0a7bd66b8b9..dc3c6ee04aaa 100644
--- a/drivers/clocksource/em_sti.c
+++ b/drivers/clocksource/em_sti.c
@@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
ret = em_sti_start(p, USER_CLOCKSOURCE);
if (!ret)
- __clocksource_updatefreq_hz(cs, p->rate);
+ __clocksource_update_freq_hz(cs, p->rate);
return ret;
}
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 2bd13b53b727..b8ff3c64cc45 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
if (!ret) {
- __clocksource_updatefreq_hz(cs, ch->rate);
+ __clocksource_update_freq_hz(cs, ch->rate);
ch->cs_enabled = true;
}
return ret;
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index f150ca82bfaf..b6b8fa3cd211 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
ret = sh_tmu_enable(ch);
if (!ret) {
- __clocksource_updatefreq_hz(cs, ch->rate);
+ __clocksource_update_freq_hz(cs, ch->rate);
ch->cs_enabled = true;
}
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index f6d04c7b5115..679b10e34fb5 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -525,17 +525,6 @@ void drm_framebuffer_reference(struct drm_framebuffer *fb)
}
EXPORT_SYMBOL(drm_framebuffer_reference);
-static void drm_framebuffer_free_bug(struct kref *kref)
-{
- BUG();
-}
-
-static void __drm_framebuffer_unreference(struct drm_framebuffer *fb)
-{
- DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount));
- kref_put(&fb->refcount, drm_framebuffer_free_bug);
-}
-
/**
* drm_framebuffer_unregister_private - unregister a private fb from the lookup idr
* @fb: fb to unregister
@@ -1320,7 +1309,7 @@ void drm_plane_force_disable(struct drm_plane *plane)
return;
}
/* disconnect the plane from the fb and crtc: */
- __drm_framebuffer_unreference(plane->old_fb);
+ drm_framebuffer_unreference(plane->old_fb);
plane->old_fb = NULL;
plane->fb = NULL;
plane->crtc = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5b205863b659..27ea6bdebce7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2737,24 +2737,11 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
WARN_ON(i915_verify_lists(ring->dev));
- /* Move any buffers on the active list that are no longer referenced
- * by the ringbuffer to the flushing/inactive lists as appropriate,
- * before we free the context associated with the requests.
+ /* Retire requests first as we use it above for the early return.
+ * If we retire requests last, we may use a later seqno and so clear
+ * the requests lists without clearing the active list, leading to
+ * confusion.
*/
- while (!list_empty(&ring->active_list)) {
- struct drm_i915_gem_object *obj;
-
- obj = list_first_entry(&ring->active_list,
- struct drm_i915_gem_object,
- ring_list);
-
- if (!i915_gem_request_completed(obj->last_read_req, true))
- break;
-
- i915_gem_object_move_to_inactive(obj);
- }
-
-
while (!list_empty(&ring->request_list)) {
struct drm_i915_gem_request *request;
struct intel_ringbuffer *ringbuf;
@@ -2789,6 +2776,23 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
i915_gem_free_request(request);
}
+ /* Move any buffers on the active list that are no longer referenced
+ * by the ringbuffer to the flushing/inactive lists as appropriate,
+ * before we free the context associated with the requests.
+ */
+ while (!list_empty(&ring->active_list)) {
+ struct drm_i915_gem_object *obj;
+
+ obj = list_first_entry(&ring->active_list,
+ struct drm_i915_gem_object,
+ ring_list);
+
+ if (!i915_gem_request_completed(obj->last_read_req, true))
+ break;
+
+ i915_gem_object_move_to_inactive(obj);
+ }
+
if (unlikely(ring->trace_irq_req &&
i915_gem_request_completed(ring->trace_irq_req, true))) {
ring->irq_put(ring);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 6d22128d97b1..f75173c20f47 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2438,8 +2438,15 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc,
if (!intel_crtc->base.primary->fb)
return;
- if (intel_alloc_plane_obj(intel_crtc, plane_config))
+ if (intel_alloc_plane_obj(intel_crtc, plane_config)) {
+ struct drm_plane *primary = intel_crtc->base.primary;
+
+ primary->state->crtc = &intel_crtc->base;
+ primary->crtc = &intel_crtc->base;
+ update_state_fb(primary);
+
return;
+ }
kfree(intel_crtc->base.primary->fb);
intel_crtc->base.primary->fb = NULL;
@@ -2462,11 +2469,15 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc,
continue;
if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
+ struct drm_plane *primary = intel_crtc->base.primary;
+
if (obj->tiling_mode != I915_TILING_NONE)
dev_priv->preserve_bios_swizzle = true;
drm_framebuffer_reference(c->primary->fb);
- intel_crtc->base.primary->fb = c->primary->fb;
+ primary->fb = c->primary->fb;
+ primary->state->crtc = &intel_crtc->base;
+ primary->crtc = &intel_crtc->base;
obj->frontbuffer_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe);
break;
}
@@ -6663,7 +6674,6 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
plane_config->size);
crtc->base.primary->fb = fb;
- update_state_fb(crtc->base.primary);
}
static void chv_crtc_clock_get(struct intel_crtc *crtc,
@@ -7704,7 +7714,6 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
plane_config->size);
crtc->base.primary->fb = fb;
- update_state_fb(crtc->base.primary);
return;
error:
@@ -7798,7 +7807,6 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc,
plane_config->size);
crtc->base.primary->fb = fb;
- update_state_fb(crtc->base.primary);
}
static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 9b641b38b857..8001fe9e3434 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -433,7 +433,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
dm_get(md);
atomic_inc(&md->open_count);
-
out:
spin_unlock(&_minor_lock);
@@ -442,16 +441,20 @@ out:
static void dm_blk_close(struct gendisk *disk, fmode_t mode)
{
- struct mapped_device *md = disk->private_data;
+ struct mapped_device *md;
spin_lock(&_minor_lock);
+ md = disk->private_data;
+ if (WARN_ON(!md))
+ goto out;
+
if (atomic_dec_and_test(&md->open_count) &&
(test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
queue_work(deferred_remove_workqueue, &deferred_remove_work);
dm_put(md);
-
+out:
spin_unlock(&_minor_lock);
}
@@ -2241,7 +2244,6 @@ static void free_dev(struct mapped_device *md)
int minor = MINOR(disk_devt(md->disk));
unlock_fs(md);
- bdput(md->bdev);
destroy_workqueue(md->wq);
if (md->kworker_task)
@@ -2252,19 +2254,22 @@ static void free_dev(struct mapped_device *md)
mempool_destroy(md->rq_pool);
if (md->bs)
bioset_free(md->bs);
- blk_integrity_unregister(md->disk);
- del_gendisk(md->disk);
+
cleanup_srcu_struct(&md->io_barrier);
free_table_devices(&md->table_devices);
- free_minor(minor);
+ dm_stats_cleanup(&md->stats);
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
-
+ if (blk_get_integrity(md->disk))
+ blk_integrity_unregister(md->disk);
+ del_gendisk(md->disk);
put_disk(md->disk);
blk_cleanup_queue(md->queue);
- dm_stats_cleanup(&md->stats);
+ bdput(md->bdev);
+ free_minor(minor);
+
module_put(THIS_MODULE);
kfree(md);
}
@@ -2642,8 +2647,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
might_sleep();
- spin_lock(&_minor_lock);
map = dm_get_live_table(md, &srcu_idx);
+
+ spin_lock(&_minor_lock);
idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);
diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c
index f38ec424872e..5615522f8d62 100644
--- a/drivers/mfd/kempld-core.c
+++ b/drivers/mfd/kempld-core.c
@@ -739,7 +739,7 @@ static int __init kempld_init(void)
for (id = kempld_dmi_table;
id->matches[0].slot != DMI_NONE; id++)
if (strstr(id->ident, force_device_id))
- if (id->callback && id->callback(id))
+ if (id->callback && !id->callback(id))
break;
if (id->matches[0].slot == DMI_NONE)
return -ENODEV;
diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c
index ede50244f265..dbd907d7170e 100644
--- a/drivers/mfd/rtsx_usb.c
+++ b/drivers/mfd/rtsx_usb.c
@@ -196,18 +196,27 @@ EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register);
int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data)
{
u16 value;
+ u8 *buf;
+ int ret;
if (!data)
return -EINVAL;
- *data = 0;
+
+ buf = kzalloc(sizeof(u8), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT;
value = swab16(addr);
- return usb_control_msg(ucr->pusb_dev,
+ ret = usb_control_msg(ucr->pusb_dev,
usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP,
USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
- value, 0, data, 1, 100);
+ value, 0, buf, 1, 100);
+ *data = *buf;
+
+ kfree(buf);
+ return ret;
}
EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register);
@@ -288,18 +297,27 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status)
int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status)
{
int ret;
+ u16 *buf;
if (!status)
return -EINVAL;
- if (polling_pipe == 0)
+ if (polling_pipe == 0) {
+ buf = kzalloc(sizeof(u16), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
ret = usb_control_msg(ucr->pusb_dev,
usb_rcvctrlpipe(ucr->pusb_dev, 0),
RTSX_USB_REQ_POLL,
USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
- 0, 0, status, 2, 100);
- else
+ 0, 0, buf, 2, 100);
+ *status = *buf;
+
+ kfree(buf);
+ } else {
ret = rtsx_usb_get_status_with_bulk(ucr, status);
+ }
/* usb_control_msg may return positive when success */
if (ret < 0)
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index 11d6e6561df1..15a8190a6f75 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1543,7 +1543,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
{
struct pcnet32_private *lp;
int i, media;
- int fdx, mii, fset, dxsuflo;
+ int fdx, mii, fset, dxsuflo, sram;
int chip_version;
char *chipname;
struct net_device *dev;
@@ -1580,7 +1580,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
}
/* initialize variables */
- fdx = mii = fset = dxsuflo = 0;
+ fdx = mii = fset = dxsuflo = sram = 0;
chip_version = (chip_version >> 12) & 0xffff;
switch (chip_version) {
@@ -1613,6 +1613,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
chipname = "PCnet/FAST III 79C973"; /* PCI */
fdx = 1;
mii = 1;
+ sram = 1;
break;
case 0x2626:
chipname = "PCnet/Home 79C978"; /* PCI */
@@ -1636,6 +1637,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
chipname = "PCnet/FAST III 79C975"; /* PCI */
fdx = 1;
mii = 1;
+ sram = 1;
break;
case 0x2628:
chipname = "PCnet/PRO 79C976";
@@ -1664,6 +1666,31 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
dxsuflo = 1;
}
+ /*
+ * The Am79C973/Am79C975 controllers come with 12K of SRAM
+ * which we can use for the Tx/Rx buffers but most importantly,
+ * the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid
+ * Tx fifo underflows.
+ */
+ if (sram) {
+ /*
+ * The SRAM is being configured in two steps. First we
+ * set the SRAM size in the BCR25:SRAM_SIZE bits. According
+ * to the datasheet, each bit corresponds to a 512-byte
+ * page so we can have at most 24 pages. The SRAM_SIZE
+ * holds the value of the upper 8 bits of the 16-bit SRAM size.
+ * The low 8-bits start at 0x00 and end at 0xff. So the
+ * address range is from 0x0000 up to 0x17ff. Therefore,
+ * the SRAM_SIZE is set to 0x17. The next step is to set
+ * the BCR26:SRAM_BND midway through so the Tx and Rx
+ * buffers can share the SRAM equally.
+ */
+ a->write_bcr(ioaddr, 25, 0x17);
+ a->write_bcr(ioaddr, 26, 0xc);
+ /* And finally enable the NOUFLO bit */
+ a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11));
+ }
+
dev = alloc_etherdev(sizeof(*lp));
if (!dev) {
ret = -ENOMEM;
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 27de37aa90af..27b9fe99a9bd 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -354,6 +354,7 @@ struct be_vf_cfg {
u16 vlan_tag;
u32 tx_rate;
u32 plink_tracking;
+ u32 privileges;
};
enum vf_state {
@@ -423,6 +424,7 @@ struct be_adapter {
u8 __iomem *csr; /* CSR BAR used only for BE2/3 */
u8 __iomem *db; /* Door Bell */
+ u8 __iomem *pcicfg; /* On SH,BEx only. Shadow of PCI config space */
struct mutex mbox_lock; /* For serializing mbox cmds to BE card */
struct be_dma_mem mbox_mem;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 36916cfa70f9..7f05f309e935 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1902,15 +1902,11 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd,
{
int num_eqs, i = 0;
- if (lancer_chip(adapter) && num > 8) {
- while (num) {
- num_eqs = min(num, 8);
- __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs);
- i += num_eqs;
- num -= num_eqs;
- }
- } else {
- __be_cmd_modify_eqd(adapter, set_eqd, num);
+ while (num) {
+ num_eqs = min(num, 8);
+ __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs);
+ i += num_eqs;
+ num -= num_eqs;
}
return 0;
@@ -1918,7 +1914,7 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd,
/* Uses sycnhronous mcc */
int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
- u32 num)
+ u32 num, u32 domain)
{
struct be_mcc_wrb *wrb;
struct be_cmd_req_vlan_config *req;
@@ -1936,6 +1932,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req),
wrb, NULL);
+ req->hdr.domain = domain;
req->interface_id = if_id;
req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index db761e8e42a3..a7634a3f052a 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2256,7 +2256,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
int be_cmd_get_fw_ver(struct be_adapter *adapter);
int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *, int num);
int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
- u32 num);
+ u32 num, u32 domain);
int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 status);
int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc);
int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 0a816859aca5..e6b790f0d9dc 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1171,7 +1171,7 @@ static int be_vid_config(struct be_adapter *adapter)
for_each_set_bit(i, adapter->vids, VLAN_N_VID)
vids[num++] = cpu_to_le16(i);
- status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num);
+ status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
if (status) {
dev_err(dev, "Setting HW VLAN filtering failed\n");
/* Set to VLAN promisc mode as setting VLAN filter failed */
@@ -1380,11 +1380,67 @@ static int be_get_vf_config(struct net_device *netdev, int vf,
return 0;
}
+static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
+{
+ struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
+ u16 vids[BE_NUM_VLANS_SUPPORTED];
+ int vf_if_id = vf_cfg->if_handle;
+ int status;
+
+ /* Enable Transparent VLAN Tagging */
+ status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0);
+ if (status)
+ return status;
+
+ /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
+ vids[0] = 0;
+ status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
+ if (!status)
+ dev_info(&adapter->pdev->dev,
+ "Cleared guest VLANs on VF%d", vf);
+
+ /* After TVT is enabled, disallow VFs to program VLAN filters */
+ if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
+ status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
+ ~BE_PRIV_FILTMGMT, vf + 1);
+ if (!status)
+ vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
+ }
+ return 0;
+}
+
+static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
+{
+ struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
+ struct device *dev = &adapter->pdev->dev;
+ int status;
+
+ /* Reset Transparent VLAN Tagging. */
+ status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
+ vf_cfg->if_handle, 0);
+ if (status)
+ return status;
+
+ /* Allow VFs to program VLAN filtering */
+ if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
+ status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
+ BE_PRIV_FILTMGMT, vf + 1);
+ if (!status) {
+ vf_cfg->privileges |= BE_PRIV_FILTMGMT;
+ dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
+ }
+ }
+
+ dev_info(dev,
+ "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
+ return 0;
+}
+
static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
{
struct be_adapter *adapter = netdev_priv(netdev);
struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
- int status = 0;
+ int status;
if (!sriov_enabled(adapter))
return -EPERM;
@@ -1394,24 +1450,19 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
if (vlan || qos) {
vlan |= qos << VLAN_PRIO_SHIFT;
- if (vf_cfg->vlan_tag != vlan)
- status = be_cmd_set_hsw_config(adapter, vlan, vf + 1,
- vf_cfg->if_handle, 0);
+ status = be_set_vf_tvt(adapter, vf, vlan);
} else {
- /* Reset Transparent Vlan Tagging. */
- status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID,
- vf + 1, vf_cfg->if_handle, 0);
+ status = be_clear_vf_tvt(adapter, vf);
}
if (status) {
dev_err(&adapter->pdev->dev,
- "VLAN %d config on VF %d failed : %#x\n", vlan,
- vf, status);
+ "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
+ status);
return be_cmd_status(status);
}
vf_cfg->vlan_tag = vlan;
-
return 0;
}
@@ -2772,14 +2823,12 @@ void be_detect_error(struct be_adapter *adapter)
}
}
} else {
- pci_read_config_dword(adapter->pdev,
- PCICFG_UE_STATUS_LOW, &ue_lo);
- pci_read_config_dword(adapter->pdev,
- PCICFG_UE_STATUS_HIGH, &ue_hi);
- pci_read_config_dword(adapter->pdev,
- PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask);
- pci_read_config_dword(adapter->pdev,
- PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask);
+ ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
+ ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
+ ue_lo_mask = ioread32(adapter->pcicfg +
+ PCICFG_UE_STATUS_LOW_MASK);
+ ue_hi_mask = ioread32(adapter->pcicfg +
+ PCICFG_UE_STATUS_HI_MASK);
ue_lo = (ue_lo & ~ue_lo_mask);
ue_hi = (ue_hi & ~ue_hi_mask);
@@ -3339,7 +3388,6 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle,
u32 cap_flags, u32 vf)
{
u32 en_flags;
- int status;
en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS |
@@ -3347,10 +3395,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle,
en_flags &= cap_flags;
- status = be_cmd_if_create(adapter, cap_flags, en_flags,
- if_handle, vf);
-
- return status;
+ return be_cmd_if_create(adapter, cap_flags, en_flags, if_handle, vf);
}
static int be_vfs_if_create(struct be_adapter *adapter)
@@ -3368,8 +3413,13 @@ static int be_vfs_if_create(struct be_adapter *adapter)
if (!BE3_chip(adapter)) {
status = be_cmd_get_profile_config(adapter, &res,
vf + 1);
- if (!status)
+ if (!status) {
cap_flags = res.if_cap_flags;
+ /* Prevent VFs from enabling VLAN promiscuous
+ * mode
+ */
+ cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
+ }
}
status = be_if_create(adapter, &vf_cfg->if_handle,
@@ -3403,7 +3453,6 @@ static int be_vf_setup(struct be_adapter *adapter)
struct device *dev = &adapter->pdev->dev;
struct be_vf_cfg *vf_cfg;
int status, old_vfs, vf;
- u32 privileges;
old_vfs = pci_num_vf(adapter->pdev);
@@ -3433,15 +3482,18 @@ static int be_vf_setup(struct be_adapter *adapter)
for_all_vfs(adapter, vf_cfg, vf) {
/* Allow VFs to programs MAC/VLAN filters */
- status = be_cmd_get_fn_privileges(adapter, &privileges, vf + 1);
- if (!status && !(privileges & BE_PRIV_FILTMGMT)) {
+ status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
+ vf + 1);
+ if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
status = be_cmd_set_fn_privileges(adapter,
- privileges |
+ vf_cfg->privileges |
BE_PRIV_FILTMGMT,
vf + 1);
- if (!status)
+ if (!status) {
+ vf_cfg->privileges |= BE_PRIV_FILTMGMT;
dev_info(dev, "VF%d has FILTMGMT privilege\n",
vf);
+ }
}
/* Allow full available bandwidth */
@@ -4820,24 +4872,37 @@ static int be_roce_map_pci_bars(struct be_adapter *adapter)
static int be_map_pci_bars(struct be_adapter *adapter)
{
+ struct pci_dev *pdev = adapter->pdev;
u8 __iomem *addr;
if (BEx_chip(adapter) && be_physfn(adapter)) {
- adapter->csr = pci_iomap(adapter->pdev, 2, 0);
+ adapter->csr = pci_iomap(pdev, 2, 0);
if (!adapter->csr)
return -ENOMEM;
}
- addr = pci_iomap(adapter->pdev, db_bar(adapter), 0);
+ addr = pci_iomap(pdev, db_bar(adapter), 0);
if (!addr)
goto pci_map_err;
adapter->db = addr;
+ if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
+ if (be_physfn(adapter)) {
+ /* PCICFG is the 2nd BAR in BE2 */
+ addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
+ if (!addr)
+ goto pci_map_err;
+ adapter->pcicfg = addr;
+ } else {
+ adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
+ }
+ }
+
be_roce_map_pci_bars(adapter);
return 0;
pci_map_err:
- dev_err(&adapter->pdev->dev, "Error in mapping PCI BARs\n");
+ dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
be_unmap_pci_bars(adapter);
return -ENOMEM;
}
diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
index fe48f4c51373..1762ad3910b2 100644
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -46,8 +46,7 @@ enum cx82310_status {
};
#define CMD_PACKET_SIZE 64
-/* first command after power on can take around 8 seconds */
-#define CMD_TIMEOUT 15000
+#define CMD_TIMEOUT 100
#define CMD_REPLY_RETRY 5
#define CX82310_MTU 1514
@@ -78,8 +77,9 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply,
ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, CMD_EP), buf,
CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT);
if (ret < 0) {
- dev_err(&dev->udev->dev, "send command %#x: error %d\n",
- cmd, ret);
+ if (cmd != CMD_GET_LINK_STATUS)
+ dev_err(&dev->udev->dev, "send command %#x: error %d\n",
+ cmd, ret);
goto end;
}
@@ -90,8 +90,10 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply,
buf, CMD_PACKET_SIZE, &actual_len,
CMD_TIMEOUT);
if (ret < 0) {
- dev_err(&dev->udev->dev,
- "reply receive error %d\n", ret);
+ if (cmd != CMD_GET_LINK_STATUS)
+ dev_err(&dev->udev->dev,
+ "reply receive error %d\n",
+ ret);
goto end;
}
if (actual_len > 0)
@@ -134,6 +136,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
int ret;
char buf[15];
struct usb_device *udev = dev->udev;
+ u8 link[3];
+ int timeout = 50;
/* avoid ADSL modems - continue only if iProduct is "USB NET CARD" */
if (usb_string(udev, udev->descriptor.iProduct, buf, sizeof(buf)) > 0
@@ -160,6 +164,20 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
if (!dev->partial_data)
return -ENOMEM;
+ /* wait for firmware to become ready (indicated by the link being up) */
+ while (--timeout) {
+ ret = cx82310_cmd(dev, CMD_GET_LINK_STATUS, true, NULL, 0,
+ link, sizeof(link));
+ /* the command can time out during boot - it's not an error */
+ if (!ret && link[0] == 1 && link[2] == 1)
+ break;
+ msleep(500);
+ };
+ if (!timeout) {
+ dev_err(&udev->dev, "firmware not ready in time\n");
+ return -ETIMEDOUT;
+ }
+
/* enable ethernet mode (?) */
ret = cx82310_cmd(dev, CMD_ETHERNET_MODE, true, "\x01", 1, NULL, 0);
if (ret) {
diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 9205f433573c..18198316b6cf 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev)
if (!pmic)
return -ENOMEM;
+ if (of_device_is_compatible(node, "ti,tps659038-pmic"))
+ palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr =
+ TPS659038_REGEN2_CTRL;
+
pmic->dev = &pdev->dev;
pmic->palmas = palmas;
palmas->pmic = pmic;
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index e2436d140175..3a6fd3a8a2ec 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -413,8 +413,8 @@ static void rtc_mrst_do_remove(struct device *dev)
mrst->dev = NULL;
}
-#ifdef CONFIG_PM
-static int mrst_suspend(struct device *dev, pm_message_t mesg)
+#ifdef CONFIG_PM_SLEEP
+static int mrst_suspend(struct device *dev)
{
struct mrst_rtc *mrst = dev_get_drvdata(dev);
unsigned char tmp;
@@ -453,7 +453,7 @@ static int mrst_suspend(struct device *dev, pm_message_t mesg)
*/
static inline int mrst_poweroff(struct device *dev)
{
- return mrst_suspend(dev, PMSG_HIBERNATE);
+ return mrst_suspend(dev);
}
static int mrst_resume(struct device *dev)
@@ -490,9 +490,11 @@ static int mrst_resume(struct device *dev)
return 0;
}
+static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume);
+#define MRST_PM_OPS (&mrst_pm_ops)
+
#else
-#define mrst_suspend NULL
-#define mrst_resume NULL
+#define MRST_PM_OPS NULL
static inline int mrst_poweroff(struct device *dev)
{
@@ -529,9 +531,8 @@ static struct platform_driver vrtc_mrst_platform_driver = {
.remove = vrtc_mrst_platform_remove,
.shutdown = vrtc_mrst_platform_shutdown,
.driver = {
- .name = (char *) driver_name,
- .suspend = mrst_suspend,
- .resume = mrst_resume,
+ .name = driver_name,
+ .pm = MRST_PM_OPS,
}
};
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9219953ee949..d9afc51af7d3 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
};
static struct ata_port_info sata_port_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
+ ATA_FLAG_SAS_HOST,
.pio_mask = ATA_PIO4_ONLY,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 932d9cc98d2f..9c706d8c1441 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
};
static struct ata_port_info sata_port_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
+ ATA_FLAG_SAS_HOST,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index 3ce39d10fafb..4f8c798e0633 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -108,7 +108,8 @@ static void dw_spi_dma_tx_done(void *arg)
{
struct dw_spi *dws = arg;
- if (test_and_clear_bit(TX_BUSY, &dws->dma_chan_busy) & BIT(RX_BUSY))
+ clear_bit(TX_BUSY, &dws->dma_chan_busy);
+ if (test_bit(RX_BUSY, &dws->dma_chan_busy))
return;
dw_spi_xfer_done(dws);
}
@@ -156,7 +157,8 @@ static void dw_spi_dma_rx_done(void *arg)
{
struct dw_spi *dws = arg;
- if (test_and_clear_bit(RX_BUSY, &dws->dma_chan_busy) & BIT(TX_BUSY))
+ clear_bit(RX_BUSY, &dws->dma_chan_busy);
+ if (test_bit(TX_BUSY, &dws->dma_chan_busy))
return;
dw_spi_xfer_done(dws);
}
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index ff9cdbdb6672..2b2c359f5a50 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -498,7 +498,7 @@ static int spi_qup_probe(struct platform_device *pdev)
struct resource *res;
struct device *dev;
void __iomem *base;
- u32 max_freq, iomode;
+ u32 max_freq, iomode, num_cs;
int ret, irq, size;
dev = &pdev->dev;
@@ -550,10 +550,11 @@ static int spi_qup_probe(struct platform_device *pdev)
}
/* use num-cs unless not present or out of range */
- if (of_property_read_u16(dev->of_node, "num-cs",
- &master->num_chipselect) ||
- (master->num_chipselect > SPI_NUM_CHIPSELECTS))
+ if (of_property_read_u32(dev->of_node, "num-cs", &num_cs) ||
+ num_cs > SPI_NUM_CHIPSELECTS)
master->num_chipselect = SPI_NUM_CHIPSELECTS;
+ else
+ master->num_chipselect = num_cs;
master->bus_num = pdev->id;
master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index c64a3e59fce3..57a195041dc7 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1105,13 +1105,14 @@ void spi_finalize_current_message(struct spi_master *master)
"failed to unprepare message: %d\n", ret);
}
}
+
+ trace_spi_message_done(mesg);
+
master->cur_msg_prepared = false;
mesg->state = NULL;
if (mesg->complete)
mesg->complete(mesg->context);
-
- trace_spi_message_done(mesg);
}
EXPORT_SYMBOL_GPL(spi_finalize_current_message);
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d2468bf95669..a91795e01a7f 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
boff = tmp % bsize;
if (boff) {
bh = affs_bread_ino(inode, bidx, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
+ if (IS_ERR(bh)) {
+ written = PTR_ERR(bh);
+ goto err_first_bh;
+ }
tmp = min(bsize - boff, to - from);
BUG_ON(boff + tmp > bsize || tmp > bsize);
memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
bidx++;
} else if (bidx) {
bh = affs_bread_ino(inode, bidx - 1, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
+ if (IS_ERR(bh)) {
+ written = PTR_ERR(bh);
+ goto err_first_bh;
+ }
}
while (from + bsize <= to) {
prev_bh = bh;
bh = affs_getemptyblk_ino(inode, bidx);
if (IS_ERR(bh))
- goto out;
+ goto err_bh;
memcpy(AFFS_DATA(bh), data + from, bsize);
if (buffer_new(bh)) {
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
prev_bh = bh;
bh = affs_bread_ino(inode, bidx, 1);
if (IS_ERR(bh))
- goto out;
+ goto err_bh;
tmp = min(bsize, to - from);
BUG_ON(tmp > bsize);
memcpy(AFFS_DATA(bh), data + from, tmp);
@@ -790,12 +794,13 @@ done:
if (tmp > inode->i_size)
inode->i_size = AFFS_I(inode)->mmu_private = tmp;
+err_first_bh:
unlock_page(page);
page_cache_release(page);
return written;
-out:
+err_bh:
bh = prev_bh;
if (!written)
written = PTR_ERR(bh);
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 6e560d56094b..754fdf8c6356 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -131,13 +131,16 @@ skip:
hfs_bnode_write(node, entry, data_off + key_len, entry_len);
hfs_bnode_dump(node);
- if (new_node) {
- /* update parent key if we inserted a key
- * at the start of the first node
- */
- if (!rec && new_node != node)
- hfs_brec_update_parent(fd);
+ /*
+ * update parent key if we inserted a key
+ * at the start of the node and it is not the new node
+ */
+ if (!rec && new_node != node) {
+ hfs_bnode_read_key(node, fd->search_key, data_off + size);
+ hfs_brec_update_parent(fd);
+ }
+ if (new_node) {
hfs_bnode_put(fd->bnode);
if (!new_node->parent) {
hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
goto again;
}
- if (!rec)
- hfs_brec_update_parent(fd);
-
return 0;
}
@@ -370,6 +370,8 @@ again:
if (IS_ERR(parent))
return PTR_ERR(parent);
__hfs_brec_find(parent, fd, hfs_find_rec_by_key);
+ if (fd->record < 0)
+ return -ENOENT;
hfs_bnode_dump(parent);
rec = fd->record;
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 2e4cb67f6e56..59af26b54d15 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -39,6 +39,8 @@ enum clock_event_mode {
CLOCK_EVT_MODE_PERIODIC,
CLOCK_EVT_MODE_ONESHOT,
CLOCK_EVT_MODE_RESUME,
+
+ /* Legacy ->set_mode() callback doesn't support below modes */
};
/*
@@ -81,7 +83,11 @@ enum clock_event_mode {
* @mode: operating mode assigned by the management code
* @features: features
* @retries: number of forced programming retries
- * @set_mode: set mode function
+ * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
+ * @set_mode_periodic: switch mode to periodic, if !set_mode
+ * @set_mode_oneshot: switch mode to oneshot, if !set_mode
+ * @set_mode_shutdown: switch mode to shutdown, if !set_mode
+ * @set_mode_resume: resume clkevt device, if !set_mode
* @broadcast: function to broadcast events
* @min_delta_ticks: minimum delta value in ticks stored for reconfiguration
* @max_delta_ticks: maximum delta value in ticks stored for reconfiguration
@@ -108,9 +114,20 @@ struct clock_event_device {
unsigned int features;
unsigned long retries;
- void (*broadcast)(const struct cpumask *mask);
+ /*
+ * Mode transition callback(s): Only one of the two groups should be
+ * defined:
+ * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
+ * - set_mode_{shutdown|periodic|oneshot|resume}().
+ */
void (*set_mode)(enum clock_event_mode mode,
struct clock_event_device *);
+ int (*set_mode_periodic)(struct clock_event_device *);
+ int (*set_mode_oneshot)(struct clock_event_device *);
+ int (*set_mode_shutdown)(struct clock_event_device *);
+ int (*set_mode_resume)(struct clock_event_device *);
+
+ void (*broadcast)(const struct cpumask *mask);
void (*suspend)(struct clock_event_device *);
void (*resume)(struct clock_event_device *);
unsigned long min_delta_ticks;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 9c78d15d33e4..135509821c39 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -56,6 +56,7 @@ struct module;
* @shift: cycle to nanosecond divisor (power of two)
* @max_idle_ns: max idle time permitted by the clocksource (nsecs)
* @maxadj: maximum adjustment value to mult (~11%)
+ * @max_cycles: maximum safe cycle value which won't overflow on multiplication
* @flags: flags describing special properties
* @archdata: arch-specific data
* @suspend: suspend function for the clocksource, if necessary
@@ -76,7 +77,7 @@ struct clocksource {
#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
struct arch_clocksource_data archdata;
#endif
-
+ u64 max_cycles;
const char *name;
struct list_head list;
int rating;
@@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
}
-extern int clocksource_register(struct clocksource*);
extern int clocksource_unregister(struct clocksource*);
extern void clocksource_touch_watchdog(void);
extern struct clocksource* clocksource_get_next(void);
@@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
extern void clocksource_mark_unstable(struct clocksource *cs);
extern u64
-clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
extern void
clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
@@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
extern int
__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
extern void
-__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
+__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
+
+/*
+ * Don't call this unless you are a default clocksource
+ * (AKA: jiffies) and absolutely have to.
+ */
+static inline int __clocksource_register(struct clocksource *cs)
+{
+ return __clocksource_register_scale(cs, 1, 0);
+}
static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
{
@@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
return __clocksource_register_scale(cs, 1000, khz);
}
-static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
+static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
{
- __clocksource_updatefreq_scale(cs, 1, hz);
+ __clocksource_update_freq_scale(cs, 1, hz);
}
-static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
+static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
{
- __clocksource_updatefreq_scale(cs, 1000, khz);
+ __clocksource_update_freq_scale(cs, 1000, khz);
}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index fc03efa64ffe..6b08cc106c21 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -232,6 +232,7 @@ enum {
* led */
ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */
ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */
+ ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */
/* bits 24:31 of ap->flags are reserved for LLD specific flags */
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index fb0390a1a498..ee7b1ce7a6f8 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -2999,6 +2999,9 @@ enum usb_irq_events {
#define PALMAS_GPADC_TRIM15 0x0E
#define PALMAS_GPADC_TRIM16 0x0F
+/* TPS659038 regen2_ctrl offset iss different from palmas */
+#define TPS659038_REGEN2_CTRL 0x12
+
/* TPS65917 Interrupt registers */
/* Registers for function INTERRUPT */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b16eac5f54ce..401554074de9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -173,6 +173,7 @@ struct perf_event;
* pmu::capabilities flags
*/
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
+#define PERF_PMU_CAP_NO_NMI 0x02
/**
* struct pmu - generic performance monitoring unit
@@ -457,6 +458,7 @@ struct perf_event {
struct pid_namespace *ns;
u64 id;
+ u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void *overflow_handler_context;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index d4ad5b5a02bb..045f709cb89b 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -316,7 +316,7 @@ struct regulator_desc {
* @driver_data: private regulator data
* @of_node: OpenFirmware node to parse for device tree bindings (may be
* NULL).
- * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is
+ * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is
* insufficient.
* @ena_gpio_initialized: GPIO controlling regulator enable was properly
* initialized, meaning that >= 0 is a valid gpio
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432e14ff..a419b65770d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1625,11 +1625,11 @@ struct task_struct {
/*
* numa_faults_locality tracks if faults recorded during the last
- * scan window were remote/local. The task scan period is adapted
- * based on the locality of the faults with different weights
- * depending on whether they were shared or private faults
+ * scan window were remote/local or failed to migrate. The task scan
+ * period is adapted based on the locality of the faults with different
+ * weights depending on whether they were shared or private faults
*/
- unsigned long numa_faults_locality[2];
+ unsigned long numa_faults_locality[3];
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
@@ -1719,6 +1719,7 @@ struct task_struct {
#define TNF_NO_GROUP 0x02
#define TNF_SHARED 0x04
#define TNF_FAULT_LOCAL 0x08
+#define TNF_MIGRATE_FAIL 0x10
#ifdef CONFIG_NUMA_BALANCING
extern void task_numa_fault(int last_node, int node, int pages, int flags);
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 05af9a334893..fb86963859c7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -16,16 +16,16 @@
* @read: Read function of @clock
* @mask: Bitmask for two's complement subtraction of non 64bit clocks
* @cycle_last: @clock cycle value at last update
- * @mult: NTP adjusted multiplier for scaled math conversion
+ * @mult: (NTP adjusted) multiplier for scaled math conversion
* @shift: Shift value for scaled math conversion
* @xtime_nsec: Shifted (fractional) nano seconds offset for readout
- * @base_mono: ktime_t (nanoseconds) base time for readout
+ * @base: ktime_t (nanoseconds) base time for readout
*
* This struct has size 56 byte on 64 bit. Together with a seqcount it
* occupies a single 64byte cache line.
*
* The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessor to clock monotonic.
+ * for a fast NMI safe accessors.
*/
struct tk_read_base {
struct clocksource *clock;
@@ -35,12 +35,13 @@ struct tk_read_base {
u32 mult;
u32 shift;
u64 xtime_nsec;
- ktime_t base_mono;
+ ktime_t base;
};
/**
* struct timekeeper - Structure holding internal timekeeping values.
- * @tkr: The readout base structure
+ * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
+ * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
* @xtime_sec: Current CLOCK_REALTIME time in seconds
* @ktime_sec: Current CLOCK_MONOTONIC time in seconds
* @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -48,7 +49,6 @@ struct tk_read_base {
* @offs_boot: Offset clock monotonic -> clock boottime
* @offs_tai: Offset clock monotonic -> clock tai
* @tai_offset: The current UTC to TAI offset in seconds
- * @base_raw: Monotonic raw base time in ktime_t format
* @raw_time: Monotonic raw base time in timespec64 format
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
@@ -76,7 +76,8 @@ struct tk_read_base {
* used instead.
*/
struct timekeeper {
- struct tk_read_base tkr;
+ struct tk_read_base tkr_mono;
+ struct tk_read_base tkr_raw;
u64 xtime_sec;
unsigned long ktime_sec;
struct timespec64 wall_to_monotonic;
@@ -84,7 +85,6 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
- ktime_t base_raw;
struct timespec64 raw_time;
/* The following members are for timekeeping internal use */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 3eaae4754275..5047b83483d6 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
return ktime_to_ns(ktime_get_boottime());
}
+static inline u64 ktime_get_tai_ns(void)
+{
+ return ktime_to_ns(ktime_get_clocktai());
+}
+
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
}
extern u64 ktime_get_mono_fast_ns(void);
+extern u64 ktime_get_raw_fast_ns(void);
/*
* Timespec interfaces utilizing the ktime based ones
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 534e1f2ac4fc..57639fca223a 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -79,6 +79,16 @@ void nf_log_packet(struct net *net,
const struct nf_loginfo *li,
const char *fmt, ...);
+__printf(8, 9)
+void nf_log_trace(struct net *net,
+ u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *li,
+ const char *fmt, ...);
+
struct nf_log_buf;
struct nf_log_buf *nf_log_buf_open(void);
diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h
index 23d561512f64..22317d2b52ab 100644
--- a/include/trace/events/regmap.h
+++ b/include/trace/events/regmap.h
@@ -7,27 +7,26 @@
#include <linux/ktime.h>
#include <linux/tracepoint.h>
-struct device;
-struct regmap;
+#include "../../../drivers/base/regmap/internal.h"
/*
* Log register events
*/
DECLARE_EVENT_CLASS(regmap_reg,
- TP_PROTO(struct device *dev, unsigned int reg,
+ TP_PROTO(struct regmap *map, unsigned int reg,
unsigned int val),
- TP_ARGS(dev, reg, val),
+ TP_ARGS(map, reg, val),
TP_STRUCT__entry(
- __string( name, dev_name(dev) )
- __field( unsigned int, reg )
- __field( unsigned int, val )
+ __string( name, regmap_name(map) )
+ __field( unsigned int, reg )
+ __field( unsigned int, val )
),
TP_fast_assign(
- __assign_str(name, dev_name(dev));
+ __assign_str(name, regmap_name(map));
__entry->reg = reg;
__entry->val = val;
),
@@ -39,45 +38,45 @@ DECLARE_EVENT_CLASS(regmap_reg,
DEFINE_EVENT(regmap_reg, regmap_reg_write,
- TP_PROTO(struct device *dev, unsigned int reg,
+ TP_PROTO(struct regmap *map, unsigned int reg,
unsigned int val),
- TP_ARGS(dev, reg, val)
+ TP_ARGS(map, reg, val)
);
DEFINE_EVENT(regmap_reg, regmap_reg_read,
- TP_PROTO(struct device *dev, unsigned int reg,
+ TP_PROTO(struct regmap *map, unsigned int reg,
unsigned int val),
- TP_ARGS(dev, reg, val)
+ TP_ARGS(map, reg, val)
);
DEFINE_EVENT(regmap_reg, regmap_reg_read_cache,
- TP_PROTO(struct device *dev, unsigned int reg,
+ TP_PROTO(struct regmap *map, unsigned int reg,
unsigned int val),
- TP_ARGS(dev, reg, val)
+ TP_ARGS(map, reg, val)
);
DECLARE_EVENT_CLASS(regmap_block,
- TP_PROTO(struct device *dev, unsigned int reg, int count),
+ TP_PROTO(struct regmap *map, unsigned int reg, int count),
- TP_ARGS(dev, reg, count),
+ TP_ARGS(map, reg, count),
TP_STRUCT__entry(
- __string( name, dev_name(dev) )
- __field( unsigned int, reg )
- __field( int, count )
+ __string( name, regmap_name(map) )
+ __field( unsigned int, reg )
+ __field( int, count )
),
TP_fast_assign(
- __assign_str(name, dev_name(dev));
+ __assign_str(name, regmap_name(map));
__entry->reg = reg;
__entry->count = count;
),
@@ -89,48 +88,48 @@ DECLARE_EVENT_CLASS(regmap_block,
DEFINE_EVENT(regmap_block, regmap_hw_read_start,
- TP_PROTO(struct device *dev, unsigned int reg, int count),
+ TP_PROTO(struct regmap *map, unsigned int reg, int count),
- TP_ARGS(dev, reg, count)
+ TP_ARGS(map, reg, count)
);
DEFINE_EVENT(regmap_block, regmap_hw_read_done,
- TP_PROTO(struct device *dev, unsigned int reg, int count),
+ TP_PROTO(struct regmap *map, unsigned int reg, int count),
- TP_ARGS(dev, reg, count)
+ TP_ARGS(map, reg, count)
);
DEFINE_EVENT(regmap_block, regmap_hw_write_start,
- TP_PROTO(struct device *dev, unsigned int reg, int count),
+ TP_PROTO(struct regmap *map, unsigned int reg, int count),
- TP_ARGS(dev, reg, count)
+ TP_ARGS(map, reg, count)
);
DEFINE_EVENT(regmap_block, regmap_hw_write_done,
- TP_PROTO(struct device *dev, unsigned int reg, int count),
+ TP_PROTO(struct regmap *map, unsigned int reg, int count),
- TP_ARGS(dev, reg, count)
+ TP_ARGS(map, reg, count)
);
TRACE_EVENT(regcache_sync,
- TP_PROTO(struct device *dev, const char *type,
+ TP_PROTO(struct regmap *map, const char *type,
const char *status),
- TP_ARGS(dev, type, status),
+ TP_ARGS(map, type, status),
TP_STRUCT__entry(
- __string( name, dev_name(dev) )
- __string( status, status )
- __string( type, type )
- __field( int, type )
+ __string( name, regmap_name(map) )
+ __string( status, status )
+ __string( type, type )
+ __field( int, type )
),
TP_fast_assign(
- __assign_str(name, dev_name(dev));
+ __assign_str(name, regmap_name(map));
__assign_str(status, status);
__assign_str(type, type);
),
@@ -141,17 +140,17 @@ TRACE_EVENT(regcache_sync,
DECLARE_EVENT_CLASS(regmap_bool,
- TP_PROTO(struct device *dev, bool flag),
+ TP_PROTO(struct regmap *map, bool flag),
- TP_ARGS(dev, flag),
+ TP_ARGS(map, flag),
TP_STRUCT__entry(
- __string( name, dev_name(dev) )
- __field( int, flag )
+ __string( name, regmap_name(map) )
+ __field( int, flag )
),
TP_fast_assign(
- __assign_str(name, dev_name(dev));
+ __assign_str(name, regmap_name(map));
__entry->flag = flag;
),
@@ -161,32 +160,32 @@ DECLARE_EVENT_CLASS(regmap_bool,
DEFINE_EVENT(regmap_bool, regmap_cache_only,
- TP_PROTO(struct device *dev, bool flag),
+ TP_PROTO(struct regmap *map, bool flag),
- TP_ARGS(dev, flag)
+ TP_ARGS(map, flag)
);
DEFINE_EVENT(regmap_bool, regmap_cache_bypass,
- TP_PROTO(struct device *dev, bool flag),
+ TP_PROTO(struct regmap *map, bool flag),
- TP_ARGS(dev, flag)
+ TP_ARGS(map, flag)
);
DECLARE_EVENT_CLASS(regmap_async,
- TP_PROTO(struct device *dev),
+ TP_PROTO(struct regmap *map),
- TP_ARGS(dev),
+ TP_ARGS(map),
TP_STRUCT__entry(
- __string( name, dev_name(dev) )
+ __string( name, regmap_name(map) )
),
TP_fast_assign(
- __assign_str(name, dev_name(dev));
+ __assign_str(name, regmap_name(map));
),
TP_printk("%s", __get_str(name))
@@ -194,50 +193,50 @@ DECLARE_EVENT_CLASS(regmap_async,
DEFINE_EVENT(regmap_block, regmap_async_write_start,
- TP_PROTO(struct device *dev, unsigned int reg, int count),
+ TP_PROTO(struct regmap *map, unsigned int reg, int count),
- TP_ARGS(dev, reg, count)
+ TP_ARGS(map, reg, count)
);
DEFINE_EVENT(regmap_async, regmap_async_io_complete,
- TP_PROTO(struct device *dev),
+ TP_PROTO(struct regmap *map),
- TP_ARGS(dev)
+ TP_ARGS(map)
);
DEFINE_EVENT(regmap_async, regmap_async_complete_start,
- TP_PROTO(struct device *dev),
+ TP_PROTO(struct regmap *map),
- TP_ARGS(dev)
+ TP_ARGS(map)
);
DEFINE_EVENT(regmap_async, regmap_async_complete_done,
- TP_PROTO(struct device *dev),
+ TP_PROTO(struct regmap *map),
- TP_ARGS(dev)
+ TP_ARGS(map)
);
TRACE_EVENT(regcache_drop_region,
- TP_PROTO(struct device *dev, unsigned int from,
+ TP_PROTO(struct regmap *map, unsigned int from,
unsigned int to),
- TP_ARGS(dev, from, to),
+ TP_ARGS(map, from, to),
TP_STRUCT__entry(
- __string( name, dev_name(dev) )
- __field( unsigned int, from )
- __field( unsigned int, to )
+ __string( name, regmap_name(map) )
+ __field( unsigned int, from )
+ __field( unsigned int, to )
),
TP_fast_assign(
- __assign_str(name, dev_name(dev));
+ __assign_str(name, regmap_name(map));
__entry->from = from;
__entry->to = to;
),
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1e3cd07cf76e..3bb40ddadbe5 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -326,7 +326,8 @@ struct perf_event_attr {
exclude_callchain_user : 1, /* exclude user callchains */
mmap2 : 1, /* include mmap with inode data */
comm_exec : 1, /* flag comm events that are due to an exec */
- __reserved_1 : 39;
+ use_clockid : 1, /* use @clockid for time fields */
+ __reserved_1 : 38;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -355,8 +356,7 @@ struct perf_event_attr {
*/
__u32 sample_stack_user;
- /* Align to u64. */
- __u32 __reserved_2;
+ __s32 clockid;
/*
* Defines set of regs to dump for each sample
* state captured on:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bb1a7c36e794..c40c2cac2d8e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -327,6 +327,11 @@ static inline u64 perf_clock(void)
return local_clock();
}
+static inline u64 perf_event_clock(struct perf_event *event)
+{
+ return event->clock();
+}
+
static inline struct perf_cpu_context *
__get_cpu_context(struct perf_event_context *ctx)
{
@@ -4762,7 +4767,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
}
if (sample_type & PERF_SAMPLE_TIME)
- data->time = perf_clock();
+ data->time = perf_event_clock(event);
if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
data->id = primary_event_id(event);
@@ -5340,6 +5345,8 @@ static void perf_event_task_output(struct perf_event *event,
task_event->event_id.tid = perf_event_tid(event, task);
task_event->event_id.ptid = perf_event_tid(event, current);
+ task_event->event_id.time = perf_event_clock(event);
+
perf_output_put(&handle, task_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
@@ -5373,7 +5380,7 @@ static void perf_event_task(struct task_struct *task,
/* .ppid */
/* .tid */
/* .ptid */
- .time = perf_clock(),
+ /* .time */
},
};
@@ -5749,7 +5756,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
.misc = 0,
.size = sizeof(throttle_event),
},
- .time = perf_clock(),
+ .time = perf_event_clock(event),
.id = primary_event_id(event),
.stream_id = event->id,
};
@@ -6293,6 +6300,8 @@ static int perf_swevent_init(struct perf_event *event)
static struct pmu perf_swevent = {
.task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_NMI,
+
.event_init = perf_swevent_init,
.add = perf_swevent_add,
.del = perf_swevent_del,
@@ -6636,6 +6645,8 @@ static int cpu_clock_event_init(struct perf_event *event)
static struct pmu perf_cpu_clock = {
.task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_NMI,
+
.event_init = cpu_clock_event_init,
.add = cpu_clock_event_add,
.del = cpu_clock_event_del,
@@ -6715,6 +6726,8 @@ static int task_clock_event_init(struct perf_event *event)
static struct pmu perf_task_clock = {
.task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_NMI,
+
.event_init = task_clock_event_init,
.add = task_clock_event_add,
.del = task_clock_event_del,
@@ -7200,6 +7213,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->hw.target = task;
}
+ event->clock = &local_clock;
+ if (parent_event)
+ event->clock = parent_event->clock;
+
if (!overflow_handler && parent_event) {
overflow_handler = parent_event->overflow_handler;
context = parent_event->overflow_handler_context;
@@ -7422,6 +7439,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
if (output_event->cpu == -1 && output_event->ctx != event->ctx)
goto out;
+ /*
+ * Mixing clocks in the same buffer is trouble you don't need.
+ */
+ if (output_event->clock != event->clock)
+ goto out;
+
set:
mutex_lock(&event->mmap_mutex);
/* Can't redirect output if we've got an active mmap() */
@@ -7454,6 +7477,43 @@ static void mutex_lock_double(struct mutex *a, struct mutex *b)
mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
}
+static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
+{
+ bool nmi_safe = false;
+
+ switch (clk_id) {
+ case CLOCK_MONOTONIC:
+ event->clock = &ktime_get_mono_fast_ns;
+ nmi_safe = true;
+ break;
+
+ case CLOCK_MONOTONIC_RAW:
+ event->clock = &ktime_get_raw_fast_ns;
+ nmi_safe = true;
+ break;
+
+ case CLOCK_REALTIME:
+ event->clock = &ktime_get_real_ns;
+ break;
+
+ case CLOCK_BOOTTIME:
+ event->clock = &ktime_get_boot_ns;
+ break;
+
+ case CLOCK_TAI:
+ event->clock = &ktime_get_tai_ns;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* sys_perf_event_open - open a performance event, associate it to a task/cpu
*
@@ -7569,6 +7629,12 @@ SYSCALL_DEFINE5(perf_event_open,
*/
pmu = event->pmu;
+ if (attr.use_clockid) {
+ err = perf_event_set_clock(event, attr.clockid);
+ if (err)
+ goto err_alloc;
+ }
+
if (group_leader &&
(is_software_event(event) != is_software_event(group_leader))) {
if (is_software_event(event)) {
@@ -7618,6 +7684,11 @@ SYSCALL_DEFINE5(perf_event_open,
*/
if (group_leader->group_leader != group_leader)
goto err_context;
+
+ /* All events in a group should have the same clock */
+ if (group_leader->clock != event->clock)
+ goto err_context;
+
/*
* Do not allow to attach to a group in a different
* task or CPU context:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7ce18f3c097a..bcfe32088b37 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p,
/*
* If there were no record hinting faults then either the task is
* completely idle or all activity is areas that are not of interest
- * to automatic numa balancing. Scan slower
+ * to automatic numa balancing. Related to that, if there were failed
+ * migration then it implies we are migrating too quickly or the local
+ * node is overloaded. In either case, scan slower
*/
- if (local + shared == 0) {
+ if (local + shared == 0 || p->numa_faults_locality[2]) {
p->numa_scan_period = min(p->numa_scan_period_max,
p->numa_scan_period << 1);
@@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
if (migrated)
p->numa_pages_migrated += pages;
+ if (flags & TNF_MIGRATE_FAIL)
+ p->numa_faults_locality[2] += pages;
p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;
p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 55449909f114..489642b08d64 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
+static int __clockevents_set_mode(struct clock_event_device *dev,
+ enum clock_event_mode mode)
+{
+ /* Transition with legacy set_mode() callback */
+ if (dev->set_mode) {
+ /* Legacy callback doesn't support new modes */
+ if (mode > CLOCK_EVT_MODE_RESUME)
+ return -ENOSYS;
+ dev->set_mode(mode, dev);
+ return 0;
+ }
+
+ if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+ return 0;
+
+ /* Transition with new mode-specific callbacks */
+ switch (mode) {
+ case CLOCK_EVT_MODE_UNUSED:
+ /*
+ * This is an internal state, which is guaranteed to go from
+ * SHUTDOWN to UNUSED. No driver interaction required.
+ */
+ return 0;
+
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ return dev->set_mode_shutdown(dev);
+
+ case CLOCK_EVT_MODE_PERIODIC:
+ /* Core internal bug */
+ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
+ return -ENOSYS;
+ return dev->set_mode_periodic(dev);
+
+ case CLOCK_EVT_MODE_ONESHOT:
+ /* Core internal bug */
+ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+ return -ENOSYS;
+ return dev->set_mode_oneshot(dev);
+
+ case CLOCK_EVT_MODE_RESUME:
+ /* Optional callback */
+ if (dev->set_mode_resume)
+ return dev->set_mode_resume(dev);
+ else
+ return 0;
+
+ default:
+ return -ENOSYS;
+ }
+}
+
/**
* clockevents_set_mode - set the operating mode of a clock event device
* @dev: device to modify
@@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode)
{
if (dev->mode != mode) {
- dev->set_mode(mode, dev);
+ if (__clockevents_set_mode(dev, mode))
+ return;
+
dev->mode = mode;
/*
@@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
}
EXPORT_SYMBOL_GPL(clockevents_unbind);
+/* Sanity check of mode transition callbacks */
+static int clockevents_sanity_check(struct clock_event_device *dev)
+{
+ /* Legacy set_mode() callback */
+ if (dev->set_mode) {
+ /* We shouldn't be supporting new modes now */
+ WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
+ dev->set_mode_shutdown || dev->set_mode_resume);
+ return 0;
+ }
+
+ if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+ return 0;
+
+ /* New mode-specific callbacks */
+ if (!dev->set_mode_shutdown)
+ return -EINVAL;
+
+ if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+ !dev->set_mode_periodic)
+ return -EINVAL;
+
+ if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
+ !dev->set_mode_oneshot)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
@@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev)
unsigned long flags;
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+ BUG_ON(clockevents_sanity_check(dev));
+
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
dev->cpumask = cpumask_of(smp_processor_id());
@@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
return clockevents_program_event(dev, dev->next_event, false);
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
- dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+ return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
return 0;
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4892352f0e49..c3be3c71bbad 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
schedule_work(&watchdog_work);
}
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
-{
- printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
- cs->name, delta);
- __clocksource_unstable(cs);
-}
-
/**
* clocksource_mark_unstable - mark clocksource unstable via watchdog
* @cs: clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
static void clocksource_watchdog(unsigned long data)
{
struct clocksource *cs;
- cycle_t csnow, wdnow, delta;
+ cycle_t csnow, wdnow, cslast, wdlast, delta;
int64_t wd_nsec, cs_nsec;
int next_cpu, reset_pending;
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+ wdlast = cs->wd_last; /* save these in case we print them */
+ cslast = cs->cs_last;
cs->cs_last = csnow;
cs->wd_last = wdnow;
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
/* Check the deviation from the watchdog clocksource. */
if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
- clocksource_unstable(cs, cs_nsec - wd_nsec);
+ pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
+ pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+ watchdog->name, wdnow, wdlast, watchdog->mask);
+ pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+ cs->name, csnow, cslast, cs->mask);
+ __clocksource_unstable(cs);
continue;
}
@@ -469,26 +469,22 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
* @shift: cycle to nanosecond divisor (power of two)
* @maxadj: maximum adjustment value to mult (~11%)
* @mask: bitmask for two's complement subtraction of non 64 bit counters
+ * @max_cyc: maximum cycle value before potential overflow (does not include
+ * any safety margin)
+ *
+ * NOTE: This function includes a safety margin of 50%, so that bad clock values
+ * can be detected.
*/
-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
{
u64 max_nsecs, max_cycles;
/*
* Calculate the maximum number of cycles that we can pass to the
- * cyc2ns function without overflowing a 64-bit signed result. The
- * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
- * which is equivalent to the below.
- * max_cycles < (2^63)/(mult + maxadj)
- * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
- * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
- * max_cycles < 2^(63 - log2(mult + maxadj))
- * max_cycles < 1 << (63 - log2(mult + maxadj))
- * Please note that we add 1 to the result of the log2 to account for
- * any rounding errors, ensure the above inequality is satisfied and
- * no overflow will occur.
+ * cyc2ns() function without overflowing a 64-bit result.
*/
- max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
+ max_cycles = ULLONG_MAX;
+ do_div(max_cycles, mult+maxadj);
/*
* The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +495,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
max_cycles = min(max_cycles, mask);
max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
+ /* return the max_cycles value as well if requested */
+ if (max_cyc)
+ *max_cyc = max_cycles;
+
+ /* Return 50% of the actual maximum, so we can detect bad values */
+ max_nsecs >>= 1;
+
return max_nsecs;
}
/**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
- * @cs: Pointer to clocksource
+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
+ * @cs: Pointer to clocksource to be updated
*
*/
-static u64 clocksource_max_deferment(struct clocksource *cs)
+static inline void clocksource_update_max_deferment(struct clocksource *cs)
{
- u64 max_nsecs;
-
- max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
- cs->mask);
- /*
- * To ensure that the clocksource does not wrap whilst we are idle,
- * limit the time the clocksource can be deferred by 12.5%. Please
- * note a margin of 12.5% is used because this can be computed with
- * a shift, versus say 10% which would require division.
- */
- return max_nsecs - (max_nsecs >> 3);
+ cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+ cs->maxadj, cs->mask,
+ &cs->max_cycles);
}
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs)
}
/**
- * __clocksource_updatefreq_scale - Used update clocksource with new freq
+ * __clocksource_update_freq_scale - Used update clocksource with new freq
* @cs: clocksource to be registered
* @scale: Scale factor multiplied against freq to get clocksource hz
* @freq: clocksource frequency (cycles per second) divided by scale
@@ -656,48 +651,64 @@ static void clocksource_enqueue(struct clocksource *cs)
* This should only be called from the clocksource->enable() method.
*
* This *SHOULD NOT* be called directly! Please use the
- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
+ * functions.
*/
-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
{
u64 sec;
+
/*
- * Calc the maximum number of seconds which we can run before
- * wrapping around. For clocksources which have a mask > 32bit
- * we need to limit the max sleep time to have a good
- * conversion precision. 10 minutes is still a reasonable
- * amount. That results in a shift value of 24 for a
- * clocksource with mask >= 40bit and f >= 4GHz. That maps to
- * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
- * margin as we do in clocksource_max_deferment()
+ * Default clocksources are *special* and self-define their mult/shift.
+ * But, you're not special, so you should specify a freq value.
*/
- sec = (cs->mask - (cs->mask >> 3));
- do_div(sec, freq);
- do_div(sec, scale);
- if (!sec)
- sec = 1;
- else if (sec > 600 && cs->mask > UINT_MAX)
- sec = 600;
-
- clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
- NSEC_PER_SEC / scale, sec * scale);
-
+ if (freq) {
+ /*
+ * Calc the maximum number of seconds which we can run before
+ * wrapping around. For clocksources which have a mask > 32-bit
+ * we need to limit the max sleep time to have a good
+ * conversion precision. 10 minutes is still a reasonable
+ * amount. That results in a shift value of 24 for a
+ * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
+ * ~ 0.06ppm granularity for NTP.
+ */
+ sec = cs->mask;
+ do_div(sec, freq);
+ do_div(sec, scale);
+ if (!sec)
+ sec = 1;
+ else if (sec > 600 && cs->mask > UINT_MAX)
+ sec = 600;
+
+ clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+ NSEC_PER_SEC / scale, sec * scale);
+ }
/*
- * for clocksources that have large mults, to avoid overflow.
- * Since mult may be adjusted by ntp, add an safety extra margin
- *
+ * Ensure clocksources that have large 'mult' values don't overflow
+ * when adjusted.
*/
cs->maxadj = clocksource_max_adjustment(cs);
- while ((cs->mult + cs->maxadj < cs->mult)
- || (cs->mult - cs->maxadj > cs->mult)) {
+ while (freq && ((cs->mult + cs->maxadj < cs->mult)
+ || (cs->mult - cs->maxadj > cs->mult))) {
cs->mult >>= 1;
cs->shift--;
cs->maxadj = clocksource_max_adjustment(cs);
}
- cs->max_idle_ns = clocksource_max_deferment(cs);
+ /*
+ * Only warn for *special* clocksources that self-define
+ * their mult/shift values and don't specify a freq.
+ */
+ WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+ "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
+ cs->name);
+
+ clocksource_update_max_deferment(cs);
+
+ pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+ cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
}
-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
/**
* __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
/* Initialize mult/shift and max_idle_ns */
- __clocksource_updatefreq_scale(cs, scale, freq);
+ __clocksource_update_freq_scale(cs, scale, freq);
/* Add clocksource to the clocksource list */
mutex_lock(&clocksource_mutex);
@@ -726,33 +737,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
}
EXPORT_SYMBOL_GPL(__clocksource_register_scale);
-
-/**
- * clocksource_register - Used to install new clocksources
- * @cs: clocksource to be registered
- *
- * Returns -EBUSY if registration fails, zero otherwise.
- */
-int clocksource_register(struct clocksource *cs)
-{
- /* calculate max adjustment for given mult/shift */
- cs->maxadj = clocksource_max_adjustment(cs);
- WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
- "Clocksource %s might overflow on 11%% adjustment\n",
- cs->name);
-
- /* calculate max idle time permitted for this clocksource */
- cs->max_idle_ns = clocksource_max_deferment(cs);
-
- mutex_lock(&clocksource_mutex);
- clocksource_enqueue(cs);
- clocksource_enqueue_watchdog(cs);
- clocksource_select();
- mutex_unlock(&clocksource_mutex);
- return 0;
-}
-EXPORT_SYMBOL(clocksource_register);
-
static void __clocksource_change_rating(struct clocksource *cs, int rating)
{
list_del(&cs->list);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a6a5bf53e86d..c4bb518725b5 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
+ .max_cycles = 10,
};
__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
static int __init init_jiffies_clocksource(void)
{
- return clocksource_register(&clocksource_jiffies);
+ return __clocksource_register(&clocksource_jiffies);
}
core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
- clocksource_register(&refined_jiffies);
+ __clocksource_register(&refined_jiffies);
return 0;
}
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 01d2d15aa662..a26036d37a38 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -1,5 +1,6 @@
/*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
+ * sched_clock.c: Generic sched_clock() support, to extend low level
+ * hardware time counters to full 64-bit ns values.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -18,15 +19,53 @@
#include <linux/seqlock.h>
#include <linux/bitops.h>
-struct clock_data {
- ktime_t wrap_kt;
+/**
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns: sched_clock() value at last update
+ * @epoch_cyc: Clock cycle value at last update.
+ * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit
+ * clocks.
+ * @read_sched_clock: Current clock source (or dummy source when suspended).
+ * @mult: Multipler for scaled math conversion.
+ * @shift: Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
u64 epoch_ns;
u64 epoch_cyc;
- seqcount_t seq;
- unsigned long rate;
+ u64 sched_clock_mask;
+ u64 (*read_sched_clock)(void);
u32 mult;
u32 shift;
- bool suspended;
+};
+
+/**
+ * struct clock_data - all data needed for sched_clock() (including
+ * registration of a new clock source)
+ *
+ * @seq: Sequence counter for protecting updates. The lowest
+ * bit is the index for @read_data.
+ * @read_data: Data required to read from sched_clock.
+ * @wrap_kt: Duration for which clock can run before wrapping.
+ * @rate: Tick rate of the registered clock.
+ * @actual_read_sched_clock: Registered hardware level clock read function.
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
+ * into a single 64-byte cache line.
+ */
+struct clock_data {
+ seqcount_t seq;
+ struct clock_read_data read_data[2];
+ ktime_t wrap_kt;
+ unsigned long rate;
+
+ u64 (*actual_read_sched_clock)(void);
};
static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@ static int irqtime = -1;
core_param(irqtime, irqtime, int, 0400);
-static struct clock_data cd = {
- .mult = NSEC_PER_SEC / HZ,
-};
-
-static u64 __read_mostly sched_clock_mask;
-
static u64 notrace jiffy_sched_clock_read(void)
{
/*
@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
return (u64)(jiffies - INITIAL_JIFFIES);
}
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd ____cacheline_aligned = {
+ .read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+ .read_sched_clock = jiffy_sched_clock_read, },
+ .actual_read_sched_clock = jiffy_sched_clock_read,
+};
static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
{
@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
unsigned long long notrace sched_clock(void)
{
- u64 epoch_ns;
- u64 epoch_cyc;
- u64 cyc;
+ u64 cyc, res;
unsigned long seq;
-
- if (cd.suspended)
- return cd.epoch_ns;
+ struct clock_read_data *rd;
do {
- seq = raw_read_seqcount_begin(&cd.seq);
- epoch_cyc = cd.epoch_cyc;
- epoch_ns = cd.epoch_ns;
+ seq = raw_read_seqcount(&cd.seq);
+ rd = cd.read_data + (seq & 1);
+
+ cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+ rd->sched_clock_mask;
+ res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
} while (read_seqcount_retry(&cd.seq, seq));
- cyc = read_sched_clock();
- cyc = (cyc - epoch_cyc) & sched_clock_mask;
- return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+ return res;
+}
+
+/*
+ * Updating the data required to read the clock.
+ *
+ * sched_clock() will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock() to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock() as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
+ */
+static void update_clock_read_data(struct clock_read_data *rd)
+{
+ /* update the backup (odd) copy with the new data */
+ cd.read_data[1] = *rd;
+
+ /* steer readers towards the odd copy */
+ raw_write_seqcount_latch(&cd.seq);
+
+ /* now its safe for us to update the normal (even) copy */
+ cd.read_data[0] = *rd;
+
+ /* switch readers back to the even copy */
+ raw_write_seqcount_latch(&cd.seq);
}
/*
- * Atomically update the sched_clock epoch.
+ * Atomically update the sched_clock() epoch.
*/
-static void notrace update_sched_clock(void)
+static void update_sched_clock(void)
{
- unsigned long flags;
u64 cyc;
u64 ns;
+ struct clock_read_data rd;
+
+ rd = cd.read_data[0];
+
+ cyc = cd.actual_read_sched_clock();
+ ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+
+ rd.epoch_ns = ns;
+ rd.epoch_cyc = cyc;
- cyc = read_sched_clock();
- ns = cd.epoch_ns +
- cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
- cd.mult, cd.shift);
-
- raw_local_irq_save(flags);
- raw_write_seqcount_begin(&cd.seq);
- cd.epoch_ns = ns;
- cd.epoch_cyc = cyc;
- raw_write_seqcount_end(&cd.seq);
- raw_local_irq_restore(flags);
+ update_clock_read_data(&rd);
}
static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
{
update_sched_clock();
hrtimer_forward_now(hrt, cd.wrap_kt);
+
return HRTIMER_RESTART;
}
-void __init sched_clock_register(u64 (*read)(void), int bits,
- unsigned long rate)
+void __init
+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
{
u64 res, wrap, new_mask, new_epoch, cyc, ns;
u32 new_mult, new_shift;
- ktime_t new_wrap_kt;
unsigned long r;
char r_unit;
+ struct clock_read_data rd;
if (cd.rate > rate)
return;
WARN_ON(!irqs_disabled());
- /* calculate the mult/shift to convert counter ticks to ns. */
+ /* Calculate the mult/shift to convert counter ticks to ns. */
clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
new_mask = CLOCKSOURCE_MASK(bits);
+ cd.rate = rate;
+
+ /* Calculate how many nanosecs until we risk wrapping */
+ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
+ cd.wrap_kt = ns_to_ktime(wrap);
- /* calculate how many ns until we wrap */
- wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
- new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
+ rd = cd.read_data[0];
- /* update epoch for new counter and update epoch_ns from old counter*/
+ /* Update epoch for new counter and update 'epoch_ns' from old counter*/
new_epoch = read();
- cyc = read_sched_clock();
- ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
- cd.mult, cd.shift);
+ cyc = cd.actual_read_sched_clock();
+ ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+ cd.actual_read_sched_clock = read;
- raw_write_seqcount_begin(&cd.seq);
- read_sched_clock = read;
- sched_clock_mask = new_mask;
- cd.rate = rate;
- cd.wrap_kt = new_wrap_kt;
- cd.mult = new_mult;
- cd.shift = new_shift;
- cd.epoch_cyc = new_epoch;
- cd.epoch_ns = ns;
- raw_write_seqcount_end(&cd.seq);
+ rd.read_sched_clock = read;
+ rd.sched_clock_mask = new_mask;
+ rd.mult = new_mult;
+ rd.shift = new_shift;
+ rd.epoch_cyc = new_epoch;
+ rd.epoch_ns = ns;
+
+ update_clock_read_data(&rd);
r = rate;
if (r >= 4000000) {
r /= 1000000;
r_unit = 'M';
- } else if (r >= 1000) {
- r /= 1000;
- r_unit = 'k';
- } else
- r_unit = ' ';
-
- /* calculate the ns resolution of this counter */
+ } else {
+ if (r >= 1000) {
+ r /= 1000;
+ r_unit = 'k';
+ } else {
+ r_unit = ' ';
+ }
+ }
+
+ /* Calculate the ns resolution of this counter */
res = cyc_to_ns(1ULL, new_mult, new_shift);
pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
bits, r, r_unit, res, wrap);
- /* Enable IRQ time accounting if we have a fast enough sched_clock */
+ /* Enable IRQ time accounting if we have a fast enough sched_clock() */
if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
enable_sched_clock_irqtime();
@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
void __init sched_clock_postinit(void)
{
/*
- * If no sched_clock function has been provided at that point,
+ * If no sched_clock() function has been provided at that point,
* make it the final one one.
*/
- if (read_sched_clock == jiffy_sched_clock_read)
+ if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
update_sched_clock();
@@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
}
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ *
+ * This function must only be called from the critical
+ * section in sched_clock(). It relies on the read_seqcount_retry()
+ * at the end of the critical section to be sure we observe the
+ * correct copy of 'epoch_cyc'.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+ unsigned long seq = raw_read_seqcount(&cd.seq);
+
+ return cd.read_data[seq & 1].epoch_cyc;
+}
+
static int sched_clock_suspend(void)
{
+ struct clock_read_data *rd = &cd.read_data[0];
+
update_sched_clock();
hrtimer_cancel(&sched_clock_timer);
- cd.suspended = true;
+ rd->read_sched_clock = suspended_sched_clock_read;
+
return 0;
}
static void sched_clock_resume(void)
{
- cd.epoch_cyc = read_sched_clock();
+ struct clock_read_data *rd = &cd.read_data[0];
+
+ rd->epoch_cyc = cd.actual_read_sched_clock();
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
- cd.suspended = false;
+ rd->read_sched_clock = cd.actual_read_sched_clock;
}
static struct syscore_ops sched_clock_ops = {
- .suspend = sched_clock_suspend,
- .resume = sched_clock_resume,
+ .suspend = sched_clock_suspend,
+ .resume = sched_clock_resume,
};
static int __init sched_clock_syscore_init(void)
{
register_syscore_ops(&sched_clock_ops);
+
return 0;
}
device_initcall(sched_clock_syscore_init);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 91db94136c10..c3fcff06d30a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -59,6 +59,7 @@ struct tk_fast {
};
static struct tk_fast tk_fast_mono ____cacheline_aligned;
+static struct tk_fast tk_fast_raw ____cacheline_aligned;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
@@ -68,8 +69,8 @@ bool __read_mostly persistent_clock_exist = false;
static inline void tk_normalize_xtime(struct timekeeper *tk)
{
- while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
- tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
+ while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
+ tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
tk->xtime_sec++;
}
}
@@ -79,20 +80,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
struct timespec64 ts;
ts.tv_sec = tk->xtime_sec;
- ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+ ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
return ts;
}
static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
{
tk->xtime_sec = ts->tv_sec;
- tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
+ tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
}
static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
{
tk->xtime_sec += ts->tv_sec;
- tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
+ tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
tk_normalize_xtime(tk);
}
@@ -118,6 +119,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
tk->offs_boot = ktime_add(tk->offs_boot, delta);
}
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+/*
+ * These simple flag variables are managed
+ * without locks, which is racy, but ok since
+ * we don't really care about being super
+ * precise about how many events were seen,
+ * just that a problem was observed.
+ */
+static int timekeeping_underflow_seen;
+static int timekeeping_overflow_seen;
+
+/* last_warning is only modified under the timekeeping lock */
+static long timekeeping_last_warning;
+
+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+
+ cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+ const char *name = tk->tkr_mono.clock->name;
+
+ if (offset > max_cycles) {
+ printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+ offset, name, max_cycles);
+ printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+ } else {
+ if (offset > (max_cycles >> 1)) {
+ printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+ offset, name, max_cycles >> 1);
+ printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+ }
+ }
+
+ if (timekeeping_underflow_seen) {
+ if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+ printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+ printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
+ printk_deferred(" Your kernel is probably still fine.\n");
+ timekeeping_last_warning = jiffies;
+ }
+ timekeeping_underflow_seen = 0;
+ }
+
+ if (timekeeping_overflow_seen) {
+ if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+ printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+ printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
+ printk_deferred(" Your kernel is probably still fine.\n");
+ timekeeping_last_warning = jiffies;
+ }
+ timekeeping_overflow_seen = 0;
+ }
+}
+
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+ cycle_t now, last, mask, max, delta;
+ unsigned int seq;
+
+ /*
+ * Since we're called holding a seqlock, the data may shift
+ * under us while we're doing the calculation. This can cause
+ * false positives, since we'd note a problem but throw the
+ * results away. So nest another seqlock here to atomically
+ * grab the points we are checking with.
+ */
+ do {
+ seq = read_seqcount_begin(&tk_core.seq);
+ now = tkr->read(tkr->clock);
+ last = tkr->cycle_last;
+ mask = tkr->mask;
+ max = tkr->clock->max_cycles;
+ } while (read_seqcount_retry(&tk_core.seq, seq));
+
+ delta = clocksource_delta(now, last, mask);
+
+ /*
+ * Try to catch underflows by checking if we are seeing small
+ * mask-relative negative values.
+ */
+ if (unlikely((~delta & mask) < (mask >> 3))) {
+ timekeeping_underflow_seen = 1;
+ delta = 0;
+ }
+
+ /* Cap delta value to the max_cycles values to avoid mult overflows */
+ if (unlikely(delta > max)) {
+ timekeeping_overflow_seen = 1;
+ delta = tkr->clock->max_cycles;
+ }
+
+ return delta;
+}
+#else
+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+ cycle_t cycle_now, delta;
+
+ /* read clocksource */
+ cycle_now = tkr->read(tkr->clock);
+
+ /* calculate the delta since the last update_wall_time */
+ delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+
+ return delta;
+}
+#endif
+
/**
* tk_setup_internals - Set up internals to use clocksource clock.
*
@@ -135,11 +247,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
u64 tmp, ntpinterval;
struct clocksource *old_clock;
- old_clock = tk->tkr.clock;
- tk->tkr.clock = clock;
- tk->tkr.read = clock->read;
- tk->tkr.mask = clock->mask;
- tk->tkr.cycle_last = tk->tkr.read(clock);
+ old_clock = tk->tkr_mono.clock;
+ tk->tkr_mono.clock = clock;
+ tk->tkr_mono.read = clock->read;
+ tk->tkr_mono.mask = clock->mask;
+ tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+
+ tk->tkr_raw.clock = clock;
+ tk->tkr_raw.read = clock->read;
+ tk->tkr_raw.mask = clock->mask;
+ tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
/* Do the ns -> cycle conversion first, using original mult */
tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +280,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
if (old_clock) {
int shift_change = clock->shift - old_clock->shift;
if (shift_change < 0)
- tk->tkr.xtime_nsec >>= -shift_change;
+ tk->tkr_mono.xtime_nsec >>= -shift_change;
else
- tk->tkr.xtime_nsec <<= shift_change;
+ tk->tkr_mono.xtime_nsec <<= shift_change;
}
- tk->tkr.shift = clock->shift;
+ tk->tkr_raw.xtime_nsec = 0;
+
+ tk->tkr_mono.shift = clock->shift;
+ tk->tkr_raw.shift = clock->shift;
tk->ntp_error = 0;
tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +298,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
* active clocksource. These value will be adjusted via NTP
* to counteract clock drifting.
*/
- tk->tkr.mult = clock->mult;
+ tk->tkr_mono.mult = clock->mult;
+ tk->tkr_raw.mult = clock->mult;
tk->ntp_err_mult = 0;
}
@@ -193,14 +314,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
{
- cycle_t cycle_now, delta;
+ cycle_t delta;
s64 nsec;
- /* read clocksource: */
- cycle_now = tkr->read(tkr->clock);
-
- /* calculate the delta since the last update_wall_time: */
- delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+ delta = timekeeping_get_delta(tkr);
nsec = delta * tkr->mult + tkr->xtime_nsec;
nsec >>= tkr->shift;
@@ -209,25 +326,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
return nsec + arch_gettimeoffset();
}
-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
-{
- struct clocksource *clock = tk->tkr.clock;
- cycle_t cycle_now, delta;
- s64 nsec;
-
- /* read clocksource: */
- cycle_now = tk->tkr.read(clock);
-
- /* calculate the delta since the last update_wall_time: */
- delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-
- /* convert delta to nanoseconds. */
- nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-
- /* If arch requires, add in get_arch_timeoffset() */
- return nsec + arch_gettimeoffset();
-}
-
/**
* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
* @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +365,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
* slightly wrong timestamp (a few nanoseconds). See
* @ktime_get_mono_fast_ns.
*/
-static void update_fast_timekeeper(struct tk_read_base *tkr)
+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
{
- struct tk_read_base *base = tk_fast_mono.base;
+ struct tk_read_base *base = tkf->base;
/* Force readers off to base[1] */
- raw_write_seqcount_latch(&tk_fast_mono.seq);
+ raw_write_seqcount_latch(&tkf->seq);
/* Update base[0] */
memcpy(base, tkr, sizeof(*base));
/* Force readers back to base[0] */
- raw_write_seqcount_latch(&tk_fast_mono.seq);
+ raw_write_seqcount_latch(&tkf->seq);
/* Update base[1] */
memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +414,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
* of the following timestamps. Callers need to be aware of that and
* deal with it.
*/
-u64 notrace ktime_get_mono_fast_ns(void)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{
struct tk_read_base *tkr;
unsigned int seq;
u64 now;
do {
- seq = raw_read_seqcount(&tk_fast_mono.seq);
- tkr = tk_fast_mono.base + (seq & 0x01);
- now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+ seq = raw_read_seqcount(&tkf->seq);
+ tkr = tkf->base + (seq & 0x01);
+ now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+ } while (read_seqcount_retry(&tkf->seq, seq));
- } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
return now;
}
+
+u64 ktime_get_mono_fast_ns(void)
+{
+ return __ktime_get_fast_ns(&tk_fast_mono);
+}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+u64 ktime_get_raw_fast_ns(void)
+{
+ return __ktime_get_fast_ns(&tk_fast_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+
/* Suspend-time cycles value for halted fast timekeeper. */
static cycle_t cycles_at_suspend;
@@ -353,12 +462,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
static void halt_fast_timekeeper(struct timekeeper *tk)
{
static struct tk_read_base tkr_dummy;
- struct tk_read_base *tkr = &tk->tkr;
+ struct tk_read_base *tkr = &tk->tkr_mono;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
cycles_at_suspend = tkr->read(tkr->clock);
tkr_dummy.read = dummy_clock_read;
- update_fast_timekeeper(&tkr_dummy);
+ update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+
+ tkr = &tk->tkr_raw;
+ memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+ tkr_dummy.read = dummy_clock_read;
+ update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
}
#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +483,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
xt = timespec64_to_timespec(tk_xtime(tk));
wm = timespec64_to_timespec(tk->wall_to_monotonic);
- update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
- tk->tkr.cycle_last);
+ update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
+ tk->tkr_mono.cycle_last);
}
static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +501,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
* users are removed, this can be killed.
*/
- remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
- tk->tkr.xtime_nsec -= remainder;
- tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+ remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
+ tk->tkr_mono.xtime_nsec -= remainder;
+ tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
tk->ntp_error += remainder << tk->ntp_error_shift;
- tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+ tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
}
#else
#define old_vsyscall_fixup(tk)
@@ -456,17 +570,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
*/
seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
- tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+ tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
/* Update the monotonic raw base */
- tk->base_raw = timespec64_to_ktime(tk->raw_time);
+ tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
/*
* The sum of the nanoseconds portions of xtime and
* wall_to_monotonic can be greater/equal one second. Take
* this into account before updating tk->ktime_sec.
*/
- nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+ nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
if (nsec >= NSEC_PER_SEC)
seconds++;
tk->ktime_sec = seconds;
@@ -489,7 +603,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
memcpy(&shadow_timekeeper, &tk_core.timekeeper,
sizeof(tk_core.timekeeper));
- update_fast_timekeeper(&tk->tkr);
+ update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+ update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
}
/**
@@ -501,22 +616,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
*/
static void timekeeping_forward_now(struct timekeeper *tk)
{
- struct clocksource *clock = tk->tkr.clock;
+ struct clocksource *clock = tk->tkr_mono.clock;
cycle_t cycle_now, delta;
s64 nsec;
- cycle_now = tk->tkr.read(clock);
- delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
- tk->tkr.cycle_last = cycle_now;
+ cycle_now = tk->tkr_mono.read(clock);
+ delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
+ tk->tkr_mono.cycle_last = cycle_now;
+ tk->tkr_raw.cycle_last = cycle_now;
- tk->tkr.xtime_nsec += delta * tk->tkr.mult;
+ tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
/* If arch requires, add in get_arch_timeoffset() */
- tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
+ tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
tk_normalize_xtime(tk);
- nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
+ nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
timespec64_add_ns(&tk->raw_time, nsec);
}
@@ -537,7 +653,7 @@ int __getnstimeofday64(struct timespec64 *ts)
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
- nsecs = timekeeping_get_ns(&tk->tkr);
+ nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -577,8 +693,8 @@ ktime_t ktime_get(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
- base = tk->tkr.base_mono;
- nsecs = timekeeping_get_ns(&tk->tkr);
+ base = tk->tkr_mono.base;
+ nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -603,8 +719,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
do {
seq = read_seqcount_begin(&tk_core.seq);
- base = ktime_add(tk->tkr.base_mono, *offset);
- nsecs = timekeeping_get_ns(&tk->tkr);
+ base = ktime_add(tk->tkr_mono.base, *offset);
+ nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -645,8 +761,8 @@ ktime_t ktime_get_raw(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
- base = tk->base_raw;
- nsecs = timekeeping_get_ns_raw(tk);
+ base = tk->tkr_raw.base;
+ nsecs = timekeeping_get_ns(&tk->tkr_raw);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -674,7 +790,7 @@ void ktime_get_ts64(struct timespec64 *ts)
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
- nsec = timekeeping_get_ns(&tk->tkr);
+ nsec = timekeeping_get_ns(&tk->tkr_mono);
tomono = tk->wall_to_monotonic;
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +875,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
ts_real->tv_sec = tk->xtime_sec;
ts_real->tv_nsec = 0;
- nsecs_raw = timekeeping_get_ns_raw(tk);
- nsecs_real = timekeeping_get_ns(&tk->tkr);
+ nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);
+ nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -943,7 +1059,7 @@ static int change_clocksource(void *data)
*/
if (try_module_get(new->owner)) {
if (!new->enable || new->enable(new) == 0) {
- old = tk->tkr.clock;
+ old = tk->tkr_mono.clock;
tk_setup_internals(tk, new);
if (old->disable)
old->disable(old);
@@ -971,11 +1087,11 @@ int timekeeping_notify(struct clocksource *clock)
{
struct timekeeper *tk = &tk_core.timekeeper;
- if (tk->tkr.clock == clock)
+ if (tk->tkr_mono.clock == clock)
return 0;
stop_machine(change_clocksource, clock, NULL);
tick_clock_notify();
- return tk->tkr.clock == clock ? 0 : -1;
+ return tk->tkr_mono.clock == clock ? 0 : -1;
}
/**
@@ -993,7 +1109,7 @@ void getrawmonotonic64(struct timespec64 *ts)
do {
seq = read_seqcount_begin(&tk_core.seq);
- nsecs = timekeeping_get_ns_raw(tk);
+ nsecs = timekeeping_get_ns(&tk->tkr_raw);
ts64 = tk->raw_time;
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1132,7 @@ int timekeeping_valid_for_hres(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
- ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+ ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1035,7 +1151,7 @@ u64 timekeeping_max_deferment(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
- ret = tk->tkr.clock->max_idle_ns;
+ ret = tk->tkr_mono.clock->max_idle_ns;
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1114,7 +1230,6 @@ void __init timekeeping_init(void)
tk_set_xtime(tk, &now);
tk->raw_time.tv_sec = 0;
tk->raw_time.tv_nsec = 0;
- tk->base_raw.tv64 = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
boot = tk_xtime(tk);
@@ -1200,7 +1315,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
void timekeeping_resume(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
- struct clocksource *clock = tk->tkr.clock;
+ struct clocksource *clock = tk->tkr_mono.clock;
unsigned long flags;
struct timespec64 ts_new, ts_delta;
struct timespec tmp;
@@ -1228,16 +1343,16 @@ void timekeeping_resume(void)
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
- cycle_now = tk->tkr.read(clock);
+ cycle_now = tk->tkr_mono.read(clock);
if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
- cycle_now > tk->tkr.cycle_last) {
+ cycle_now > tk->tkr_mono.cycle_last) {
u64 num, max = ULLONG_MAX;
u32 mult = clock->mult;
u32 shift = clock->shift;
s64 nsec = 0;
- cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
- tk->tkr.mask);
+ cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
+ tk->tkr_mono.mask);
/*
* "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1263,7 +1378,9 @@ void timekeeping_resume(void)
__timekeeping_inject_sleeptime(tk, &ts_delta);
/* Re-base the last cycle value */
- tk->tkr.cycle_last = cycle_now;
+ tk->tkr_mono.cycle_last = cycle_now;
+ tk->tkr_raw.cycle_last = cycle_now;
+
tk->ntp_error = 0;
timekeeping_suspended = 0;
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1416,15 +1533,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
*
* XXX - TODO: Doc ntp_error calculation.
*/
- if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
+ if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
/* NTP adjustment caused clocksource mult overflow */
WARN_ON_ONCE(1);
return;
}
- tk->tkr.mult += mult_adj;
+ tk->tkr_mono.mult += mult_adj;
tk->xtime_interval += interval;
- tk->tkr.xtime_nsec -= offset;
+ tk->tkr_mono.xtime_nsec -= offset;
tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
}
@@ -1486,13 +1603,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
tk->ntp_err_mult = 0;
}
- if (unlikely(tk->tkr.clock->maxadj &&
- (abs(tk->tkr.mult - tk->tkr.clock->mult)
- > tk->tkr.clock->maxadj))) {
+ if (unlikely(tk->tkr_mono.clock->maxadj &&
+ (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
+ > tk->tkr_mono.clock->maxadj))) {
printk_once(KERN_WARNING
"Adjusting %s more than 11%% (%ld vs %ld)\n",
- tk->tkr.clock->name, (long)tk->tkr.mult,
- (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+ tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
+ (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
}
/*
@@ -1509,9 +1626,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
* We'll correct this error next time through this function, when
* xtime_nsec is not as small.
*/
- if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
- s64 neg = -(s64)tk->tkr.xtime_nsec;
- tk->tkr.xtime_nsec = 0;
+ if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
+ s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
+ tk->tkr_mono.xtime_nsec = 0;
tk->ntp_error += neg << tk->ntp_error_shift;
}
}
@@ -1526,13 +1643,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
*/
static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
{
- u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
+ u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
unsigned int clock_set = 0;
- while (tk->tkr.xtime_nsec >= nsecps) {
+ while (tk->tkr_mono.xtime_nsec >= nsecps) {
int leap;
- tk->tkr.xtime_nsec -= nsecps;
+ tk->tkr_mono.xtime_nsec -= nsecps;
tk->xtime_sec++;
/* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1694,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
/* Accumulate one shifted interval */
offset -= interval;
- tk->tkr.cycle_last += interval;
+ tk->tkr_mono.cycle_last += interval;
+ tk->tkr_raw.cycle_last += interval;
- tk->tkr.xtime_nsec += tk->xtime_interval << shift;
+ tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
@@ -1622,14 +1740,17 @@ void update_wall_time(void)
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
offset = real_tk->cycle_interval;
#else
- offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
- tk->tkr.cycle_last, tk->tkr.mask);
+ offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+ tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
#endif
/* Check if there's really nothing to do */
if (offset < real_tk->cycle_interval)
goto out;
+ /* Do some additional sanity checking */
+ timekeeping_check_update(real_tk, offset);
+
/*
* With NO_HZ we may have to accumulate many cycle_intervals
* (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1905,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
do {
seq = read_seqcount_begin(&tk_core.seq);
- base = tk->tkr.base_mono;
- nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+ base = tk->tkr_mono.base;
+ nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;
@@ -1816,8 +1937,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
do {
seq = read_seqcount_begin(&tk_core.seq);
- base = tk->tkr.base_mono;
- nsecs = timekeeping_get_ns(&tk->tkr);
+ base = tk->tkr_mono.base;
+ nsecs = timekeeping_get_ns(&tk->tkr_mono);
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 61ed862cdd37..2cfd19485824 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
print_name_offset(m, dev->set_next_event);
SEQ_printf(m, "\n");
- SEQ_printf(m, " set_mode: ");
- print_name_offset(m, dev->set_mode);
- SEQ_printf(m, "\n");
+ if (dev->set_mode) {
+ SEQ_printf(m, " set_mode: ");
+ print_name_offset(m, dev->set_mode);
+ SEQ_printf(m, "\n");
+ } else {
+ if (dev->set_mode_shutdown) {
+ SEQ_printf(m, " shutdown: ");
+ print_name_offset(m, dev->set_mode_shutdown);
+ SEQ_printf(m, "\n");
+ }
+
+ if (dev->set_mode_periodic) {
+ SEQ_printf(m, " periodic: ");
+ print_name_offset(m, dev->set_mode_periodic);
+ SEQ_printf(m, "\n");
+ }
+
+ if (dev->set_mode_oneshot) {
+ SEQ_printf(m, " oneshot: ");
+ print_name_offset(m, dev->set_mode_oneshot);
+ SEQ_printf(m, "\n");
+ }
+
+ if (dev->set_mode_resume) {
+ SEQ_printf(m, " resume: ");
+ print_name_offset(m, dev->set_mode_resume);
+ SEQ_printf(m, "\n");
+ }
+ }
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c5cefb3c009c..36b6fa88ce5b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
data corruption or a sporadic crash at a later stage once the region
is examined. The runtime overhead introduced is minimal.
+config DEBUG_TIMEKEEPING
+ bool "Enable extra timekeeping sanity checking"
+ help
+ This option will enable additional timekeeping sanity checks
+ which may be helpful when diagnosing issues where timekeeping
+ problems are suspected.
+
+ This may include checks in the timekeeping hotpaths, so this
+ option may have a (very small) performance impact to some
+ workloads.
+
+ If unsure, say N.
+
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 626e93db28ba..6817b0350c71 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int target_nid, last_cpupid = -1;
bool page_locked;
bool migrated = false;
+ bool was_writable;
int flags = 0;
/* A PROT_NONE fault should not end up here */
@@ -1291,17 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
flags |= TNF_FAULT_LOCAL;
}
- /*
- * Avoid grouping on DSO/COW pages in specific and RO pages
- * in general, RO pages shouldn't hurt as much anyway since
- * they can be in shared cache state.
- *
- * FIXME! This checks "pmd_dirty()" as an approximation of
- * "is this a read-only page", since checking "pmd_write()"
- * is even more broken. We haven't actually turned this into
- * a writable page, so pmd_write() will always be false.
- */
- if (!pmd_dirty(pmd))
+ /* See similar comment in do_numa_page for explanation */
+ if (!(vma->vm_flags & VM_WRITE))
flags |= TNF_NO_GROUP;
/*
@@ -1358,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (migrated) {
flags |= TNF_MIGRATED;
page_nid = target_nid;
- }
+ } else
+ flags |= TNF_MIGRATE_FAIL;
goto out;
clear_pmdnuma:
BUG_ON(!PageLocked(page));
+ was_writable = pmd_write(pmd);
pmd = pmd_modify(pmd, vma->vm_page_prot);
+ pmd = pmd_mkyoung(pmd);
+ if (was_writable)
+ pmd = pmd_mkwrite(pmd);
set_pmd_at(mm, haddr, pmdp, pmd);
update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page);
@@ -1487,6 +1484,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
pmd_t entry;
+ bool preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
/*
@@ -1502,9 +1500,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (!prot_numa || !pmd_protnone(*pmd)) {
entry = pmdp_get_and_clear_notify(mm, addr, pmd);
entry = pmd_modify(entry, newprot);
+ if (preserve_write)
+ entry = pmd_mkwrite(entry);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
- BUG_ON(pmd_write(entry));
+ BUG_ON(!preserve_write && pmd_write(entry));
}
spin_unlock(ptl);
}
diff --git a/mm/memory.c b/mm/memory.c
index 411144f977b1..97839f5c8c30 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int last_cpupid;
int target_nid;
bool migrated = false;
+ bool was_writable = pte_write(pte);
int flags = 0;
/* A PROT_NONE fault should not end up here */
@@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Make it present again */
pte = pte_modify(pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
+ if (was_writable)
+ pte = pte_mkwrite(pte);
set_pte_at(mm, addr, ptep, pte);
update_mmu_cache(vma, addr, ptep);
@@ -3069,16 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
- * Avoid grouping on DSO/COW pages in specific and RO pages
- * in general, RO pages shouldn't hurt as much anyway since
- * they can be in shared cache state.
- *
- * FIXME! This checks "pmd_dirty()" as an approximation of
- * "is this a read-only page", since checking "pmd_write()"
- * is even more broken. We haven't actually turned this into
- * a writable page, so pmd_write() will always be false.
+ * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
+ * much anyway since they can be in shared cache state. This misses
+ * the case where a mapping is writable but the process never writes
+ * to it but pte_write gets cleared during protection updates and
+ * pte_dirty has unpredictable behaviour between PTE scan updates,
+ * background writeback, dirty balancing and application behaviour.
*/
- if (!pte_dirty(pte))
+ if (!(vma->vm_flags & VM_WRITE))
flags |= TNF_NO_GROUP;
/*
@@ -3102,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (migrated) {
page_nid = target_nid;
flags |= TNF_MIGRATED;
- }
+ } else
+ flags |= TNF_MIGRATE_FAIL;
out:
if (page_nid != -1)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9fab10795bea..65842d688b7c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
return NULL;
arch_refresh_nodedata(nid, pgdat);
+ } else {
+ /* Reset the nr_zones and classzone_idx to 0 before reuse */
+ pgdat->nr_zones = 0;
+ pgdat->classzone_idx = 0;
}
/* we can use NODE_DATA(nid) from here */
@@ -1977,15 +1981,6 @@ void try_offline_node(int nid)
if (is_vmalloc_addr(zone->wait_table))
vfree(zone->wait_table);
}
-
- /*
- * Since there is no way to guarentee the address of pgdat/zone is not
- * on stack of any kernel threads or used by other kernel objects
- * without reference counting or other symchronizing method, do not
- * reset node_data and free pgdat here. Just reset it to 0 and reuse
- * the memory when the node is online again.
- */
- memset(pgdat, 0, sizeof(*pgdat));
}
EXPORT_SYMBOL(try_offline_node);
diff --git a/mm/mmap.c b/mm/mmap.c
index da9990acc08b..9ec50a368634 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end);
importer->anon_vma = exporter->anon_vma;
error = anon_vma_clone(importer, exporter);
- if (error) {
- importer->anon_vma = NULL;
+ if (error)
return error;
- }
}
}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 44727811bf4c..88584838e704 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
oldpte = *pte;
if (pte_present(oldpte)) {
pte_t ptent;
+ bool preserve_write = prot_numa && pte_write(oldpte);
/*
* Avoid trapping faults against the zero or KSM
@@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptent = ptep_modify_prot_start(mm, addr, pte);
ptent = pte_modify(ptent, newprot);
+ if (preserve_write)
+ ptent = pte_mkwrite(ptent);
/* Avoid taking write faults for known dirty pages */
if (dirty_accountable && pte_dirty(ptent) &&
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 45e187b2d971..644bcb665773 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
* bw * elapsed + write_bandwidth * (period - elapsed)
* write_bandwidth = ---------------------------------------------------
* period
+ *
+ * @written may have decreased due to account_page_redirty().
+ * Avoid underflowing @bw calculation.
*/
- bw = written - bdi->written_stamp;
+ bw = written - min(written, bdi->written_stamp);
bw *= HZ;
if (unlikely(elapsed > period)) {
do_div(bw, elapsed);
@@ -922,7 +925,7 @@ static void global_update_bandwidth(unsigned long thresh,
unsigned long now)
{
static DEFINE_SPINLOCK(dirty_lock);
- static unsigned long update_time;
+ static unsigned long update_time = INITIAL_JIFFIES;
/*
* check locklessly first to optimize away locking for the most time
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 72f5ac381ab3..755a42c76eb4 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
if (!is_migrate_isolate_page(buddy)) {
__isolate_free_page(page, order);
+ kernel_map_pages(page, (1 << order), 1);
set_page_refcounted(page);
isolated_page = page;
}
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 75c1f2878519..29f2f8b853ae 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end,
vma = vma->vm_next;
err = walk_page_test(start, next, walk);
- if (err > 0)
+ if (err > 0) {
+ /*
+ * positive return values are purely for
+ * controlling the pagewalk, so should never
+ * be passed to the callers.
+ */
+ err = 0;
continue;
+ }
if (err < 0)
break;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 5e3e09081164..c161a14b6a8f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
return 0;
enomem_failure:
+ /*
+ * dst->anon_vma is dropped here otherwise its degree can be incorrectly
+ * decremented in unlink_anon_vmas().
+ * We can safely do this because callers of anon_vma_clone() don't care
+ * about dst->anon_vma if anon_vma_clone() failed.
+ */
+ dst->anon_vma = NULL;
unlink_anon_vmas(dst);
return -ENOMEM;
}
diff --git a/mm/slub.c b/mm/slub.c
index 6832c4eab104..82c473780c91 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2449,7 +2449,8 @@ redo:
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
- } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
+ } while (IS_ENABLED(CONFIG_PREEMPT) &&
+ unlikely(tid != READ_ONCE(c->tid)));
/*
* Irqless object alloc/free algorithm used here depends on sequence
@@ -2718,7 +2719,8 @@ redo:
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
- } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
+ } while (IS_ENABLED(CONFIG_PREEMPT) &&
+ unlikely(tid != READ_ONCE(c->tid)));
/* Same with comment on barrier() in slab_alloc_node() */
barrier();
diff --git a/net/compat.c b/net/compat.c
index 94d3d5e97883..f7bd286a8280 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -49,6 +49,13 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
__get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
__get_user(kmsg->msg_flags, &umsg->msg_flags))
return -EFAULT;
+
+ if (!uaddr)
+ kmsg->msg_namelen = 0;
+
+ if (kmsg->msg_namelen < 0)
+ return -EINVAL;
+
if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
kmsg->msg_namelen = sizeof(struct sockaddr_storage);
kmsg->msg_control = compat_ptr(tmp3);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 99e810f84671..cf5e82f39d3b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
- "TRACE: %s:%s:%s:%u ",
- tablename, chainname, comment, rulenum);
+ nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
+ "TRACE: %s:%s:%s:%u ",
+ tablename, chainname, comment, rulenum);
}
#endif
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a2a796c5536b..1db253e36045 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk)
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
- skb = alloc_skb_fclone(MAX_TCP_HEADER,
- sk->sk_allocation);
+ skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
if (skb)
break;
yield();
}
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, MAX_TCP_HEADER);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
tcp_init_nondata_skb(skb, tp->write_seq,
TCPHDR_ACK | TCPHDR_FIN);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b4d5e1d97c1b..27ca79682efb 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto again;
flp6->saddr = saddr;
}
+ err = rt->dst.error;
goto out;
}
again:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e080fbbbc0e5..bb00c6f2a885 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -298,9 +298,9 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
- "TRACE: %s:%s:%s:%u ",
- tablename, chainname, comment, rulenum);
+ nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
+ "TRACE: %s:%s:%s:%u ",
+ tablename, chainname, comment, rulenum);
}
#endif
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ab889bb16b3c..be2c0ba82c85 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -112,11 +112,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
fptr->nexthdr = nexthdr;
fptr->reserved = 0;
- if (skb_shinfo(skb)->ip6_frag_id)
- fptr->identification = skb_shinfo(skb)->ip6_frag_id;
- else
- ipv6_select_ident(fptr,
- (struct rt6_info *)skb_dst(skb));
+ if (!skb_shinfo(skb)->ip6_frag_id)
+ ipv6_proxy_select_ident(skb);
+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;
/* Fragment the skb. ipv6 header and the remaining fields of the
* fragment header are updated in ipv6_gso_segment()
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 0d8448f19dfe..675d12c69e32 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -212,6 +212,30 @@ void nf_log_packet(struct net *net,
}
EXPORT_SYMBOL(nf_log_packet);
+void nf_log_trace(struct net *net,
+ u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo, const char *fmt, ...)
+{
+ va_list args;
+ char prefix[NF_LOG_PREFIXLEN];
+ const struct nf_logger *logger;
+
+ rcu_read_lock();
+ logger = rcu_dereference(net->nf.nf_loggers[pf]);
+ if (logger) {
+ va_start(args, fmt);
+ vsnprintf(prefix, sizeof(prefix), fmt, args);
+ va_end(args);
+ logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_trace);
+
#define S_SIZE (1024 - (sizeof(unsigned int) + 1))
struct nf_log_buf {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6ab777912237..ac1a9528dbf2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1225,7 +1225,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_CHAIN_POLICY]) {
if ((chain != NULL &&
- !(chain->flags & NFT_BASE_CHAIN)) ||
+ !(chain->flags & NFT_BASE_CHAIN)))
+ return -EOPNOTSUPP;
+
+ if (chain == NULL &&
nla[NFTA_CHAIN_HOOK] == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 3b90eb2b2c55..2d298dccb6dd 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -94,10 +94,10 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt,
{
struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
- nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
- pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
- chain->table->name, chain->name, comments[type],
- rulenum);
+ nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type],
+ rulenum);
}
unsigned int
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index a5599fc51a6f..54330fb5efaf 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
return -EINVAL;
+ /* Not all fields are initialized so first zero the tuple */
+ memset(tuple, 0, sizeof(struct nf_conntrack_tuple));
+
tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 213584cf04b3..65f3e2b6be44 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -133,6 +133,9 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
break;
case AF_INET6:
+ if (proto)
+ entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
entry->e6.ipv6.proto = proto;
entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
break;
@@ -344,6 +347,9 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
break;
case AF_INET6:
+ if (proto)
+ entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
entry->e6.ipv6.proto = proto;
entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
break;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c82df0a48fcd..37c15e674884 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -153,6 +153,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
iter->err = err;
goto out;
}
+
+ continue;
}
if (iter->count < iter->skip)
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index ef8a926752a9..50e1e5aaf4ce 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -513,8 +513,8 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
{
const struct ip6t_ip6 *i = par->entryinfo;
- if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
- && !(i->flags & IP6T_INV_PROTO))
+ if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) &&
+ !(i->invflags & IP6T_INV_PROTO))
return 0;
pr_info("Can be used only in combination with "
diff --git a/net/socket.c b/net/socket.c
index bbedbfcb42c2..245330ca0015 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1702,6 +1702,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
if (len > INT_MAX)
len = INT_MAX;
+ if (unlikely(!access_ok(VERIFY_READ, buff, len)))
+ return -EFAULT;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
@@ -1760,6 +1762,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
if (size > INT_MAX)
size = INT_MAX;
+ if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))
+ return -EFAULT;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 4e511221a0c1..0db571340edb 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -22,6 +22,14 @@ TARGETS += vm
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
+# Clear LDFLAGS and MAKEFLAGS if called from main
+# Makefile to avoid test build failures when test
+# Makefile doesn't have explicit build rules.
+ifeq (1,$(MAKELEVEL))
+undefine LDFLAGS
+override MAKEFLAGS =
+endif
+
all:
for TARGET in $(TARGETS); do \
make -C $$TARGET; \
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a2214d9609bd..cc6a25d95fbf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -471,7 +471,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
r = -ENOMEM;
- kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));
if (!kvm->memslots)
goto out_err_no_srcu;
@@ -522,7 +522,7 @@ out_err_no_srcu:
out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
- kfree(kvm->memslots);
+ kvfree(kvm->memslots);
kvm_arch_free_vm(kvm);
return ERR_PTR(r);
}
@@ -578,7 +578,7 @@ static void kvm_free_physmem(struct kvm *kvm)
kvm_for_each_memslot(memslot, slots)
kvm_free_physmem_slot(kvm, memslot, NULL);
- kfree(kvm->memslots);
+ kvfree(kvm->memslots);
}
static void kvm_destroy_devices(struct kvm *kvm)
@@ -871,10 +871,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
- slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
- GFP_KERNEL);
+ slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
if (!slots)
goto out_free;
+ memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
slot = id_to_memslot(slots, mem->slot);
@@ -917,7 +917,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
kvm_arch_commit_memory_region(kvm, mem, &old, change);
kvm_free_physmem_slot(kvm, &old, &new);
- kfree(old_memslots);
+ kvfree(old_memslots);
/*
* IOMMU mapping: New slots need to be mapped. Old slots need to be
@@ -936,7 +936,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
return 0;
out_slots:
- kfree(slots);
+ kvfree(slots);
out_free:
kvm_free_physmem_slot(kvm, &new, &old);
out: