summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/device-mapper/dm-crypt.txt21
-rw-r--r--Documentation/device-mapper/dm-flakey.txt48
-rw-r--r--Documentation/device-mapper/dm-raid.txt138
-rw-r--r--Documentation/fault-injection/fault-injection.txt3
-rw-r--r--Documentation/feature-removal-schedule.txt9
-rw-r--r--Documentation/frv/booting.txt13
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--Documentation/m68k/kernel-options.txt14
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/cris/arch-v10/drivers/sync_serial.c6
-rw-r--r--arch/cris/arch-v10/kernel/irq.c3
-rw-r--r--arch/cris/include/asm/thread_info.h6
-rw-r--r--arch/tile/include/asm/Kbuild38
-rw-r--r--arch/tile/include/asm/bug.h1
-rw-r--r--arch/tile/include/asm/bugs.h1
-rw-r--r--arch/tile/include/asm/cputime.h1
-rw-r--r--arch/tile/include/asm/device.h1
-rw-r--r--arch/tile/include/asm/div64.h1
-rw-r--r--arch/tile/include/asm/emergency-restart.h1
-rw-r--r--arch/tile/include/asm/errno.h1
-rw-r--r--arch/tile/include/asm/fb.h1
-rw-r--r--arch/tile/include/asm/fcntl.h1
-rw-r--r--arch/tile/include/asm/fixmap.h6
-rw-r--r--arch/tile/include/asm/ioctl.h1
-rw-r--r--arch/tile/include/asm/ioctls.h1
-rw-r--r--arch/tile/include/asm/ipc.h1
-rw-r--r--arch/tile/include/asm/ipcbuf.h1
-rw-r--r--arch/tile/include/asm/irq_regs.h1
-rw-r--r--arch/tile/include/asm/kdebug.h1
-rw-r--r--arch/tile/include/asm/local.h1
-rw-r--r--arch/tile/include/asm/module.h1
-rw-r--r--arch/tile/include/asm/msgbuf.h1
-rw-r--r--arch/tile/include/asm/mutex.h1
-rw-r--r--arch/tile/include/asm/param.h1
-rw-r--r--arch/tile/include/asm/parport.h1
-rw-r--r--arch/tile/include/asm/poll.h1
-rw-r--r--arch/tile/include/asm/posix_types.h1
-rw-r--r--arch/tile/include/asm/resource.h1
-rw-r--r--arch/tile/include/asm/scatterlist.h1
-rw-r--r--arch/tile/include/asm/sembuf.h1
-rw-r--r--arch/tile/include/asm/serial.h1
-rw-r--r--arch/tile/include/asm/shmbuf.h1
-rw-r--r--arch/tile/include/asm/shmparam.h1
-rw-r--r--arch/tile/include/asm/socket.h1
-rw-r--r--arch/tile/include/asm/sockios.h1
-rw-r--r--arch/tile/include/asm/statfs.h1
-rw-r--r--arch/tile/include/asm/termbits.h1
-rw-r--r--arch/tile/include/asm/termios.h1
-rw-r--r--arch/tile/include/asm/types.h1
-rw-r--r--arch/tile/include/asm/ucontext.h1
-rw-r--r--arch/tile/include/asm/xor.h1
-rw-r--r--arch/tile/include/hv/drv_srom_intf.h41
-rw-r--r--arch/tile/kernel/time.c5
-rw-r--r--arch/tile/mm/init.c3
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-timeout.c5
-rw-r--r--drivers/acpi/acpica/acglobal.h6
-rw-r--r--drivers/acpi/acpica/aclocal.h1
-rw-r--r--drivers/acpi/acpica/acpredef.h1
-rw-r--r--drivers/acpi/acpica/nspredef.c19
-rw-r--r--drivers/acpi/acpica/nsrepair2.c15
-rw-r--r--drivers/acpi/acpica/tbinstal.c27
-rw-r--r--drivers/acpi/battery.c82
-rw-r--r--drivers/acpi/dock.c4
-rw-r--r--drivers/acpi/ec_sys.c2
-rw-r--r--drivers/acpi/fan.c2
-rw-r--r--drivers/acpi/osl.c25
-rw-r--r--drivers/acpi/pci_irq.c58
-rw-r--r--drivers/acpi/pci_root.c3
-rw-r--r--drivers/acpi/processor_thermal.c2
-rw-r--r--drivers/acpi/sbs.c13
-rw-r--r--drivers/acpi/sleep.c16
-rw-r--r--drivers/acpi/sysfs.c4
-rw-r--r--drivers/acpi/thermal.c2
-rw-r--r--drivers/acpi/video.c2
-rw-r--r--drivers/ata/libata-acpi.c4
-rw-r--r--drivers/char/Kconfig11
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/char/ramoops.c8
-rw-r--r--drivers/char/tile-srom.c481
-rw-r--r--drivers/char/tpm/tpm_tis.c7
-rw-r--r--drivers/firmware/efivars.c6
-rw-r--r--drivers/input/serio/xilinx_ps2.c2
-rw-r--r--drivers/md/Kconfig5
-rw-r--r--drivers/md/dm-crypt.c62
-rw-r--r--drivers/md/dm-flakey.c270
-rw-r--r--drivers/md/dm-io.c29
-rw-r--r--drivers/md/dm-ioctl.c89
-rw-r--r--drivers/md/dm-kcopyd.c42
-rw-r--r--drivers/md/dm-log-userspace-base.c3
-rw-r--r--drivers/md/dm-log.c32
-rw-r--r--drivers/md/dm-mpath.c147
-rw-r--r--drivers/md/dm-raid.c621
-rw-r--r--drivers/md/dm-snap-persistent.c80
-rw-r--r--drivers/md/dm-snap.c84
-rw-r--r--drivers/md/dm-table.c155
-rw-r--r--drivers/md/dm.c75
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/of/address.c2
-rw-r--r--drivers/of/base.c137
-rw-r--r--drivers/of/fdt.c4
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c2
-rw-r--r--drivers/rtc/rtc-omap.c2
-rw-r--r--drivers/spi/spi-pl022.c11
-rw-r--r--drivers/target/iscsi/Kconfig1
-rw-r--r--drivers/target/iscsi/iscsi_target.c15
-rw-r--r--drivers/target/iscsi/iscsi_target_configfs.c2
-rw-r--r--drivers/target/iscsi/iscsi_target_nego.c2
-rw-r--r--drivers/target/target_core_transport.c33
-rw-r--r--drivers/target/tcm_fc/tcm_fc.h5
-rw-r--r--drivers/target/tcm_fc/tfc_cmd.c1
-rw-r--r--drivers/target/tcm_fc/tfc_io.c121
-rw-r--r--drivers/thermal/Kconfig8
-rw-r--r--drivers/thermal/thermal_sys.c142
-rw-r--r--drivers/video/backlight/Kconfig2
-rw-r--r--drivers/video/backlight/aat2870_bl.c8
-rw-r--r--drivers/watchdog/Kconfig3
-rw-r--r--drivers/watchdog/nv_tco.c8
-rw-r--r--drivers/watchdog/shwdt.c2
-rw-r--r--fs/Kconfig15
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c17
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/ctree.h30
-rw-r--r--fs/btrfs/dir-item.c30
-rw-r--r--fs/btrfs/extent-tree.c45
-rw-r--r--fs/btrfs/extent_io.c139
-rw-r--r--fs/btrfs/extent_io.h20
-rw-r--r--fs/btrfs/extent_map.c155
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/inode.c98
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/ref-cache.c68
-rw-r--r--fs/btrfs/ref-cache.h52
-rw-r--r--fs/btrfs/root-tree.c5
-rw-r--r--fs/btrfs/transaction.c65
-rw-r--r--fs/btrfs/tree-log.c12
-rw-r--r--fs/btrfs/volumes.c12
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/stack.c5
-rw-r--r--include/acpi/acpi_drivers.h2
-rw-r--r--include/acpi/acpixf.h3
-rw-r--r--include/acpi/processor.h2
-rw-r--r--include/linux/acpi.h1
-rw-r--r--include/linux/device-mapper.h43
-rw-r--r--include/linux/dm-ioctl.h4
-rw-r--r--include/linux/dm-kcopyd.h15
-rw-r--r--include/linux/fault-inject.h18
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/idr.h4
-rw-r--r--include/linux/memcontrol.h8
-rw-r--r--include/linux/mfd/aat2870.h2
-rw-r--r--include/linux/of.h23
-rw-r--r--include/linux/of_fdt.h1
-rw-r--r--include/linux/radix-tree.h37
-rw-r--r--include/linux/shmem_fs.h17
-rw-r--r--include/linux/swapops.h23
-rw-r--r--include/linux/thermal.h22
-rw-r--r--init/main.c2
-rw-r--r--ipc/shm.c7
-rw-r--r--kernel/taskstats.c18
-rw-r--r--lib/fault-inject.c20
-rw-r--r--lib/idr.c67
-rw-r--r--lib/radix-tree.c121
-rw-r--r--mm/failslab.c14
-rw-r--r--mm/filemap.c106
-rw-r--r--mm/memcontrol.c66
-rw-r--r--mm/mincore.c11
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/shmem.c1493
-rw-r--r--mm/swapfile.c20
-rw-r--r--mm/truncate.c8
-rw-r--r--sound/core/pcm_compat.c2
-rw-r--r--sound/core/rtctimer.c2
-rw-r--r--sound/pci/asihpi/hpidspcd.c9
-rw-r--r--sound/pci/asihpi/hpioctl.c19
-rw-r--r--sound/pci/rme9652/hdspm.c109
-rw-r--r--sound/soc/txx9/txx9aclc.c1
-rw-r--r--tools/power/x86/turbostat/turbostat.c46
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c5
183 files changed, 4154 insertions, 2475 deletions
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
index 6b5c42dbbe84..2c656ae43ba7 100644
--- a/Documentation/device-mapper/dm-crypt.txt
+++ b/Documentation/device-mapper/dm-crypt.txt
@@ -4,7 +4,8 @@ dm-crypt
Device-Mapper's "crypt" target provides transparent encryption of block devices
using the kernel crypto API.
-Parameters: <cipher> <key> <iv_offset> <device path> <offset>
+Parameters: <cipher> <key> <iv_offset> <device path> \
+ <offset> [<#opt_params> <opt_params>]
<cipher>
Encryption cipher and an optional IV generation mode.
@@ -37,6 +38,24 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset>
<offset>
Starting sector within the device where the encrypted data begins.
+<#opt_params>
+ Number of optional parameters. If there are no optional parameters,
+ the optional paramaters section can be skipped or #opt_params can be zero.
+ Otherwise #opt_params is the number of following arguments.
+
+ Example of optional parameters section:
+ 1 allow_discards
+
+allow_discards
+ Block discard requests (a.k.a. TRIM) are passed through the crypt device.
+ The default is to ignore discard requests.
+
+ WARNING: Assess the specific security risks carefully before enabling this
+ option. For example, allowing discards on encrypted devices may lead to
+ the leak of information about the ciphertext device (filesystem type,
+ used space etc.) if the discarded blocks can be located easily on the
+ device later.
+
Example scripts
===============
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt
index c8efdfd19a65..6ff5c2327227 100644
--- a/Documentation/device-mapper/dm-flakey.txt
+++ b/Documentation/device-mapper/dm-flakey.txt
@@ -1,17 +1,53 @@
dm-flakey
=========
-This target is the same as the linear target except that it returns I/O
-errors periodically. It's been found useful in simulating failing
-devices for testing purposes.
+This target is the same as the linear target except that it exhibits
+unreliable behaviour periodically. It's been found useful in simulating
+failing devices for testing purposes.
Starting from the time the table is loaded, the device is available for
-<up interval> seconds, then returns errors for <down interval> seconds,
-and then this cycle repeats.
+<up interval> seconds, then exhibits unreliable behaviour for <down
+interval> seconds, and then this cycle repeats.
-Parameters: <dev path> <offset> <up interval> <down interval>
+Also, consider using this in combination with the dm-delay target too,
+which can delay reads and writes and/or send them to different
+underlying devices.
+
+Table parameters
+----------------
+ <dev path> <offset> <up interval> <down interval> \
+ [<num_features> [<feature arguments>]]
+
+Mandatory parameters:
<dev path>: Full pathname to the underlying block-device, or a
"major:minor" device-number.
<offset>: Starting sector within the device.
<up interval>: Number of seconds device is available.
<down interval>: Number of seconds device returns errors.
+
+Optional feature parameters:
+ If no feature parameters are present, during the periods of
+ unreliability, all I/O returns errors.
+
+ drop_writes:
+ All write I/O is silently ignored.
+ Read I/O is handled correctly.
+
+ corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
+ During <down interval>, replace <Nth_byte> of the data of
+ each matching bio with <value>.
+
+ <Nth_byte>: The offset of the byte to replace.
+ Counting starts at 1, to replace the first byte.
+ <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
+ 'w' is incompatible with drop_writes.
+ <value>: The value (from 0-255) to write.
+ <flags>: Perform the replacement only if bio->bi_rw has all the
+ selected flags set.
+
+Examples:
+ corrupt_bio_byte 32 r 1 0
+ - replaces the 32nd byte of READ bios with the value 1
+
+ corrupt_bio_byte 224 w 0 32
+ - replaces the 224th byte of REQ_META (=32) bios with the value 0
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index 33b6b7071ac8..2a8c11331d2d 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -1,70 +1,108 @@
-Device-mapper RAID (dm-raid) is a bridge from DM to MD. It
-provides a way to use device-mapper interfaces to access the MD RAID
-drivers.
+dm-raid
+-------
-As with all device-mapper targets, the nominal public interfaces are the
-constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO
-and STATUSTYPE_TABLE). The CTR table looks like the following:
+The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
+It allows the MD RAID drivers to be accessed using a device-mapper
+interface.
-1: <s> <l> raid \
-2: <raid_type> <#raid_params> <raid_params> \
-3: <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN>
-
-Line 1 contains the standard first three arguments to any device-mapper
-target - the start, length, and target type fields. The target type in
-this case is "raid".
-
-Line 2 contains the arguments that define the particular raid
-type/personality/level, the required arguments for that raid type, and
-any optional arguments. Possible raid types include: raid4, raid5_la,
-raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is
-planned for the future.) The list of required and optional parameters
-is the same for all the current raid types. The required parameters are
-positional, while the optional parameters are given as key/value pairs.
-The possible parameters are as follows:
- <chunk_size> Chunk size in sectors.
- [[no]sync] Force/Prevent RAID initialization
- [rebuild <idx>] Rebuild the drive indicated by the index
- [daemon_sleep <ms>] Time between bitmap daemon work to clear bits
- [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
- [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
- [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
- [stripe_cache <sectors>] Stripe cache size for higher RAIDs
-
-Line 3 contains the list of devices that compose the array in
-metadata/data device pairs. If the metadata is stored separately, a '-'
-is given for the metadata device position. If a drive has failed or is
-missing at creation time, a '-' can be given for both the metadata and
-data drives for a given position.
-
-NB. Currently all metadata devices must be specified as '-'.
-
-Examples:
-# RAID4 - 4 data drives, 1 parity
+The target is named "raid" and it accepts the following parameters:
+
+ <raid_type> <#raid_params> <raid_params> \
+ <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
+
+<raid_type>:
+ raid1 RAID1 mirroring
+ raid4 RAID4 dedicated parity disk
+ raid5_la RAID5 left asymmetric
+ - rotating parity 0 with data continuation
+ raid5_ra RAID5 right asymmetric
+ - rotating parity N with data continuation
+ raid5_ls RAID5 left symmetric
+ - rotating parity 0 with data restart
+ raid5_rs RAID5 right symmetric
+ - rotating parity N with data restart
+ raid6_zr RAID6 zero restart
+ - rotating parity zero (left-to-right) with data restart
+ raid6_nr RAID6 N restart
+ - rotating parity N (right-to-left) with data restart
+ raid6_nc RAID6 N continue
+ - rotating parity N (right-to-left) with data continuation
+
+ Refererence: Chapter 4 of
+ http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
+
+<#raid_params>: The number of parameters that follow.
+
+<raid_params> consists of
+ Mandatory parameters:
+ <chunk_size>: Chunk size in sectors. This parameter is often known as
+ "stripe size". It is the only mandatory parameter and
+ is placed first.
+
+ followed by optional parameters (in any order):
+ [sync|nosync] Force or prevent RAID initialization.
+
+ [rebuild <idx>] Rebuild drive number idx (first drive is 0).
+
+ [daemon_sleep <ms>]
+ Interval between runs of the bitmap daemon that
+ clear bits. A longer interval means less bitmap I/O but
+ resyncing after a failure is likely to take longer.
+
+ [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
+ [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
+ [write_mostly <idx>] Drive index is write-mostly
+ [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
+ [stripe_cache <sectors>] Stripe cache size (higher RAIDs only)
+ [region_size <sectors>]
+ The region_size multiplied by the number of regions is the
+ logical size of the array. The bitmap records the device
+ synchronisation state for each region.
+
+<#raid_devs>: The number of devices composing the array.
+ Each device consists of two entries. The first is the device
+ containing the metadata (if any); the second is the one containing the
+ data.
+
+ If a drive has failed or is missing at creation time, a '-' can be
+ given for both the metadata and data drives for a given position.
+
+
+Example tables
+--------------
+# RAID4 - 4 data drives, 1 parity (no metadata devices)
# No metadata devices specified to hold superblock/bitmap info
# Chunk size of 1MiB
# (Lines separated for easy reading)
+
0 1960893648 raid \
raid4 1 2048 \
5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
-# RAID4 - 4 data drives, 1 parity (no metadata devices)
+# RAID4 - 4 data drives, 1 parity (with metadata devices)
# Chunk size of 1MiB, force RAID initialization,
# min recovery rate at 20 kiB/sec/disk
+
0 1960893648 raid \
- raid4 4 2048 min_recovery_rate 20 sync\
- 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
+ raid4 4 2048 sync min_recovery_rate 20 \
+ 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
-Performing a 'dmsetup table' should display the CTR table used to
-construct the mapping (with possible reordering of optional
-parameters).
+'dmsetup table' displays the table used to construct the mapping.
+The optional parameters are always printed in the order listed
+above with "sync" or "nosync" always output ahead of the other
+arguments, regardless of the order used when originally loading the table.
+Arguments that can be repeated are ordered by value.
-Performing a 'dmsetup status' will yield information on the state and
-health of the array. The output is as follows:
+'dmsetup status' yields information on the state and health of the
+array.
+The output is as follows:
1: <s> <l> raid \
2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
-Line 1 is standard DM output. Line 2 is best shown by example:
+Line 1 is the standard output produced by device-mapper.
+Line 2 is produced by the raid target, and best explained by example:
0 1960893648 raid raid4 5 AAAAA 2/490221568
Here we can see the RAID type is raid4, there are 5 devices - all of
which are 'A'live, and the array is 2/490221568 complete with recovery.
+Faulty or missing devices are marked 'D'. Devices that are out-of-sync
+are marked 'a'.
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 7be15e44d481..82a5d250d75e 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -143,8 +143,7 @@ o provide a way to configure fault attributes
failslab, fail_page_alloc, and fail_make_request use this way.
Helper functions:
- init_fault_attr_dentries(entries, attr, name);
- void cleanup_fault_attr_dentries(entries);
+ fault_create_debugfs_attr(name, parent, attr);
- module parameters
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ea0bace0124a..43f48098220d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -296,15 +296,6 @@ Who: Ravikiran Thirumalai <kiran@scalex86.org>
---------------------------
-What: CONFIG_THERMAL_HWMON
-When: January 2009
-Why: This option was introduced just to allow older lm-sensors userspace
- to keep working over the upgrade to 2.6.26. At the scheduled time of
- removal fixed lm-sensors (2.x or 3.x) should be readily available.
-Who: Rene Herman <rene.herman@gmail.com>
-
----------------------------
-
What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS
(in net/core/net-sysfs.c)
When: After the only user (hal) has seen a release with the patches
diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt
index ace200b7c214..37c4d84a0e57 100644
--- a/Documentation/frv/booting.txt
+++ b/Documentation/frv/booting.txt
@@ -106,13 +106,20 @@ separated by spaces:
To use the first on-chip serial port at baud rate 115200, no parity, 8
bits, and no flow control.
- (*) root=/dev/<xxxx>
+ (*) root=<xxxx>
- This specifies the device upon which the root filesystem resides. For
- example:
+ This specifies the device upon which the root filesystem resides. It
+ may be specified by major and minor number, device path, or even
+ partition uuid, if supported. For example:
/dev/nfs NFS root filesystem
/dev/mtdblock3 Fourth RedBoot partition on the System Flash
+ PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1
+ first partition after the partition with the given UUID
+ 253:0 Device with major 253 and minor 0
+
+ Authoritative information can be found in
+ "Documentation/kernel-parameters.txt".
(*) rw
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 72ba8d51dbc1..845a191004b1 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -292,6 +292,7 @@ Code Seq#(hex) Include File Comments
<mailto:buk@buks.ipn.de>
0xA0 all linux/sdp/sdp.h Industrial Device Project
<mailto:kenji@bitgate.com>
+0xA2 00-0F arch/tile/include/asm/hardwall.h
0xA3 80-8F Port ACL in development:
<mailto:tlewis@mindspring.com>
0xA3 90-9F linux/dtlk.h
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 26a83743af19..865e39f1850c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -163,6 +163,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
See also Documentation/power/pm.txt, pci=noacpi
+ acpi_rsdp= [ACPI,EFI,KEXEC]
+ Pass the RSDP address to the kernel, mostly used
+ on machines running EFI runtime service to boot the
+ second kernel for kdump.
+
acpi_apic_instance= [ACPI, IOAPIC]
Format: <int>
2: use 2nd APIC table, if available
@@ -2240,6 +2245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ro [KNL] Mount root device read-only on boot
root= [KNL] Root filesystem
+ See name_to_dev_t comment in init/do_mounts.c.
rootdelay= [KNL] Delay (in seconds) to pause before attempting to
mount the root filesystem
diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt
index c93bed66e25d..97d45f276fe6 100644
--- a/Documentation/m68k/kernel-options.txt
+++ b/Documentation/m68k/kernel-options.txt
@@ -129,6 +129,20 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
the first of these. You can find out all valid major numbers by
looking into include/linux/major.h.
+In addition to major and minor numbers, if the device containing your
+root partition uses a partition table format with unique partition
+identifiers, then you may use them. For instance,
+"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF". It is also
+possible to reference another partition on the same device using a
+known partition UUID as the starting point. For example,
+if partition 5 of the device has the UUID of
+00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
+follows:
+ PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
+
+Authoritative information can be found in
+"Documentation/kernel-parameters.txt".
+
2.2) ro, rw
-----------
diff --git a/MAINTAINERS b/MAINTAINERS
index c9c6324a7a9f..6c84a219c833 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4722,6 +4722,7 @@ S: Maintained
F: drivers/of
F: include/linux/of*.h
K: of_get_property
+K: of_match_table
OPENRISC ARCHITECTURE
M: Jonas Bonn <jonas@southpole.se>
@@ -6318,6 +6319,7 @@ F: include/linux/sysv_fs.h
TARGET SUBSYSTEM
M: Nicholas A. Bellinger <nab@linux-iscsi.org>
L: linux-scsi@vger.kernel.org
+L: target-devel@vger.kernel.org
L: http://groups.google.com/group/linux-iscsi-target-dev
W: http://www.linux-iscsi.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 850265373611..466af40c5822 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file);
static int sync_serial_release(struct inode *inode, struct file *file);
static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
-static int sync_serial_ioctl(struct file *file,
+static long sync_serial_ioctl(struct file *file,
unsigned int cmd, unsigned long arg);
static ssize_t sync_serial_write(struct file *file, const char *buf,
size_t count, loff_t *ppos);
@@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file)
*R_IRQ_MASK1_SET = 1 << port->data_avail_bit;
DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev));
}
- ret = 0;
+ err = 0;
out:
mutex_unlock(&sync_serial_mutex);
- return ret;
+ return err;
}
static int sync_serial_release(struct inode *inode, struct file *file)
diff --git a/arch/cris/arch-v10/kernel/irq.c b/arch/cris/arch-v10/kernel/irq.c
index 907cfb5a873d..ba0e5965d6e3 100644
--- a/arch/cris/arch-v10/kernel/irq.c
+++ b/arch/cris/arch-v10/kernel/irq.c
@@ -20,6 +20,9 @@
#define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr));
#define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr));
+extern void kgdb_init(void);
+extern void breakpoint(void);
+
/* don't use set_int_vector, it bypasses the linux interrupt handlers. it is
* global just so that the kernel gdb can use it.
*/
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 29b74a105830..332f19c54557 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -11,8 +11,6 @@
#ifdef __KERNEL__
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-
#ifndef __ASSEMBLY__
#include <asm/types.h>
#include <asm/processor.h>
@@ -67,8 +65,10 @@ struct thread_info {
#define init_thread_info (init_thread_union.thread_info)
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
/* thread information allocation */
-#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
+#define alloc_thread_info_node(tsk, node) \
+ ((struct thread_info *) __get_free_pages(GFP_KERNEL, 1))
#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 849ab2fa1f5c..aec60dc06007 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -2,3 +2,41 @@ include include/asm-generic/Kbuild.asm
header-y += ucontext.h
header-y += hardwall.h
+
+generic-y += bug.h
+generic-y += bugs.h
+generic-y += cputime.h
+generic-y += device.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipc.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += local.h
+generic-y += module.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += parport.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += scatterlist.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += xor.h
diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h
deleted file mode 100644
index b12fd89e42e9..000000000000
--- a/arch/tile/include/asm/bug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bug.h>
diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h
deleted file mode 100644
index 61791e1ad9f5..000000000000
--- a/arch/tile/include/asm/bugs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bugs.h>
diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h
deleted file mode 100644
index 6d68ad7e0ea3..000000000000
--- a/arch/tile/include/asm/cputime.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/cputime.h>
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
deleted file mode 100644
index f0a4c256403b..000000000000
--- a/arch/tile/include/asm/device.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/device.h>
diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/tile/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h
deleted file mode 100644
index 3711bd9d50bd..000000000000
--- a/arch/tile/include/asm/emergency-restart.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/emergency-restart.h>
diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/tile/include/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/tile/include/asm/fb.h b/arch/tile/include/asm/fb.h
deleted file mode 100644
index 3a4988e8df45..000000000000
--- a/arch/tile/include/asm/fb.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fb.h>
diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/tile/include/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index 51537ff9265a..c66f7933beaa 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -75,12 +75,6 @@ extern void __set_fixmap(enum fixed_addresses idx,
#define set_fixmap(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
- __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
-
#define clear_fixmap(idx) \
__set_fixmap(idx, 0, __pgprot(0))
diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/tile/include/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/tile/include/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h
deleted file mode 100644
index a46e3d9c2a3f..000000000000
--- a/arch/tile/include/asm/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipc.h>
diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/tile/include/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/tile/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/tile/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/tile/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
deleted file mode 100644
index 1e4b79fe8584..000000000000
--- a/arch/tile/include/asm/module.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/module.h>
diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h
deleted file mode 100644
index 809134c644a6..000000000000
--- a/arch/tile/include/asm/msgbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/msgbuf.h>
diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h
deleted file mode 100644
index ff6101aa2c71..000000000000
--- a/arch/tile/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/tile/include/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/tile/include/asm/parport.h b/arch/tile/include/asm/parport.h
deleted file mode 100644
index cf252af64590..000000000000
--- a/arch/tile/include/asm/parport.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/parport.h>
diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/tile/include/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h
deleted file mode 100644
index 22cae6230ceb..000000000000
--- a/arch/tile/include/asm/posix_types.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/posix_types.h>
diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/tile/include/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93ae..000000000000
--- a/arch/tile/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/scatterlist.h>
diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h
deleted file mode 100644
index 7673b83cfef7..000000000000
--- a/arch/tile/include/asm/sembuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sembuf.h>
diff --git a/arch/tile/include/asm/serial.h b/arch/tile/include/asm/serial.h
deleted file mode 100644
index a0cb0caff152..000000000000
--- a/arch/tile/include/asm/serial.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/serial.h>
diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h
deleted file mode 100644
index 83c05fc2de38..000000000000
--- a/arch/tile/include/asm/shmbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmbuf.h>
diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h
deleted file mode 100644
index 93f30deb95d0..000000000000
--- a/arch/tile/include/asm/shmparam.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmparam.h>
diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b..000000000000
--- a/arch/tile/include/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/socket.h>
diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h
deleted file mode 100644
index def6d4746ee7..000000000000
--- a/arch/tile/include/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sockios.h>
diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h
deleted file mode 100644
index 0b91fe198c20..000000000000
--- a/arch/tile/include/asm/statfs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/statfs.h>
diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/tile/include/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/tile/include/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h
deleted file mode 100644
index b9e79bc580dd..000000000000
--- a/arch/tile/include/asm/types.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/types.h>
diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h
deleted file mode 100644
index 9bc07b9f30fb..000000000000
--- a/arch/tile/include/asm/ucontext.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ucontext.h>
diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h
deleted file mode 100644
index c82eb12a5b18..000000000000
--- a/arch/tile/include/asm/xor.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/xor.h>
diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h
new file mode 100644
index 000000000000..6395faa6d9e6
--- /dev/null
+++ b/arch/tile/include/hv/drv_srom_intf.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file drv_srom_intf.h
+ * Interface definitions for the SPI Flash ROM driver.
+ */
+
+#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+
+/** Read this offset to get the total device size. */
+#define SROM_TOTAL_SIZE_OFF 0xF0000000
+
+/** Read this offset to get the device sector size. */
+#define SROM_SECTOR_SIZE_OFF 0xF0000004
+
+/** Read this offset to get the device page size. */
+#define SROM_PAGE_SIZE_OFF 0xF0000008
+
+/** Write this offset to flush any pending writes. */
+#define SROM_FLUSH_OFF 0xF1000000
+
+/** Write this offset, plus the byte offset of the start of a sector, to
+ * erase a sector. Any write data is ignored, but there must be at least
+ * one byte of write data. Only applies when the driver is in MTD mode.
+ */
+#define SROM_ERASE_OFF 0xF2000000
+
+#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index c4be58cc5d50..f6f50f2a5e37 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -78,7 +78,6 @@ static struct clocksource cycle_counter_cs = {
.rating = 300,
.read = clocksource_get_cycles,
.mask = CLOCKSOURCE_MASK(64),
- .shift = 22, /* typical value, e.g. x86 tsc uses this */
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -91,8 +90,6 @@ void __init setup_clock(void)
cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
sched_clock_mult =
clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
- cycle_counter_cs.mult =
- clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift);
}
void __init calibrate_delay(void)
@@ -107,7 +104,7 @@ void __init calibrate_delay(void)
void __init time_init(void)
{
/* Initialize and register the clock source. */
- clocksource_register(&cycle_counter_cs);
+ clocksource_register_hz(&cycle_counter_cs, cycles_per_sec);
/* Start up the tile-timer interrupt source on the boot cpu. */
setup_tile_timer();
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 4e10c4023028..7309988c9794 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -836,8 +836,7 @@ void __init mem_init(void)
#endif
#ifdef CONFIG_FLATMEM
- if (!mem_map)
- BUG();
+ BUG_ON(!mem_map);
#endif
#ifdef CONFIG_HIGHMEM
diff --git a/block/blk-core.c b/block/blk-core.c
index b850bedad229..b627558c461f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1368,8 +1368,10 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
static int __init fail_make_request_debugfs(void)
{
- return init_fault_attr_dentries(&fail_make_request,
- "fail_make_request");
+ struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
+ NULL, &fail_make_request);
+
+ return IS_ERR(dir) ? PTR_ERR(dir) : 0;
}
late_initcall(fail_make_request_debugfs);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 4f0c06c7a338..780354888958 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q)
static int __init fail_io_timeout_debugfs(void)
{
- return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
+ struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
+ NULL, &fail_io_timeout);
+
+ return IS_ERR(dir) ? PTR_ERR(dir) : 0;
}
late_initcall(fail_io_timeout_debugfs);
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 73863d86f022..76dc02f15574 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -126,6 +126,12 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE);
*/
u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE);
+/*
+ * Disable runtime checking and repair of values returned by control methods.
+ * Use only if the repair is causing a problem on a particular machine.
+ */
+u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE);
+
/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index c7f743ca395b..5552125d8340 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -357,6 +357,7 @@ struct acpi_predefined_data {
char *pathname;
const union acpi_predefined_info *predefined;
union acpi_operand_object *parent_package;
+ struct acpi_namespace_node *node;
u32 flags;
u8 node_flags;
};
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 94e73c97cf85..c445cca490ea 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -468,6 +468,7 @@ static const union acpi_predefined_info predefined_names[] =
{{"_SWS", 0, ACPI_RTYPE_INTEGER}},
{{"_TC1", 0, ACPI_RTYPE_INTEGER}},
{{"_TC2", 0, ACPI_RTYPE_INTEGER}},
+ {{"_TDL", 0, ACPI_RTYPE_INTEGER}},
{{"_TIP", 1, ACPI_RTYPE_INTEGER}},
{{"_TIV", 1, ACPI_RTYPE_INTEGER}},
{{"_TMP", 0, ACPI_RTYPE_INTEGER}},
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 9fb03fa8ffde..c845c8089f39 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -193,14 +193,20 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
}
/*
- * 1) We have a return value, but if one wasn't expected, just exit, this is
- * not a problem. For example, if the "Implicit Return" feature is
- * enabled, methods will always return a value.
+ * Return value validation and possible repair.
*
- * 2) If the return value can be of any type, then we cannot perform any
- * validation, exit.
+ * 1) Don't perform return value validation/repair if this feature
+ * has been disabled via a global option.
+ *
+ * 2) We have a return value, but if one wasn't expected, just exit,
+ * this is not a problem. For example, if the "Implicit Return"
+ * feature is enabled, methods will always return a value.
+ *
+ * 3) If the return value can be of any type, then we cannot perform
+ * any validation, just exit.
*/
- if ((!predefined->info.expected_btypes) ||
+ if (acpi_gbl_disable_auto_repair ||
+ (!predefined->info.expected_btypes) ||
(predefined->info.expected_btypes == ACPI_RTYPE_ALL)) {
goto cleanup;
}
@@ -212,6 +218,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
goto cleanup;
}
data->predefined = predefined;
+ data->node = node;
data->node_flags = node->flags;
data->pathname = pathname;
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 973883babee1..024c4f263f87 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -503,6 +503,21 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data,
{
union acpi_operand_object *return_object = *return_object_ptr;
acpi_status status;
+ struct acpi_namespace_node *node;
+
+ /*
+ * We can only sort the _TSS return package if there is no _PSS in the
+ * same scope. This is because if _PSS is present, the ACPI specification
+ * dictates that the _TSS Power Dissipation field is to be ignored, and
+ * therefore some BIOSs leave garbage values in the _TSS Power field(s).
+ * In this case, it is best to just return the _TSS package as-is.
+ * (May, 2011)
+ */
+ status =
+ acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node);
+ if (ACPI_SUCCESS(status)) {
+ return (AE_OK);
+ }
status = acpi_ns_check_sorted_list(data, return_object, 5, 1,
ACPI_SORT_DESCENDING,
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 48db0944ce4a..62365f6075dd 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -126,12 +126,29 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index)
}
/*
- * Originally, we checked the table signature for "SSDT" or "PSDT" here.
- * Next, we added support for OEMx tables, signature "OEM".
- * Valid tables were encountered with a null signature, so we've just
- * given up on validating the signature, since it seems to be a waste
- * of code. The original code was removed (05/2008).
+ * Validate the incoming table signature.
+ *
+ * 1) Originally, we checked the table signature for "SSDT" or "PSDT".
+ * 2) We added support for OEMx tables, signature "OEM".
+ * 3) Valid tables were encountered with a null signature, so we just
+ * gave up on validating the signature, (05/2008).
+ * 4) We encountered non-AML tables such as the MADT, which caused
+ * interpreter errors and kernel faults. So now, we once again allow
+ * only "SSDT", "OEMx", and now, also a null signature. (05/2011).
*/
+ if ((table_desc->pointer->signature[0] != 0x00) &&
+ (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT))
+ && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) {
+ ACPI_ERROR((AE_INFO,
+ "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx",
+ acpi_ut_valid_acpi_name(*(u32 *)table_desc->
+ pointer->
+ signature) ? table_desc->
+ pointer->signature : "????",
+ *(u32 *)table_desc->pointer->signature));
+
+ return_ACPI_STATUS(AE_BAD_SIGNATURE);
+ }
(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 2c661353e8f2..87c0a8daa99a 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -55,6 +55,9 @@
#define ACPI_BATTERY_NOTIFY_INFO 0x81
#define ACPI_BATTERY_NOTIFY_THRESHOLD 0x82
+/* Battery power unit: 0 means mW, 1 means mA */
+#define ACPI_BATTERY_POWER_UNIT_MA 1
+
#define _COMPONENT ACPI_BATTERY_COMPONENT
ACPI_MODULE_NAME("battery");
@@ -91,11 +94,6 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids);
enum {
ACPI_BATTERY_ALARM_PRESENT,
ACPI_BATTERY_XINFO_PRESENT,
- /* For buggy DSDTs that report negative 16-bit values for either
- * charging or discharging current and/or report 0 as 65536
- * due to bad math.
- */
- ACPI_BATTERY_QUIRK_SIGNED16_CURRENT,
ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY,
};
@@ -301,7 +299,8 @@ static enum power_supply_property energy_battery_props[] = {
#ifdef CONFIG_ACPI_PROCFS_POWER
inline char *acpi_battery_units(struct acpi_battery *battery)
{
- return (battery->power_unit)?"mA":"mW";
+ return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ?
+ "mA" : "mW";
}
#endif
@@ -461,9 +460,17 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
battery->update_time = jiffies;
kfree(buffer.pointer);
- if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) &&
- battery->rate_now != -1)
+ /* For buggy DSDTs that report negative 16-bit values for either
+ * charging or discharging current and/or report 0 as 65536
+ * due to bad math.
+ */
+ if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA &&
+ battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN &&
+ (s16)(battery->rate_now) < 0) {
battery->rate_now = abs((s16)battery->rate_now);
+ printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate"
+ " invalid.\n");
+ }
if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)
&& battery->capacity_now >= 0 && battery->capacity_now <= 100)
@@ -544,7 +551,7 @@ static int sysfs_add_battery(struct acpi_battery *battery)
{
int result;
- if (battery->power_unit) {
+ if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
battery->bat.properties = charge_battery_props;
battery->bat.num_properties =
ARRAY_SIZE(charge_battery_props);
@@ -566,18 +573,16 @@ static int sysfs_add_battery(struct acpi_battery *battery)
static void sysfs_remove_battery(struct acpi_battery *battery)
{
- if (!battery->bat.dev)
+ mutex_lock(&battery->lock);
+ if (!battery->bat.dev) {
+ mutex_unlock(&battery->lock);
return;
+ }
+
device_remove_file(battery->bat.dev, &alarm_attr);
power_supply_unregister(&battery->bat);
battery->bat.dev = NULL;
-}
-
-static void acpi_battery_quirks(struct acpi_battery *battery)
-{
- if (dmi_name_in_vendors("Acer") && battery->power_unit) {
- set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags);
- }
+ mutex_unlock(&battery->lock);
}
/*
@@ -592,7 +597,7 @@ static void acpi_battery_quirks(struct acpi_battery *battery)
*
* Handle this correctly so that they won't break userspace.
*/
-static void acpi_battery_quirks2(struct acpi_battery *battery)
+static void acpi_battery_quirks(struct acpi_battery *battery)
{
if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags))
return ;
@@ -623,13 +628,15 @@ static int acpi_battery_update(struct acpi_battery *battery)
result = acpi_battery_get_info(battery);
if (result)
return result;
- acpi_battery_quirks(battery);
acpi_battery_init_alarm(battery);
}
- if (!battery->bat.dev)
- sysfs_add_battery(battery);
+ if (!battery->bat.dev) {
+ result = sysfs_add_battery(battery);
+ if (result)
+ return result;
+ }
result = acpi_battery_get_state(battery);
- acpi_battery_quirks2(battery);
+ acpi_battery_quirks(battery);
return result;
}
@@ -863,7 +870,7 @@ DECLARE_FILE_FUNCTIONS(alarm);
}, \
}
-static struct battery_file {
+static const struct battery_file {
struct file_operations ops;
mode_t mode;
const char *name;
@@ -948,9 +955,12 @@ static int battery_notify(struct notifier_block *nb,
struct acpi_battery *battery = container_of(nb, struct acpi_battery,
pm_nb);
switch (mode) {
+ case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
- sysfs_remove_battery(battery);
- sysfs_add_battery(battery);
+ if (battery->bat.dev) {
+ sysfs_remove_battery(battery);
+ sysfs_add_battery(battery);
+ }
break;
}
@@ -975,25 +985,33 @@ static int acpi_battery_add(struct acpi_device *device)
if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle,
"_BIX", &handle)))
set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags);
- acpi_battery_update(battery);
+ result = acpi_battery_update(battery);
+ if (result)
+ goto fail;
#ifdef CONFIG_ACPI_PROCFS_POWER
result = acpi_battery_add_fs(device);
#endif
- if (!result) {
- printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
- ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
- device->status.battery_present ? "present" : "absent");
- } else {
+ if (result) {
#ifdef CONFIG_ACPI_PROCFS_POWER
acpi_battery_remove_fs(device);
#endif
- kfree(battery);
+ goto fail;
}
+ printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
+ ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
+ device->status.battery_present ? "present" : "absent");
+
battery->pm_nb.notifier_call = battery_notify;
register_pm_notifier(&battery->pm_nb);
return result;
+
+fail:
+ sysfs_remove_battery(battery);
+ mutex_destroy(&battery->lock);
+ kfree(battery);
+ return result;
}
static int acpi_battery_remove(struct acpi_device *device, int type)
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index 1864ad3cf895..19a61136d848 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -77,7 +77,7 @@ struct dock_dependent_device {
struct list_head list;
struct list_head hotplug_list;
acpi_handle handle;
- struct acpi_dock_ops *ops;
+ const struct acpi_dock_ops *ops;
void *context;
};
@@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier);
* the dock driver after _DCK is executed.
*/
int
-register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops,
+register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops,
void *context)
{
struct dock_dependent_device *dd;
diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c
index 05b44201a614..22f918bacd35 100644
--- a/drivers/acpi/ec_sys.c
+++ b/drivers/acpi/ec_sys.c
@@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf,
return count;
}
-static struct file_operations acpi_ec_io_ops = {
+static const struct file_operations acpi_ec_io_ops = {
.owner = THIS_MODULE,
.open = acpi_ec_open_io,
.read = acpi_ec_read_io,
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 467479f07c1f..0f0356ca1a9e 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
return result;
}
-static struct thermal_cooling_device_ops fan_cooling_ops = {
+static const struct thermal_cooling_device_ops fan_cooling_ops = {
.get_max_state = fan_get_max_state,
.get_cur_state = fan_get_cur_state,
.set_cur_state = fan_set_cur_state,
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 372f9b70f7f4..fa32f584229f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported)
{
if (!strcmp("Linux", interface)) {
- printk(KERN_NOTICE FW_BUG PREFIX
+ printk_once(KERN_NOTICE FW_BUG PREFIX
"BIOS _OSI(Linux) query %s%s\n",
osi_linux.enable ? "honored" : "ignored",
osi_linux.cmdline ? " via cmdline" :
@@ -237,8 +237,23 @@ void acpi_os_vprintf(const char *fmt, va_list args)
#endif
}
+#ifdef CONFIG_KEXEC
+static unsigned long acpi_rsdp;
+static int __init setup_acpi_rsdp(char *arg)
+{
+ acpi_rsdp = simple_strtoul(arg, NULL, 16);
+ return 0;
+}
+early_param("acpi_rsdp", setup_acpi_rsdp);
+#endif
+
acpi_physical_address __init acpi_os_get_root_pointer(void)
{
+#ifdef CONFIG_KEXEC
+ if (acpi_rsdp)
+ return acpi_rsdp;
+#endif
+
if (efi_enabled) {
if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
return efi.acpi20;
@@ -1083,7 +1098,13 @@ struct osi_setup_entry {
bool enable;
};
-static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX];
+static struct osi_setup_entry __initdata
+ osi_setup_entries[OSI_STRING_ENTRIES_MAX] = {
+ {"Module Device", true},
+ {"Processor Device", true},
+ {"3.0 _SCP Extensions", true},
+ {"Processor Aggregator Device", true},
+};
void __init acpi_osi_setup(char *str)
{
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index f907cfbfa13c..7f9eba9a0b02 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -303,6 +303,61 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus)
/* --------------------------------------------------------------------------
PCI Interrupt Routing Support
-------------------------------------------------------------------------- */
+#ifdef CONFIG_X86_IO_APIC
+extern int noioapicquirk;
+extern int noioapicreroute;
+
+static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
+{
+ struct pci_bus *bus_it;
+
+ for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) {
+ if (!bus_it->self)
+ return 0;
+ if (bus_it->self->irq_reroute_variant)
+ return bus_it->self->irq_reroute_variant;
+ }
+ return 0;
+}
+
+/*
+ * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ
+ * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does
+ * during interrupt handling). When this INTx generation cannot be disabled,
+ * we reroute these interrupts to their legacy equivalent to get rid of
+ * spurious interrupts.
+ */
+static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
+ struct acpi_prt_entry *entry)
+{
+ if (noioapicquirk || noioapicreroute) {
+ return 0;
+ } else {
+ switch (bridge_has_boot_interrupt_variant(dev->bus)) {
+ case 0:
+ /* no rerouting necessary */
+ return 0;
+ case INTEL_IRQ_REROUTE_VARIANT:
+ /*
+ * Remap according to INTx routing table in 6700PXH
+ * specs, intel order number 302628-002, section
+ * 2.15.2. Other chipsets (80332, ...) have the same
+ * mapping and are handled here as well.
+ */
+ dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy "
+ "IRQ %d\n", entry->index,
+ (entry->index % 4) + 16);
+ entry->index = (entry->index % 4) + 16;
+ return 1;
+ default:
+ dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy "
+ "IRQ: unknown mapping\n", entry->index);
+ return -1;
+ }
+ }
+}
+#endif /* CONFIG_X86_IO_APIC */
+
static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
{
struct acpi_prt_entry *entry;
@@ -311,6 +366,9 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
entry = acpi_pci_irq_find_prt_entry(dev, pin);
if (entry) {
+#ifdef CONFIG_X86_IO_APIC
+ acpi_reroute_boot_interrupt(dev, entry);
+#endif /* CONFIG_X86_IO_APIC */
ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n",
pci_name(dev), pin_name(pin)));
return entry;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d06078d660ad..2672c798272f 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -485,7 +485,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
root->secondary.end = 0xFF;
printk(KERN_WARNING FW_BUG PREFIX
"no secondary bus range in _CRS\n");
- status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL, &bus);
+ status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,
+ NULL, &bus);
if (ACPI_SUCCESS(status))
root->secondary.start = bus;
else if (status == AE_NOT_FOUND)
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 79cb65332894..870550d6a4bf 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev,
return result;
}
-struct thermal_cooling_device_ops processor_cooling_ops = {
+const struct thermal_cooling_device_ops processor_cooling_ops = {
.get_max_state = processor_get_max_state,
.get_cur_state = processor_get_cur_state,
.set_cur_state = processor_set_cur_state,
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 50658ff887d9..6e36d0c0057c 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -130,6 +130,9 @@ struct acpi_sbs {
#define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
+static int acpi_sbs_remove(struct acpi_device *device, int type);
+static int acpi_battery_get_state(struct acpi_battery *battery);
+
static inline int battery_scale(int log)
{
int scale = 1;
@@ -195,6 +198,8 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT)
return -ENODEV;
+
+ acpi_battery_get_state(battery);
switch (psp) {
case POWER_SUPPLY_PROP_STATUS:
if (battery->rate_now < 0)
@@ -225,11 +230,17 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
case POWER_SUPPLY_PROP_POWER_NOW:
val->intval = abs(battery->rate_now) *
acpi_battery_ipscale(battery) * 1000;
+ val->intval *= (acpi_battery_mode(battery)) ?
+ (battery->voltage_now *
+ acpi_battery_vscale(battery) / 1000) : 1;
break;
case POWER_SUPPLY_PROP_CURRENT_AVG:
case POWER_SUPPLY_PROP_POWER_AVG:
val->intval = abs(battery->rate_avg) *
acpi_battery_ipscale(battery) * 1000;
+ val->intval *= (acpi_battery_mode(battery)) ?
+ (battery->voltage_now *
+ acpi_battery_vscale(battery) / 1000) : 1;
break;
case POWER_SUPPLY_PROP_CAPACITY:
val->intval = battery->state_of_charge;
@@ -903,8 +914,6 @@ static void acpi_sbs_callback(void *context)
}
}
-static int acpi_sbs_remove(struct acpi_device *device, int type);
-
static int acpi_sbs_add(struct acpi_device *device)
{
struct acpi_sbs *sbs;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 6c949602cbd1..3ed80b2ca907 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -428,6 +428,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"),
},
},
+ {
+ .callback = init_old_suspend_ordering,
+ .ident = "Asus A8N-SLI DELUXE",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"),
+ },
+ },
+ {
+ .callback = init_old_suspend_ordering,
+ .ident = "Asus A8N-SLI Premium",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"),
+ },
+ },
{},
};
#endif /* CONFIG_SUSPEND */
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 77255f250dbb..c538d0ef10ff 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp)
return result;
}
-static struct kernel_param_ops param_ops_debug_layer = {
+static const struct kernel_param_ops param_ops_debug_layer = {
.set = param_set_uint,
.get = param_get_debug_layer,
};
-static struct kernel_param_ops param_ops_debug_level = {
+static const struct kernel_param_ops param_ops_debug_level = {
.set = param_set_uint,
.get = param_get_debug_level,
};
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 2607e17b520f..48fbc647b178 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal,
thermal_zone_unbind_cooling_device);
}
-static struct thermal_zone_device_ops acpi_thermal_zone_ops = {
+static const struct thermal_zone_device_ops acpi_thermal_zone_ops = {
.bind = acpi_thermal_bind_cooling_device,
.unbind = acpi_thermal_unbind_cooling_device,
.get_temp = thermal_get_temp,
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index ada4b4d9bdc8..08a44b532f7c 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -307,7 +307,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st
return acpi_video_device_lcd_set_level(video, level);
}
-static struct thermal_cooling_device_ops video_cooling_ops = {
+static const struct thermal_cooling_device_ops video_cooling_ops = {
.get_max_state = video_get_max_state,
.get_cur_state = video_get_cur_state,
.set_cur_state = video_set_cur_state,
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index e0a5b555cee1..bb7c5f1085cc 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data)
ata_acpi_uevent(dev->link->ap, dev, event);
}
-static struct acpi_dock_ops ata_acpi_dev_dock_ops = {
+static const struct acpi_dock_ops ata_acpi_dev_dock_ops = {
.handler = ata_acpi_dev_notify_dock,
.uevent = ata_acpi_dev_uevent,
};
-static struct acpi_dock_ops ata_acpi_ap_dock_ops = {
+static const struct acpi_dock_ops ata_acpi_ap_dock_ops = {
.handler = ata_acpi_ap_notify_dock,
.uevent = ata_acpi_ap_uevent,
};
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 49502bc5360a..423fd56bf612 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -616,5 +616,16 @@ config MSM_SMD_PKT
Enables userspace clients to read and write to some packet SMD
ports via device interface for MSM chipset.
+config TILE_SROM
+ bool "Character-device access via hypervisor to the Tilera SPI ROM"
+ depends on TILE
+ default y
+ ---help---
+ This device provides character-level read-write access
+ to the SROM, typically via the "0", "1", and "2" devices
+ in /dev/srom/. The Tilera hypervisor makes the flash
+ device appear much like a simple EEPROM, and knows
+ how to partition a single ROM for multiple purposes.
+
endmenu
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 7a00672bd85d..32762ba769c2 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -63,3 +63,5 @@ obj-$(CONFIG_RAMOOPS) += ramoops.o
obj-$(CONFIG_JS_RTC) += js-rtc.o
js-rtc-y = rtc.o
+
+obj-$(CONFIG_TILE_SROM) += tile-srom.o
diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index fca0c51bbc90..810aff9e750f 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c
@@ -147,6 +147,14 @@ static int __init ramoops_probe(struct platform_device *pdev)
cxt->phys_addr = pdata->mem_address;
cxt->record_size = pdata->record_size;
cxt->dump_oops = pdata->dump_oops;
+ /*
+ * Update the module parameter variables as well so they are visible
+ * through /sys/module/ramoops/parameters/
+ */
+ mem_size = pdata->mem_size;
+ mem_address = pdata->mem_address;
+ record_size = pdata->record_size;
+ dump_oops = pdata->dump_oops;
if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
pr_err("request mem region failed\n");
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
new file mode 100644
index 000000000000..cf3ee008dca2
--- /dev/null
+++ b/drivers/char/tile-srom.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * SPI Flash ROM driver
+ *
+ * This source code is derived from code provided in "Linux Device
+ * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and
+ * Greg Kroah-Hartman, published by O'Reilly Media, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/kernel.h> /* printk() */
+#include <linux/slab.h> /* kmalloc() */
+#include <linux/fs.h> /* everything... */
+#include <linux/errno.h> /* error codes */
+#include <linux/types.h> /* size_t */
+#include <linux/proc_fs.h>
+#include <linux/fcntl.h> /* O_ACCMODE */
+#include <linux/aio.h>
+#include <linux/pagemap.h>
+#include <linux/hugetlb.h>
+#include <linux/uaccess.h>
+#include <linux/platform_device.h>
+#include <hv/hypervisor.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/delay.h>
+#include <hv/drv_srom_intf.h>
+
+/*
+ * Size of our hypervisor I/O requests. We break up large transfers
+ * so that we don't spend large uninterrupted spans of time in the
+ * hypervisor. Erasing an SROM sector takes a significant fraction of
+ * a second, so if we allowed the user to, say, do one I/O to write the
+ * entire ROM, we'd get soft lockup timeouts, or worse.
+ */
+#define SROM_CHUNK_SIZE ((size_t)4096)
+
+/*
+ * When hypervisor is busy (e.g. erasing), poll the status periodically.
+ */
+
+/*
+ * Interval to poll the state in msec
+ */
+#define SROM_WAIT_TRY_INTERVAL 20
+
+/*
+ * Maximum times to poll the state
+ */
+#define SROM_MAX_WAIT_TRY_TIMES 1000
+
+struct srom_dev {
+ int hv_devhdl; /* Handle for hypervisor device */
+ u32 total_size; /* Size of this device */
+ u32 sector_size; /* Size of a sector */
+ u32 page_size; /* Size of a page */
+ struct mutex lock; /* Allow only one accessor at a time */
+};
+
+static int srom_major; /* Dynamic major by default */
+module_param(srom_major, int, 0);
+MODULE_AUTHOR("Tilera Corporation");
+MODULE_LICENSE("GPL");
+
+static int srom_devs; /* Number of SROM partitions */
+static struct cdev srom_cdev;
+static struct class *srom_class;
+static struct srom_dev *srom_devices;
+
+/*
+ * Handle calling the hypervisor and managing EAGAIN/EBUSY.
+ */
+
+static ssize_t _srom_read(int hv_devhdl, void *buf,
+ loff_t off, size_t count)
+{
+ int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
+ for (;;) {
+ retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf,
+ count, off);
+ if (retval >= 0)
+ return retval;
+ if (retval == HV_EAGAIN)
+ continue;
+ if (retval == HV_EBUSY && --retries > 0) {
+ msleep(SROM_WAIT_TRY_INTERVAL);
+ continue;
+ }
+ pr_err("_srom_read: error %d\n", retval);
+ return -EIO;
+ }
+}
+
+static ssize_t _srom_write(int hv_devhdl, const void *buf,
+ loff_t off, size_t count)
+{
+ int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
+ for (;;) {
+ retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf,
+ count, off);
+ if (retval >= 0)
+ return retval;
+ if (retval == HV_EAGAIN)
+ continue;
+ if (retval == HV_EBUSY && --retries > 0) {
+ msleep(SROM_WAIT_TRY_INTERVAL);
+ continue;
+ }
+ pr_err("_srom_write: error %d\n", retval);
+ return -EIO;
+ }
+}
+
+/**
+ * srom_open() - Device open routine.
+ * @inode: Inode for this device.
+ * @filp: File for this specific open of the device.
+ *
+ * Returns zero, or an error code.
+ */
+static int srom_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = &srom_devices[iminor(inode)];
+ return 0;
+}
+
+
+/**
+ * srom_release() - Device release routine.
+ * @inode: Inode for this device.
+ * @filp: File for this specific open of the device.
+ *
+ * Returns zero, or an error code.
+ */
+static int srom_release(struct inode *inode, struct file *filp)
+{
+ struct srom_dev *srom = filp->private_data;
+ char dummy;
+
+ /* Make sure we've flushed anything written to the ROM. */
+ mutex_lock(&srom->lock);
+ if (srom->hv_devhdl >= 0)
+ _srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1);
+ mutex_unlock(&srom->lock);
+
+ filp->private_data = NULL;
+
+ return 0;
+}
+
+
+/**
+ * srom_read() - Read data from the device.
+ * @filp: File for this specific open of the device.
+ * @buf: User's data buffer.
+ * @count: Number of bytes requested.
+ * @f_pos: File position.
+ *
+ * Returns number of bytes read, or an error code.
+ */
+static ssize_t srom_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ int retval = 0;
+ void *kernbuf;
+ struct srom_dev *srom = filp->private_data;
+
+ kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
+ if (!kernbuf)
+ return -ENOMEM;
+
+ if (mutex_lock_interruptible(&srom->lock)) {
+ retval = -ERESTARTSYS;
+ kfree(kernbuf);
+ return retval;
+ }
+
+ while (count) {
+ int hv_retval;
+ int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
+
+ hv_retval = _srom_read(srom->hv_devhdl, kernbuf,
+ *f_pos, bytes_this_pass);
+ if (hv_retval > 0) {
+ if (copy_to_user(buf, kernbuf, hv_retval) != 0) {
+ retval = -EFAULT;
+ break;
+ }
+ } else if (hv_retval <= 0) {
+ if (retval == 0)
+ retval = hv_retval;
+ break;
+ }
+
+ retval += hv_retval;
+ *f_pos += hv_retval;
+ buf += hv_retval;
+ count -= hv_retval;
+ }
+
+ mutex_unlock(&srom->lock);
+ kfree(kernbuf);
+
+ return retval;
+}
+
+/**
+ * srom_write() - Write data to the device.
+ * @filp: File for this specific open of the device.
+ * @buf: User's data buffer.
+ * @count: Number of bytes requested.
+ * @f_pos: File position.
+ *
+ * Returns number of bytes written, or an error code.
+ */
+static ssize_t srom_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ int retval = 0;
+ void *kernbuf;
+ struct srom_dev *srom = filp->private_data;
+
+ kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
+ if (!kernbuf)
+ return -ENOMEM;
+
+ if (mutex_lock_interruptible(&srom->lock)) {
+ retval = -ERESTARTSYS;
+ kfree(kernbuf);
+ return retval;
+ }
+
+ while (count) {
+ int hv_retval;
+ int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
+
+ if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) {
+ retval = -EFAULT;
+ break;
+ }
+
+ hv_retval = _srom_write(srom->hv_devhdl, kernbuf,
+ *f_pos, bytes_this_pass);
+ if (hv_retval <= 0) {
+ if (retval == 0)
+ retval = hv_retval;
+ break;
+ }
+
+ retval += hv_retval;
+ *f_pos += hv_retval;
+ buf += hv_retval;
+ count -= hv_retval;
+ }
+
+ mutex_unlock(&srom->lock);
+ kfree(kernbuf);
+
+ return retval;
+}
+
+/* Provide our own implementation so we can use srom->total_size. */
+loff_t srom_llseek(struct file *filp, loff_t offset, int origin)
+{
+ struct srom_dev *srom = filp->private_data;
+
+ if (mutex_lock_interruptible(&srom->lock))
+ return -ERESTARTSYS;
+
+ switch (origin) {
+ case SEEK_END:
+ offset += srom->total_size;
+ break;
+ case SEEK_CUR:
+ offset += filp->f_pos;
+ break;
+ }
+
+ if (offset < 0 || offset > srom->total_size) {
+ offset = -EINVAL;
+ } else {
+ filp->f_pos = offset;
+ filp->f_version = 0;
+ }
+
+ mutex_unlock(&srom->lock);
+
+ return offset;
+}
+
+static ssize_t total_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct srom_dev *srom = dev_get_drvdata(dev);
+ return sprintf(buf, "%u\n", srom->total_size);
+}
+
+static ssize_t sector_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct srom_dev *srom = dev_get_drvdata(dev);
+ return sprintf(buf, "%u\n", srom->sector_size);
+}
+
+static ssize_t page_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct srom_dev *srom = dev_get_drvdata(dev);
+ return sprintf(buf, "%u\n", srom->page_size);
+}
+
+static struct device_attribute srom_dev_attrs[] = {
+ __ATTR(total_size, S_IRUGO, total_show, NULL),
+ __ATTR(sector_size, S_IRUGO, sector_show, NULL),
+ __ATTR(page_size, S_IRUGO, page_show, NULL),
+ __ATTR_NULL
+};
+
+static char *srom_devnode(struct device *dev, mode_t *mode)
+{
+ *mode = S_IRUGO | S_IWUSR;
+ return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev));
+}
+
+/*
+ * The fops
+ */
+static const struct file_operations srom_fops = {
+ .owner = THIS_MODULE,
+ .llseek = srom_llseek,
+ .read = srom_read,
+ .write = srom_write,
+ .open = srom_open,
+ .release = srom_release,
+};
+
+/**
+ * srom_setup_minor() - Initialize per-minor information.
+ * @srom: Per-device SROM state.
+ * @index: Device to set up.
+ */
+static int srom_setup_minor(struct srom_dev *srom, int index)
+{
+ struct device *dev;
+ int devhdl = srom->hv_devhdl;
+
+ mutex_init(&srom->lock);
+
+ if (_srom_read(devhdl, &srom->total_size,
+ SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0)
+ return -EIO;
+ if (_srom_read(devhdl, &srom->sector_size,
+ SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0)
+ return -EIO;
+ if (_srom_read(devhdl, &srom->page_size,
+ SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0)
+ return -EIO;
+
+ dev = device_create(srom_class, &platform_bus,
+ MKDEV(srom_major, index), srom, "%d", index);
+ return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+/** srom_init() - Initialize the driver's module. */
+static int srom_init(void)
+{
+ int result, i;
+ dev_t dev = MKDEV(srom_major, 0);
+
+ /*
+ * Start with a plausible number of partitions; the krealloc() call
+ * below will yield about log(srom_devs) additional allocations.
+ */
+ srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
+
+ /* Discover the number of srom partitions. */
+ for (i = 0; ; i++) {
+ int devhdl;
+ char buf[20];
+ struct srom_dev *new_srom_devices =
+ krealloc(srom_devices, (i+1) * sizeof(struct srom_dev),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!new_srom_devices) {
+ result = -ENOMEM;
+ goto fail_mem;
+ }
+ srom_devices = new_srom_devices;
+ sprintf(buf, "srom/0/%d", i);
+ devhdl = hv_dev_open((HV_VirtAddr)buf, 0);
+ if (devhdl < 0) {
+ if (devhdl != HV_ENODEV)
+ pr_notice("srom/%d: hv_dev_open failed: %d.\n",
+ i, devhdl);
+ break;
+ }
+ srom_devices[i].hv_devhdl = devhdl;
+ }
+ srom_devs = i;
+
+ /* Bail out early if we have no partitions at all. */
+ if (srom_devs == 0) {
+ result = -ENODEV;
+ goto fail_mem;
+ }
+
+ /* Register our major, and accept a dynamic number. */
+ if (srom_major)
+ result = register_chrdev_region(dev, srom_devs, "srom");
+ else {
+ result = alloc_chrdev_region(&dev, 0, srom_devs, "srom");
+ srom_major = MAJOR(dev);
+ }
+ if (result < 0)
+ goto fail_mem;
+
+ /* Register a character device. */
+ cdev_init(&srom_cdev, &srom_fops);
+ srom_cdev.owner = THIS_MODULE;
+ srom_cdev.ops = &srom_fops;
+ result = cdev_add(&srom_cdev, dev, srom_devs);
+ if (result < 0)
+ goto fail_chrdev;
+
+ /* Create a sysfs class. */
+ srom_class = class_create(THIS_MODULE, "srom");
+ if (IS_ERR(srom_class)) {
+ result = PTR_ERR(srom_class);
+ goto fail_cdev;
+ }
+ srom_class->dev_attrs = srom_dev_attrs;
+ srom_class->devnode = srom_devnode;
+
+ /* Do per-partition initialization */
+ for (i = 0; i < srom_devs; i++) {
+ result = srom_setup_minor(srom_devices + i, i);
+ if (result < 0)
+ goto fail_class;
+ }
+
+ return 0;
+
+fail_class:
+ for (i = 0; i < srom_devs; i++)
+ device_destroy(srom_class, MKDEV(srom_major, i));
+ class_destroy(srom_class);
+fail_cdev:
+ cdev_del(&srom_cdev);
+fail_chrdev:
+ unregister_chrdev_region(dev, srom_devs);
+fail_mem:
+ kfree(srom_devices);
+ return result;
+}
+
+/** srom_cleanup() - Clean up the driver's module. */
+static void srom_cleanup(void)
+{
+ int i;
+ for (i = 0; i < srom_devs; i++)
+ device_destroy(srom_class, MKDEV(srom_major, i));
+ class_destroy(srom_class);
+ cdev_del(&srom_cdev);
+ unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs);
+ kfree(srom_devices);
+}
+
+module_init(srom_init);
+module_exit(srom_cleanup);
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 7fc2f108f490..3f4051a7c5a7 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -80,7 +80,7 @@ enum tis_defaults {
static LIST_HEAD(tis_chips);
static DEFINE_SPINLOCK(tis_lock);
-#ifdef CONFIG_PNP
+#if defined(CONFIG_PNP) && defined(CONFIG_ACPI)
static int is_itpm(struct pnp_dev *dev)
{
struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -93,6 +93,11 @@ static int is_itpm(struct pnp_dev *dev)
return 0;
}
+#else
+static inline int is_itpm(struct pnp_dev *dev)
+{
+ return 0;
+}
#endif
static int check_locality(struct tpm_chip *chip, int l)
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index eacb05e6cfb3..eb80b549ed8d 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -157,7 +157,7 @@ utf16_strnlen(efi_char16_t *s, size_t maxlength)
return length;
}
-static unsigned long
+static inline unsigned long
utf16_strlen(efi_char16_t *s)
{
return utf16_strnlen(s, ~0UL);
@@ -580,8 +580,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
return -1;
}
-static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size,
- struct pstore_info *psi)
+static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
+ size_t size, struct pstore_info *psi)
{
return 0;
}
diff --git a/drivers/input/serio/xilinx_ps2.c b/drivers/input/serio/xilinx_ps2.c
index 80baa53da5b1..d64c5a43aaad 100644
--- a/drivers/input/serio/xilinx_ps2.c
+++ b/drivers/input/serio/xilinx_ps2.c
@@ -23,7 +23,7 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/io.h>
-
+#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 8420129fc5ee..f75a66e7d312 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -241,12 +241,13 @@ config DM_MIRROR
needed for live data migration tools such as 'pvmove'.
config DM_RAID
- tristate "RAID 4/5/6 target (EXPERIMENTAL)"
+ tristate "RAID 1/4/5/6 target (EXPERIMENTAL)"
depends on BLK_DEV_DM && EXPERIMENTAL
+ select MD_RAID1
select MD_RAID456
select BLK_DEV_MD
---help---
- A dm target that supports RAID4, RAID5 and RAID6 mappings
+ A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bae6c4e23d3f..49da55c1528a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -30,7 +30,6 @@
#include <linux/device-mapper.h>
#define DM_MSG_PREFIX "crypt"
-#define MESG_STR(x) x, sizeof(x)
/*
* context holding the current state of a multi-part conversion
@@ -239,7 +238,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
struct dm_crypt_request *dmreq)
{
memset(iv, 0, cc->iv_size);
- *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
+ *(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
return 0;
}
@@ -248,7 +247,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
struct dm_crypt_request *dmreq)
{
memset(iv, 0, cc->iv_size);
- *(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
+ *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
return 0;
}
@@ -415,7 +414,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
memset(iv, 0, cc->iv_size);
- *(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
+ *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
return 0;
@@ -1575,11 +1574,17 @@ bad_mem:
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
- unsigned int key_size;
+ unsigned int key_size, opt_params;
unsigned long long tmpll;
int ret;
+ struct dm_arg_set as;
+ const char *opt_string;
+
+ static struct dm_arg _args[] = {
+ {0, 1, "Invalid number of feature args"},
+ };
- if (argc != 5) {
+ if (argc < 5) {
ti->error = "Not enough arguments";
return -EINVAL;
}
@@ -1648,6 +1653,30 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
cc->start = tmpll;
+ argv += 5;
+ argc -= 5;
+
+ /* Optional parameters */
+ if (argc) {
+ as.argc = argc;
+ as.argv = argv;
+
+ ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
+ if (ret)
+ goto bad;
+
+ opt_string = dm_shift_arg(&as);
+
+ if (opt_params == 1 && opt_string &&
+ !strcasecmp(opt_string, "allow_discards"))
+ ti->num_discard_requests = 1;
+ else if (opt_params) {
+ ret = -EINVAL;
+ ti->error = "Invalid feature arguments";
+ goto bad;
+ }
+ }
+
ret = -ENOMEM;
cc->io_queue = alloc_workqueue("kcryptd_io",
WQ_NON_REENTRANT|
@@ -1682,9 +1711,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
struct dm_crypt_io *io;
struct crypt_config *cc;
- if (bio->bi_rw & REQ_FLUSH) {
+ /*
+ * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues.
+ * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight
+ * - for REQ_DISCARD caller must use flush if IO ordering matters
+ */
+ if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
cc = ti->private;
bio->bi_bdev = cc->dev->bdev;
+ if (bio_sectors(bio))
+ bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector);
return DM_MAPIO_REMAPPED;
}
@@ -1727,6 +1763,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
cc->dev->name, (unsigned long long)cc->start);
+
+ if (ti->num_discard_requests)
+ DMEMIT(" 1 allow_discards");
+
break;
}
return 0;
@@ -1770,12 +1810,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
if (argc < 2)
goto error;
- if (!strnicmp(argv[0], MESG_STR("key"))) {
+ if (!strcasecmp(argv[0], "key")) {
if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) {
DMWARN("not suspended during key manipulation.");
return -EINVAL;
}
- if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) {
+ if (argc == 3 && !strcasecmp(argv[1], "set")) {
ret = crypt_set_key(cc, argv[2]);
if (ret)
return ret;
@@ -1783,7 +1823,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
ret = cc->iv_gen_ops->init(cc);
return ret;
}
- if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) {
+ if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
ret = cc->iv_gen_ops->wipe(cc);
if (ret)
@@ -1823,7 +1863,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 10, 0},
+ .version = {1, 11, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index ea790623c30b..89f73ca22cfa 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2003 Sistina Software (UK) Limited.
- * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
@@ -15,6 +15,9 @@
#define DM_MSG_PREFIX "flakey"
+#define all_corrupt_bio_flags_match(bio, fc) \
+ (((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags)
+
/*
* Flakey: Used for testing only, simulates intermittent,
* catastrophic device failure.
@@ -25,60 +28,189 @@ struct flakey_c {
sector_t start;
unsigned up_interval;
unsigned down_interval;
+ unsigned long flags;
+ unsigned corrupt_bio_byte;
+ unsigned corrupt_bio_rw;
+ unsigned corrupt_bio_value;
+ unsigned corrupt_bio_flags;
+};
+
+enum feature_flag_bits {
+ DROP_WRITES
};
+static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
+ struct dm_target *ti)
+{
+ int r;
+ unsigned argc;
+ const char *arg_name;
+
+ static struct dm_arg _args[] = {
+ {0, 6, "Invalid number of feature args"},
+ {1, UINT_MAX, "Invalid corrupt bio byte"},
+ {0, 255, "Invalid corrupt value to write into bio byte (0-255)"},
+ {0, UINT_MAX, "Invalid corrupt bio flags mask"},
+ };
+
+ /* No feature arguments supplied. */
+ if (!as->argc)
+ return 0;
+
+ r = dm_read_arg_group(_args, as, &argc, &ti->error);
+ if (r)
+ return r;
+
+ while (argc) {
+ arg_name = dm_shift_arg(as);
+ argc--;
+
+ /*
+ * drop_writes
+ */
+ if (!strcasecmp(arg_name, "drop_writes")) {
+ if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
+ ti->error = "Feature drop_writes duplicated";
+ return -EINVAL;
+ }
+
+ continue;
+ }
+
+ /*
+ * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
+ */
+ if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
+ if (!argc)
+ ti->error = "Feature corrupt_bio_byte requires parameters";
+
+ r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error);
+ if (r)
+ return r;
+ argc--;
+
+ /*
+ * Direction r or w?
+ */
+ arg_name = dm_shift_arg(as);
+ if (!strcasecmp(arg_name, "w"))
+ fc->corrupt_bio_rw = WRITE;
+ else if (!strcasecmp(arg_name, "r"))
+ fc->corrupt_bio_rw = READ;
+ else {
+ ti->error = "Invalid corrupt bio direction (r or w)";
+ return -EINVAL;
+ }
+ argc--;
+
+ /*
+ * Value of byte (0-255) to write in place of correct one.
+ */
+ r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error);
+ if (r)
+ return r;
+ argc--;
+
+ /*
+ * Only corrupt bios with these flags set.
+ */
+ r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error);
+ if (r)
+ return r;
+ argc--;
+
+ continue;
+ }
+
+ ti->error = "Unrecognised flakey feature requested";
+ return -EINVAL;
+ }
+
+ if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
+ ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/*
- * Construct a flakey mapping: <dev_path> <offset> <up interval> <down interval>
+ * Construct a flakey mapping:
+ * <dev_path> <offset> <up interval> <down interval> [<#feature args> [<arg>]*]
+ *
+ * Feature args:
+ * [drop_writes]
+ * [corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>]
+ *
+ * Nth_byte starts from 1 for the first byte.
+ * Direction is r for READ or w for WRITE.
+ * bio_flags is ignored if 0.
*/
static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
+ static struct dm_arg _args[] = {
+ {0, UINT_MAX, "Invalid up interval"},
+ {0, UINT_MAX, "Invalid down interval"},
+ };
+
+ int r;
struct flakey_c *fc;
- unsigned long long tmp;
+ unsigned long long tmpll;
+ struct dm_arg_set as;
+ const char *devname;
- if (argc != 4) {
- ti->error = "dm-flakey: Invalid argument count";
+ as.argc = argc;
+ as.argv = argv;
+
+ if (argc < 4) {
+ ti->error = "Invalid argument count";
return -EINVAL;
}
- fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+ fc = kzalloc(sizeof(*fc), GFP_KERNEL);
if (!fc) {
- ti->error = "dm-flakey: Cannot allocate linear context";
+ ti->error = "Cannot allocate linear context";
return -ENOMEM;
}
fc->start_time = jiffies;
- if (sscanf(argv[1], "%llu", &tmp) != 1) {
- ti->error = "dm-flakey: Invalid device sector";
+ devname = dm_shift_arg(&as);
+
+ if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) {
+ ti->error = "Invalid device sector";
goto bad;
}
- fc->start = tmp;
+ fc->start = tmpll;
- if (sscanf(argv[2], "%u", &fc->up_interval) != 1) {
- ti->error = "dm-flakey: Invalid up interval";
+ r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error);
+ if (r)
goto bad;
- }
- if (sscanf(argv[3], "%u", &fc->down_interval) != 1) {
- ti->error = "dm-flakey: Invalid down interval";
+ r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error);
+ if (r)
goto bad;
- }
if (!(fc->up_interval + fc->down_interval)) {
- ti->error = "dm-flakey: Total (up + down) interval is zero";
+ ti->error = "Total (up + down) interval is zero";
goto bad;
}
if (fc->up_interval + fc->down_interval < fc->up_interval) {
- ti->error = "dm-flakey: Interval overflow";
+ ti->error = "Interval overflow";
goto bad;
}
- if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) {
- ti->error = "dm-flakey: Device lookup failed";
+ r = parse_features(&as, fc, ti);
+ if (r)
+ goto bad;
+
+ if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) {
+ ti->error = "Device lookup failed";
goto bad;
}
ti->num_flush_requests = 1;
+ ti->num_discard_requests = 1;
ti->private = fc;
return 0;
@@ -99,7 +231,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector)
{
struct flakey_c *fc = ti->private;
- return fc->start + (bi_sector - ti->begin);
+ return fc->start + dm_target_offset(ti, bi_sector);
}
static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
@@ -111,6 +243,25 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
}
+static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
+{
+ unsigned bio_bytes = bio_cur_bytes(bio);
+ char *data = bio_data(bio);
+
+ /*
+ * Overwrite the Nth byte of the data returned.
+ */
+ if (data && bio_bytes >= fc->corrupt_bio_byte) {
+ data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
+
+ DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
+ "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
+ bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
+ (bio_data_dir(bio) == WRITE) ? 'w' : 'r',
+ bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes);
+ }
+}
+
static int flakey_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
{
@@ -119,18 +270,71 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
/* Are we alive ? */
elapsed = (jiffies - fc->start_time) / HZ;
- if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval)
+ if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
+ /*
+ * Flag this bio as submitted while down.
+ */
+ map_context->ll = 1;
+
+ /*
+ * Map reads as normal.
+ */
+ if (bio_data_dir(bio) == READ)
+ goto map_bio;
+
+ /*
+ * Drop writes?
+ */
+ if (test_bit(DROP_WRITES, &fc->flags)) {
+ bio_endio(bio, 0);
+ return DM_MAPIO_SUBMITTED;
+ }
+
+ /*
+ * Corrupt matching writes.
+ */
+ if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) {
+ if (all_corrupt_bio_flags_match(bio, fc))
+ corrupt_bio_data(bio, fc);
+ goto map_bio;
+ }
+
+ /*
+ * By default, error all I/O.
+ */
return -EIO;
+ }
+map_bio:
flakey_map_bio(ti, bio);
return DM_MAPIO_REMAPPED;
}
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+ int error, union map_info *map_context)
+{
+ struct flakey_c *fc = ti->private;
+ unsigned bio_submitted_while_down = map_context->ll;
+
+ /*
+ * Corrupt successful READs while in down state.
+ * If flags were specified, only corrupt those that match.
+ */
+ if (!error && bio_submitted_while_down &&
+ (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
+ all_corrupt_bio_flags_match(bio, fc))
+ corrupt_bio_data(bio, fc);
+
+ return error;
+}
+
static int flakey_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
{
+ unsigned sz = 0;
struct flakey_c *fc = ti->private;
+ unsigned drop_writes;
switch (type) {
case STATUSTYPE_INFO:
@@ -138,9 +342,22 @@ static int flakey_status(struct dm_target *ti, status_type_t type,
break;
case STATUSTYPE_TABLE:
- snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name,
- (unsigned long long)fc->start, fc->up_interval,
- fc->down_interval);
+ DMEMIT("%s %llu %u %u ", fc->dev->name,
+ (unsigned long long)fc->start, fc->up_interval,
+ fc->down_interval);
+
+ drop_writes = test_bit(DROP_WRITES, &fc->flags);
+ DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5);
+
+ if (drop_writes)
+ DMEMIT("drop_writes ");
+
+ if (fc->corrupt_bio_byte)
+ DMEMIT("corrupt_bio_byte %u %c %u %u ",
+ fc->corrupt_bio_byte,
+ (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r',
+ fc->corrupt_bio_value, fc->corrupt_bio_flags);
+
break;
}
return 0;
@@ -177,11 +394,12 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
static struct target_type flakey_target = {
.name = "flakey",
- .version = {1, 1, 0},
+ .version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = flakey_ctr,
.dtr = flakey_dtr,
.map = flakey_map,
+ .end_io = flakey_end_io,
.status = flakey_status,
.ioctl = flakey_ioctl,
.merge = flakey_merge,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2067288f61f9..ad2eba40e319 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -38,6 +38,8 @@ struct io {
struct dm_io_client *client;
io_notify_fn callback;
void *context;
+ void *vma_invalidate_address;
+ unsigned long vma_invalidate_size;
} __attribute__((aligned(DM_IO_MAX_REGIONS)));
static struct kmem_cache *_dm_io_cache;
@@ -116,6 +118,10 @@ static void dec_count(struct io *io, unsigned int region, int error)
set_bit(region, &io->error_bits);
if (atomic_dec_and_test(&io->count)) {
+ if (io->vma_invalidate_size)
+ invalidate_kernel_vmap_range(io->vma_invalidate_address,
+ io->vma_invalidate_size);
+
if (io->sleeper)
wake_up_process(io->sleeper);
@@ -159,6 +165,9 @@ struct dpages {
unsigned context_u;
void *context_ptr;
+
+ void *vma_invalidate_address;
+ unsigned long vma_invalidate_size;
};
/*
@@ -377,6 +386,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
io->sleeper = current;
io->client = client;
+ io->vma_invalidate_address = dp->vma_invalidate_address;
+ io->vma_invalidate_size = dp->vma_invalidate_size;
+
dispatch_io(rw, num_regions, where, dp, io, 1);
while (1) {
@@ -415,13 +427,21 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
io->callback = fn;
io->context = context;
+ io->vma_invalidate_address = dp->vma_invalidate_address;
+ io->vma_invalidate_size = dp->vma_invalidate_size;
+
dispatch_io(rw, num_regions, where, dp, io, 0);
return 0;
}
-static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
+static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
+ unsigned long size)
{
/* Set up dpages based on memory type */
+
+ dp->vma_invalidate_address = NULL;
+ dp->vma_invalidate_size = 0;
+
switch (io_req->mem.type) {
case DM_IO_PAGE_LIST:
list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
@@ -432,6 +452,11 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
break;
case DM_IO_VMA:
+ flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
+ if ((io_req->bi_rw & RW_MASK) == READ) {
+ dp->vma_invalidate_address = io_req->mem.ptr.vma;
+ dp->vma_invalidate_size = size;
+ }
vm_dp_init(dp, io_req->mem.ptr.vma);
break;
@@ -460,7 +485,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
int r;
struct dpages dp;
- r = dp_init(io_req, &dp);
+ r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
if (r)
return r;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 4cacdad2270a..2e9a3ca37bdd 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -128,6 +128,24 @@ static struct hash_cell *__get_uuid_cell(const char *str)
return NULL;
}
+static struct hash_cell *__get_dev_cell(uint64_t dev)
+{
+ struct mapped_device *md;
+ struct hash_cell *hc;
+
+ md = dm_get_md(huge_decode_dev(dev));
+ if (!md)
+ return NULL;
+
+ hc = dm_get_mdptr(md);
+ if (!hc) {
+ dm_put(md);
+ return NULL;
+ }
+
+ return hc;
+}
+
/*-----------------------------------------------------------------
* Inserting, removing and renaming a device.
*---------------------------------------------------------------*/
@@ -718,25 +736,45 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
*/
static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
{
- struct mapped_device *md;
- void *mdptr = NULL;
+ struct hash_cell *hc = NULL;
- if (*param->uuid)
- return __get_uuid_cell(param->uuid);
+ if (*param->uuid) {
+ if (*param->name || param->dev)
+ return NULL;
- if (*param->name)
- return __get_name_cell(param->name);
+ hc = __get_uuid_cell(param->uuid);
+ if (!hc)
+ return NULL;
+ } else if (*param->name) {
+ if (param->dev)
+ return NULL;
- md = dm_get_md(huge_decode_dev(param->dev));
- if (!md)
- goto out;
+ hc = __get_name_cell(param->name);
+ if (!hc)
+ return NULL;
+ } else if (param->dev) {
+ hc = __get_dev_cell(param->dev);
+ if (!hc)
+ return NULL;
+ } else
+ return NULL;
- mdptr = dm_get_mdptr(md);
- if (!mdptr)
- dm_put(md);
+ /*
+ * Sneakily write in both the name and the uuid
+ * while we have the cell.
+ */
+ strlcpy(param->name, hc->name, sizeof(param->name));
+ if (hc->uuid)
+ strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
+ else
+ param->uuid[0] = '\0';
-out:
- return mdptr;
+ if (hc->new_map)
+ param->flags |= DM_INACTIVE_PRESENT_FLAG;
+ else
+ param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+ return hc;
}
static struct mapped_device *find_device(struct dm_ioctl *param)
@@ -746,24 +784,8 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
down_read(&_hash_lock);
hc = __find_device_hash_cell(param);
- if (hc) {
+ if (hc)
md = hc->md;
-
- /*
- * Sneakily write in both the name and the uuid
- * while we have the cell.
- */
- strlcpy(param->name, hc->name, sizeof(param->name));
- if (hc->uuid)
- strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
- else
- param->uuid[0] = '\0';
-
- if (hc->new_map)
- param->flags |= DM_INACTIVE_PRESENT_FLAG;
- else
- param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
- }
up_read(&_hash_lock);
return md;
@@ -1402,6 +1424,11 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
goto out;
}
+ if (!argc) {
+ DMWARN("Empty message received.");
+ goto out;
+ }
+
table = dm_get_live_table(md);
if (!table)
goto out_argv;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 320401dec104..f82147029636 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -224,8 +224,6 @@ struct kcopyd_job {
unsigned int num_dests;
struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
- sector_t offset;
- unsigned int nr_pages;
struct page_list *pages;
/*
@@ -380,7 +378,7 @@ static int run_io_job(struct kcopyd_job *job)
.bi_rw = job->rw,
.mem.type = DM_IO_PAGE_LIST,
.mem.ptr.pl = job->pages,
- .mem.offset = job->offset,
+ .mem.offset = 0,
.notify.fn = complete_io,
.notify.context = job,
.client = job->kc->io_client,
@@ -397,10 +395,9 @@ static int run_io_job(struct kcopyd_job *job)
static int run_pages_job(struct kcopyd_job *job)
{
int r;
+ unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
- job->nr_pages = dm_div_up(job->dests[0].count + job->offset,
- PAGE_SIZE >> 9);
- r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
+ r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
if (!r) {
/* this job is ready for io */
push(&job->kc->io_jobs, job);
@@ -602,8 +599,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
job->num_dests = num_dests;
memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
- job->offset = 0;
- job->nr_pages = 0;
job->pages = NULL;
job->fn = fn;
@@ -622,6 +617,37 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
}
EXPORT_SYMBOL(dm_kcopyd_copy);
+void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
+ dm_kcopyd_notify_fn fn, void *context)
+{
+ struct kcopyd_job *job;
+
+ job = mempool_alloc(kc->job_pool, GFP_NOIO);
+
+ memset(job, 0, sizeof(struct kcopyd_job));
+ job->kc = kc;
+ job->fn = fn;
+ job->context = context;
+
+ atomic_inc(&kc->nr_jobs);
+
+ return job;
+}
+EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
+
+void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
+{
+ struct kcopyd_job *job = j;
+ struct dm_kcopyd_client *kc = job->kc;
+
+ job->read_err = read_err;
+ job->write_err = write_err;
+
+ push(&kc->complete_jobs, job);
+ wake(kc);
+}
+EXPORT_SYMBOL(dm_kcopyd_do_callback);
+
/*
* Cancels a kcopyd job, eg. someone might be deactivating a
* mirror.
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index aa2e0c374ab3..1021c8986011 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -394,8 +394,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
group[count] = fe->region;
count++;
- list_del(&fe->list);
- list_add(&fe->list, &tmp_list);
+ list_move(&fe->list, &tmp_list);
type = fe->type;
if (count >= MAX_FLUSH_GROUP_COUNT)
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 948e3f4925bf..3b52bb72bd1f 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -197,15 +197,21 @@ EXPORT_SYMBOL(dm_dirty_log_destroy);
#define MIRROR_DISK_VERSION 2
#define LOG_OFFSET 2
-struct log_header {
- uint32_t magic;
+struct log_header_disk {
+ __le32 magic;
/*
* Simple, incrementing version. no backward
* compatibility.
*/
+ __le32 version;
+ __le64 nr_regions;
+} __packed;
+
+struct log_header_core {
+ uint32_t magic;
uint32_t version;
- sector_t nr_regions;
+ uint64_t nr_regions;
};
struct log_c {
@@ -239,10 +245,10 @@ struct log_c {
int log_dev_failed;
int log_dev_flush_failed;
struct dm_dev *log_dev;
- struct log_header header;
+ struct log_header_core header;
struct dm_io_region header_location;
- struct log_header *disk_header;
+ struct log_header_disk *disk_header;
};
/*
@@ -251,34 +257,34 @@ struct log_c {
*/
static inline int log_test_bit(uint32_t *bs, unsigned bit)
{
- return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0;
+ return test_bit_le(bit, bs) ? 1 : 0;
}
static inline void log_set_bit(struct log_c *l,
uint32_t *bs, unsigned bit)
{
- __test_and_set_bit_le(bit, (unsigned long *) bs);
+ __set_bit_le(bit, bs);
l->touched_cleaned = 1;
}
static inline void log_clear_bit(struct log_c *l,
uint32_t *bs, unsigned bit)
{
- __test_and_clear_bit_le(bit, (unsigned long *) bs);
+ __clear_bit_le(bit, bs);
l->touched_dirtied = 1;
}
/*----------------------------------------------------------------
* Header IO
*--------------------------------------------------------------*/
-static void header_to_disk(struct log_header *core, struct log_header *disk)
+static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk)
{
disk->magic = cpu_to_le32(core->magic);
disk->version = cpu_to_le32(core->version);
disk->nr_regions = cpu_to_le64(core->nr_regions);
}
-static void header_from_disk(struct log_header *core, struct log_header *disk)
+static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk)
{
core->magic = le32_to_cpu(disk->magic);
core->version = le32_to_cpu(disk->version);
@@ -486,7 +492,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size);
lc->sync_count = (sync == NOSYNC) ? region_count : 0;
- lc->recovering_bits = vmalloc(bitset_size);
+ lc->recovering_bits = vzalloc(bitset_size);
if (!lc->recovering_bits) {
DMWARN("couldn't allocate sync bitset");
vfree(lc->sync_bits);
@@ -498,7 +504,6 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
kfree(lc);
return -ENOMEM;
}
- memset(lc->recovering_bits, 0, bitset_size);
lc->sync_search = 0;
log->context = lc;
@@ -739,8 +744,7 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region)
return 0;
do {
- *region = find_next_zero_bit_le(
- (unsigned long *) lc->sync_bits,
+ *region = find_next_zero_bit_le(lc->sync_bits,
lc->region_count,
lc->sync_search);
lc->sync_search = *region + 1;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c3547016f0f1..5e0090ef4182 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -22,7 +22,6 @@
#include <linux/atomic.h>
#define DM_MSG_PREFIX "multipath"
-#define MESG_STR(x) x, sizeof(x)
#define DM_PG_INIT_DELAY_MSECS 2000
#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
@@ -505,80 +504,29 @@ static void trigger_event(struct work_struct *work)
* <#paths> <#per-path selector args>
* [<path> [<arg>]* ]+ ]+
*---------------------------------------------------------------*/
-struct param {
- unsigned min;
- unsigned max;
- char *error;
-};
-
-static int read_param(struct param *param, char *str, unsigned *v, char **error)
-{
- if (!str ||
- (sscanf(str, "%u", v) != 1) ||
- (*v < param->min) ||
- (*v > param->max)) {
- *error = param->error;
- return -EINVAL;
- }
-
- return 0;
-}
-
-struct arg_set {
- unsigned argc;
- char **argv;
-};
-
-static char *shift(struct arg_set *as)
-{
- char *r;
-
- if (as->argc) {
- as->argc--;
- r = *as->argv;
- as->argv++;
- return r;
- }
-
- return NULL;
-}
-
-static void consume(struct arg_set *as, unsigned n)
-{
- BUG_ON (as->argc < n);
- as->argc -= n;
- as->argv += n;
-}
-
-static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
+static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
struct dm_target *ti)
{
int r;
struct path_selector_type *pst;
unsigned ps_argc;
- static struct param _params[] = {
+ static struct dm_arg _args[] = {
{0, 1024, "invalid number of path selector args"},
};
- pst = dm_get_path_selector(shift(as));
+ pst = dm_get_path_selector(dm_shift_arg(as));
if (!pst) {
ti->error = "unknown path selector type";
return -EINVAL;
}
- r = read_param(_params, shift(as), &ps_argc, &ti->error);
+ r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
if (r) {
dm_put_path_selector(pst);
return -EINVAL;
}
- if (ps_argc > as->argc) {
- dm_put_path_selector(pst);
- ti->error = "not enough arguments for path selector";
- return -EINVAL;
- }
-
r = pst->create(&pg->ps, ps_argc, as->argv);
if (r) {
dm_put_path_selector(pst);
@@ -587,12 +535,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
}
pg->ps.type = pst;
- consume(as, ps_argc);
+ dm_consume_args(as, ps_argc);
return 0;
}
-static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
+static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
struct dm_target *ti)
{
int r;
@@ -609,7 +557,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
if (!p)
return ERR_PTR(-ENOMEM);
- r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
+ r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
&p->path.dev);
if (r) {
ti->error = "error getting device";
@@ -660,16 +608,16 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
return ERR_PTR(r);
}
-static struct priority_group *parse_priority_group(struct arg_set *as,
+static struct priority_group *parse_priority_group(struct dm_arg_set *as,
struct multipath *m)
{
- static struct param _params[] = {
+ static struct dm_arg _args[] = {
{1, 1024, "invalid number of paths"},
{0, 1024, "invalid number of selector args"}
};
int r;
- unsigned i, nr_selector_args, nr_params;
+ unsigned i, nr_selector_args, nr_args;
struct priority_group *pg;
struct dm_target *ti = m->ti;
@@ -693,26 +641,26 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
/*
* read the paths
*/
- r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
+ r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
if (r)
goto bad;
- r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
+ r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
if (r)
goto bad;
- nr_params = 1 + nr_selector_args;
+ nr_args = 1 + nr_selector_args;
for (i = 0; i < pg->nr_pgpaths; i++) {
struct pgpath *pgpath;
- struct arg_set path_args;
+ struct dm_arg_set path_args;
- if (as->argc < nr_params) {
+ if (as->argc < nr_args) {
ti->error = "not enough path parameters";
r = -EINVAL;
goto bad;
}
- path_args.argc = nr_params;
+ path_args.argc = nr_args;
path_args.argv = as->argv;
pgpath = parse_path(&path_args, &pg->ps, ti);
@@ -723,7 +671,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
pgpath->pg = pg;
list_add_tail(&pgpath->list, &pg->pgpaths);
- consume(as, nr_params);
+ dm_consume_args(as, nr_args);
}
return pg;
@@ -733,28 +681,23 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
return ERR_PTR(r);
}
-static int parse_hw_handler(struct arg_set *as, struct multipath *m)
+static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
{
unsigned hw_argc;
int ret;
struct dm_target *ti = m->ti;
- static struct param _params[] = {
+ static struct dm_arg _args[] = {
{0, 1024, "invalid number of hardware handler args"},
};
- if (read_param(_params, shift(as), &hw_argc, &ti->error))
+ if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
return -EINVAL;
if (!hw_argc)
return 0;
- if (hw_argc > as->argc) {
- ti->error = "not enough arguments for hardware handler";
- return -EINVAL;
- }
-
- m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
+ m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
request_module("scsi_dh_%s", m->hw_handler_name);
if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
ti->error = "unknown hardware handler type";
@@ -778,7 +721,7 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
j = sprintf(p, "%s", as->argv[i]);
}
- consume(as, hw_argc - 1);
+ dm_consume_args(as, hw_argc - 1);
return 0;
fail:
@@ -787,20 +730,20 @@ fail:
return ret;
}
-static int parse_features(struct arg_set *as, struct multipath *m)
+static int parse_features(struct dm_arg_set *as, struct multipath *m)
{
int r;
unsigned argc;
struct dm_target *ti = m->ti;
- const char *param_name;
+ const char *arg_name;
- static struct param _params[] = {
+ static struct dm_arg _args[] = {
{0, 5, "invalid number of feature args"},
{1, 50, "pg_init_retries must be between 1 and 50"},
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
};
- r = read_param(_params, shift(as), &argc, &ti->error);
+ r = dm_read_arg_group(_args, as, &argc, &ti->error);
if (r)
return -EINVAL;
@@ -808,26 +751,24 @@ static int parse_features(struct arg_set *as, struct multipath *m)
return 0;
do {
- param_name = shift(as);
+ arg_name = dm_shift_arg(as);
argc--;
- if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
+ if (!strcasecmp(arg_name, "queue_if_no_path")) {
r = queue_if_no_path(m, 1, 0);
continue;
}
- if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
+ if (!strcasecmp(arg_name, "pg_init_retries") &&
(argc >= 1)) {
- r = read_param(_params + 1, shift(as),
- &m->pg_init_retries, &ti->error);
+ r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
argc--;
continue;
}
- if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) &&
+ if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
(argc >= 1)) {
- r = read_param(_params + 2, shift(as),
- &m->pg_init_delay_msecs, &ti->error);
+ r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
argc--;
continue;
}
@@ -842,15 +783,15 @@ static int parse_features(struct arg_set *as, struct multipath *m)
static int multipath_ctr(struct dm_target *ti, unsigned int argc,
char **argv)
{
- /* target parameters */
- static struct param _params[] = {
+ /* target arguments */
+ static struct dm_arg _args[] = {
{0, 1024, "invalid number of priority groups"},
{0, 1024, "invalid initial priority group number"},
};
int r;
struct multipath *m;
- struct arg_set as;
+ struct dm_arg_set as;
unsigned pg_count = 0;
unsigned next_pg_num;
@@ -871,11 +812,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
if (r)
goto bad;
- r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
+ r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
if (r)
goto bad;
- r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
+ r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
if (r)
goto bad;
@@ -1505,10 +1446,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
}
if (argc == 1) {
- if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) {
+ if (!strcasecmp(argv[0], "queue_if_no_path")) {
r = queue_if_no_path(m, 1, 0);
goto out;
- } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) {
+ } else if (!strcasecmp(argv[0], "fail_if_no_path")) {
r = queue_if_no_path(m, 0, 0);
goto out;
}
@@ -1519,18 +1460,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
goto out;
}
- if (!strnicmp(argv[0], MESG_STR("disable_group"))) {
+ if (!strcasecmp(argv[0], "disable_group")) {
r = bypass_pg_num(m, argv[1], 1);
goto out;
- } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) {
+ } else if (!strcasecmp(argv[0], "enable_group")) {
r = bypass_pg_num(m, argv[1], 0);
goto out;
- } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) {
+ } else if (!strcasecmp(argv[0], "switch_group")) {
r = switch_pg_num(m, argv[1]);
goto out;
- } else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
+ } else if (!strcasecmp(argv[0], "reinstate_path"))
action = reinstate_path;
- else if (!strnicmp(argv[0], MESG_STR("fail_path")))
+ else if (!strcasecmp(argv[0], "fail_path"))
action = fail_path;
else {
DMWARN("Unrecognised multipath message received.");
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index e5d8904fc8f6..a002dd85db1e 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -8,19 +8,19 @@
#include <linux/slab.h>
#include "md.h"
+#include "raid1.h"
#include "raid5.h"
-#include "dm.h"
#include "bitmap.h"
+#include <linux/device-mapper.h>
+
#define DM_MSG_PREFIX "raid"
/*
- * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then
- * make it so the flag doesn't set anything.
+ * The following flags are used by dm-raid.c to set up the array state.
+ * They must be cleared before md_run is called.
*/
-#ifndef MD_SYNC_STATE_FORCED
-#define MD_SYNC_STATE_FORCED 0
-#endif
+#define FirstUse 10 /* rdev flag */
struct raid_dev {
/*
@@ -43,14 +43,15 @@ struct raid_dev {
/*
* Flags for rs->print_flags field.
*/
-#define DMPF_DAEMON_SLEEP 0x1
-#define DMPF_MAX_WRITE_BEHIND 0x2
-#define DMPF_SYNC 0x4
-#define DMPF_NOSYNC 0x8
-#define DMPF_STRIPE_CACHE 0x10
-#define DMPF_MIN_RECOVERY_RATE 0x20
-#define DMPF_MAX_RECOVERY_RATE 0x40
-
+#define DMPF_SYNC 0x1
+#define DMPF_NOSYNC 0x2
+#define DMPF_REBUILD 0x4
+#define DMPF_DAEMON_SLEEP 0x8
+#define DMPF_MIN_RECOVERY_RATE 0x10
+#define DMPF_MAX_RECOVERY_RATE 0x20
+#define DMPF_MAX_WRITE_BEHIND 0x40
+#define DMPF_STRIPE_CACHE 0x80
+#define DMPF_REGION_SIZE 0X100
struct raid_set {
struct dm_target *ti;
@@ -72,6 +73,7 @@ static struct raid_type {
const unsigned level; /* RAID level. */
const unsigned algorithm; /* RAID algorithm. */
} raid_types[] = {
+ {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
{"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
{"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
{"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
@@ -105,7 +107,8 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
}
sectors_per_dev = ti->len;
- if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
+ if ((raid_type->level > 1) &&
+ sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
ti->error = "Target length not divisible by number of data devices";
return ERR_PTR(-EINVAL);
}
@@ -147,9 +150,16 @@ static void context_free(struct raid_set *rs)
{
int i;
- for (i = 0; i < rs->md.raid_disks; i++)
+ for (i = 0; i < rs->md.raid_disks; i++) {
+ if (rs->dev[i].meta_dev)
+ dm_put_device(rs->ti, rs->dev[i].meta_dev);
+ if (rs->dev[i].rdev.sb_page)
+ put_page(rs->dev[i].rdev.sb_page);
+ rs->dev[i].rdev.sb_page = NULL;
+ rs->dev[i].rdev.sb_loaded = 0;
if (rs->dev[i].data_dev)
dm_put_device(rs->ti, rs->dev[i].data_dev);
+ }
kfree(rs);
}
@@ -159,7 +169,16 @@ static void context_free(struct raid_set *rs)
* <meta_dev>: meta device name or '-' if missing
* <data_dev>: data device name or '-' if missing
*
- * This code parses those words.
+ * The following are permitted:
+ * - -
+ * - <data_dev>
+ * <meta_dev> <data_dev>
+ *
+ * The following is not allowed:
+ * <meta_dev> -
+ *
+ * This code parses those words. If there is a failure,
+ * the caller must use context_free to unwind the operations.
*/
static int dev_parms(struct raid_set *rs, char **argv)
{
@@ -182,8 +201,16 @@ static int dev_parms(struct raid_set *rs, char **argv)
rs->dev[i].rdev.mddev = &rs->md;
if (strcmp(argv[0], "-")) {
- rs->ti->error = "Metadata devices not supported";
- return -EINVAL;
+ ret = dm_get_device(rs->ti, argv[0],
+ dm_table_get_mode(rs->ti->table),
+ &rs->dev[i].meta_dev);
+ rs->ti->error = "RAID metadata device lookup failure";
+ if (ret)
+ return ret;
+
+ rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
+ if (!rs->dev[i].rdev.sb_page)
+ return -ENOMEM;
}
if (!strcmp(argv[1], "-")) {
@@ -193,6 +220,10 @@ static int dev_parms(struct raid_set *rs, char **argv)
return -EINVAL;
}
+ rs->ti->error = "No data device supplied with metadata device";
+ if (rs->dev[i].meta_dev)
+ return -EINVAL;
+
continue;
}
@@ -204,6 +235,10 @@ static int dev_parms(struct raid_set *rs, char **argv)
return ret;
}
+ if (rs->dev[i].meta_dev) {
+ metadata_available = 1;
+ rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
+ }
rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
@@ -235,33 +270,109 @@ static int dev_parms(struct raid_set *rs, char **argv)
}
/*
+ * validate_region_size
+ * @rs
+ * @region_size: region size in sectors. If 0, pick a size (4MiB default).
+ *
+ * Set rs->md.bitmap_info.chunksize (which really refers to 'region size').
+ * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap.
+ *
+ * Returns: 0 on success, -EINVAL on failure.
+ */
+static int validate_region_size(struct raid_set *rs, unsigned long region_size)
+{
+ unsigned long min_region_size = rs->ti->len / (1 << 21);
+
+ if (!region_size) {
+ /*
+ * Choose a reasonable default. All figures in sectors.
+ */
+ if (min_region_size > (1 << 13)) {
+ DMINFO("Choosing default region size of %lu sectors",
+ region_size);
+ region_size = min_region_size;
+ } else {
+ DMINFO("Choosing default region size of 4MiB");
+ region_size = 1 << 13; /* sectors */
+ }
+ } else {
+ /*
+ * Validate user-supplied value.
+ */
+ if (region_size > rs->ti->len) {
+ rs->ti->error = "Supplied region size is too large";
+ return -EINVAL;
+ }
+
+ if (region_size < min_region_size) {
+ DMERR("Supplied region_size (%lu sectors) below minimum (%lu)",
+ region_size, min_region_size);
+ rs->ti->error = "Supplied region size is too small";
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(region_size)) {
+ rs->ti->error = "Region size is not a power of 2";
+ return -EINVAL;
+ }
+
+ if (region_size < rs->md.chunk_sectors) {
+ rs->ti->error = "Region size is smaller than the chunk size";
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * Convert sectors to bytes.
+ */
+ rs->md.bitmap_info.chunksize = (region_size << 9);
+
+ return 0;
+}
+
+/*
* Possible arguments are...
- * RAID456:
* <chunk_size> [optional_args]
*
- * Optional args:
- * [[no]sync] Force or prevent recovery of the entire array
+ * Argument definitions
+ * <chunk_size> The number of sectors per disk that
+ * will form the "stripe"
+ * [[no]sync] Force or prevent recovery of the
+ * entire array
* [rebuild <idx>] Rebuild the drive indicated by the index
- * [daemon_sleep <ms>] Time between bitmap daemon work to clear bits
+ * [daemon_sleep <ms>] Time between bitmap daemon work to
+ * clear bits
* [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
* [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
+ * [write_mostly <idx>] Indicate a write mostly drive via index
* [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
* [stripe_cache <sectors>] Stripe cache size for higher RAIDs
+ * [region_size <sectors>] Defines granularity of bitmap
*/
static int parse_raid_params(struct raid_set *rs, char **argv,
unsigned num_raid_params)
{
unsigned i, rebuild_cnt = 0;
- unsigned long value;
+ unsigned long value, region_size = 0;
char *key;
/*
* First, parse the in-order required arguments
+ * "chunk_size" is the only argument of this type.
*/
- if ((strict_strtoul(argv[0], 10, &value) < 0) ||
- !is_power_of_2(value) || (value < 8)) {
+ if ((strict_strtoul(argv[0], 10, &value) < 0)) {
rs->ti->error = "Bad chunk size";
return -EINVAL;
+ } else if (rs->raid_type->level == 1) {
+ if (value)
+ DMERR("Ignoring chunk size parameter for RAID 1");
+ value = 0;
+ } else if (!is_power_of_2(value)) {
+ rs->ti->error = "Chunk size must be a power of 2";
+ return -EINVAL;
+ } else if (value < 8) {
+ rs->ti->error = "Chunk size value is too small";
+ return -EINVAL;
}
rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
@@ -269,22 +380,39 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
num_raid_params--;
/*
- * Second, parse the unordered optional arguments
+ * We set each individual device as In_sync with a completed
+ * 'recovery_offset'. If there has been a device failure or
+ * replacement then one of the following cases applies:
+ *
+ * 1) User specifies 'rebuild'.
+ * - Device is reset when param is read.
+ * 2) A new device is supplied.
+ * - No matching superblock found, resets device.
+ * 3) Device failure was transient and returns on reload.
+ * - Failure noticed, resets device for bitmap replay.
+ * 4) Device hadn't completed recovery after previous failure.
+ * - Superblock is read and overrides recovery_offset.
+ *
+ * What is found in the superblocks of the devices is always
+ * authoritative, unless 'rebuild' or '[no]sync' was specified.
*/
- for (i = 0; i < rs->md.raid_disks; i++)
+ for (i = 0; i < rs->md.raid_disks; i++) {
set_bit(In_sync, &rs->dev[i].rdev.flags);
+ rs->dev[i].rdev.recovery_offset = MaxSector;
+ }
+ /*
+ * Second, parse the unordered optional arguments
+ */
for (i = 0; i < num_raid_params; i++) {
- if (!strcmp(argv[i], "nosync")) {
+ if (!strcasecmp(argv[i], "nosync")) {
rs->md.recovery_cp = MaxSector;
rs->print_flags |= DMPF_NOSYNC;
- rs->md.flags |= MD_SYNC_STATE_FORCED;
continue;
}
- if (!strcmp(argv[i], "sync")) {
+ if (!strcasecmp(argv[i], "sync")) {
rs->md.recovery_cp = 0;
rs->print_flags |= DMPF_SYNC;
- rs->md.flags |= MD_SYNC_STATE_FORCED;
continue;
}
@@ -300,9 +428,13 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
return -EINVAL;
}
- if (!strcmp(key, "rebuild")) {
- if (++rebuild_cnt > rs->raid_type->parity_devs) {
- rs->ti->error = "Too many rebuild drives given";
+ if (!strcasecmp(key, "rebuild")) {
+ rebuild_cnt++;
+ if (((rs->raid_type->level != 1) &&
+ (rebuild_cnt > rs->raid_type->parity_devs)) ||
+ ((rs->raid_type->level == 1) &&
+ (rebuild_cnt > (rs->md.raid_disks - 1)))) {
+ rs->ti->error = "Too many rebuild devices specified for given RAID type";
return -EINVAL;
}
if (value > rs->md.raid_disks) {
@@ -311,7 +443,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
}
clear_bit(In_sync, &rs->dev[value].rdev.flags);
rs->dev[value].rdev.recovery_offset = 0;
- } else if (!strcmp(key, "max_write_behind")) {
+ rs->print_flags |= DMPF_REBUILD;
+ } else if (!strcasecmp(key, "write_mostly")) {
+ if (rs->raid_type->level != 1) {
+ rs->ti->error = "write_mostly option is only valid for RAID1";
+ return -EINVAL;
+ }
+ if (value > rs->md.raid_disks) {
+ rs->ti->error = "Invalid write_mostly drive index given";
+ return -EINVAL;
+ }
+ set_bit(WriteMostly, &rs->dev[value].rdev.flags);
+ } else if (!strcasecmp(key, "max_write_behind")) {
+ if (rs->raid_type->level != 1) {
+ rs->ti->error = "max_write_behind option is only valid for RAID1";
+ return -EINVAL;
+ }
rs->print_flags |= DMPF_MAX_WRITE_BEHIND;
/*
@@ -324,14 +471,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
return -EINVAL;
}
rs->md.bitmap_info.max_write_behind = value;
- } else if (!strcmp(key, "daemon_sleep")) {
+ } else if (!strcasecmp(key, "daemon_sleep")) {
rs->print_flags |= DMPF_DAEMON_SLEEP;
if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
rs->ti->error = "daemon sleep period out of range";
return -EINVAL;
}
rs->md.bitmap_info.daemon_sleep = value;
- } else if (!strcmp(key, "stripe_cache")) {
+ } else if (!strcasecmp(key, "stripe_cache")) {
rs->print_flags |= DMPF_STRIPE_CACHE;
/*
@@ -348,20 +495,23 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
rs->ti->error = "Bad stripe_cache size";
return -EINVAL;
}
- } else if (!strcmp(key, "min_recovery_rate")) {
+ } else if (!strcasecmp(key, "min_recovery_rate")) {
rs->print_flags |= DMPF_MIN_RECOVERY_RATE;
if (value > INT_MAX) {
rs->ti->error = "min_recovery_rate out of range";
return -EINVAL;
}
rs->md.sync_speed_min = (int)value;
- } else if (!strcmp(key, "max_recovery_rate")) {
+ } else if (!strcasecmp(key, "max_recovery_rate")) {
rs->print_flags |= DMPF_MAX_RECOVERY_RATE;
if (value > INT_MAX) {
rs->ti->error = "max_recovery_rate out of range";
return -EINVAL;
}
rs->md.sync_speed_max = (int)value;
+ } else if (!strcasecmp(key, "region_size")) {
+ rs->print_flags |= DMPF_REGION_SIZE;
+ region_size = value;
} else {
DMERR("Unable to parse RAID parameter: %s", key);
rs->ti->error = "Unable to parse RAID parameters";
@@ -369,6 +519,19 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
}
}
+ if (validate_region_size(rs, region_size))
+ return -EINVAL;
+
+ if (rs->md.chunk_sectors)
+ rs->ti->split_io = rs->md.chunk_sectors;
+ else
+ rs->ti->split_io = region_size;
+
+ if (rs->md.chunk_sectors)
+ rs->ti->split_io = rs->md.chunk_sectors;
+ else
+ rs->ti->split_io = region_size;
+
/* Assume there are no metadata devices until the drives are parsed */
rs->md.persistent = 0;
rs->md.external = 1;
@@ -387,17 +550,351 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
{
struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
+ if (rs->raid_type->level == 1)
+ return md_raid1_congested(&rs->md, bits);
+
return md_raid5_congested(&rs->md, bits);
}
/*
+ * This structure is never routinely used by userspace, unlike md superblocks.
+ * Devices with this superblock should only ever be accessed via device-mapper.
+ */
+#define DM_RAID_MAGIC 0x64526D44
+struct dm_raid_superblock {
+ __le32 magic; /* "DmRd" */
+ __le32 features; /* Used to indicate possible future changes */
+
+ __le32 num_devices; /* Number of devices in this array. (Max 64) */
+ __le32 array_position; /* The position of this drive in the array */
+
+ __le64 events; /* Incremented by md when superblock updated */
+ __le64 failed_devices; /* Bit field of devices to indicate failures */
+
+ /*
+ * This offset tracks the progress of the repair or replacement of
+ * an individual drive.
+ */
+ __le64 disk_recovery_offset;
+
+ /*
+ * This offset tracks the progress of the initial array
+ * synchronisation/parity calculation.
+ */
+ __le64 array_resync_offset;
+
+ /*
+ * RAID characteristics
+ */
+ __le32 level;
+ __le32 layout;
+ __le32 stripe_sectors;
+
+ __u8 pad[452]; /* Round struct to 512 bytes. */
+ /* Always set to 0 when writing. */
+} __packed;
+
+static int read_disk_sb(mdk_rdev_t *rdev, int size)
+{
+ BUG_ON(!rdev->sb_page);
+
+ if (rdev->sb_loaded)
+ return 0;
+
+ if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
+ DMERR("Failed to read device superblock");
+ return -EINVAL;
+ }
+
+ rdev->sb_loaded = 1;
+
+ return 0;
+}
+
+static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+ mdk_rdev_t *r, *t;
+ uint64_t failed_devices;
+ struct dm_raid_superblock *sb;
+
+ sb = page_address(rdev->sb_page);
+ failed_devices = le64_to_cpu(sb->failed_devices);
+
+ rdev_for_each(r, t, mddev)
+ if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
+ failed_devices |= (1ULL << r->raid_disk);
+
+ memset(sb, 0, sizeof(*sb));
+
+ sb->magic = cpu_to_le32(DM_RAID_MAGIC);
+ sb->features = cpu_to_le32(0); /* No features yet */
+
+ sb->num_devices = cpu_to_le32(mddev->raid_disks);
+ sb->array_position = cpu_to_le32(rdev->raid_disk);
+
+ sb->events = cpu_to_le64(mddev->events);
+ sb->failed_devices = cpu_to_le64(failed_devices);
+
+ sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
+ sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
+
+ sb->level = cpu_to_le32(mddev->level);
+ sb->layout = cpu_to_le32(mddev->layout);
+ sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
+}
+
+/*
+ * super_load
+ *
+ * This function creates a superblock if one is not found on the device
+ * and will decide which superblock to use if there's a choice.
+ *
+ * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
+ */
+static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev)
+{
+ int ret;
+ struct dm_raid_superblock *sb;
+ struct dm_raid_superblock *refsb;
+ uint64_t events_sb, events_refsb;
+
+ rdev->sb_start = 0;
+ rdev->sb_size = sizeof(*sb);
+
+ ret = read_disk_sb(rdev, rdev->sb_size);
+ if (ret)
+ return ret;
+
+ sb = page_address(rdev->sb_page);
+ if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) {
+ super_sync(rdev->mddev, rdev);
+
+ set_bit(FirstUse, &rdev->flags);
+
+ /* Force writing of superblocks to disk */
+ set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
+
+ /* Any superblock is better than none, choose that if given */
+ return refdev ? 0 : 1;
+ }
+
+ if (!refdev)
+ return 1;
+
+ events_sb = le64_to_cpu(sb->events);
+
+ refsb = page_address(refdev->sb_page);
+ events_refsb = le64_to_cpu(refsb->events);
+
+ return (events_sb > events_refsb) ? 1 : 0;
+}
+
+static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+ int role;
+ struct raid_set *rs = container_of(mddev, struct raid_set, md);
+ uint64_t events_sb;
+ uint64_t failed_devices;
+ struct dm_raid_superblock *sb;
+ uint32_t new_devs = 0;
+ uint32_t rebuilds = 0;
+ mdk_rdev_t *r, *t;
+ struct dm_raid_superblock *sb2;
+
+ sb = page_address(rdev->sb_page);
+ events_sb = le64_to_cpu(sb->events);
+ failed_devices = le64_to_cpu(sb->failed_devices);
+
+ /*
+ * Initialise to 1 if this is a new superblock.
+ */
+ mddev->events = events_sb ? : 1;
+
+ /*
+ * Reshaping is not currently allowed
+ */
+ if ((le32_to_cpu(sb->level) != mddev->level) ||
+ (le32_to_cpu(sb->layout) != mddev->layout) ||
+ (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) {
+ DMERR("Reshaping arrays not yet supported.");
+ return -EINVAL;
+ }
+
+ /* We can only change the number of devices in RAID1 right now */
+ if ((rs->raid_type->level != 1) &&
+ (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
+ DMERR("Reshaping arrays not yet supported.");
+ return -EINVAL;
+ }
+
+ if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)))
+ mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
+
+ /*
+ * During load, we set FirstUse if a new superblock was written.
+ * There are two reasons we might not have a superblock:
+ * 1) The array is brand new - in which case, all of the
+ * devices must have their In_sync bit set. Also,
+ * recovery_cp must be 0, unless forced.
+ * 2) This is a new device being added to an old array
+ * and the new device needs to be rebuilt - in which
+ * case the In_sync bit will /not/ be set and
+ * recovery_cp must be MaxSector.
+ */
+ rdev_for_each(r, t, mddev) {
+ if (!test_bit(In_sync, &r->flags)) {
+ if (!test_bit(FirstUse, &r->flags))
+ DMERR("Superblock area of "
+ "rebuild device %d should have been "
+ "cleared.", r->raid_disk);
+ set_bit(FirstUse, &r->flags);
+ rebuilds++;
+ } else if (test_bit(FirstUse, &r->flags))
+ new_devs++;
+ }
+
+ if (!rebuilds) {
+ if (new_devs == mddev->raid_disks) {
+ DMINFO("Superblocks created for new array");
+ set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
+ } else if (new_devs) {
+ DMERR("New device injected "
+ "into existing array without 'rebuild' "
+ "parameter specified");
+ return -EINVAL;
+ }
+ } else if (new_devs) {
+ DMERR("'rebuild' devices cannot be "
+ "injected into an array with other first-time devices");
+ return -EINVAL;
+ } else if (mddev->recovery_cp != MaxSector) {
+ DMERR("'rebuild' specified while array is not in-sync");
+ return -EINVAL;
+ }
+
+ /*
+ * Now we set the Faulty bit for those devices that are
+ * recorded in the superblock as failed.
+ */
+ rdev_for_each(r, t, mddev) {
+ if (!r->sb_page)
+ continue;
+ sb2 = page_address(r->sb_page);
+ sb2->failed_devices = 0;
+
+ /*
+ * Check for any device re-ordering.
+ */
+ if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
+ role = le32_to_cpu(sb2->array_position);
+ if (role != r->raid_disk) {
+ if (rs->raid_type->level != 1) {
+ rs->ti->error = "Cannot change device "
+ "positions in RAID array";
+ return -EINVAL;
+ }
+ DMINFO("RAID1 device #%d now at position #%d",
+ role, r->raid_disk);
+ }
+
+ /*
+ * Partial recovery is performed on
+ * returning failed devices.
+ */
+ if (failed_devices & (1 << role))
+ set_bit(Faulty, &r->flags);
+ }
+ }
+
+ return 0;
+}
+
+static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+ struct dm_raid_superblock *sb = page_address(rdev->sb_page);
+
+ /*
+ * If mddev->events is not set, we know we have not yet initialized
+ * the array.
+ */
+ if (!mddev->events && super_init_validation(mddev, rdev))
+ return -EINVAL;
+
+ mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */
+ rdev->mddev->bitmap_info.default_offset = 4096 >> 9;
+ if (!test_bit(FirstUse, &rdev->flags)) {
+ rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
+ if (rdev->recovery_offset != MaxSector)
+ clear_bit(In_sync, &rdev->flags);
+ }
+
+ /*
+ * If a device comes back, set it as not In_sync and no longer faulty.
+ */
+ if (test_bit(Faulty, &rdev->flags)) {
+ clear_bit(Faulty, &rdev->flags);
+ clear_bit(In_sync, &rdev->flags);
+ rdev->saved_raid_disk = rdev->raid_disk;
+ rdev->recovery_offset = 0;
+ }
+
+ clear_bit(FirstUse, &rdev->flags);
+
+ return 0;
+}
+
+/*
+ * Analyse superblocks and select the freshest.
+ */
+static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
+{
+ int ret;
+ mdk_rdev_t *rdev, *freshest, *tmp;
+ mddev_t *mddev = &rs->md;
+
+ freshest = NULL;
+ rdev_for_each(rdev, tmp, mddev) {
+ if (!rdev->meta_bdev)
+ continue;
+
+ ret = super_load(rdev, freshest);
+
+ switch (ret) {
+ case 1:
+ freshest = rdev;
+ break;
+ case 0:
+ break;
+ default:
+ ti->error = "Failed to load superblock";
+ return ret;
+ }
+ }
+
+ if (!freshest)
+ return 0;
+
+ /*
+ * Validation of the freshest device provides the source of
+ * validation for the remaining devices.
+ */
+ ti->error = "Unable to assemble array: Invalid superblocks";
+ if (super_validate(mddev, freshest))
+ return -EINVAL;
+
+ rdev_for_each(rdev, tmp, mddev)
+ if ((rdev != freshest) && super_validate(mddev, rdev))
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
* Construct a RAID4/5/6 mapping:
* Args:
* <raid_type> <#raid_params> <raid_params> \
* <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
*
- * ** metadata devices are not supported yet, use '-' instead **
- *
* <raid_params> varies by <raid_type>. See 'parse_raid_params' for
* details on possible <raid_params>.
*/
@@ -465,8 +962,12 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (ret)
goto bad;
+ rs->md.sync_super = super_sync;
+ ret = analyse_superblocks(ti, rs);
+ if (ret)
+ goto bad;
+
INIT_WORK(&rs->md.event_work, do_table_event);
- ti->split_io = rs->md.chunk_sectors;
ti->private = rs;
mutex_lock(&rs->md.reconfig_mutex);
@@ -482,6 +983,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
rs->callbacks.congested_fn = raid_is_congested;
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
+ mddev_suspend(&rs->md);
return 0;
bad:
@@ -546,12 +1048,17 @@ static int raid_status(struct dm_target *ti, status_type_t type,
break;
case STATUSTYPE_TABLE:
/* The string you would use to construct this array */
- for (i = 0; i < rs->md.raid_disks; i++)
- if (rs->dev[i].data_dev &&
+ for (i = 0; i < rs->md.raid_disks; i++) {
+ if ((rs->print_flags & DMPF_REBUILD) &&
+ rs->dev[i].data_dev &&
!test_bit(In_sync, &rs->dev[i].rdev.flags))
- raid_param_cnt++; /* for rebuilds */
+ raid_param_cnt += 2; /* for rebuilds */
+ if (rs->dev[i].data_dev &&
+ test_bit(WriteMostly, &rs->dev[i].rdev.flags))
+ raid_param_cnt += 2;
+ }
- raid_param_cnt += (hweight64(rs->print_flags) * 2);
+ raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2);
if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))
raid_param_cnt--;
@@ -565,7 +1072,8 @@ static int raid_status(struct dm_target *ti, status_type_t type,
DMEMIT(" nosync");
for (i = 0; i < rs->md.raid_disks; i++)
- if (rs->dev[i].data_dev &&
+ if ((rs->print_flags & DMPF_REBUILD) &&
+ rs->dev[i].data_dev &&
!test_bit(In_sync, &rs->dev[i].rdev.flags))
DMEMIT(" rebuild %u", i);
@@ -579,6 +1087,11 @@ static int raid_status(struct dm_target *ti, status_type_t type,
if (rs->print_flags & DMPF_MAX_RECOVERY_RATE)
DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
+ for (i = 0; i < rs->md.raid_disks; i++)
+ if (rs->dev[i].data_dev &&
+ test_bit(WriteMostly, &rs->dev[i].rdev.flags))
+ DMEMIT(" write_mostly %u", i);
+
if (rs->print_flags & DMPF_MAX_WRITE_BEHIND)
DMEMIT(" max_write_behind %lu",
rs->md.bitmap_info.max_write_behind);
@@ -591,9 +1104,16 @@ static int raid_status(struct dm_target *ti, status_type_t type,
conf ? conf->max_nr_stripes * 2 : 0);
}
+ if (rs->print_flags & DMPF_REGION_SIZE)
+ DMEMIT(" region_size %lu",
+ rs->md.bitmap_info.chunksize >> 9);
+
DMEMIT(" %d", rs->md.raid_disks);
for (i = 0; i < rs->md.raid_disks; i++) {
- DMEMIT(" -"); /* metadata device */
+ if (rs->dev[i].meta_dev)
+ DMEMIT(" %s", rs->dev[i].meta_dev->name);
+ else
+ DMEMIT(" -");
if (rs->dev[i].data_dev)
DMEMIT(" %s", rs->dev[i].data_dev->name);
@@ -650,12 +1170,13 @@ static void raid_resume(struct dm_target *ti)
{
struct raid_set *rs = ti->private;
+ bitmap_load(&rs->md);
mddev_resume(&rs->md);
}
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 0, 0},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 135c2f1fdbfc..d1f1d7017103 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -58,25 +58,30 @@
#define NUM_SNAPSHOT_HDR_CHUNKS 1
struct disk_header {
- uint32_t magic;
+ __le32 magic;
/*
* Is this snapshot valid. There is no way of recovering
* an invalid snapshot.
*/
- uint32_t valid;
+ __le32 valid;
/*
* Simple, incrementing version. no backward
* compatibility.
*/
- uint32_t version;
+ __le32 version;
/* In sectors */
- uint32_t chunk_size;
-};
+ __le32 chunk_size;
+} __packed;
struct disk_exception {
+ __le64 old_chunk;
+ __le64 new_chunk;
+} __packed;
+
+struct core_exception {
uint64_t old_chunk;
uint64_t new_chunk;
};
@@ -169,10 +174,9 @@ static int alloc_area(struct pstore *ps)
if (!ps->area)
goto err_area;
- ps->zero_area = vmalloc(len);
+ ps->zero_area = vzalloc(len);
if (!ps->zero_area)
goto err_zero_area;
- memset(ps->zero_area, 0, len);
ps->header_area = vmalloc(len);
if (!ps->header_area)
@@ -396,32 +400,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
}
static void read_exception(struct pstore *ps,
- uint32_t index, struct disk_exception *result)
+ uint32_t index, struct core_exception *result)
{
- struct disk_exception *e = get_exception(ps, index);
+ struct disk_exception *de = get_exception(ps, index);
/* copy it */
- result->old_chunk = le64_to_cpu(e->old_chunk);
- result->new_chunk = le64_to_cpu(e->new_chunk);
+ result->old_chunk = le64_to_cpu(de->old_chunk);
+ result->new_chunk = le64_to_cpu(de->new_chunk);
}
static void write_exception(struct pstore *ps,
- uint32_t index, struct disk_exception *de)
+ uint32_t index, struct core_exception *e)
{
- struct disk_exception *e = get_exception(ps, index);
+ struct disk_exception *de = get_exception(ps, index);
/* copy it */
- e->old_chunk = cpu_to_le64(de->old_chunk);
- e->new_chunk = cpu_to_le64(de->new_chunk);
+ de->old_chunk = cpu_to_le64(e->old_chunk);
+ de->new_chunk = cpu_to_le64(e->new_chunk);
}
static void clear_exception(struct pstore *ps, uint32_t index)
{
- struct disk_exception *e = get_exception(ps, index);
+ struct disk_exception *de = get_exception(ps, index);
/* clear it */
- e->old_chunk = 0;
- e->new_chunk = 0;
+ de->old_chunk = 0;
+ de->new_chunk = 0;
}
/*
@@ -437,13 +441,13 @@ static int insert_exceptions(struct pstore *ps,
{
int r;
unsigned int i;
- struct disk_exception de;
+ struct core_exception e;
/* presume the area is full */
*full = 1;
for (i = 0; i < ps->exceptions_per_area; i++) {
- read_exception(ps, i, &de);
+ read_exception(ps, i, &e);
/*
* If the new_chunk is pointing at the start of
@@ -451,7 +455,7 @@ static int insert_exceptions(struct pstore *ps,
* is we know that we've hit the end of the
* exceptions. Therefore the area is not full.
*/
- if (de.new_chunk == 0LL) {
+ if (e.new_chunk == 0LL) {
ps->current_committed = i;
*full = 0;
break;
@@ -460,13 +464,13 @@ static int insert_exceptions(struct pstore *ps,
/*
* Keep track of the start of the free chunks.
*/
- if (ps->next_free <= de.new_chunk)
- ps->next_free = de.new_chunk + 1;
+ if (ps->next_free <= e.new_chunk)
+ ps->next_free = e.new_chunk + 1;
/*
* Otherwise we add the exception to the snapshot.
*/
- r = callback(callback_context, de.old_chunk, de.new_chunk);
+ r = callback(callback_context, e.old_chunk, e.new_chunk);
if (r)
return r;
}
@@ -563,7 +567,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
sizeof(struct disk_exception);
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
- sizeof(*ps->callbacks));
+ sizeof(*ps->callbacks));
if (!ps->callbacks)
return -ENOMEM;
@@ -641,12 +645,12 @@ static void persistent_commit_exception(struct dm_exception_store *store,
{
unsigned int i;
struct pstore *ps = get_info(store);
- struct disk_exception de;
+ struct core_exception ce;
struct commit_callback *cb;
- de.old_chunk = e->old_chunk;
- de.new_chunk = e->new_chunk;
- write_exception(ps, ps->current_committed++, &de);
+ ce.old_chunk = e->old_chunk;
+ ce.new_chunk = e->new_chunk;
+ write_exception(ps, ps->current_committed++, &ce);
/*
* Add the callback to the back of the array. This code
@@ -670,7 +674,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
* If we completely filled the current area, then wipe the next one.
*/
if ((ps->current_committed == ps->exceptions_per_area) &&
- zero_disk_area(ps, ps->current_area + 1))
+ zero_disk_area(ps, ps->current_area + 1))
ps->valid = 0;
/*
@@ -701,7 +705,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
chunk_t *last_new_chunk)
{
struct pstore *ps = get_info(store);
- struct disk_exception de;
+ struct core_exception ce;
int nr_consecutive;
int r;
@@ -722,9 +726,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
ps->current_committed = ps->exceptions_per_area;
}
- read_exception(ps, ps->current_committed - 1, &de);
- *last_old_chunk = de.old_chunk;
- *last_new_chunk = de.new_chunk;
+ read_exception(ps, ps->current_committed - 1, &ce);
+ *last_old_chunk = ce.old_chunk;
+ *last_new_chunk = ce.new_chunk;
/*
* Find number of consecutive chunks within the current area,
@@ -733,9 +737,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
nr_consecutive++) {
read_exception(ps, ps->current_committed - 1 - nr_consecutive,
- &de);
- if (de.old_chunk != *last_old_chunk - nr_consecutive ||
- de.new_chunk != *last_new_chunk - nr_consecutive)
+ &ce);
+ if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
+ ce.new_chunk != *last_new_chunk - nr_consecutive)
break;
}
@@ -753,7 +757,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
for (i = 0; i < nr_merged; i++)
clear_exception(ps, ps->current_committed - 1 - i);
- r = area_io(ps, WRITE);
+ r = area_io(ps, WRITE_FLUSH_FUA);
if (r < 0)
return r;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 9ecff5f3023a..6f758870fc19 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -30,16 +30,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
((ti)->type->name == dm_snapshot_merge_target_name)
/*
- * The percentage increment we will wake up users at
- */
-#define WAKE_UP_PERCENT 5
-
-/*
- * kcopyd priority of snapshot operations
- */
-#define SNAPSHOT_COPY_PRIORITY 2
-
-/*
* The size of the mempool used to track chunks in use.
*/
#define MIN_IOS 256
@@ -180,6 +170,13 @@ struct dm_snap_pending_exception {
* kcopyd.
*/
int started;
+
+ /*
+ * For writing a complete chunk, bypassing the copy.
+ */
+ struct bio *full_bio;
+ bio_end_io_t *full_bio_end_io;
+ void *full_bio_private;
};
/*
@@ -1055,8 +1052,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s) {
- ti->error = "Cannot allocate snapshot context private "
- "structure";
+ ti->error = "Cannot allocate private snapshot structure";
r = -ENOMEM;
goto bad;
}
@@ -1380,6 +1376,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
struct dm_snapshot *s = pe->snap;
struct bio *origin_bios = NULL;
struct bio *snapshot_bios = NULL;
+ struct bio *full_bio = NULL;
int error = 0;
if (!success) {
@@ -1415,10 +1412,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
*/
dm_insert_exception(&s->complete, e);
- out:
+out:
dm_remove_exception(&pe->e);
snapshot_bios = bio_list_get(&pe->snapshot_bios);
origin_bios = bio_list_get(&pe->origin_bios);
+ full_bio = pe->full_bio;
+ if (full_bio) {
+ full_bio->bi_end_io = pe->full_bio_end_io;
+ full_bio->bi_private = pe->full_bio_private;
+ }
free_pending_exception(pe);
increment_pending_exceptions_done_count();
@@ -1426,10 +1428,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
up_write(&s->lock);
/* Submit any pending write bios */
- if (error)
+ if (error) {
+ if (full_bio)
+ bio_io_error(full_bio);
error_bios(snapshot_bios);
- else
+ } else {
+ if (full_bio)
+ bio_endio(full_bio, 0);
flush_bios(snapshot_bios);
+ }
retry_origin_bios(s, origin_bios);
}
@@ -1480,8 +1487,33 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dest.count = src.count;
/* Hand over to kcopyd */
- dm_kcopyd_copy(s->kcopyd_client,
- &src, 1, &dest, 0, copy_callback, pe);
+ dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
+}
+
+static void full_bio_end_io(struct bio *bio, int error)
+{
+ void *callback_data = bio->bi_private;
+
+ dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0);
+}
+
+static void start_full_bio(struct dm_snap_pending_exception *pe,
+ struct bio *bio)
+{
+ struct dm_snapshot *s = pe->snap;
+ void *callback_data;
+
+ pe->full_bio = bio;
+ pe->full_bio_end_io = bio->bi_end_io;
+ pe->full_bio_private = bio->bi_private;
+
+ callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
+ copy_callback, pe);
+
+ bio->bi_end_io = full_bio_end_io;
+ bio->bi_private = callback_data;
+
+ generic_make_request(bio);
}
static struct dm_snap_pending_exception *
@@ -1519,6 +1551,7 @@ __find_pending_exception(struct dm_snapshot *s,
bio_list_init(&pe->origin_bios);
bio_list_init(&pe->snapshot_bios);
pe->started = 0;
+ pe->full_bio = NULL;
if (s->store->type->prepare_exception(s->store, &pe->e)) {
free_pending_exception(pe);
@@ -1612,10 +1645,19 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
}
remap_exception(s, &pe->e, bio, chunk);
- bio_list_add(&pe->snapshot_bios, bio);
r = DM_MAPIO_SUBMITTED;
+ if (!pe->started &&
+ bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
+ pe->started = 1;
+ up_write(&s->lock);
+ start_full_bio(pe, bio);
+ goto out;
+ }
+
+ bio_list_add(&pe->snapshot_bios, bio);
+
if (!pe->started) {
/* this is protected by snap->lock */
pe->started = 1;
@@ -1628,9 +1670,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
map_context->ptr = track_chunk(s, chunk);
}
- out_unlock:
+out_unlock:
up_write(&s->lock);
- out:
+out:
return r;
}
@@ -1974,7 +2016,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
pe_to_start_now = pe;
}
- next_snapshot:
+next_snapshot:
up_write(&snap->lock);
if (pe_to_start_now) {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bfe9c2333cea..986b8754bb08 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -54,7 +54,6 @@ struct dm_table {
sector_t *highs;
struct dm_target *targets;
- unsigned discards_supported:1;
unsigned integrity_supported:1;
/*
@@ -154,12 +153,11 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
return NULL;
size = nmemb * elem_size;
- addr = vmalloc(size);
- if (addr)
- memset(addr, 0, size);
+ addr = vzalloc(size);
return addr;
}
+EXPORT_SYMBOL(dm_vcalloc);
/*
* highs, and targets are managed as dynamic arrays during a
@@ -209,7 +207,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
INIT_LIST_HEAD(&t->devices);
INIT_LIST_HEAD(&t->target_callbacks);
atomic_set(&t->holders, 0);
- t->discards_supported = 1;
if (!num_targets)
num_targets = KEYS_PER_NODE;
@@ -281,6 +278,7 @@ void dm_table_get(struct dm_table *t)
{
atomic_inc(&t->holders);
}
+EXPORT_SYMBOL(dm_table_get);
void dm_table_put(struct dm_table *t)
{
@@ -290,6 +288,7 @@ void dm_table_put(struct dm_table *t)
smp_mb__before_atomic_dec();
atomic_dec(&t->holders);
}
+EXPORT_SYMBOL(dm_table_put);
/*
* Checks to see if we need to extend highs or targets.
@@ -455,13 +454,14 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
* Add a device to the list, or just increment the usage count if
* it's already present.
*/
-static int __table_get_device(struct dm_table *t, struct dm_target *ti,
- const char *path, fmode_t mode, struct dm_dev **result)
+int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+ struct dm_dev **result)
{
int r;
dev_t uninitialized_var(dev);
struct dm_dev_internal *dd;
unsigned int major, minor;
+ struct dm_table *t = ti->table;
BUG_ON(!t);
@@ -509,6 +509,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
*result = &dd->dm_dev;
return 0;
}
+EXPORT_SYMBOL(dm_get_device);
int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
@@ -539,23 +540,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
* If not we'll force DM to use PAGE_SIZE or
* smaller I/O, just to be safe.
*/
-
- if (q->merge_bvec_fn && !ti->type->merge)
+ if (dm_queue_merge_is_compulsory(q) && !ti->type->merge)
blk_limits_max_hw_sectors(limits,
(unsigned int) (PAGE_SIZE >> 9));
return 0;
}
EXPORT_SYMBOL_GPL(dm_set_device_limits);
-int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
- struct dm_dev **result)
-{
- return __table_get_device(ti->table, ti, path, mode, result);
-}
-
-
/*
- * Decrement a devices use count and remove it if necessary.
+ * Decrement a device's use count and remove it if necessary.
*/
void dm_put_device(struct dm_target *ti, struct dm_dev *d)
{
@@ -568,6 +561,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d)
kfree(dd);
}
}
+EXPORT_SYMBOL(dm_put_device);
/*
* Checks to see if the target joins onto the end of the table.
@@ -791,8 +785,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
- if (!tgt->num_discard_requests)
- t->discards_supported = 0;
+ if (!tgt->num_discard_requests && tgt->discards_supported)
+ DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
+ dm_device_name(t->md), type);
return 0;
@@ -802,6 +797,63 @@ int dm_table_add_target(struct dm_table *t, const char *type,
return r;
}
+/*
+ * Target argument parsing helpers.
+ */
+static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
+ unsigned *value, char **error, unsigned grouped)
+{
+ const char *arg_str = dm_shift_arg(arg_set);
+
+ if (!arg_str ||
+ (sscanf(arg_str, "%u", value) != 1) ||
+ (*value < arg->min) ||
+ (*value > arg->max) ||
+ (grouped && arg_set->argc < *value)) {
+ *error = arg->error;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
+ unsigned *value, char **error)
+{
+ return validate_next_arg(arg, arg_set, value, error, 0);
+}
+EXPORT_SYMBOL(dm_read_arg);
+
+int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
+ unsigned *value, char **error)
+{
+ return validate_next_arg(arg, arg_set, value, error, 1);
+}
+EXPORT_SYMBOL(dm_read_arg_group);
+
+const char *dm_shift_arg(struct dm_arg_set *as)
+{
+ char *r;
+
+ if (as->argc) {
+ as->argc--;
+ r = *as->argv;
+ as->argv++;
+ return r;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(dm_shift_arg);
+
+void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
+{
+ BUG_ON(as->argc < num_args);
+ as->argc -= num_args;
+ as->argv += num_args;
+}
+EXPORT_SYMBOL(dm_consume_args);
+
static int dm_table_set_type(struct dm_table *t)
{
unsigned i;
@@ -1077,11 +1129,13 @@ void dm_table_event(struct dm_table *t)
t->event_fn(t->event_context);
mutex_unlock(&_event_lock);
}
+EXPORT_SYMBOL(dm_table_event);
sector_t dm_table_get_size(struct dm_table *t)
{
return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
}
+EXPORT_SYMBOL(dm_table_get_size);
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
{
@@ -1194,9 +1248,45 @@ static void dm_table_set_integrity(struct dm_table *t)
blk_get_integrity(template_disk));
}
+static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ unsigned flush = (*(unsigned *)data);
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && (q->flush_flags & flush);
+}
+
+static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
+{
+ struct dm_target *ti;
+ unsigned i = 0;
+
+ /*
+ * Require at least one underlying device to support flushes.
+ * t->devices includes internal dm devices such as mirror logs
+ * so we need to use iterate_devices here, which targets
+ * supporting flushes must provide.
+ */
+ while (i < dm_table_get_num_targets(t)) {
+ ti = dm_table_get_target(t, i++);
+
+ if (!ti->num_flush_requests)
+ continue;
+
+ if (ti->type->iterate_devices &&
+ ti->type->iterate_devices(ti, device_flush_capable, &flush))
+ return 1;
+ }
+
+ return 0;
+}
+
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
+ unsigned flush = 0;
+
/*
* Copy table's limits to the DM device's request_queue
*/
@@ -1207,6 +1297,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
else
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ if (dm_table_supports_flush(t, REQ_FLUSH)) {
+ flush |= REQ_FLUSH;
+ if (dm_table_supports_flush(t, REQ_FUA))
+ flush |= REQ_FUA;
+ }
+ blk_queue_flush(q, flush);
+
dm_table_set_integrity(t);
/*
@@ -1237,6 +1334,7 @@ fmode_t dm_table_get_mode(struct dm_table *t)
{
return t->mode;
}
+EXPORT_SYMBOL(dm_table_get_mode);
static void suspend_targets(struct dm_table *t, unsigned postsuspend)
{
@@ -1345,6 +1443,7 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
{
return t->md;
}
+EXPORT_SYMBOL(dm_table_get_md);
static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
@@ -1359,19 +1458,19 @@ bool dm_table_supports_discards(struct dm_table *t)
struct dm_target *ti;
unsigned i = 0;
- if (!t->discards_supported)
- return 0;
-
/*
* Unless any target used by the table set discards_supported,
* require at least one underlying device to support discards.
* t->devices includes internal dm devices such as mirror logs
* so we need to use iterate_devices here, which targets
- * supporting discard must provide.
+ * supporting discard selectively must provide.
*/
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
+ if (!ti->num_discard_requests)
+ continue;
+
if (ti->discards_supported)
return 1;
@@ -1382,13 +1481,3 @@ bool dm_table_supports_discards(struct dm_table *t)
return 0;
}
-
-EXPORT_SYMBOL(dm_vcalloc);
-EXPORT_SYMBOL(dm_get_device);
-EXPORT_SYMBOL(dm_put_device);
-EXPORT_SYMBOL(dm_table_event);
-EXPORT_SYMBOL(dm_table_get_size);
-EXPORT_SYMBOL(dm_table_get_mode);
-EXPORT_SYMBOL(dm_table_get_md);
-EXPORT_SYMBOL(dm_table_put);
-EXPORT_SYMBOL(dm_table_get);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0cf68b478878..52b39f335bb3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -37,6 +37,8 @@ static const char *_name = DM_NAME;
static unsigned int major = 0;
static unsigned int _major = 0;
+static DEFINE_IDR(_minor_idr);
+
static DEFINE_SPINLOCK(_minor_lock);
/*
* For bio-based dm.
@@ -109,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
#define DMF_FREEING 3
#define DMF_DELETING 4
#define DMF_NOFLUSH_SUSPENDING 5
+#define DMF_MERGE_IS_OPTIONAL 6
/*
* Work processed by per-device workqueue.
@@ -313,6 +316,12 @@ static void __exit dm_exit(void)
while (i--)
_exits[i]();
+
+ /*
+ * Should be empty by this point.
+ */
+ idr_remove_all(&_minor_idr);
+ idr_destroy(&_minor_idr);
}
/*
@@ -1171,7 +1180,8 @@ static int __clone_and_map_discard(struct clone_info *ci)
/*
* Even though the device advertised discard support,
- * reconfiguration might have changed that since the
+ * that does not mean every target supports it, and
+ * reconfiguration might also have changed that since the
* check was performed.
*/
if (!ti->num_discard_requests)
@@ -1705,8 +1715,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
/*-----------------------------------------------------------------
* An IDR is used to keep track of allocated minor numbers.
*---------------------------------------------------------------*/
-static DEFINE_IDR(_minor_idr);
-
static void free_minor(int minor)
{
spin_lock(&_minor_lock);
@@ -1800,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md)
blk_queue_make_request(md->queue, dm_request);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
- blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
}
/*
@@ -1986,6 +1993,59 @@ static void __set_size(struct mapped_device *md, sector_t size)
}
/*
+ * Return 1 if the queue has a compulsory merge_bvec_fn function.
+ *
+ * If this function returns 0, then the device is either a non-dm
+ * device without a merge_bvec_fn, or it is a dm device that is
+ * able to split any bios it receives that are too big.
+ */
+int dm_queue_merge_is_compulsory(struct request_queue *q)
+{
+ struct mapped_device *dev_md;
+
+ if (!q->merge_bvec_fn)
+ return 0;
+
+ if (q->make_request_fn == dm_request) {
+ dev_md = q->queuedata;
+ if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
+ return 0;
+ }
+
+ return 1;
+}
+
+static int dm_device_merge_is_compulsory(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ struct block_device *bdev = dev->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ return dm_queue_merge_is_compulsory(q);
+}
+
+/*
+ * Return 1 if it is acceptable to ignore merge_bvec_fn based
+ * on the properties of the underlying devices.
+ */
+static int dm_table_merge_is_optional(struct dm_table *table)
+{
+ unsigned i = 0;
+ struct dm_target *ti;
+
+ while (i < dm_table_get_num_targets(table)) {
+ ti = dm_table_get_target(table, i++);
+
+ if (ti->type->iterate_devices &&
+ ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
* Returns old map, which caller must destroy.
*/
static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
@@ -1995,6 +2055,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
struct request_queue *q = md->queue;
sector_t size;
unsigned long flags;
+ int merge_is_optional;
size = dm_table_get_size(t);
@@ -2020,10 +2081,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
__bind_mempools(md, t);
+ merge_is_optional = dm_table_merge_is_optional(t);
+
write_lock_irqsave(&md->map_lock, flags);
old_map = md->map;
md->map = t;
dm_table_set_restrictions(t, q, limits);
+ if (merge_is_optional)
+ set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
+ else
+ clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
write_unlock_irqrestore(&md->map_lock, flags);
return old_map;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 1aaf16746da8..6745dbd278a4 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -66,6 +66,8 @@ int dm_table_alloc_md_mempools(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
+int dm_queue_merge_is_compulsory(struct request_queue *q);
+
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
void dm_set_md_type(struct mapped_device *md, unsigned type);
diff --git a/drivers/of/address.c b/drivers/of/address.c
index da1f4b9605df..72c33fbe451d 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -610,6 +610,6 @@ void __iomem *of_iomap(struct device_node *np, int index)
if (of_address_to_resource(np, index, &res))
return NULL;
- return ioremap(res.start, 1 + res.end - res.start);
+ return ioremap(res.start, resource_size(&res));
}
EXPORT_SYMBOL(of_iomap);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 02ed36719def..fb28b5af733b 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,14 +17,39 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
+/**
+ * struct alias_prop - Alias property in 'aliases' node
+ * @link: List node to link the structure in aliases_lookup list
+ * @alias: Alias property name
+ * @np: Pointer to device_node that the alias stands for
+ * @id: Index value from end of alias name
+ * @stem: Alias string without the index
+ *
+ * The structure represents one alias property of 'aliases' node as
+ * an entry in aliases_lookup list.
+ */
+struct alias_prop {
+ struct list_head link;
+ const char *alias;
+ struct device_node *np;
+ int id;
+ char stem[0];
+};
+
+static LIST_HEAD(aliases_lookup);
+
struct device_node *allnodes;
struct device_node *of_chosen;
+struct device_node *of_aliases;
+
+static DEFINE_MUTEX(of_aliases_mutex);
/* use when traversing tree through the allnext, child, sibling,
* or parent members of struct device_node.
@@ -610,8 +635,9 @@ EXPORT_SYMBOL(of_find_node_by_phandle);
*
* The out_value is modified only if a valid u32 value can be decoded.
*/
-int of_property_read_u32_array(const struct device_node *np, char *propname,
- u32 *out_values, size_t sz)
+int of_property_read_u32_array(const struct device_node *np,
+ const char *propname, u32 *out_values,
+ size_t sz)
{
struct property *prop = of_find_property(np, propname, NULL);
const __be32 *val;
@@ -645,7 +671,7 @@ EXPORT_SYMBOL_GPL(of_property_read_u32_array);
*
* The out_string pointer is modified only if a valid string can be decoded.
*/
-int of_property_read_string(struct device_node *np, char *propname,
+int of_property_read_string(struct device_node *np, const char *propname,
const char **out_string)
{
struct property *prop = of_find_property(np, propname, NULL);
@@ -987,3 +1013,108 @@ out_unlock:
}
#endif /* defined(CONFIG_OF_DYNAMIC) */
+static void of_alias_add(struct alias_prop *ap, struct device_node *np,
+ int id, const char *stem, int stem_len)
+{
+ ap->id = id;
+ ap->np = np;
+ strncpy(ap->stem, stem, stem_len);
+ ap->stem[stem_len] = 0;
+ list_add_tail(&ap->link, &aliases_lookup);
+ pr_debug("adding DT alias:%s: stem=%s id=%i node=%s\n",
+ ap->alias, ap->stem, ap->id, np ? np->full_name : NULL);
+}
+
+/**
+ * of_alias_scan() - Scan all properties of 'aliases' node
+ *
+ * The function scans all the properties of 'aliases' node and populate
+ * the global lookup table with the properties. It returns the
+ * number of alias_prop found, or error code in error case.
+ */
+__init void of_alias_scan(void)
+{
+ struct property *pp;
+
+ if (!of_aliases)
+ return;
+
+ for_each_property(pp, of_aliases->properties) {
+ const char *start = pp->name;
+ const char *end = start + strlen(start);
+ struct device_node *np;
+ struct alias_prop *ap;
+ int id, len;
+
+ /* Skip those we do not want to proceed */
+ if (!strcmp(pp->name, "name") ||
+ !strcmp(pp->name, "phandle") ||
+ !strcmp(pp->name, "linux,phandle"))
+ continue;
+
+ np = of_find_node_by_path(pp->value);
+ if (!np)
+ continue;
+
+ /* walk alias backwards to extract the id and 'stem' string */
+ while (isdigit(*(end-1)) && end > start)
+ end--;
+ len = end - start;
+ id = strlen(end) ? simple_strtoul(end, NULL, 10) : -1;
+
+ /* Allocate an alias_prop with enough space for the stem */
+ ap = early_init_dt_alloc_memory_arch(sizeof(*ap) + len + 1, 4);
+ if (!ap)
+ continue;
+ ap->alias = start;
+ of_alias_add(ap, np, id, start, len);
+ }
+}
+
+/**
+ * of_alias_get_id() - Get alias id for the given device_node
+ * @np: Pointer to the given device_node
+ * @stem: Alias stem of the given device_node
+ *
+ * The function travels the lookup table to get alias id for the given
+ * device_node and alias stem. It returns the alias id if find it.
+ * If not, dynamically creates one in the lookup table and returns it,
+ * or returns error code if fail to create.
+ */
+int of_alias_get_id(struct device_node *np, const char *stem)
+{
+ struct alias_prop *app;
+ int id = 0;
+ bool found = false;
+
+ mutex_lock(&of_aliases_mutex);
+ list_for_each_entry(app, &aliases_lookup, link) {
+ if (strcmp(app->stem, stem) != 0)
+ continue;
+
+ if (np == app->np) {
+ found = true;
+ id = app->id;
+ break;
+ }
+
+ if (id <= app->id)
+ id = app->id + 1;
+ }
+
+ /* If an id is not found, then allocate a new one */
+ if (!found) {
+ app = kzalloc(sizeof(*app) + strlen(stem) + 1, 4);
+ if (!app) {
+ id = -ENODEV;
+ goto out;
+ }
+ of_alias_add(app, np, id, stem, strlen(stem));
+ }
+
+ out:
+ mutex_unlock(&of_aliases_mutex);
+
+ return id;
+}
+EXPORT_SYMBOL_GPL(of_alias_get_id);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 65200af29c52..13d6d3a96b31 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -707,10 +707,12 @@ void __init unflatten_device_tree(void)
__unflatten_device_tree(initial_boot_params, &allnodes,
early_init_dt_alloc_memory_arch);
- /* Get pointer to OF "/chosen" node for use everywhere */
+ /* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */
of_chosen = of_find_node_by_path("/chosen");
if (of_chosen == NULL)
of_chosen = of_find_node_by_path("/chosen@0");
+ of_aliases = of_find_node_by_path("/aliases");
+ of_alias_scan();
}
#endif /* CONFIG_OF_EARLY_FLATTREE */
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index a70fa89f76fd..220285760b68 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val,
}
-static struct acpi_dock_ops acpiphp_dock_ops = {
+static const struct acpi_dock_ops acpiphp_dock_ops = {
.handler = handle_hotplug_event_func,
};
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index bcae8dd41496..7789002bdd5c 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
pr_info("%s: already running\n", pdev->name);
/* force to 24 hour mode */
- new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
+ new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
new_ctrl |= OMAP_RTC_CTRL_STOP;
/* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE:
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index eba88c749fb1..730b4a37b823 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -2267,17 +2267,13 @@ static int __devexit
pl022_remove(struct amba_device *adev)
{
struct pl022 *pl022 = amba_get_drvdata(adev);
- int status = 0;
+
if (!pl022)
return 0;
/* Remove the queue */
- status = destroy_queue(pl022);
- if (status != 0) {
- dev_err(&adev->dev,
- "queue remove failed (%d)\n", status);
- return status;
- }
+ if (destroy_queue(pl022) != 0)
+ dev_err(&adev->dev, "queue remove failed\n");
load_ssp_default_config(pl022);
pl022_dma_remove(pl022);
free_irq(adev->irq[0], pl022);
@@ -2289,7 +2285,6 @@ pl022_remove(struct amba_device *adev)
spi_unregister_master(pl022->master);
spi_master_put(pl022->master);
amba_set_drvdata(adev, NULL);
- dev_dbg(&adev->dev, "remove succeeded\n");
return 0;
}
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 564ff4e0dbc4..8345fb457a40 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,5 +1,6 @@
config ISCSI_TARGET
tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
+ depends on NET
select CRYPTO
select CRYPTO_CRC32C
select CRYPTO_CRC32C_INTEL if X86
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 14c81c4265bd..c24fb10de60b 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -120,7 +120,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
struct iscsi_tiqn *tiqn = NULL;
int ret;
- if (strlen(buf) > ISCSI_IQN_LEN) {
+ if (strlen(buf) >= ISCSI_IQN_LEN) {
pr_err("Target IQN exceeds %d bytes\n",
ISCSI_IQN_LEN);
return ERR_PTR(-EINVAL);
@@ -1857,7 +1857,7 @@ static int iscsit_handle_text_cmd(
char *text_ptr, *text_in;
int cmdsn_ret, niov = 0, rx_got, rx_size;
u32 checksum = 0, data_crc = 0, payload_length;
- u32 padding = 0, text_length = 0;
+ u32 padding = 0, pad_bytes = 0, text_length = 0;
struct iscsi_cmd *cmd;
struct kvec iov[3];
struct iscsi_text *hdr;
@@ -1896,7 +1896,7 @@ static int iscsit_handle_text_cmd(
padding = ((-payload_length) & 3);
if (padding != 0) {
- iov[niov].iov_base = cmd->pad_bytes;
+ iov[niov].iov_base = &pad_bytes;
iov[niov++].iov_len = padding;
rx_size += padding;
pr_debug("Receiving %u additional bytes"
@@ -1917,7 +1917,7 @@ static int iscsit_handle_text_cmd(
if (conn->conn_ops->DataDigest) {
iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
text_in, text_length,
- padding, cmd->pad_bytes,
+ padding, (u8 *)&pad_bytes,
(u8 *)&data_crc);
if (checksum != data_crc) {
@@ -3468,7 +3468,12 @@ static inline void iscsit_thread_check_cpumask(
}
#else
-#define iscsit_thread_get_cpumask(X) ({})
+
+void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
+{
+ return;
+}
+
#define iscsit_thread_check_cpumask(X, Y, Z) ({})
#endif /* CONFIG_SMP */
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 32bb92c44450..f095e65b1ccf 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -181,7 +181,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
return ERR_PTR(-EOVERFLOW);
}
memset(buf, 0, MAX_PORTAL_LEN + 1);
- snprintf(buf, MAX_PORTAL_LEN, "%s", name);
+ snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 713a4d23557a..4d087ac11067 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -978,7 +978,7 @@ struct iscsi_login *iscsi_target_init_negotiation(
pr_err("Unable to allocate memory for struct iscsi_login.\n");
iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
ISCSI_LOGIN_STATUS_NO_RESOURCES);
- goto out;
+ return NULL;
}
login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index c75a01a1c475..89760329d5d0 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1747,6 +1747,8 @@ int transport_generic_handle_cdb(
}
EXPORT_SYMBOL(transport_generic_handle_cdb);
+static void transport_generic_request_failure(struct se_cmd *,
+ struct se_device *, int, int);
/*
* Used by fabric module frontends to queue tasks directly.
* Many only be used from process context only
@@ -1754,6 +1756,8 @@ EXPORT_SYMBOL(transport_generic_handle_cdb);
int transport_handle_cdb_direct(
struct se_cmd *cmd)
{
+ int ret;
+
if (!cmd->se_lun) {
dump_stack();
pr_err("cmd->se_lun is NULL\n");
@@ -1765,8 +1769,31 @@ int transport_handle_cdb_direct(
" from interrupt context\n");
return -EINVAL;
}
-
- return transport_generic_new_cmd(cmd);
+ /*
+ * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following
+ * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue()
+ * in existing usage to ensure that outstanding descriptors are handled
+ * correctly during shutdown via transport_generic_wait_for_tasks()
+ *
+ * Also, we don't take cmd->t_state_lock here as we only expect
+ * this to be called for initial descriptor submission.
+ */
+ cmd->t_state = TRANSPORT_NEW_CMD;
+ atomic_set(&cmd->t_transport_active, 1);
+ /*
+ * transport_generic_new_cmd() is already handling QUEUE_FULL,
+ * so follow TRANSPORT_NEW_CMD processing thread context usage
+ * and call transport_generic_request_failure() if necessary..
+ */
+ ret = transport_generic_new_cmd(cmd);
+ if (ret == -EAGAIN)
+ return 0;
+ else if (ret < 0) {
+ cmd->transport_error_status = ret;
+ transport_generic_request_failure(cmd, NULL, 0,
+ (cmd->data_direction != DMA_TO_DEVICE));
+ }
+ return 0;
}
EXPORT_SYMBOL(transport_handle_cdb_direct);
@@ -3324,7 +3351,7 @@ static int transport_generic_cmd_sequencer(
goto out_invalid_cdb_field;
}
- cmd->t_task_lba = get_unaligned_be16(&cdb[2]);
+ cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
passthrough = (dev->transport->transport_type ==
TRANSPORT_PLUGIN_PHBA_PDEV);
/*
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
index f7fff7ed63c3..bd4fe21a23b8 100644
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -187,4 +187,9 @@ void ft_dump_cmd(struct ft_cmd *, const char *caller);
ssize_t ft_format_wwn(char *, size_t, u64);
+/*
+ * Underlying HW specific helper function
+ */
+void ft_invl_hw_context(struct ft_cmd *);
+
#endif /* __TCM_FC_H__ */
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 09df38b4610c..5654dc22f7ae 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -320,6 +320,7 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg)
default:
pr_debug("%s: unhandled frame r_ctl %x\n",
__func__, fh->fh_r_ctl);
+ ft_invl_hw_context(cmd);
fc_frame_free(fp);
transport_generic_free_cmd(&cmd->se_cmd, 0, 0);
break;
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index 8e2a46ddcccb..c37f4cd96452 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -213,62 +213,49 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF))
goto drop;
+ f_ctl = ntoh24(fh->fh_f_ctl);
+ ep = fc_seq_exch(seq);
+ lport = ep->lp;
+ if (cmd->was_ddp_setup) {
+ BUG_ON(!ep);
+ BUG_ON(!lport);
+ }
+
/*
- * Doesn't expect even single byte of payload. Payload
+ * Doesn't expect payload if DDP is setup. Payload
* is expected to be copied directly to user buffers
- * due to DDP (Large Rx offload) feature, hence
- * BUG_ON if BUF is non-NULL
+ * due to DDP (Large Rx offload),
*/
buf = fc_frame_payload_get(fp, 1);
- if (cmd->was_ddp_setup && buf) {
- pr_debug("%s: When DDP was setup, not expected to"
- "receive frame with payload, Payload shall be"
- "copied directly to buffer instead of coming "
- "via. legacy receive queues\n", __func__);
- BUG_ON(buf);
- }
+ if (buf)
+ pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
+ "cmd->sg_cnt 0x%x. DDP was setup"
+ " hence not expected to receive frame with "
+ "payload, Frame will be dropped if "
+ "'Sequence Initiative' bit in f_ctl is "
+ "not set\n", __func__, ep->xid, f_ctl,
+ cmd->sg, cmd->sg_cnt);
+ /*
+ * Invalidate HW DDP context if it was setup for respective
+ * command. Invalidation of HW DDP context is requited in both
+ * situation (success and error).
+ */
+ ft_invl_hw_context(cmd);
/*
- * If ft_cmd indicated 'ddp_setup', in that case only the last frame
- * should come with 'TSI bit being set'. If 'TSI bit is not set and if
- * data frame appears here, means error condition. In both the cases
- * release the DDP context (ddp_put) and in error case, as well
- * initiate error recovery mechanism.
+ * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
+ * write data frame is received successfully where payload is
+ * posted directly to user buffer and only the last frame's
+ * header is posted in receive queue.
+ *
+ * If "Sequence Initiative (TSI)" bit is not set, means error
+ * condition w.r.t. DDP, hence drop the packet and let explict
+ * ABORTS from other end of exchange timer trigger the recovery.
*/
- ep = fc_seq_exch(seq);
- if (cmd->was_ddp_setup) {
- BUG_ON(!ep);
- lport = ep->lp;
- BUG_ON(!lport);
- }
- if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) {
- f_ctl = ntoh24(fh->fh_f_ctl);
- /*
- * If TSI bit set in f_ctl, means last write data frame is
- * received successfully where payload is posted directly
- * to user buffer and only the last frame's header is posted
- * in legacy receive queue
- */
- if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */
- cmd->write_data_len = lport->tt.ddp_done(lport,
- ep->xid);
- goto last_frame;
- } else {
- /*
- * Updating the write_data_len may be meaningless at
- * this point, but just in case if required in future
- * for debugging or any other purpose
- */
- pr_err("%s: Received frame with TSI bit not"
- " being SET, dropping the frame, "
- "cmd->sg <%p>, cmd->sg_cnt <0x%x>\n",
- __func__, cmd->sg, cmd->sg_cnt);
- cmd->write_data_len = lport->tt.ddp_done(lport,
- ep->xid);
- lport->tt.seq_exch_abort(cmd->seq, 0);
- goto drop;
- }
- }
+ if (f_ctl & FC_FC_SEQ_INIT)
+ goto last_frame;
+ else
+ goto drop;
rel_off = ntohl(fh->fh_parm_offset);
frame_len = fr_len(fp);
@@ -331,3 +318,39 @@ last_frame:
drop:
fc_frame_free(fp);
}
+
+/*
+ * Handle and cleanup any HW specific resources if
+ * received ABORTS, errors, timeouts.
+ */
+void ft_invl_hw_context(struct ft_cmd *cmd)
+{
+ struct fc_seq *seq = cmd->seq;
+ struct fc_exch *ep = NULL;
+ struct fc_lport *lport = NULL;
+
+ BUG_ON(!cmd);
+
+ /* Cleanup the DDP context in HW if DDP was setup */
+ if (cmd->was_ddp_setup && seq) {
+ ep = fc_seq_exch(seq);
+ if (ep) {
+ lport = ep->lp;
+ if (lport && (ep->xid <= lport->lro_xid))
+ /*
+ * "ddp_done" trigger invalidation of HW
+ * specific DDP context
+ */
+ cmd->write_data_len = lport->tt.ddp_done(lport,
+ ep->xid);
+
+ /*
+ * Resetting same variable to indicate HW's
+ * DDP context has been invalidated to avoid
+ * re_invalidation of same context (context is
+ * identified using ep->xid)
+ */
+ cmd->was_ddp_setup = 0;
+ }
+ }
+}
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index bf7c687519ef..f7f71b2d3101 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -14,11 +14,7 @@ menuconfig THERMAL
If you want this support, you should say Y or M here.
config THERMAL_HWMON
- bool "Hardware monitoring support"
+ bool
depends on THERMAL
depends on HWMON=y || HWMON=THERMAL
- help
- The generic thermal sysfs driver's hardware monitoring support
- requires a 2.10.7/3.0.2 or later lm-sensors userspace.
-
- Say Y if your user-space is new enough.
+ default y
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 0b1c82ad6805..708f8e92771a 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -420,6 +420,29 @@ thermal_cooling_device_trip_point_show(struct device *dev,
/* hwmon sys I/F */
#include <linux/hwmon.h>
+
+/* thermal zone devices with the same type share one hwmon device */
+struct thermal_hwmon_device {
+ char type[THERMAL_NAME_LENGTH];
+ struct device *device;
+ int count;
+ struct list_head tz_list;
+ struct list_head node;
+};
+
+struct thermal_hwmon_attr {
+ struct device_attribute attr;
+ char name[16];
+};
+
+/* one temperature input for each thermal zone */
+struct thermal_hwmon_temp {
+ struct list_head hwmon_node;
+ struct thermal_zone_device *tz;
+ struct thermal_hwmon_attr temp_input; /* hwmon sys attr */
+ struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */
+};
+
static LIST_HEAD(thermal_hwmon_list);
static ssize_t
@@ -437,9 +460,10 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
int ret;
struct thermal_hwmon_attr *hwmon_attr
= container_of(attr, struct thermal_hwmon_attr, attr);
- struct thermal_zone_device *tz
- = container_of(hwmon_attr, struct thermal_zone_device,
+ struct thermal_hwmon_temp *temp
+ = container_of(hwmon_attr, struct thermal_hwmon_temp,
temp_input);
+ struct thermal_zone_device *tz = temp->tz;
ret = tz->ops->get_temp(tz, &temperature);
@@ -455,9 +479,10 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
{
struct thermal_hwmon_attr *hwmon_attr
= container_of(attr, struct thermal_hwmon_attr, attr);
- struct thermal_zone_device *tz
- = container_of(hwmon_attr, struct thermal_zone_device,
+ struct thermal_hwmon_temp *temp
+ = container_of(hwmon_attr, struct thermal_hwmon_temp,
temp_crit);
+ struct thermal_zone_device *tz = temp->tz;
long temperature;
int ret;
@@ -469,22 +494,54 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
}
-static int
-thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+static struct thermal_hwmon_device *
+thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
{
struct thermal_hwmon_device *hwmon;
- int new_hwmon_device = 1;
- int result;
mutex_lock(&thermal_list_lock);
list_for_each_entry(hwmon, &thermal_hwmon_list, node)
if (!strcmp(hwmon->type, tz->type)) {
- new_hwmon_device = 0;
mutex_unlock(&thermal_list_lock);
- goto register_sys_interface;
+ return hwmon;
+ }
+ mutex_unlock(&thermal_list_lock);
+
+ return NULL;
+}
+
+/* Find the temperature input matching a given thermal zone */
+static struct thermal_hwmon_temp *
+thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
+ const struct thermal_zone_device *tz)
+{
+ struct thermal_hwmon_temp *temp;
+
+ mutex_lock(&thermal_list_lock);
+ list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
+ if (temp->tz == tz) {
+ mutex_unlock(&thermal_list_lock);
+ return temp;
}
mutex_unlock(&thermal_list_lock);
+ return NULL;
+}
+
+static int
+thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+ struct thermal_hwmon_device *hwmon;
+ struct thermal_hwmon_temp *temp;
+ int new_hwmon_device = 1;
+ int result;
+
+ hwmon = thermal_hwmon_lookup_by_type(tz);
+ if (hwmon) {
+ new_hwmon_device = 0;
+ goto register_sys_interface;
+ }
+
hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL);
if (!hwmon)
return -ENOMEM;
@@ -502,30 +559,36 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
goto free_mem;
register_sys_interface:
- tz->hwmon = hwmon;
+ temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL);
+ if (!temp) {
+ result = -ENOMEM;
+ goto unregister_name;
+ }
+
+ temp->tz = tz;
hwmon->count++;
- snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH,
+ snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH,
"temp%d_input", hwmon->count);
- tz->temp_input.attr.attr.name = tz->temp_input.name;
- tz->temp_input.attr.attr.mode = 0444;
- tz->temp_input.attr.show = temp_input_show;
- sysfs_attr_init(&tz->temp_input.attr.attr);
- result = device_create_file(hwmon->device, &tz->temp_input.attr);
+ temp->temp_input.attr.attr.name = temp->temp_input.name;
+ temp->temp_input.attr.attr.mode = 0444;
+ temp->temp_input.attr.show = temp_input_show;
+ sysfs_attr_init(&temp->temp_input.attr.attr);
+ result = device_create_file(hwmon->device, &temp->temp_input.attr);
if (result)
- goto unregister_name;
+ goto free_temp_mem;
if (tz->ops->get_crit_temp) {
unsigned long temperature;
if (!tz->ops->get_crit_temp(tz, &temperature)) {
- snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH,
+ snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH,
"temp%d_crit", hwmon->count);
- tz->temp_crit.attr.attr.name = tz->temp_crit.name;
- tz->temp_crit.attr.attr.mode = 0444;
- tz->temp_crit.attr.show = temp_crit_show;
- sysfs_attr_init(&tz->temp_crit.attr.attr);
+ temp->temp_crit.attr.attr.name = temp->temp_crit.name;
+ temp->temp_crit.attr.attr.mode = 0444;
+ temp->temp_crit.attr.show = temp_crit_show;
+ sysfs_attr_init(&temp->temp_crit.attr.attr);
result = device_create_file(hwmon->device,
- &tz->temp_crit.attr);
+ &temp->temp_crit.attr);
if (result)
goto unregister_input;
}
@@ -534,13 +597,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
mutex_lock(&thermal_list_lock);
if (new_hwmon_device)
list_add_tail(&hwmon->node, &thermal_hwmon_list);
- list_add_tail(&tz->hwmon_node, &hwmon->tz_list);
+ list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
mutex_unlock(&thermal_list_lock);
return 0;
unregister_input:
- device_remove_file(hwmon->device, &tz->temp_input.attr);
+ device_remove_file(hwmon->device, &temp->temp_input.attr);
+ free_temp_mem:
+ kfree(temp);
unregister_name:
if (new_hwmon_device) {
device_remove_file(hwmon->device, &dev_attr_name);
@@ -556,15 +621,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
static void
thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
{
- struct thermal_hwmon_device *hwmon = tz->hwmon;
+ struct thermal_hwmon_device *hwmon;
+ struct thermal_hwmon_temp *temp;
+
+ hwmon = thermal_hwmon_lookup_by_type(tz);
+ if (unlikely(!hwmon)) {
+ /* Should never happen... */
+ dev_dbg(&tz->device, "hwmon device lookup failed!\n");
+ return;
+ }
+
+ temp = thermal_hwmon_lookup_temp(hwmon, tz);
+ if (unlikely(!temp)) {
+ /* Should never happen... */
+ dev_dbg(&tz->device, "temperature input lookup failed!\n");
+ return;
+ }
- tz->hwmon = NULL;
- device_remove_file(hwmon->device, &tz->temp_input.attr);
+ device_remove_file(hwmon->device, &temp->temp_input.attr);
if (tz->ops->get_crit_temp)
- device_remove_file(hwmon->device, &tz->temp_crit.attr);
+ device_remove_file(hwmon->device, &temp->temp_crit.attr);
mutex_lock(&thermal_list_lock);
- list_del(&tz->hwmon_node);
+ list_del(&temp->hwmon_node);
+ kfree(temp);
if (!list_empty(&hwmon->tz_list)) {
mutex_unlock(&thermal_list_lock);
return;
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 69407e72aac1..278aeaa92505 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633
enable its driver.
config BACKLIGHT_AAT2870
- bool "AnalogicTech AAT2870 Backlight"
+ tristate "AnalogicTech AAT2870 Backlight"
depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE
help
If you have a AnalogicTech AAT2870 say Y to enable the
diff --git a/drivers/video/backlight/aat2870_bl.c b/drivers/video/backlight/aat2870_bl.c
index 4952a617563d..331f1ef1dad5 100644
--- a/drivers/video/backlight/aat2870_bl.c
+++ b/drivers/video/backlight/aat2870_bl.c
@@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl,
struct backlight_device *bd = aat2870_bl->bd;
int val;
- val = brightness * aat2870_bl->max_current;
+ val = brightness * (aat2870_bl->max_current - 1);
val /= bd->props.max_brightness;
return val;
@@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev)
props.type = BACKLIGHT_RAW;
bd = backlight_device_register("aat2870-backlight", &pdev->dev,
aat2870_bl, &aat2870_bl_ops, &props);
- if (!bd) {
+ if (IS_ERR(bd)) {
dev_err(&pdev->dev,
"Failed allocate memory for backlight device\n");
- ret = -ENOMEM;
+ ret = PTR_ERR(bd);
goto out_kfree;
}
@@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev)
else
aat2870_bl->channels = AAT2870_BL_CH_ALL;
- if (pdata->max_brightness > 0)
+ if (pdata->max_current > 0)
aat2870_bl->max_current = pdata->max_current;
else
aat2870_bl->max_current = AAT2870_CURRENT_27_9;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index f441726ddf2b..86b0735e6aa0 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -36,9 +36,6 @@ config WATCHDOG_CORE
and gives them the /dev/watchdog interface (and later also the
sysfs interface).
- To compile this driver as a module, choose M here: the module will
- be called watchdog.
-
config WATCHDOG_NOWAYOUT
bool "Disable watchdog shutdown on close"
help
diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c
index afa78a54711e..809f41c30c44 100644
--- a/drivers/watchdog/nv_tco.c
+++ b/drivers/watchdog/nv_tco.c
@@ -458,7 +458,15 @@ static int __devexit nv_tco_remove(struct platform_device *dev)
static void nv_tco_shutdown(struct platform_device *dev)
{
+ u32 val;
+
tco_timer_stop();
+
+ /* Some BIOSes fail the POST (once) if the NO_REBOOT flag is not
+ * unset during shutdown. */
+ pci_read_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, &val);
+ val &= ~MCP51_SMBUS_SETUP_B_TCO_REBOOT;
+ pci_write_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, val);
}
static struct platform_driver nv_tco_driver = {
diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c
index db84f2322d1a..a267dc078daf 100644
--- a/drivers/watchdog/shwdt.c
+++ b/drivers/watchdog/shwdt.c
@@ -64,7 +64,7 @@
* misses its deadline, the kernel timer will allow the WDT to overflow.
*/
static int clock_division_ratio = WTCSR_CKS_4096;
-#define next_ping_period(cks) msecs_to_jiffies(cks - 4)
+#define next_ping_period(cks) (jiffies + msecs_to_jiffies(cks - 4))
static const struct watchdog_info sh_wdt_info;
static struct platform_device *sh_wdt_dev;
diff --git a/fs/Kconfig b/fs/Kconfig
index 19891aab9c6e..9fe0b349f4cd 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL
select TMPFS_XATTR
select GENERIC_ACL
help
- POSIX Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
+ POSIX Access Control Lists (ACLs) support additional access rights
+ for users and groups beyond the standard owner/group/world scheme,
+ and this option selects support for ACLs specifically for tmpfs
+ filesystems.
+
+ If you've selected TMPFS, it's possible that you'll also need
+ this option as there are a number of Linux distros that require
+ POSIX ACL support under /dev for certain features to work properly.
+ For example, some distros need this feature for ALSA-related /dev
+ files for sound to work properly. In short, if you're not sure,
+ say Y.
To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.
- If you don't know what Access Control Lists are, say N.
-
config TMPFS_XATTR
bool "Tmpfs extended attributes"
depends on TMPFS
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9b72dcf1cd25..40e6ac08c21f 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
- export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
+ export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
+
+btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 4cc5c0164ed6..eb159aaa5a11 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -28,8 +28,6 @@
#include "btrfs_inode.h"
#include "xattr.h"
-#ifdef CONFIG_BTRFS_FS_POSIX_ACL
-
struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
{
int size;
@@ -276,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = {
.get = btrfs_xattr_acl_get,
.set = btrfs_xattr_acl_set,
};
-
-#else /* CONFIG_BTRFS_FS_POSIX_ACL */
-
-int btrfs_acl_chmod(struct inode *inode)
-{
- return 0;
-}
-
-int btrfs_init_acl(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
-{
- return 0;
-}
-
-#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bfe42b03eaf9..8ec5d86f1734 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
u64 first_byte = disk_start;
struct block_device *bdev;
int ret;
+ int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
- ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
- BUG_ON(ret);
+ if (!skip_sum) {
+ ret = btrfs_csum_one_bio(root, inode, bio,
+ start, 1);
+ BUG_ON(ret);
+ }
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
BUG_ON(ret);
@@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
- ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
- BUG_ON(ret);
+ if (!skip_sum) {
+ ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+ BUG_ON(ret);
+ }
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
BUG_ON(ret);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 365c4e1dde04..0469263e327e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2406,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
btrfs_root_item *item, struct btrfs_key *key);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
-int btrfs_set_root_node(struct btrfs_root_item *item,
- struct extent_buffer *node);
+void btrfs_set_root_node(struct btrfs_root_item *item,
+ struct extent_buffer *node);
void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
/* dir-item.c */
@@ -2523,6 +2523,14 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
#define PageChecked PageFsMisc
#endif
+/* This forces readahead on a given range of bytes in an inode */
+static inline void btrfs_force_ra(struct address_space *mapping,
+ struct file_ra_state *ra, struct file *file,
+ pgoff_t offset, unsigned long req_size)
+{
+ page_cache_sync_readahead(mapping, ra, file, offset, req_size);
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -2551,9 +2559,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio, unsigned long bio_flags);
-unsigned long btrfs_force_ra(struct address_space *mapping,
- struct file_ra_state *ra, struct file *file,
- pgoff_t offset, pgoff_t last_index);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
@@ -2648,12 +2653,21 @@ do { \
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
-#else
-#define btrfs_get_acl NULL
-#endif
int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir);
int btrfs_acl_chmod(struct inode *inode);
+#else
+#define btrfs_get_acl NULL
+static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+static inline int btrfs_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+#endif
/* relocation.c */
int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c360a848d97f..31d84e78129b 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -198,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_key key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
key.objectid = dir;
btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
@@ -209,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
- if (ret > 0) {
- if (path->slots[0] == 0)
- return NULL;
- path->slots[0]--;
- }
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != dir ||
- btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
- found_key.offset != key.offset)
+ if (ret > 0)
return NULL;
return btrfs_match_dir_item_name(root, path, name, name_len);
@@ -315,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_key key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
key.objectid = dir;
btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
@@ -324,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
- if (ret > 0) {
- if (path->slots[0] == 0)
- return NULL;
- path->slots[0]--;
- }
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != dir ||
- btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
- found_key.offset != key.offset)
+ if (ret > 0)
return NULL;
return btrfs_match_dir_item_name(root, path, name, name_len);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4d08ed79405d..66bac226944e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -663,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
struct btrfs_path *path;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
+
key.objectid = start;
key.offset = len;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -3272,6 +3274,9 @@ again:
}
ret = btrfs_alloc_chunk(trans, extent_root, flags);
+ if (ret < 0 && ret != -ENOSPC)
+ goto out;
+
spin_lock(&space_info->lock);
if (ret)
space_info->full = 1;
@@ -3281,6 +3286,7 @@ again:
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
+out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
return ret;
}
@@ -4456,7 +4462,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
printk(KERN_ERR "umm, got %d back from search"
", was looking for %llu\n", ret,
(unsigned long long)bytenr);
- btrfs_print_leaf(extent_root, path->nodes[0]);
+ if (ret > 0)
+ btrfs_print_leaf(extent_root,
+ path->nodes[0]);
}
BUG_ON(ret);
extent_slot = path->slots[0];
@@ -5073,7 +5081,9 @@ have_block_group:
* group is does point to and try again
*/
if (!last_ptr_loop && last_ptr->block_group &&
- last_ptr->block_group != block_group) {
+ last_ptr->block_group != block_group &&
+ index <=
+ get_block_group_index(last_ptr->block_group)) {
btrfs_put_block_group(block_group);
block_group = last_ptr->block_group;
@@ -5501,7 +5511,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -6272,10 +6283,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
int level;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
wc = kzalloc(sizeof(*wc), GFP_NOFS);
- BUG_ON(!wc);
+ if (!wc) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
trans = btrfs_start_transaction(tree_root, 0);
BUG_ON(IS_ERR(trans));
@@ -6538,8 +6553,6 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
u64 min_allocable_bytes;
int ret = -ENOSPC;
- if (cache->ro)
- return 0;
/*
* We need some metadata space and system metadata space for
@@ -6555,6 +6568,12 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
+
+ if (cache->ro) {
+ ret = 0;
+ goto out;
+ }
+
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
@@ -6568,7 +6587,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
cache->ro = 1;
ret = 0;
}
-
+out:
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
return ret;
@@ -7183,11 +7202,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cluster->refill_lock);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
inode = lookup_free_space_inode(root, block_group, path);
if (!IS_ERR(inode)) {
- btrfs_orphan_add(trans, inode);
+ ret = btrfs_orphan_add(trans, inode);
+ BUG_ON(ret);
clear_nlink(inode);
/* One for the block groups ref */
spin_lock(&block_group->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 067b1747421b..d418164a35f1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
*
* This should be called with the tree lock held.
*/
-static int merge_state(struct extent_io_tree *tree,
- struct extent_state *state)
+static void merge_state(struct extent_io_tree *tree,
+ struct extent_state *state)
{
struct extent_state *other;
struct rb_node *other_node;
if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
- return 0;
+ return;
other_node = rb_prev(&state->rb_node);
if (other_node) {
@@ -287,19 +287,13 @@ static int merge_state(struct extent_io_tree *tree,
free_extent_state(other);
}
}
-
- return 0;
}
-static int set_state_cb(struct extent_io_tree *tree,
+static void set_state_cb(struct extent_io_tree *tree,
struct extent_state *state, int *bits)
{
- if (tree->ops && tree->ops->set_bit_hook) {
- return tree->ops->set_bit_hook(tree->mapping->host,
- state, bits);
- }
-
- return 0;
+ if (tree->ops && tree->ops->set_bit_hook)
+ tree->ops->set_bit_hook(tree->mapping->host, state, bits);
}
static void clear_state_cb(struct extent_io_tree *tree,
@@ -309,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree,
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
}
+static void set_state_bits(struct extent_io_tree *tree,
+ struct extent_state *state, int *bits);
+
/*
* insert an extent_state struct into the tree. 'bits' are set on the
* struct before it is inserted.
@@ -324,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree,
int *bits)
{
struct rb_node *node;
- int bits_to_set = *bits & ~EXTENT_CTLBITS;
- int ret;
if (end < start) {
printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -335,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree,
}
state->start = start;
state->end = end;
- ret = set_state_cb(tree, state, bits);
- if (ret)
- return ret;
- if (bits_to_set & EXTENT_DIRTY)
- tree->dirty_bytes += end - start + 1;
- state->state |= bits_to_set;
+ set_state_bits(tree, state, bits);
+
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
@@ -357,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree,
return 0;
}
-static int split_cb(struct extent_io_tree *tree, struct extent_state *orig,
+static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
u64 split)
{
if (tree->ops && tree->ops->split_extent_hook)
- return tree->ops->split_extent_hook(tree->mapping->host,
- orig, split);
- return 0;
+ tree->ops->split_extent_hook(tree->mapping->host, orig, split);
}
/*
@@ -659,34 +648,25 @@ again:
if (start > end)
break;
- if (need_resched()) {
- spin_unlock(&tree->lock);
- cond_resched();
- spin_lock(&tree->lock);
- }
+ cond_resched_lock(&tree->lock);
}
out:
spin_unlock(&tree->lock);
return 0;
}
-static int set_state_bits(struct extent_io_tree *tree,
+static void set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
int *bits)
{
- int ret;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
- ret = set_state_cb(tree, state, bits);
- if (ret)
- return ret;
+ set_state_cb(tree, state, bits);
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
state->state |= bits_to_set;
-
- return 0;
}
static void cache_state(struct extent_state *state,
@@ -779,9 +759,7 @@ hit_next:
goto out;
}
- err = set_state_bits(tree, state, &bits);
- if (err)
- goto out;
+ set_state_bits(tree, state, &bits);
cache_state(state, cached_state);
merge_state(tree, state);
@@ -830,9 +808,7 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
- err = set_state_bits(tree, state, &bits);
- if (err)
- goto out;
+ set_state_bits(tree, state, &bits);
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
@@ -893,11 +869,7 @@ hit_next:
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
- err = set_state_bits(tree, prealloc, &bits);
- if (err) {
- prealloc = NULL;
- goto out;
- }
+ set_state_bits(tree, prealloc, &bits);
cache_state(prealloc, cached_state);
merge_state(tree, prealloc);
prealloc = NULL;
@@ -1059,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
return 0;
}
-/*
- * find the first offset in the io tree with 'bits' set. zero is
- * returned if we find something, and *start_ret and *end_ret are
- * set to reflect the state struct that was found.
- *
- * If nothing was found, 1 is returned, < 0 on error
- */
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, int bits)
-{
- struct rb_node *node;
- struct extent_state *state;
- int ret = 1;
-
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node)
- goto out;
-
- while (1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->end >= start && (state->state & bits)) {
- *start_ret = state->start;
- *end_ret = state->end;
- ret = 0;
- break;
- }
- node = rb_next(node);
- if (!node)
- break;
- }
-out:
- spin_unlock(&tree->lock);
- return ret;
-}
-
/* find the first state struct with 'bits' set after 'start', and
* return it. tree->lock must be held. NULL will returned if
* nothing was found after 'start'
@@ -1131,6 +1063,30 @@ out:
}
/*
+ * find the first offset in the io tree with 'bits' set. zero is
+ * returned if we find something, and *start_ret and *end_ret are
+ * set to reflect the state struct that was found.
+ *
+ * If nothing was found, 1 is returned, < 0 on error
+ */
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits)
+{
+ struct extent_state *state;
+ int ret = 1;
+
+ spin_lock(&tree->lock);
+ state = find_first_extent_bit_state(tree, start, bits);
+ if (state) {
+ *start_ret = state->start;
+ *end_ret = state->end;
+ ret = 0;
+ }
+ spin_unlock(&tree->lock);
+ return ret;
+}
+
+/*
* find a contiguous range of bytes in the file marked as delalloc, not
* more than 'max_bytes'. start and end are used to return the range,
*
@@ -2546,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
struct writeback_control *wbc)
{
int ret;
- struct address_space *mapping = page->mapping;
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
@@ -2554,17 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
- struct writeback_control wbc_writepages = {
- .sync_mode = wbc->sync_mode,
- .nr_to_write = 64,
- .range_start = page_offset(page) + PAGE_CACHE_SIZE,
- .range_end = (loff_t)-1,
- };
ret = __extent_writepage(page, wbc, &epd);
- extent_write_cache_pages(tree, mapping, &wbc_writepages,
- __extent_writepage, &epd, flush_write_bio);
flush_epd_write_bio(&epd);
return ret;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 21a7ca9e7282..7b2f0c3e7929 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -76,15 +76,15 @@ struct extent_io_ops {
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
- int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
- int *bits);
- int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
- int *bits);
- int (*merge_extent_hook)(struct inode *inode,
- struct extent_state *new,
- struct extent_state *other);
- int (*split_extent_hook)(struct inode *inode,
- struct extent_state *orig, u64 split);
+ void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
+ int *bits);
+ void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
+ int *bits);
+ void (*merge_extent_hook)(struct inode *inode,
+ struct extent_state *new,
+ struct extent_state *other);
+ void (*split_extent_hook)(struct inode *inode,
+ struct extent_state *orig, u64 split);
int (*write_cache_pages_lock_hook)(struct page *page);
};
@@ -108,8 +108,6 @@ struct extent_state {
wait_queue_head_t wq;
atomic_t refs;
unsigned long state;
- u64 split_start;
- u64 split_end;
/* for use by the FS */
u64 private;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2d0410344ea3..7c97b3301459 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
return 0;
}
-int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
+static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
{
- int ret = 0;
struct extent_map *merge = NULL;
struct rb_node *rb;
- struct extent_map *em;
-
- write_lock(&tree->lock);
- em = lookup_extent_mapping(tree, start, len);
-
- WARN_ON(!em || em->start != start);
-
- if (!em)
- goto out;
-
- clear_bit(EXTENT_FLAG_PINNED, &em->flags);
if (em->start != 0) {
rb = rb_prev(&em->rb_node);
@@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
merge->in_tree = 0;
free_extent_map(merge);
}
+}
+
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
+{
+ int ret = 0;
+ struct extent_map *em;
+
+ write_lock(&tree->lock);
+ em = lookup_extent_mapping(tree, start, len);
+
+ WARN_ON(!em || em->start != start);
+
+ if (!em)
+ goto out;
+
+ clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+ try_merge_map(tree, em);
free_extent_map(em);
out:
@@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em)
{
int ret = 0;
- struct extent_map *merge = NULL;
struct rb_node *rb;
struct extent_map *exist;
@@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree,
goto out;
}
atomic_inc(&em->refs);
- if (em->start != 0) {
- rb = rb_prev(&em->rb_node);
- if (rb)
- merge = rb_entry(rb, struct extent_map, rb_node);
- if (rb && mergable_maps(merge, em)) {
- em->start = merge->start;
- em->len += merge->len;
- em->block_len += merge->block_len;
- em->block_start = merge->block_start;
- merge->in_tree = 0;
- rb_erase(&merge->rb_node, &tree->map);
- free_extent_map(merge);
- }
- }
- rb = rb_next(&em->rb_node);
- if (rb)
- merge = rb_entry(rb, struct extent_map, rb_node);
- if (rb && mergable_maps(em, merge)) {
- em->len += merge->len;
- em->block_len += merge->len;
- rb_erase(&merge->rb_node, &tree->map);
- merge->in_tree = 0;
- free_extent_map(merge);
- }
+
+ try_merge_map(tree, em);
out:
return ret;
}
@@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len)
return start + len;
}
-/**
- * lookup_extent_mapping - lookup extent_map
- * @tree: tree to lookup in
- * @start: byte offset to start the search
- * @len: length of the lookup range
- *
- * Find and return the first extent_map struct in @tree that intersects the
- * [start, len] range. There may be additional objects in the tree that
- * intersect, so check the object returned carefully to make sure that no
- * additional lookups are needed.
- */
-struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len)
+struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len, int strict)
{
struct extent_map *em;
struct rb_node *rb_node;
@@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 end = range_end(start, len);
rb_node = __tree_search(&tree->map, start, &prev, &next);
- if (!rb_node && prev) {
- em = rb_entry(prev, struct extent_map, rb_node);
- if (end > em->start && start < extent_map_end(em))
- goto found;
- }
- if (!rb_node && next) {
- em = rb_entry(next, struct extent_map, rb_node);
- if (end > em->start && start < extent_map_end(em))
- goto found;
- }
if (!rb_node) {
- em = NULL;
- goto out;
- }
- if (IS_ERR(rb_node)) {
- em = ERR_CAST(rb_node);
- goto out;
+ if (prev)
+ rb_node = prev;
+ else if (next)
+ rb_node = next;
+ else
+ return NULL;
}
+
em = rb_entry(rb_node, struct extent_map, rb_node);
- if (end > em->start && start < extent_map_end(em))
- goto found;
- em = NULL;
- goto out;
+ if (strict && !(end > em->start && start < extent_map_end(em)))
+ return NULL;
-found:
atomic_inc(&em->refs);
-out:
return em;
}
/**
+ * lookup_extent_mapping - lookup extent_map
+ * @tree: tree to lookup in
+ * @start: byte offset to start the search
+ * @len: length of the lookup range
+ *
+ * Find and return the first extent_map struct in @tree that intersects the
+ * [start, len] range. There may be additional objects in the tree that
+ * intersect, so check the object returned carefully to make sure that no
+ * additional lookups are needed.
+ */
+struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len)
+{
+ return __lookup_extent_mapping(tree, start, len, 1);
+}
+
+/**
* search_extent_mapping - find a nearby extent map
* @tree: tree to lookup in
* @start: byte offset to start the search
@@ -365,38 +341,7 @@ out:
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len)
{
- struct extent_map *em;
- struct rb_node *rb_node;
- struct rb_node *prev = NULL;
- struct rb_node *next = NULL;
-
- rb_node = __tree_search(&tree->map, start, &prev, &next);
- if (!rb_node && prev) {
- em = rb_entry(prev, struct extent_map, rb_node);
- goto found;
- }
- if (!rb_node && next) {
- em = rb_entry(next, struct extent_map, rb_node);
- goto found;
- }
- if (!rb_node) {
- em = NULL;
- goto out;
- }
- if (IS_ERR(rb_node)) {
- em = ERR_CAST(rb_node);
- goto out;
- }
- em = rb_entry(rb_node, struct extent_map, rb_node);
- goto found;
-
- em = NULL;
- goto out;
-
-found:
- atomic_inc(&em->refs);
-out:
- return em;
+ return __lookup_extent_mapping(tree, start, len, 0);
}
/**
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 08bcfa92a222..b910694f61ed 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -291,7 +291,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
if (search_commit) {
path->skip_locking = 1;
@@ -677,7 +678,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
btrfs_super_csum_size(&root->fs_info->super_copy);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
+
sector_sum = sums->sums;
again:
next_offset = (u64)-1;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a35e51c9f235..658d66959abe 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -74,7 +74,7 @@ struct inode_defrag {
* If an existing record is found the defrag item you
* pass in is freed
*/
-static int __btrfs_add_inode_defrag(struct inode *inode,
+static void __btrfs_add_inode_defrag(struct inode *inode,
struct inode_defrag *defrag)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
BTRFS_I(inode)->in_defrag = 1;
rb_link_node(&defrag->rb_node, parent, p);
rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
- return 0;
+ return;
exists:
kfree(defrag);
- return 0;
+ return;
}
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct inode_defrag *defrag;
- int ret = 0;
u64 transid;
if (!btrfs_test_opt(root, AUTO_DEFRAG))
@@ -150,9 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
spin_lock(&root->fs_info->defrag_inodes_lock);
if (!BTRFS_I(inode)->in_defrag)
- ret = __btrfs_add_inode_defrag(inode, defrag);
+ __btrfs_add_inode_defrag(inode, defrag);
spin_unlock(&root->fs_info->defrag_inodes_lock);
- return ret;
+ return 0;
}
/*
@@ -855,7 +854,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
again:
recow = 0;
split = start;
@@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
struct page **pages, size_t num_pages,
loff_t pos, unsigned long first_index,
- unsigned long last_index, size_t write_bytes)
+ size_t write_bytes)
{
struct extent_state *cached_state = NULL;
int i;
@@ -1159,7 +1159,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
unsigned long first_index;
- unsigned long last_index;
size_t num_written = 0;
int nrptrs;
int ret = 0;
@@ -1172,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
return -ENOMEM;
first_index = pos >> PAGE_CACHE_SHIFT;
- last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
while (iov_iter_count(i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
@@ -1206,8 +1204,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* contents of pages from loop to loop
*/
ret = prepare_pages(root, file, pages, num_pages,
- pos, first_index, last_index,
- write_bytes);
+ pos, first_index, write_bytes);
if (ret) {
btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ae762dab37f8..15fceefbca0a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1061,7 +1061,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
nolock = btrfs_is_free_space_inode(root, inode);
@@ -1282,17 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
return ret;
}
-static int btrfs_split_extent_hook(struct inode *inode,
- struct extent_state *orig, u64 split)
+static void btrfs_split_extent_hook(struct inode *inode,
+ struct extent_state *orig, u64 split)
{
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
- return 0;
+ return;
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
- return 0;
}
/*
@@ -1301,18 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode,
* extents, such as when we are doing sequential writes, so we can properly
* account for the metadata space we'll need.
*/
-static int btrfs_merge_extent_hook(struct inode *inode,
- struct extent_state *new,
- struct extent_state *other)
+static void btrfs_merge_extent_hook(struct inode *inode,
+ struct extent_state *new,
+ struct extent_state *other)
{
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
- return 0;
+ return;
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
- return 0;
}
/*
@@ -1320,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
* bytes in this file, and to maintain the list of inodes that
* have pending delalloc work to be done.
*/
-static int btrfs_set_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
+static void btrfs_set_bit_hook(struct inode *inode,
+ struct extent_state *state, int *bits)
{
/*
@@ -1351,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
}
spin_unlock(&root->fs_info->delalloc_lock);
}
- return 0;
}
/*
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
-static int btrfs_clear_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
+static void btrfs_clear_bit_hook(struct inode *inode,
+ struct extent_state *state, int *bits)
{
/*
* set_bit and clear bit hooks normally require _irqsave/restore
@@ -1395,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode,
}
spin_unlock(&root->fs_info->delalloc_lock);
}
- return 0;
}
/*
@@ -1645,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
int ret;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
path->leave_spinning = 1;
@@ -2215,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
if (!root->orphan_block_rsv) {
block_rsv = btrfs_alloc_block_rsv(root);
- BUG_ON(!block_rsv);
+ if (!block_rsv)
+ return -ENOMEM;
}
spin_lock(&root->orphan_lock);
@@ -2517,7 +2516,9 @@ static void btrfs_read_locked_inode(struct inode *inode)
filled = true;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ goto make_bad;
+
path->leave_spinning = 1;
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
@@ -2998,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
dentry->d_name.name, dentry->d_name.len);
- BUG_ON(ret);
+ if (ret)
+ goto out;
if (inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, inode);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
+out:
nr = trans->blocks_used;
__unlink_end_trans(trans, root);
btrfs_btree_balance_dirty(root, nr);
@@ -3147,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = -1;
+
if (root->ref_cows || root == root->fs_info->tree_root)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
@@ -3159,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (min_type == 0 && root == BTRFS_I(inode)->root)
btrfs_kill_delayed_inode_items(inode);
- path = btrfs_alloc_path();
- BUG_ON(!path);
- path->reada = -1;
-
key.objectid = ino;
key.offset = (u64)-1;
key.type = (u8)-1;
@@ -3690,7 +3695,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
int ret = 0;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
namelen, 0);
@@ -3946,6 +3952,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *new)
{
struct inode *inode;
+ int bad_inode = 0;
inode = btrfs_iget_locked(s, location->objectid, root);
if (!inode)
@@ -3955,10 +3962,19 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
BTRFS_I(inode)->root = root;
memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
btrfs_read_locked_inode(inode);
- inode_tree_add(inode);
- unlock_new_inode(inode);
- if (new)
- *new = 1;
+ if (!is_bad_inode(inode)) {
+ inode_tree_add(inode);
+ unlock_new_inode(inode);
+ if (new)
+ *new = 1;
+ } else {
+ bad_inode = 1;
+ }
+ }
+
+ if (bad_inode) {
+ iput(inode);
+ inode = ERR_PTR(-ESTALE);
}
return inode;
@@ -4451,7 +4467,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
int owner;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return ERR_PTR(-ENOMEM);
inode = new_inode(root->fs_info->sb);
if (!inode) {
@@ -6711,19 +6728,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
return 0;
}
-/* helper function for file defrag and space balancing. This
- * forces readahead on a given range of bytes in an inode
- */
-unsigned long btrfs_force_ra(struct address_space *mapping,
- struct file_ra_state *ra, struct file *file,
- pgoff_t offset, pgoff_t last_index)
-{
- pgoff_t req_size = last_index - offset + 1;
-
- page_cache_sync_readahead(mapping, ra, file, offset, req_size);
- return offset + req_size;
-}
-
struct inode *btrfs_alloc_inode(struct super_block *sb)
{
struct btrfs_inode *ei;
@@ -7206,7 +7210,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path) {
+ err = -ENOMEM;
+ drop_inode = 1;
+ goto out_unlock;
+ }
key.objectid = btrfs_ino(inode);
key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0b980afc5edd..7cf013349941 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1749,11 +1749,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.objectid = key.offset;
key.offset = (u64)-1;
dirid = key.objectid;
-
}
if (ptr < name)
goto out;
- memcpy(name, ptr, total_len);
+ memmove(name, ptr, total_len);
name[total_len]='\0';
ret = 0;
out:
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
deleted file mode 100644
index 82d569cb6267..000000000000
--- a/fs/btrfs/ref-cache.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/sort.h>
-#include "ctree.h"
-#include "ref-cache.h"
-#include "transaction.h"
-
-static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct btrfs_leaf_ref *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
-
- if (bytenr < entry->bytenr)
- p = &(*p)->rb_left;
- else if (bytenr > entry->bytenr)
- p = &(*p)->rb_right;
- else
- return parent;
- }
-
- entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
-{
- struct rb_node *n = root->rb_node;
- struct btrfs_leaf_ref *entry;
-
- while (n) {
- entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
- WARN_ON(!entry->in_tree);
-
- if (bytenr < entry->bytenr)
- n = n->rb_left;
- else if (bytenr > entry->bytenr)
- n = n->rb_right;
- else
- return n;
- }
- return NULL;
-}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
deleted file mode 100644
index 24f7001f6387..000000000000
--- a/fs/btrfs/ref-cache.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-#ifndef __REFCACHE__
-#define __REFCACHE__
-
-struct btrfs_extent_info {
- /* bytenr and num_bytes find the extent in the extent allocation tree */
- u64 bytenr;
- u64 num_bytes;
-
- /* objectid and offset find the back reference for the file */
- u64 objectid;
- u64 offset;
-};
-
-struct btrfs_leaf_ref {
- struct rb_node rb_node;
- struct btrfs_leaf_ref_tree *tree;
- int in_tree;
- atomic_t usage;
-
- u64 root_gen;
- u64 bytenr;
- u64 owner;
- u64 generation;
- int nritems;
-
- struct list_head list;
- struct btrfs_extent_info extents[];
-};
-
-static inline size_t btrfs_leaf_ref_size(int nr_extents)
-{
- return sizeof(struct btrfs_leaf_ref) +
- sizeof(struct btrfs_extent_info) * nr_extents;
-}
-#endif
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ebe45443de06..f4099904565a 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -71,13 +71,12 @@ out:
return ret;
}
-int btrfs_set_root_node(struct btrfs_root_item *item,
- struct extent_buffer *node)
+void btrfs_set_root_node(struct btrfs_root_item *item,
+ struct extent_buffer *node)
{
btrfs_set_root_bytenr(item, node->start);
btrfs_set_root_level(item, btrfs_header_level(node));
btrfs_set_root_generation(item, btrfs_header_generation(node));
- return 0;
}
/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index eb55863bb4ae..7dc36fab4afc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root)
spin_lock(&root->fs_info->trans_lock);
cur_trans = root->fs_info->running_transaction;
if (cur_trans && cur_trans->blocked) {
- DEFINE_WAIT(wait);
atomic_inc(&cur_trans->use_count);
spin_unlock(&root->fs_info->trans_lock);
- while (1) {
- prepare_to_wait(&root->fs_info->transaction_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (!cur_trans->blocked)
- break;
- schedule();
- }
- finish_wait(&root->fs_info->transaction_wait, &wait);
+
+ wait_event(root->fs_info->transaction_wait,
+ !cur_trans->blocked);
put_transaction(cur_trans);
} else {
spin_unlock(&root->fs_info->trans_lock);
@@ -357,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
}
/* wait for a transaction commit to be fully complete */
-static noinline int wait_for_commit(struct btrfs_root *root,
+static noinline void wait_for_commit(struct btrfs_root *root,
struct btrfs_transaction *commit)
{
- DEFINE_WAIT(wait);
- while (!commit->commit_done) {
- prepare_to_wait(&commit->commit_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (commit->commit_done)
- break;
- schedule();
- }
- finish_wait(&commit->commit_wait, &wait);
- return 0;
+ wait_event(commit->commit_wait, commit->commit_done);
}
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
@@ -1085,22 +1070,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
static void wait_current_trans_commit_start(struct btrfs_root *root,
struct btrfs_transaction *trans)
{
- DEFINE_WAIT(wait);
-
- if (trans->in_commit)
- return;
-
- while (1) {
- prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (trans->in_commit) {
- finish_wait(&root->fs_info->transaction_blocked_wait,
- &wait);
- break;
- }
- schedule();
- finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
- }
+ wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
}
/*
@@ -1110,24 +1080,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
struct btrfs_transaction *trans)
{
- DEFINE_WAIT(wait);
-
- if (trans->commit_done || (trans->in_commit && !trans->blocked))
- return;
-
- while (1) {
- prepare_to_wait(&root->fs_info->transaction_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (trans->commit_done ||
- (trans->in_commit && !trans->blocked)) {
- finish_wait(&root->fs_info->transaction_wait,
- &wait);
- break;
- }
- schedule();
- finish_wait(&root->fs_info->transaction_wait,
- &wait);
- }
+ wait_event(root->fs_info->transaction_wait,
+ trans->commit_done || (trans->in_commit && !trans->blocked));
}
/*
@@ -1234,8 +1188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
atomic_inc(&cur_trans->use_count);
btrfs_end_transaction(trans, root);
- ret = wait_for_commit(root, cur_trans);
- BUG_ON(ret);
+ wait_for_commit(root, cur_trans);
put_transaction(cur_trans);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ac278dd83175..babee65f8eda 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
return 0;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
@@ -1723,7 +1724,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
return -ENOMEM;
if (*level == 1) {
- wc->process_func(root, next, wc, ptr_gen);
+ ret = wc->process_func(root, next, wc, ptr_gen);
+ if (ret)
+ return ret;
path->slots[*level]++;
if (wc->free) {
@@ -1788,8 +1791,11 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level + 1];
root_owner = btrfs_header_owner(parent);
- wc->process_func(root, path->nodes[*level], wc,
+ ret = wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
+ if (ret)
+ return ret;
+
if (wc->free) {
struct extent_buffer *next;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b89e372c7544..53875ae73ad4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1037,7 +1037,8 @@ static noinline int find_next_chunk(struct btrfs_root *root,
struct btrfs_key found_key;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
key.objectid = objectid;
key.offset = (u64)-1;
@@ -2061,8 +2062,10 @@ int btrfs_balance(struct btrfs_root *dev_root)
/* step two, relocate all the chunks */
path = btrfs_alloc_path();
- BUG_ON(!path);
-
+ if (!path) {
+ ret = -ENOMEM;
+ goto error;
+ }
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -2661,7 +2664,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
ret = find_next_chunk(fs_info->chunk_root,
BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
- BUG_ON(ret);
+ if (ret)
+ return ret;
alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
(fs_info->metadata_alloc_profile &
diff --git a/fs/dcache.c b/fs/dcache.c
index 2347cdb15abb..c83cae19161e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -795,6 +795,7 @@ relock:
/**
* prune_dcache_sb - shrink the dcache
+ * @sb: superblock
* @nr_to_scan: number of entries to try to free
*
* Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e2d88baf91d3..4687fea0c00f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
{
void *ret;
- ret = kmalloc(size, flags);
+ ret = kzalloc(size, flags);
if (!ret)
ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
return ret;
diff --git a/fs/stack.c b/fs/stack.c
index 4a6f7f440658..b4f2ab48a61f 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
*
* We don't actually know what locking is used at the lower level;
* but if it's a filesystem that supports quotas, it will be using
- * i_lock as in inode_add_bytes(). tmpfs uses other locking, and
- * its 32-bit is (just) able to exceed 2TB i_size with the aid of
- * holes; but its i_blocks cannot carry into the upper long without
- * almost 2TB swap - let's ignore that case.
+ * i_lock as in inode_add_bytes().
*/
if (sizeof(i_blocks) > sizeof(long))
spin_lock(&src->i_lock);
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 3090471b2a5e..e49c36d38d7e 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -128,7 +128,7 @@ extern int is_dock_device(acpi_handle handle);
extern int register_dock_notifier(struct notifier_block *nb);
extern void unregister_dock_notifier(struct notifier_block *nb);
extern int register_hotplug_dock_device(acpi_handle handle,
- struct acpi_dock_ops *ops,
+ const struct acpi_dock_ops *ops,
void *context);
extern void unregister_hotplug_dock_device(acpi_handle handle);
#else
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 2ed0a8486c19..f554a9313b43 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20110413
+#define ACPI_CA_VERSION 0x20110623
#include "actypes.h"
#include "actbl.h"
@@ -69,6 +69,7 @@ extern u32 acpi_gbl_trace_flags;
extern u32 acpi_gbl_enable_aml_debug_object;
extern u8 acpi_gbl_copy_dsdt_locally;
extern u8 acpi_gbl_truncate_io_addresses;
+extern u8 acpi_gbl_disable_auto_repair;
extern u32 acpi_current_gpe_count;
extern struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index ba4928cae473..67055f180330 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -337,7 +337,7 @@ extern struct cpuidle_driver acpi_idle_driver;
/* in processor_thermal.c */
int acpi_processor_get_limit_info(struct acpi_processor *pr);
-extern struct thermal_cooling_device_ops processor_cooling_ops;
+extern const struct thermal_cooling_device_ops processor_cooling_ops;
#ifdef CONFIG_CPU_FREQ
void acpi_thermal_cpufreq_init(void);
void acpi_thermal_cpufreq_exit(void);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e19527de6a93..6001b4da39dd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -238,7 +238,6 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size);
extern int pnpacpi_disabled;
#define PXM_INVAL (-1)
-#define NID_INVAL (-1)
int acpi_check_resource_conflict(const struct resource *res);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 4427e0454051..3fa1f3d90ce0 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -208,6 +208,49 @@ struct dm_target_callbacks {
int dm_register_target(struct target_type *t);
void dm_unregister_target(struct target_type *t);
+/*
+ * Target argument parsing.
+ */
+struct dm_arg_set {
+ unsigned argc;
+ char **argv;
+};
+
+/*
+ * The minimum and maximum value of a numeric argument, together with
+ * the error message to use if the number is found to be outside that range.
+ */
+struct dm_arg {
+ unsigned min;
+ unsigned max;
+ char *error;
+};
+
+/*
+ * Validate the next argument, either returning it as *value or, if invalid,
+ * returning -EINVAL and setting *error.
+ */
+int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
+ unsigned *value, char **error);
+
+/*
+ * Process the next argument as the start of a group containing between
+ * arg->min and arg->max further arguments. Either return the size as
+ * *num_args or, if invalid, return -EINVAL and set *error.
+ */
+int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
+ unsigned *num_args, char **error);
+
+/*
+ * Return the current argument and shift to the next.
+ */
+const char *dm_shift_arg(struct dm_arg_set *as);
+
+/*
+ * Move through num_args arguments.
+ */
+void dm_consume_args(struct dm_arg_set *as, unsigned num_args);
+
/*-----------------------------------------------------------------
* Functions for creating and manipulating mapped devices.
* Drop the reference with dm_put when you finish with the object.
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 3708455ee6c3..0cb8eff76bd6 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
-#define DM_VERSION_MINOR 20
+#define DM_VERSION_MINOR 21
#define DM_VERSION_PATCHLEVEL 0
-#define DM_VERSION_EXTRA "-ioctl (2011-02-02)"
+#define DM_VERSION_EXTRA "-ioctl (2011-07-06)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index 298d587e349b..5e54458e920f 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -42,5 +42,20 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
unsigned num_dests, struct dm_io_region *dests,
unsigned flags, dm_kcopyd_notify_fn fn, void *context);
+/*
+ * Prepare a callback and submit it via the kcopyd thread.
+ *
+ * dm_kcopyd_prepare_callback allocates a callback structure and returns it.
+ * It must not be called from interrupt context.
+ * The returned value should be passed into dm_kcopyd_do_callback.
+ *
+ * dm_kcopyd_do_callback submits the callback.
+ * It may be called from interrupt context.
+ * The callback is issued from the kcopyd thread.
+ */
+void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
+ dm_kcopyd_notify_fn fn, void *context);
+void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_DM_KCOPYD_H */
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 3ff060ac7810..c6f996f2abb6 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -25,10 +25,6 @@ struct fault_attr {
unsigned long reject_end;
unsigned long count;
-
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
- struct dentry *dir;
-#endif
};
#define FAULT_ATTR_INITIALIZER { \
@@ -45,19 +41,15 @@ bool should_fail(struct fault_attr *attr, ssize_t size);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-int init_fault_attr_dentries(struct fault_attr *attr, const char *name);
-void cleanup_fault_attr_dentries(struct fault_attr *attr);
+struct dentry *fault_create_debugfs_attr(const char *name,
+ struct dentry *parent, struct fault_attr *attr);
#else /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-static inline int init_fault_attr_dentries(struct fault_attr *attr,
- const char *name)
-{
- return -ENODEV;
-}
-
-static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
+static inline struct dentry *fault_create_debugfs_attr(const char *name,
+ struct dentry *parent, struct fault_attr *attr)
{
+ return ERR_PTR(-ENODEV);
}
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cb4089254f01..3a76faf6a3ee 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -92,7 +92,7 @@ struct vm_area_struct;
*/
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
-#define __GFP_BITS_SHIFT 23 /* Room for 23 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 13a801f3d028..255491cf522e 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -146,6 +146,10 @@ void ida_remove(struct ida *ida, int id);
void ida_destroy(struct ida *ida);
void ida_init(struct ida *ida);
+int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
+ gfp_t gfp_mask);
+void ida_simple_remove(struct ida *ida, unsigned int id);
+
void __init idr_init_cache(void);
#endif /* __IDR_H__ */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b96600786913..3b535db00a94 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -86,8 +86,6 @@ extern void mem_cgroup_uncharge_end(void);
extern void mem_cgroup_uncharge_page(struct page *page);
extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern int mem_cgroup_shmem_charge_fallback(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
@@ -225,12 +223,6 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
{
}
-static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- return 0;
-}
-
static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
{
}
diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h
index 89212df05622..f7316c29bdec 100644
--- a/include/linux/mfd/aat2870.h
+++ b/include/linux/mfd/aat2870.h
@@ -89,7 +89,7 @@ enum aat2870_id {
/* Backlight current magnitude (mA) */
enum aat2870_current {
- AAT2870_CURRENT_0_45,
+ AAT2870_CURRENT_0_45 = 1,
AAT2870_CURRENT_0_90,
AAT2870_CURRENT_1_80,
AAT2870_CURRENT_2_70,
diff --git a/include/linux/of.h b/include/linux/of.h
index bd716f8908de..bc3dc6399547 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -68,6 +68,7 @@ struct device_node {
/* Pointer for first entry in chain of all nodes. */
extern struct device_node *allnodes;
extern struct device_node *of_chosen;
+extern struct device_node *of_aliases;
extern rwlock_t devtree_lock;
static inline bool of_have_populated_dt(void)
@@ -196,18 +197,22 @@ extern struct property *of_find_property(const struct device_node *np,
const char *name,
int *lenp);
extern int of_property_read_u32_array(const struct device_node *np,
- char *propname,
+ const char *propname,
u32 *out_values,
size_t sz);
-extern int of_property_read_string(struct device_node *np, char *propname,
- const char **out_string);
+extern int of_property_read_string(struct device_node *np,
+ const char *propname,
+ const char **out_string);
extern int of_device_is_compatible(const struct device_node *device,
const char *);
extern int of_device_is_available(const struct device_node *device);
extern const void *of_get_property(const struct device_node *node,
const char *name,
int *lenp);
+#define for_each_property(pp, properties) \
+ for (pp = properties; pp != NULL; pp = pp->next)
+
extern int of_n_addr_cells(struct device_node *np);
extern int of_n_size_cells(struct device_node *np);
extern const struct of_device_id *of_match_node(
@@ -220,6 +225,10 @@ extern int of_parse_phandles_with_args(struct device_node *np,
const char *list_name, const char *cells_name, int index,
struct device_node **out_node, const void **out_args);
+extern void *early_init_dt_alloc_memory_arch(u64 size, u64 align);
+extern void of_alias_scan(void);
+extern int of_alias_get_id(struct device_node *np, const char *stem);
+
extern int of_machine_is_compatible(const char *compat);
extern int prom_add_property(struct device_node* np, struct property* prop);
@@ -242,13 +251,15 @@ static inline bool of_have_populated_dt(void)
}
static inline int of_property_read_u32_array(const struct device_node *np,
- char *propname, u32 *out_values, size_t sz)
+ const char *propname,
+ u32 *out_values, size_t sz)
{
return -ENOSYS;
}
static inline int of_property_read_string(struct device_node *np,
- char *propname, const char **out_string)
+ const char *propname,
+ const char **out_string)
{
return -ENOSYS;
}
@@ -256,7 +267,7 @@ static inline int of_property_read_string(struct device_node *np,
#endif /* CONFIG_OF */
static inline int of_property_read_u32(const struct device_node *np,
- char *propname,
+ const char *propname,
u32 *out_value)
{
return of_property_read_u32_array(np, propname, out_value, 1);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index c84d900fbbb3..b74b74ffe0e7 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -97,7 +97,6 @@ extern void early_init_dt_check_for_initrd(unsigned long node);
extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
int depth, void *data);
extern void early_init_dt_add_memory_arch(u64 base, u64 size);
-extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
extern u64 dt_mem_next_cell(int s, __be32 **cellp);
/*
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 23241c2fecce..9d4539c52e53 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -39,7 +39,15 @@
* when it is shrunk, before we rcu free the node. See shrink code for
* details.
*/
-#define RADIX_TREE_INDIRECT_PTR 1
+#define RADIX_TREE_INDIRECT_PTR 1
+/*
+ * A common use of the radix tree is to store pointers to struct pages;
+ * but shmem/tmpfs needs also to store swap entries in the same tree:
+ * those are marked as exceptional entries to distinguish them.
+ * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
+ */
+#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
+#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
#define radix_tree_indirect_to_ptr(ptr) \
radix_tree_indirect_to_ptr((void __force *)(ptr))
@@ -174,6 +182,28 @@ static inline int radix_tree_deref_retry(void *arg)
}
/**
+ * radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry?
+ * @arg: value returned by radix_tree_deref_slot
+ * Returns: 0 if well-aligned pointer, non-0 if exceptional entry.
+ */
+static inline int radix_tree_exceptional_entry(void *arg)
+{
+ /* Not unlikely because radix_tree_exception often tested first */
+ return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
+}
+
+/**
+ * radix_tree_exception - radix_tree_deref_slot returned either exception?
+ * @arg: value returned by radix_tree_deref_slot
+ * Returns: 0 if well-aligned pointer, non-0 if either kind of exception.
+ */
+static inline int radix_tree_exception(void *arg)
+{
+ return unlikely((unsigned long)arg &
+ (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY));
+}
+
+/**
* radix_tree_replace_slot - replace item in a slot
* @pslot: pointer to slot, returned by radix_tree_lookup_slot
* @item: new item to store in the slot.
@@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items);
-unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+ void ***results, unsigned long *indices,
unsigned long first_index, unsigned int max_items);
unsigned long radix_tree_next_hole(struct radix_tree_root *root,
unsigned long index, unsigned long max_scan);
@@ -222,6 +252,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
unsigned long nr_to_tag,
unsigned int fromtag, unsigned int totag);
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
static inline void radix_tree_preload_end(void)
{
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index aa08fa8fd79b..9291ac3cc627 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -8,22 +8,15 @@
/* inode in-kernel data */
-#define SHMEM_NR_DIRECT 16
-
-#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
-
struct shmem_inode_info {
spinlock_t lock;
unsigned long flags;
unsigned long alloced; /* data pages alloced to file */
- unsigned long swapped; /* subtotal assigned to swap */
- unsigned long next_index; /* highest alloced index + 1 */
- struct shared_policy policy; /* NUMA memory alloc policy */
- struct page *i_indirect; /* top indirect blocks page */
union {
- swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */
- char inline_symlink[SHMEM_SYMLINK_INLINE_LEN];
+ unsigned long swapped; /* subtotal assigned to swap */
+ char *symlink; /* unswappable short symlink */
};
+ struct shared_policy policy; /* NUMA memory alloc policy */
struct list_head swaplist; /* chain of maybes on swap */
struct list_head xattr_list; /* list of shmem_xattr */
struct inode vfs_inode;
@@ -49,7 +42,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
/*
* Functions in mm/shmem.c called directly from elsewhere:
*/
-extern int init_tmpfs(void);
+extern int shmem_init(void);
extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
extern struct file *shmem_file_setup(const char *name,
loff_t size, unsigned long flags);
@@ -59,8 +52,6 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
extern int shmem_unuse(swp_entry_t entry, struct page *page);
-extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
- struct page **pagep, swp_entry_t *ent);
static inline struct page *shmem_read_mapping_page(
struct address_space *mapping, pgoff_t index)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cd42e30b7c6e..2189d3ffc85d 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -1,3 +1,8 @@
+#ifndef _LINUX_SWAPOPS_H
+#define _LINUX_SWAPOPS_H
+
+#include <linux/radix-tree.h>
+
/*
* swapcache pages are stored in the swapper_space radix tree. We want to
* get good packing density in that tree, so the index should be dense in
@@ -76,6 +81,22 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
return __swp_entry_to_pte(arch_entry);
}
+static inline swp_entry_t radix_to_swp_entry(void *arg)
+{
+ swp_entry_t entry;
+
+ entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+ return entry;
+}
+
+static inline void *swp_to_radix_entry(swp_entry_t entry)
+{
+ unsigned long value;
+
+ value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
+ return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
+}
+
#ifdef CONFIG_MIGRATION
static inline swp_entry_t make_migration_entry(struct page *page, int write)
{
@@ -169,3 +190,5 @@ static inline int non_swap_entry(swp_entry_t entry)
return 0;
}
#endif
+
+#endif /* _LINUX_SWAPOPS_H */
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index d3ec89fb4122..47b4a27e6e97 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -85,22 +85,6 @@ struct thermal_cooling_device {
((long)t-2732+5)/10 : ((long)t-2732-5)/10)
#define CELSIUS_TO_KELVIN(t) ((t)*10+2732)
-#if defined(CONFIG_THERMAL_HWMON)
-/* thermal zone devices with the same type share one hwmon device */
-struct thermal_hwmon_device {
- char type[THERMAL_NAME_LENGTH];
- struct device *device;
- int count;
- struct list_head tz_list;
- struct list_head node;
-};
-
-struct thermal_hwmon_attr {
- struct device_attribute attr;
- char name[16];
-};
-#endif
-
struct thermal_zone_device {
int id;
char type[THERMAL_NAME_LENGTH];
@@ -120,12 +104,6 @@ struct thermal_zone_device {
struct mutex lock; /* protect cooling devices list */
struct list_head node;
struct delayed_work poll_queue;
-#if defined(CONFIG_THERMAL_HWMON)
- struct list_head hwmon_node;
- struct thermal_hwmon_device *hwmon;
- struct thermal_hwmon_attr temp_input; /* hwmon sys attr */
- struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */
-#endif
};
/* Adding event notification support elements */
#define THERMAL_GENL_FAMILY_NAME "thermal_event"
diff --git a/init/main.c b/init/main.c
index d7211faed2ad..1952d37e4ecb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -715,7 +715,7 @@ static void __init do_basic_setup(void)
{
cpuset_init_smp();
usermodehelper_init();
- init_tmpfs();
+ shmem_init();
driver_init();
init_irq_proc();
do_ctors();
diff --git a/ipc/shm.c b/ipc/shm.c
index 9fb044f3b345..b5bae9d945b6 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -294,7 +294,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
void shm_destroy_orphaned(struct ipc_namespace *ns)
{
down_write(&shm_ids(ns).rw_mutex);
- if (&shm_ids(ns).in_use)
+ if (shm_ids(ns).in_use)
idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
up_write(&shm_ids(ns).rw_mutex);
}
@@ -304,9 +304,12 @@ void exit_shm(struct task_struct *task)
{
struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+ if (shm_ids(ns).in_use == 0)
+ return;
+
/* Destroy all already created segments, but not mapped yet */
down_write(&shm_ids(ns).rw_mutex);
- if (&shm_ids(ns).in_use)
+ if (shm_ids(ns).in_use)
idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
up_write(&shm_ids(ns).rw_mutex);
}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d1db2880d1cf..e19ce1454ee1 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
if (!cpumask_subset(mask, cpu_possible_mask))
return -EINVAL;
- s = NULL;
if (isadd == REGISTER) {
for_each_cpu(cpu, mask) {
- if (!s)
- s = kmalloc_node(sizeof(struct listener),
- GFP_KERNEL, cpu_to_node(cpu));
+ s = kmalloc_node(sizeof(struct listener),
+ GFP_KERNEL, cpu_to_node(cpu));
if (!s)
goto cleanup;
+
s->pid = pid;
- INIT_LIST_HEAD(&s->list);
s->valid = 1;
listeners = &per_cpu(listener_array, cpu);
down_write(&listeners->sem);
- list_for_each_entry_safe(s2, tmp, &listeners->list, list) {
- if (s2->pid == pid)
- goto next_cpu;
+ list_for_each_entry(s2, &listeners->list, list) {
+ if (s2->pid == pid && s2->valid)
+ goto exists;
}
list_add(&s->list, &listeners->list);
s = NULL;
-next_cpu:
+exists:
up_write(&listeners->sem);
+ kfree(s); /* nop if NULL */
}
- kfree(s);
return 0;
}
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 2577b121c7c1..f193b7796449 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -197,21 +197,15 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
}
-void cleanup_fault_attr_dentries(struct fault_attr *attr)
-{
- debugfs_remove_recursive(attr->dir);
-}
-
-int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
+struct dentry *fault_create_debugfs_attr(const char *name,
+ struct dentry *parent, struct fault_attr *attr)
{
mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
struct dentry *dir;
- dir = debugfs_create_dir(name, NULL);
+ dir = debugfs_create_dir(name, parent);
if (!dir)
- return -ENOMEM;
-
- attr->dir = dir;
+ return ERR_PTR(-ENOMEM);
if (!debugfs_create_ul("probability", mode, dir, &attr->probability))
goto fail;
@@ -243,11 +237,11 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
- return 0;
+ return dir;
fail:
- debugfs_remove_recursive(attr->dir);
+ debugfs_remove_recursive(dir);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/lib/idr.c b/lib/idr.c
index e15502e8b21e..db040ce3fa73 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -34,8 +34,10 @@
#include <linux/err.h>
#include <linux/string.h>
#include <linux/idr.h>
+#include <linux/spinlock.h>
static struct kmem_cache *idr_layer_cache;
+static DEFINE_SPINLOCK(simple_ida_lock);
static struct idr_layer *get_from_free_list(struct idr *idp)
{
@@ -926,6 +928,71 @@ void ida_destroy(struct ida *ida)
EXPORT_SYMBOL(ida_destroy);
/**
+ * ida_simple_get - get a new id.
+ * @ida: the (initialized) ida.
+ * @start: the minimum id (inclusive, < 0x8000000)
+ * @end: the maximum id (exclusive, < 0x8000000 or 0)
+ * @gfp_mask: memory allocation flags
+ *
+ * Allocates an id in the range start <= id < end, or returns -ENOSPC.
+ * On memory allocation failure, returns -ENOMEM.
+ *
+ * Use ida_simple_remove() to get rid of an id.
+ */
+int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
+ gfp_t gfp_mask)
+{
+ int ret, id;
+ unsigned int max;
+
+ BUG_ON((int)start < 0);
+ BUG_ON((int)end < 0);
+
+ if (end == 0)
+ max = 0x80000000;
+ else {
+ BUG_ON(end < start);
+ max = end - 1;
+ }
+
+again:
+ if (!ida_pre_get(ida, gfp_mask))
+ return -ENOMEM;
+
+ spin_lock(&simple_ida_lock);
+ ret = ida_get_new_above(ida, start, &id);
+ if (!ret) {
+ if (id > max) {
+ ida_remove(ida, id);
+ ret = -ENOSPC;
+ } else {
+ ret = id;
+ }
+ }
+ spin_unlock(&simple_ida_lock);
+
+ if (unlikely(ret == -EAGAIN))
+ goto again;
+
+ return ret;
+}
+EXPORT_SYMBOL(ida_simple_get);
+
+/**
+ * ida_simple_remove - remove an allocated id.
+ * @ida: the (initialized) ida.
+ * @id: the id returned by ida_simple_get.
+ */
+void ida_simple_remove(struct ida *ida, unsigned int id)
+{
+ BUG_ON((int)id < 0);
+ spin_lock(&simple_ida_lock);
+ ida_remove(ida, id);
+ spin_unlock(&simple_ida_lock);
+}
+EXPORT_SYMBOL(ida_simple_remove);
+
+/**
* ida_init - initialize ida handle
* @ida: ida handle
*
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7ea2e033d715..a2f9da59c197 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
EXPORT_SYMBOL(radix_tree_prev_hole);
static unsigned int
-__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
- unsigned int max_items, unsigned long *next_index)
+__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
+ unsigned long index, unsigned int max_items, unsigned long *next_index)
{
unsigned int nr_found = 0;
unsigned int shift, height;
@@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
/* Bottom level: grab some items */
for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
- index++;
if (slot->slots[i]) {
- results[nr_found++] = &(slot->slots[i]);
- if (nr_found == max_items)
+ results[nr_found] = &(slot->slots[i]);
+ if (indices)
+ indices[nr_found] = index;
+ if (++nr_found == max_items) {
+ index++;
goto out;
+ }
}
+ index++;
}
out:
*next_index = index;
@@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
if (cur_index > max_index)
break;
- slots_found = __lookup(node, (void ***)results + ret, cur_index,
- max_items - ret, &next_index);
+ slots_found = __lookup(node, (void ***)results + ret, NULL,
+ cur_index, max_items - ret, &next_index);
nr_found = 0;
for (i = 0; i < slots_found; i++) {
struct radix_tree_node *slot;
@@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
* radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
* @root: radix tree root
* @results: where the results of the lookup are placed
+ * @indices: where their indices should be placed (but usually NULL)
* @first_index: start the lookup from this key
* @max_items: place up to this many items at *results
*
@@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
* protection, radix_tree_deref_slot may fail requiring a retry.
*/
unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+ void ***results, unsigned long *indices,
unsigned long first_index, unsigned int max_items)
{
unsigned long max_index;
@@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
if (first_index > 0)
return 0;
results[0] = (void **)&root->rnode;
+ if (indices)
+ indices[0] = 0;
return 1;
}
node = indirect_to_ptr(node);
@@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
if (cur_index > max_index)
break;
- slots_found = __lookup(node, results + ret, cur_index,
- max_items - ret, &next_index);
+ slots_found = __lookup(node, results + ret,
+ indices ? indices + ret : NULL,
+ cur_index, max_items - ret, &next_index);
ret += slots_found;
if (next_index == 0)
break;
@@ -1194,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
+#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
+#include <linux/sched.h> /* for cond_resched() */
+
+/*
+ * This linear search is at present only useful to shmem_unuse_inode().
+ */
+static unsigned long __locate(struct radix_tree_node *slot, void *item,
+ unsigned long index, unsigned long *found_index)
+{
+ unsigned int shift, height;
+ unsigned long i;
+
+ height = slot->height;
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+ for ( ; height > 1; height--) {
+ i = (index >> shift) & RADIX_TREE_MAP_MASK;
+ for (;;) {
+ if (slot->slots[i] != NULL)
+ break;
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ i++;
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+ }
+
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = rcu_dereference_raw(slot->slots[i]);
+ if (slot == NULL)
+ goto out;
+ }
+
+ /* Bottom level: check items */
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+ if (slot->slots[i] == item) {
+ *found_index = index + i;
+ index = 0;
+ goto out;
+ }
+ }
+ index += RADIX_TREE_MAP_SIZE;
+out:
+ return index;
+}
+
+/**
+ * radix_tree_locate_item - search through radix tree for item
+ * @root: radix tree root
+ * @item: item to be found
+ *
+ * Returns index where item was found, or -1 if not found.
+ * Caller must hold no lock (since this time-consuming function needs
+ * to be preemptible), and must check afterwards if item is still there.
+ */
+unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
+{
+ struct radix_tree_node *node;
+ unsigned long max_index;
+ unsigned long cur_index = 0;
+ unsigned long found_index = -1;
+
+ do {
+ rcu_read_lock();
+ node = rcu_dereference_raw(root->rnode);
+ if (!radix_tree_is_indirect_ptr(node)) {
+ rcu_read_unlock();
+ if (node == item)
+ found_index = 0;
+ break;
+ }
+
+ node = indirect_to_ptr(node);
+ max_index = radix_tree_maxindex(node->height);
+ if (cur_index > max_index)
+ break;
+
+ cur_index = __locate(node, item, cur_index, &found_index);
+ rcu_read_unlock();
+ cond_resched();
+ } while (cur_index != 0 && cur_index <= max_index);
+
+ return found_index;
+}
+#else
+unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
+{
+ return -1;
+}
+#endif /* CONFIG_SHMEM && CONFIG_SWAP */
/**
* radix_tree_shrink - shrink height of a radix tree to minimal
diff --git a/mm/failslab.c b/mm/failslab.c
index 1ce58c201dca..0dd7b8fec71c 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
static int __init failslab_debugfs_init(void)
{
+ struct dentry *dir;
mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
- int err;
- err = init_fault_attr_dentries(&failslab.attr, "failslab");
- if (err)
- return err;
+ dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
- if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir,
+ if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
&failslab.ignore_gfp_wait))
goto fail;
- if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir,
+ if (!debugfs_create_bool("cache-filter", mode, dir,
&failslab.cache_filter))
goto fail;
return 0;
fail:
- cleanup_fault_attr_dentries(&failslab.attr);
+ debugfs_remove_recursive(dir);
return -ENOMEM;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 867d40222ec7..645a080ba4df 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,7 +33,6 @@
#include <linux/cpuset.h>
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
-#include <linux/mm_inline.h> /* for page_is_file_cache() */
#include <linux/cleancache.h>
#include "internal.h"
@@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
int error;
VM_BUG_ON(!PageLocked(page));
+ VM_BUG_ON(PageSwapBacked(page));
error = mem_cgroup_cache_charge(page, current->mm,
gfp_mask & GFP_RECLAIM_MASK);
@@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
if (likely(!error)) {
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
- if (PageSwapBacked(page))
- __inc_zone_page_state(page, NR_SHMEM);
spin_unlock_irq(&mapping->tree_lock);
} else {
page->mapping = NULL;
@@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
{
int ret;
- /*
- * Splice_read and readahead add shmem/tmpfs pages into the page cache
- * before shmem_readpage has a chance to mark them as SwapBacked: they
- * need to go on the anon lru below, and mem_cgroup_cache_charge
- * (called in add_to_page_cache) needs to know where they're going too.
- */
- if (mapping_cap_swap_backed(mapping))
- SetPageSwapBacked(page);
-
ret = add_to_page_cache(page, mapping, offset, gfp_mask);
- if (ret == 0) {
- if (page_is_file_cache(page))
- lru_cache_add_file(page);
- else
- lru_cache_add_anon(page);
- }
+ if (ret == 0)
+ lru_cache_add_file(page);
return ret;
}
EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@ -714,9 +699,16 @@ repeat:
page = radix_tree_deref_slot(pagep);
if (unlikely(!page))
goto out;
- if (radix_tree_deref_retry(page))
- goto repeat;
-
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page))
+ goto repeat;
+ /*
+ * Otherwise, shmem/tmpfs must be storing a swap entry
+ * here as an exceptional entry: so return it without
+ * attempting to raise page count.
+ */
+ goto out;
+ }
if (!page_cache_get_speculative(page))
goto repeat;
@@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
repeat:
page = find_get_page(mapping, offset);
- if (page) {
+ if (page && !radix_tree_exception(page)) {
lock_page(page);
/* Has the page been truncated? */
if (unlikely(page->mapping != mapping)) {
@@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
rcu_read_lock();
restart:
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
- (void ***)pages, start, nr_pages);
+ (void ***)pages, NULL, start, nr_pages);
ret = 0;
for (i = 0; i < nr_found; i++) {
struct page *page;
@@ -849,13 +841,22 @@ repeat:
if (unlikely(!page))
continue;
- /*
- * This can only trigger when the entry at index 0 moves out
- * of or back to the root: none yet gotten, safe to restart.
- */
- if (radix_tree_deref_retry(page)) {
- WARN_ON(start | i);
- goto restart;
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ /*
+ * Transient condition which can only trigger
+ * when entry at index 0 moves out of or back
+ * to root: none yet gotten, safe to restart.
+ */
+ WARN_ON(start | i);
+ goto restart;
+ }
+ /*
+ * Otherwise, shmem/tmpfs must be storing a swap entry
+ * here as an exceptional entry: so skip over it -
+ * we only reach this from invalidate_mapping_pages().
+ */
+ continue;
}
if (!page_cache_get_speculative(page))
@@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
rcu_read_lock();
restart:
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
- (void ***)pages, index, nr_pages);
+ (void ***)pages, NULL, index, nr_pages);
ret = 0;
for (i = 0; i < nr_found; i++) {
struct page *page;
@@ -912,12 +913,22 @@ repeat:
if (unlikely(!page))
continue;
- /*
- * This can only trigger when the entry at index 0 moves out
- * of or back to the root: none yet gotten, safe to restart.
- */
- if (radix_tree_deref_retry(page))
- goto restart;
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ /*
+ * Transient condition which can only trigger
+ * when entry at index 0 moves out of or back
+ * to root: none yet gotten, safe to restart.
+ */
+ goto restart;
+ }
+ /*
+ * Otherwise, shmem/tmpfs must be storing a swap entry
+ * here as an exceptional entry: so stop looking for
+ * contiguous pages.
+ */
+ break;
+ }
if (!page_cache_get_speculative(page))
goto repeat;
@@ -977,12 +988,21 @@ repeat:
if (unlikely(!page))
continue;
- /*
- * This can only trigger when the entry at index 0 moves out
- * of or back to the root: none yet gotten, safe to restart.
- */
- if (radix_tree_deref_retry(page))
- goto restart;
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ /*
+ * Transient condition which can only trigger
+ * when entry at index 0 moves out of or back
+ * to root: none yet gotten, safe to restart.
+ */
+ goto restart;
+ }
+ /*
+ * This function is never used on a shmem/tmpfs
+ * mapping, so a swap entry won't be found here.
+ */
+ BUG();
+ }
if (!page_cache_get_speculative(page))
goto repeat;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5f84d2351ddb..f4ec4e7ca4cd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,7 +35,6 @@
#include <linux/limits.h>
#include <linux/mutex.h>
#include <linux/rbtree.h>
-#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapops.h>
@@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
return 0;
if (PageCompound(page))
return 0;
- /*
- * Corner case handling. This is called from add_to_page_cache()
- * in usual. But some FS (shmem) precharges this page before calling it
- * and call add_to_page_cache() with GFP_NOWAIT.
- *
- * For GFP_NOWAIT case, the page may be pre-charged before calling
- * add_to_page_cache(). (See shmem.c) check it here and avoid to call
- * charge twice. (It works but has to pay a bit larger cost.)
- * And when the page is SwapCache, it should take swap information
- * into account. This is under lock_page() now.
- */
- if (!(gfp_mask & __GFP_WAIT)) {
- struct page_cgroup *pc;
-
- pc = lookup_page_cgroup(page);
- if (!pc)
- return 0;
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- unlock_page_cgroup(pc);
- return 0;
- }
- unlock_page_cgroup(pc);
- }
if (unlikely(!mm))
mm = &init_mm;
@@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
cgroup_release_and_wakeup_rmdir(&mem->css);
}
-/*
- * A call to try to shrink memory usage on charge failure at shmem's swapin.
- * Calling hierarchical_reclaim is not enough because we should update
- * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
- * Moreover considering hierarchy, we should reclaim from the mem_over_limit,
- * not from the memcg which this page would be charged to.
- * try_charge_swapin does all of these works properly.
- */
-int mem_cgroup_shmem_charge_fallback(struct page *page,
- struct mm_struct *mm,
- gfp_t gfp_mask)
-{
- struct mem_cgroup *mem;
- int ret;
-
- if (mem_cgroup_disabled())
- return 0;
-
- ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
- if (!ret)
- mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
-
- return ret;
-}
-
#ifdef CONFIG_DEBUG_VM
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
{
@@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
pgoff = pte_to_pgoff(ptent);
/* page is moved even if it's not RSS of this task(page-faulted). */
- if (!mapping_cap_swap_backed(mapping)) { /* normal file */
- page = find_get_page(mapping, pgoff);
- } else { /* shmem/tmpfs file. we should take account of swap too. */
- swp_entry_t ent;
- mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
+ page = find_get_page(mapping, pgoff);
+
+#ifdef CONFIG_SWAP
+ /* shmem/tmpfs may report page out on swap: account for that too. */
+ if (radix_tree_exceptional_entry(page)) {
+ swp_entry_t swap = radix_to_swp_entry(page);
if (do_swap_account)
- entry->val = ent.val;
+ *entry = swap;
+ page = find_get_page(&swapper_space, swap.val);
}
-
+#endif
return page;
}
diff --git a/mm/mincore.c b/mm/mincore.c
index a4e6b9d75c76..636a86876ff2 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
* file will not get a swp_entry_t in its pte, but rather it is like
* any other file mapping (ie. marked !present and faulted in with
* tmpfs's .fault). So swapped out tmpfs mappings are tested here.
- *
- * However when tmpfs moves the page from pagecache and into swapcache,
- * it is still in core, but the find_get_page below won't find it.
- * No big deal, but make a note of it.
*/
page = find_get_page(mapping, pgoff);
+#ifdef CONFIG_SWAP
+ /* shmem/tmpfs may return swap: account for swapcache page too. */
+ if (radix_tree_exceptional_entry(page)) {
+ swp_entry_t swap = radix_to_swp_entry(page);
+ page = find_get_page(&swapper_space, swap.val);
+ }
+#endif
if (page) {
present = PageUptodate(page);
page_cache_release(page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1dbcf8888f14..6e8ecb6e021c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void)
{
mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
struct dentry *dir;
- int err;
- err = init_fault_attr_dentries(&fail_page_alloc.attr,
- "fail_page_alloc");
- if (err)
- return err;
-
- dir = fail_page_alloc.attr.dir;
+ dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
+ &fail_page_alloc.attr);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
&fail_page_alloc.ignore_gfp_wait))
@@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void)
return 0;
fail:
- cleanup_fault_attr_dentries(&fail_page_alloc.attr);
+ debugfs_remove_recursive(dir);
return -ENOMEM;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 5cc21f8b4cd3..32f6763f16fb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -6,7 +6,8 @@
* 2000-2001 Christoph Rohland
* 2000-2001 SAP AG
* 2002 Red Hat Inc.
- * Copyright (C) 2002-2005 Hugh Dickins.
+ * Copyright (C) 2002-2011 Hugh Dickins.
+ * Copyright (C) 2011 Google Inc.
* Copyright (C) 2002-2005 VERITAS Software Corporation.
* Copyright (C) 2004 Andi Kleen, SuSE Labs
*
@@ -28,7 +29,6 @@
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/module.h>
-#include <linux/percpu_counter.h>
#include <linux/swap.h>
static struct vfsmount *shm_mnt;
@@ -51,6 +51,8 @@ static struct vfsmount *shm_mnt;
#include <linux/shmem_fs.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
+#include <linux/pagevec.h>
+#include <linux/percpu_counter.h>
#include <linux/splice.h>
#include <linux/security.h>
#include <linux/swapops.h>
@@ -63,43 +65,17 @@ static struct vfsmount *shm_mnt;
#include <linux/magic.h>
#include <asm/uaccess.h>
-#include <asm/div64.h>
#include <asm/pgtable.h>
-/*
- * The maximum size of a shmem/tmpfs file is limited by the maximum size of
- * its triple-indirect swap vector - see illustration at shmem_swp_entry().
- *
- * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
- * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum
- * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
- * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
- *
- * We use / and * instead of shifts in the definitions below, so that the swap
- * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
- */
-#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
-#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
-
-#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
-#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
-
-#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
-#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
-
#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
-/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
-#define SHMEM_PAGEIN VM_READ
-#define SHMEM_TRUNCATE VM_WRITE
-
-/* Definition to limit shmem_truncate's steps between cond_rescheds */
-#define LATENCY_LIMIT 64
-
/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20
+/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
+#define SHORT_SYMLINK_LEN 128
+
struct shmem_xattr {
struct list_head list; /* anchored by shmem_inode_info->xattr_list */
char *name; /* xattr name */
@@ -107,7 +83,7 @@ struct shmem_xattr {
char value[0];
};
-/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
+/* Flag allocation requirements to shmem_getpage */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
@@ -137,56 +113,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index,
mapping_gfp_mask(inode->i_mapping), fault_type);
}
-static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
-{
- /*
- * The above definition of ENTRIES_PER_PAGE, and the use of
- * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
- * might be reconsidered if it ever diverges from PAGE_SIZE.
- *
- * Mobility flags are masked out as swap vectors cannot move
- */
- return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
- PAGE_CACHE_SHIFT-PAGE_SHIFT);
-}
-
-static inline void shmem_dir_free(struct page *page)
-{
- __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
-}
-
-static struct page **shmem_dir_map(struct page *page)
-{
- return (struct page **)kmap_atomic(page, KM_USER0);
-}
-
-static inline void shmem_dir_unmap(struct page **dir)
-{
- kunmap_atomic(dir, KM_USER0);
-}
-
-static swp_entry_t *shmem_swp_map(struct page *page)
-{
- return (swp_entry_t *)kmap_atomic(page, KM_USER1);
-}
-
-static inline void shmem_swp_balance_unmap(void)
-{
- /*
- * When passing a pointer to an i_direct entry, to code which
- * also handles indirect entries and so will shmem_swp_unmap,
- * we must arrange for the preempt count to remain in balance.
- * What kmap_atomic of a lowmem page does depends on config
- * and architecture, so pretend to kmap_atomic some lowmem page.
- */
- (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
-}
-
-static inline void shmem_swp_unmap(swp_entry_t *entry)
-{
- kunmap_atomic(entry, KM_USER1);
-}
-
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
{
return sb->s_fs_info;
@@ -244,15 +170,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
-static void shmem_free_blocks(struct inode *inode, long pages)
-{
- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
- if (sbinfo->max_blocks) {
- percpu_counter_add(&sbinfo->used_blocks, -pages);
- inode->i_blocks -= pages*BLOCKS_PER_PAGE;
- }
-}
-
static int shmem_reserve_inode(struct super_block *sb)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
@@ -279,7 +196,7 @@ static void shmem_free_inode(struct super_block *sb)
}
/**
- * shmem_recalc_inode - recalculate the size of an inode
+ * shmem_recalc_inode - recalculate the block usage of an inode
* @inode: inode to recalc
*
* We have to calculate the free blocks since the mm can drop
@@ -297,474 +214,297 @@ static void shmem_recalc_inode(struct inode *inode)
freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
if (freed > 0) {
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -freed);
info->alloced -= freed;
+ inode->i_blocks -= freed * BLOCKS_PER_PAGE;
shmem_unacct_blocks(info->flags, freed);
- shmem_free_blocks(inode, freed);
}
}
-/**
- * shmem_swp_entry - find the swap vector position in the info structure
- * @info: info structure for the inode
- * @index: index of the page to find
- * @page: optional page to add to the structure. Has to be preset to
- * all zeros
- *
- * If there is no space allocated yet it will return NULL when
- * page is NULL, else it will use the page for the needed block,
- * setting it to NULL on return to indicate that it has been used.
- *
- * The swap vector is organized the following way:
- *
- * There are SHMEM_NR_DIRECT entries directly stored in the
- * shmem_inode_info structure. So small files do not need an addional
- * allocation.
- *
- * For pages with index > SHMEM_NR_DIRECT there is the pointer
- * i_indirect which points to a page which holds in the first half
- * doubly indirect blocks, in the second half triple indirect blocks:
- *
- * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
- * following layout (for SHMEM_NR_DIRECT == 16):
- *
- * i_indirect -> dir --> 16-19
- * | +-> 20-23
- * |
- * +-->dir2 --> 24-27
- * | +-> 28-31
- * | +-> 32-35
- * | +-> 36-39
- * |
- * +-->dir3 --> 40-43
- * +-> 44-47
- * +-> 48-51
- * +-> 52-55
+/*
+ * Replace item expected in radix tree by a new item, while holding tree lock.
*/
-static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
-{
- unsigned long offset;
- struct page **dir;
- struct page *subdir;
-
- if (index < SHMEM_NR_DIRECT) {
- shmem_swp_balance_unmap();
- return info->i_direct+index;
- }
- if (!info->i_indirect) {
- if (page) {
- info->i_indirect = *page;
- *page = NULL;
- }
- return NULL; /* need another page */
- }
-
- index -= SHMEM_NR_DIRECT;
- offset = index % ENTRIES_PER_PAGE;
- index /= ENTRIES_PER_PAGE;
- dir = shmem_dir_map(info->i_indirect);
-
- if (index >= ENTRIES_PER_PAGE/2) {
- index -= ENTRIES_PER_PAGE/2;
- dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
- index %= ENTRIES_PER_PAGE;
- subdir = *dir;
- if (!subdir) {
- if (page) {
- *dir = *page;
- *page = NULL;
- }
- shmem_dir_unmap(dir);
- return NULL; /* need another page */
- }
- shmem_dir_unmap(dir);
- dir = shmem_dir_map(subdir);
- }
+static int shmem_radix_tree_replace(struct address_space *mapping,
+ pgoff_t index, void *expected, void *replacement)
+{
+ void **pslot;
+ void *item = NULL;
+
+ VM_BUG_ON(!expected);
+ pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
+ if (pslot)
+ item = radix_tree_deref_slot_protected(pslot,
+ &mapping->tree_lock);
+ if (item != expected)
+ return -ENOENT;
+ if (replacement)
+ radix_tree_replace_slot(pslot, replacement);
+ else
+ radix_tree_delete(&mapping->page_tree, index);
+ return 0;
+}
- dir += index;
- subdir = *dir;
- if (!subdir) {
- if (!page || !(subdir = *page)) {
- shmem_dir_unmap(dir);
- return NULL; /* need a page */
+/*
+ * Like add_to_page_cache_locked, but error if expected item has gone.
+ */
+static int shmem_add_to_page_cache(struct page *page,
+ struct address_space *mapping,
+ pgoff_t index, gfp_t gfp, void *expected)
+{
+ int error = 0;
+
+ VM_BUG_ON(!PageLocked(page));
+ VM_BUG_ON(!PageSwapBacked(page));
+
+ if (!expected)
+ error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+ if (!error) {
+ page_cache_get(page);
+ page->mapping = mapping;
+ page->index = index;
+
+ spin_lock_irq(&mapping->tree_lock);
+ if (!expected)
+ error = radix_tree_insert(&mapping->page_tree,
+ index, page);
+ else
+ error = shmem_radix_tree_replace(mapping, index,
+ expected, page);
+ if (!error) {
+ mapping->nrpages++;
+ __inc_zone_page_state(page, NR_FILE_PAGES);
+ __inc_zone_page_state(page, NR_SHMEM);
+ spin_unlock_irq(&mapping->tree_lock);
+ } else {
+ page->mapping = NULL;
+ spin_unlock_irq(&mapping->tree_lock);
+ page_cache_release(page);
}
- *dir = subdir;
- *page = NULL;
+ if (!expected)
+ radix_tree_preload_end();
}
- shmem_dir_unmap(dir);
- return shmem_swp_map(subdir) + offset;
+ if (error)
+ mem_cgroup_uncharge_cache_page(page);
+ return error;
}
-static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
+/*
+ * Like delete_from_page_cache, but substitutes swap for page.
+ */
+static void shmem_delete_from_page_cache(struct page *page, void *radswap)
{
- long incdec = value? 1: -1;
+ struct address_space *mapping = page->mapping;
+ int error;
- entry->val = value;
- info->swapped += incdec;
- if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
- struct page *page = kmap_atomic_to_page(entry);
- set_page_private(page, page_private(page) + incdec);
- }
+ spin_lock_irq(&mapping->tree_lock);
+ error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
+ page->mapping = NULL;
+ mapping->nrpages--;
+ __dec_zone_page_state(page, NR_FILE_PAGES);
+ __dec_zone_page_state(page, NR_SHMEM);
+ spin_unlock_irq(&mapping->tree_lock);
+ page_cache_release(page);
+ BUG_ON(error);
}
-/**
- * shmem_swp_alloc - get the position of the swap entry for the page.
- * @info: info structure for the inode
- * @index: index of the page to find
- * @sgp: check and recheck i_size? skip allocation?
- * @gfp: gfp mask to use for any page allocation
- *
- * If the entry does not exist, allocate it.
+/*
+ * Like find_get_pages, but collecting swap entries as well as pages.
*/
-static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info,
- unsigned long index, enum sgp_type sgp, gfp_t gfp)
-{
- struct inode *inode = &info->vfs_inode;
- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
- struct page *page = NULL;
- swp_entry_t *entry;
-
- if (sgp != SGP_WRITE &&
- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
- return ERR_PTR(-EINVAL);
-
- while (!(entry = shmem_swp_entry(info, index, &page))) {
- if (sgp == SGP_READ)
- return shmem_swp_map(ZERO_PAGE(0));
- /*
- * Test used_blocks against 1 less max_blocks, since we have 1 data
- * page (and perhaps indirect index pages) yet to allocate:
- * a waste to allocate index if we cannot allocate data.
- */
- if (sbinfo->max_blocks) {
- if (percpu_counter_compare(&sbinfo->used_blocks,
- sbinfo->max_blocks - 1) >= 0)
- return ERR_PTR(-ENOSPC);
- percpu_counter_inc(&sbinfo->used_blocks);
- inode->i_blocks += BLOCKS_PER_PAGE;
+static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
+ pgoff_t start, unsigned int nr_pages,
+ struct page **pages, pgoff_t *indices)
+{
+ unsigned int i;
+ unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+ (void ***)pages, indices, start, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page))
+ goto restart;
+ /*
+ * Otherwise, we must be storing a swap entry
+ * here as an exceptional entry: so return it
+ * without attempting to raise page count.
+ */
+ goto export;
}
+ if (!page_cache_get_speculative(page))
+ goto repeat;
- spin_unlock(&info->lock);
- page = shmem_dir_alloc(gfp);
- spin_lock(&info->lock);
-
- if (!page) {
- shmem_free_blocks(inode, 1);
- return ERR_PTR(-ENOMEM);
- }
- if (sgp != SGP_WRITE &&
- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
- entry = ERR_PTR(-EINVAL);
- break;
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
}
- if (info->next_index <= index)
- info->next_index = index + 1;
- }
- if (page) {
- /* another task gave its page, or truncated the file */
- shmem_free_blocks(inode, 1);
- shmem_dir_free(page);
- }
- if (info->next_index <= index && !IS_ERR(entry))
- info->next_index = index + 1;
- return entry;
+export:
+ indices[ret] = indices[i];
+ pages[ret] = page;
+ ret++;
+ }
+ if (unlikely(!ret && nr_found))
+ goto restart;
+ rcu_read_unlock();
+ return ret;
}
-/**
- * shmem_free_swp - free some swap entries in a directory
- * @dir: pointer to the directory
- * @edir: pointer after last entry of the directory
- * @punch_lock: pointer to spinlock when needed for the holepunch case
+/*
+ * Remove swap entry from radix tree, free the swap and its page cache.
*/
-static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
- spinlock_t *punch_lock)
-{
- spinlock_t *punch_unlock = NULL;
- swp_entry_t *ptr;
- int freed = 0;
-
- for (ptr = dir; ptr < edir; ptr++) {
- if (ptr->val) {
- if (unlikely(punch_lock)) {
- punch_unlock = punch_lock;
- punch_lock = NULL;
- spin_lock(punch_unlock);
- if (!ptr->val)
- continue;
- }
- free_swap_and_cache(*ptr);
- *ptr = (swp_entry_t){0};
- freed++;
- }
- }
- if (punch_unlock)
- spin_unlock(punch_unlock);
- return freed;
-}
-
-static int shmem_map_and_free_swp(struct page *subdir, int offset,
- int limit, struct page ***dir, spinlock_t *punch_lock)
-{
- swp_entry_t *ptr;
- int freed = 0;
-
- ptr = shmem_swp_map(subdir);
- for (; offset < limit; offset += LATENCY_LIMIT) {
- int size = limit - offset;
- if (size > LATENCY_LIMIT)
- size = LATENCY_LIMIT;
- freed += shmem_free_swp(ptr+offset, ptr+offset+size,
- punch_lock);
- if (need_resched()) {
- shmem_swp_unmap(ptr);
- if (*dir) {
- shmem_dir_unmap(*dir);
- *dir = NULL;
- }
- cond_resched();
- ptr = shmem_swp_map(subdir);
- }
- }
- shmem_swp_unmap(ptr);
- return freed;
+static int shmem_free_swap(struct address_space *mapping,
+ pgoff_t index, void *radswap)
+{
+ int error;
+
+ spin_lock_irq(&mapping->tree_lock);
+ error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
+ spin_unlock_irq(&mapping->tree_lock);
+ if (!error)
+ free_swap_and_cache(radix_to_swp_entry(radswap));
+ return error;
}
-static void shmem_free_pages(struct list_head *next)
+/*
+ * Pagevec may contain swap entries, so shuffle up pages before releasing.
+ */
+static void shmem_pagevec_release(struct pagevec *pvec)
{
- struct page *page;
- int freed = 0;
-
- do {
- page = container_of(next, struct page, lru);
- next = next->next;
- shmem_dir_free(page);
- freed++;
- if (freed >= LATENCY_LIMIT) {
- cond_resched();
- freed = 0;
- }
- } while (next);
+ int i, j;
+
+ for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
+ struct page *page = pvec->pages[i];
+ if (!radix_tree_exceptional_entry(page))
+ pvec->pages[j++] = page;
+ }
+ pvec->nr = j;
+ pagevec_release(pvec);
}
-void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+/*
+ * Remove range of pages and swap entries from radix tree, and free them.
+ */
+void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
+ struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
- unsigned long idx;
- unsigned long size;
- unsigned long limit;
- unsigned long stage;
- unsigned long diroff;
- struct page **dir;
- struct page *topdir;
- struct page *middir;
- struct page *subdir;
- swp_entry_t *ptr;
- LIST_HEAD(pages_to_free);
- long nr_pages_to_free = 0;
+ pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
+ pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
+ struct pagevec pvec;
+ pgoff_t indices[PAGEVEC_SIZE];
long nr_swaps_freed = 0;
- int offset;
- int freed;
- int punch_hole;
- spinlock_t *needs_lock;
- spinlock_t *punch_lock;
- unsigned long upper_limit;
+ pgoff_t index;
+ int i;
- truncate_inode_pages_range(inode->i_mapping, start, end);
+ BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
- idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (idx >= info->next_index)
- return;
+ pagevec_init(&pvec, 0);
+ index = start;
+ while (index <= end) {
+ pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+ pvec.pages, indices);
+ if (!pvec.nr)
+ break;
+ mem_cgroup_uncharge_start();
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
- spin_lock(&info->lock);
- info->flags |= SHMEM_TRUNCATE;
- if (likely(end == (loff_t) -1)) {
- limit = info->next_index;
- upper_limit = SHMEM_MAX_INDEX;
- info->next_index = idx;
- needs_lock = NULL;
- punch_hole = 0;
- } else {
- if (end + 1 >= inode->i_size) { /* we may free a little more */
- limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
- upper_limit = SHMEM_MAX_INDEX;
- } else {
- limit = (end + 1) >> PAGE_CACHE_SHIFT;
- upper_limit = limit;
- }
- needs_lock = &info->lock;
- punch_hole = 1;
- }
+ index = indices[i];
+ if (index > end)
+ break;
+
+ if (radix_tree_exceptional_entry(page)) {
+ nr_swaps_freed += !shmem_free_swap(mapping,
+ index, page);
+ continue;
+ }
- topdir = info->i_indirect;
- if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
- info->i_indirect = NULL;
- nr_pages_to_free++;
- list_add(&topdir->lru, &pages_to_free);
+ if (!trylock_page(page))
+ continue;
+ if (page->mapping == mapping) {
+ VM_BUG_ON(PageWriteback(page));
+ truncate_inode_page(mapping, page);
+ }
+ unlock_page(page);
+ }
+ shmem_pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ cond_resched();
+ index++;
}
- spin_unlock(&info->lock);
- if (info->swapped && idx < SHMEM_NR_DIRECT) {
- ptr = info->i_direct;
- size = limit;
- if (size > SHMEM_NR_DIRECT)
- size = SHMEM_NR_DIRECT;
- nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
+ if (partial) {
+ struct page *page = NULL;
+ shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
+ if (page) {
+ zero_user_segment(page, partial, PAGE_CACHE_SIZE);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
}
- /*
- * If there are no indirect blocks or we are punching a hole
- * below indirect blocks, nothing to be done.
- */
- if (!topdir || limit <= SHMEM_NR_DIRECT)
- goto done2;
+ index = start;
+ for ( ; ; ) {
+ cond_resched();
+ pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+ pvec.pages, indices);
+ if (!pvec.nr) {
+ if (index == start)
+ break;
+ index = start;
+ continue;
+ }
+ if (index == start && indices[0] > end) {
+ shmem_pagevec_release(&pvec);
+ break;
+ }
+ mem_cgroup_uncharge_start();
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
- /*
- * The truncation case has already dropped info->lock, and we're safe
- * because i_size and next_index have already been lowered, preventing
- * access beyond. But in the punch_hole case, we still need to take
- * the lock when updating the swap directory, because there might be
- * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
- * shmem_writepage. However, whenever we find we can remove a whole
- * directory page (not at the misaligned start or end of the range),
- * we first NULLify its pointer in the level above, and then have no
- * need to take the lock when updating its contents: needs_lock and
- * punch_lock (either pointing to info->lock or NULL) manage this.
- */
+ index = indices[i];
+ if (index > end)
+ break;
- upper_limit -= SHMEM_NR_DIRECT;
- limit -= SHMEM_NR_DIRECT;
- idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
- offset = idx % ENTRIES_PER_PAGE;
- idx -= offset;
-
- dir = shmem_dir_map(topdir);
- stage = ENTRIES_PER_PAGEPAGE/2;
- if (idx < ENTRIES_PER_PAGEPAGE/2) {
- middir = topdir;
- diroff = idx/ENTRIES_PER_PAGE;
- } else {
- dir += ENTRIES_PER_PAGE/2;
- dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
- while (stage <= idx)
- stage += ENTRIES_PER_PAGEPAGE;
- middir = *dir;
- if (*dir) {
- diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
- ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
- if (!diroff && !offset && upper_limit >= stage) {
- if (needs_lock) {
- spin_lock(needs_lock);
- *dir = NULL;
- spin_unlock(needs_lock);
- needs_lock = NULL;
- } else
- *dir = NULL;
- nr_pages_to_free++;
- list_add(&middir->lru, &pages_to_free);
+ if (radix_tree_exceptional_entry(page)) {
+ nr_swaps_freed += !shmem_free_swap(mapping,
+ index, page);
+ continue;
}
- shmem_dir_unmap(dir);
- dir = shmem_dir_map(middir);
- } else {
- diroff = 0;
- offset = 0;
- idx = stage;
- }
- }
- for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
- if (unlikely(idx == stage)) {
- shmem_dir_unmap(dir);
- dir = shmem_dir_map(topdir) +
- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
- while (!*dir) {
- dir++;
- idx += ENTRIES_PER_PAGEPAGE;
- if (idx >= limit)
- goto done1;
- }
- stage = idx + ENTRIES_PER_PAGEPAGE;
- middir = *dir;
- if (punch_hole)
- needs_lock = &info->lock;
- if (upper_limit >= stage) {
- if (needs_lock) {
- spin_lock(needs_lock);
- *dir = NULL;
- spin_unlock(needs_lock);
- needs_lock = NULL;
- } else
- *dir = NULL;
- nr_pages_to_free++;
- list_add(&middir->lru, &pages_to_free);
+ lock_page(page);
+ if (page->mapping == mapping) {
+ VM_BUG_ON(PageWriteback(page));
+ truncate_inode_page(mapping, page);
}
- shmem_dir_unmap(dir);
- cond_resched();
- dir = shmem_dir_map(middir);
- diroff = 0;
- }
- punch_lock = needs_lock;
- subdir = dir[diroff];
- if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
- if (needs_lock) {
- spin_lock(needs_lock);
- dir[diroff] = NULL;
- spin_unlock(needs_lock);
- punch_lock = NULL;
- } else
- dir[diroff] = NULL;
- nr_pages_to_free++;
- list_add(&subdir->lru, &pages_to_free);
- }
- if (subdir && page_private(subdir) /* has swap entries */) {
- size = limit - idx;
- if (size > ENTRIES_PER_PAGE)
- size = ENTRIES_PER_PAGE;
- freed = shmem_map_and_free_swp(subdir,
- offset, size, &dir, punch_lock);
- if (!dir)
- dir = shmem_dir_map(middir);
- nr_swaps_freed += freed;
- if (offset || punch_lock) {
- spin_lock(&info->lock);
- set_page_private(subdir,
- page_private(subdir) - freed);
- spin_unlock(&info->lock);
- } else
- BUG_ON(page_private(subdir) != freed);
+ unlock_page(page);
}
- offset = 0;
- }
-done1:
- shmem_dir_unmap(dir);
-done2:
- if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
- /*
- * Call truncate_inode_pages again: racing shmem_unuse_inode
- * may have swizzled a page in from swap since
- * truncate_pagecache or generic_delete_inode did it, before we
- * lowered next_index. Also, though shmem_getpage checks
- * i_size before adding to cache, no recheck after: so fix the
- * narrow window there too.
- */
- truncate_inode_pages_range(inode->i_mapping, start, end);
+ shmem_pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ index++;
}
spin_lock(&info->lock);
- info->flags &= ~SHMEM_TRUNCATE;
info->swapped -= nr_swaps_freed;
- if (nr_pages_to_free)
- shmem_free_blocks(inode, nr_pages_to_free);
shmem_recalc_inode(inode);
spin_unlock(&info->lock);
- /*
- * Empty swap vector directory pages to be freed?
- */
- if (!list_empty(&pages_to_free)) {
- pages_to_free.prev->next = NULL;
- shmem_free_pages(pages_to_free.next);
- }
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -780,37 +520,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
- struct page *page = NULL;
- if (newsize < oldsize) {
- /*
- * If truncating down to a partial page, then
- * if that page is already allocated, hold it
- * in memory until the truncation is over, so
- * truncate_partial_page cannot miss it were
- * it assigned to swap.
- */
- if (newsize & (PAGE_CACHE_SIZE-1)) {
- (void) shmem_getpage(inode,
- newsize >> PAGE_CACHE_SHIFT,
- &page, SGP_READ, NULL);
- if (page)
- unlock_page(page);
- }
- /*
- * Reset SHMEM_PAGEIN flag so that shmem_truncate can
- * detect if any pages might have been added to cache
- * after truncate_inode_pages. But we needn't bother
- * if it's being fully truncated to zero-length: the
- * nrpages check is efficient enough in that case.
- */
- if (newsize) {
- struct shmem_inode_info *info = SHMEM_I(inode);
- spin_lock(&info->lock);
- info->flags &= ~SHMEM_PAGEIN;
- spin_unlock(&info->lock);
- }
- }
if (newsize != oldsize) {
i_size_write(inode, newsize);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -822,8 +532,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
/* unmap again to remove racily COWed private pages */
unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
}
- if (page)
- page_cache_release(page);
}
setattr_copy(inode, attr);
@@ -848,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode)
list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
- }
+ } else
+ kfree(info->symlink);
list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
kfree(xattr->name);
@@ -859,106 +568,27 @@ static void shmem_evict_inode(struct inode *inode)
end_writeback(inode);
}
-static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
-{
- swp_entry_t *ptr;
-
- for (ptr = dir; ptr < edir; ptr++) {
- if (ptr->val == entry.val)
- return ptr - dir;
- }
- return -1;
-}
-
-static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
+/*
+ * If swap found in inode, free it and move page from swapcache to filecache.
+ */
+static int shmem_unuse_inode(struct shmem_inode_info *info,
+ swp_entry_t swap, struct page *page)
{
- struct address_space *mapping;
- unsigned long idx;
- unsigned long size;
- unsigned long limit;
- unsigned long stage;
- struct page **dir;
- struct page *subdir;
- swp_entry_t *ptr;
- int offset;
+ struct address_space *mapping = info->vfs_inode.i_mapping;
+ void *radswap;
+ pgoff_t index;
int error;
- idx = 0;
- ptr = info->i_direct;
- spin_lock(&info->lock);
- if (!info->swapped) {
- list_del_init(&info->swaplist);
- goto lost2;
- }
- limit = info->next_index;
- size = limit;
- if (size > SHMEM_NR_DIRECT)
- size = SHMEM_NR_DIRECT;
- offset = shmem_find_swp(entry, ptr, ptr+size);
- if (offset >= 0) {
- shmem_swp_balance_unmap();
- goto found;
- }
- if (!info->i_indirect)
- goto lost2;
-
- dir = shmem_dir_map(info->i_indirect);
- stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
-
- for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
- if (unlikely(idx == stage)) {
- shmem_dir_unmap(dir-1);
- if (cond_resched_lock(&info->lock)) {
- /* check it has not been truncated */
- if (limit > info->next_index) {
- limit = info->next_index;
- if (idx >= limit)
- goto lost2;
- }
- }
- dir = shmem_dir_map(info->i_indirect) +
- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
- while (!*dir) {
- dir++;
- idx += ENTRIES_PER_PAGEPAGE;
- if (idx >= limit)
- goto lost1;
- }
- stage = idx + ENTRIES_PER_PAGEPAGE;
- subdir = *dir;
- shmem_dir_unmap(dir);
- dir = shmem_dir_map(subdir);
- }
- subdir = *dir;
- if (subdir && page_private(subdir)) {
- ptr = shmem_swp_map(subdir);
- size = limit - idx;
- if (size > ENTRIES_PER_PAGE)
- size = ENTRIES_PER_PAGE;
- offset = shmem_find_swp(entry, ptr, ptr+size);
- shmem_swp_unmap(ptr);
- if (offset >= 0) {
- shmem_dir_unmap(dir);
- ptr = shmem_swp_map(subdir);
- goto found;
- }
- }
- }
-lost1:
- shmem_dir_unmap(dir-1);
-lost2:
- spin_unlock(&info->lock);
- return 0;
-found:
- idx += offset;
- ptr += offset;
+ radswap = swp_to_radix_entry(swap);
+ index = radix_tree_locate_item(&mapping->page_tree, radswap);
+ if (index == -1)
+ return 0;
/*
* Move _head_ to start search for next from here.
* But be careful: shmem_evict_inode checks list_empty without taking
* mutex, and there's an instant in list_move_tail when info->swaplist
- * would appear empty, if it were the only one on shmem_swaplist. We
- * could avoid doing it if inode NULL; or use this minor optimization.
+ * would appear empty, if it were the only one on shmem_swaplist.
*/
if (shmem_swaplist.next != &info->swaplist)
list_move_tail(&shmem_swaplist, &info->swaplist);
@@ -968,29 +598,34 @@ found:
* but also to hold up shmem_evict_inode(): so inode cannot be freed
* beneath us (pagelock doesn't help until the page is in pagecache).
*/
- mapping = info->vfs_inode.i_mapping;
- error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
+ error = shmem_add_to_page_cache(page, mapping, index,
+ GFP_NOWAIT, radswap);
/* which does mem_cgroup_uncharge_cache_page on error */
if (error != -ENOMEM) {
+ /*
+ * Truncation and eviction use free_swap_and_cache(), which
+ * only does trylock page: if we raced, best clean up here.
+ */
delete_from_swap_cache(page);
set_page_dirty(page);
- info->flags |= SHMEM_PAGEIN;
- shmem_swp_set(info, ptr, 0);
- swap_free(entry);
+ if (!error) {
+ spin_lock(&info->lock);
+ info->swapped--;
+ spin_unlock(&info->lock);
+ swap_free(swap);
+ }
error = 1; /* not an error, but entry was found */
}
- shmem_swp_unmap(ptr);
- spin_unlock(&info->lock);
return error;
}
/*
- * shmem_unuse() search for an eventually swapped out shmem page.
+ * Search through swapped inodes to find and replace swap by page.
*/
-int shmem_unuse(swp_entry_t entry, struct page *page)
+int shmem_unuse(swp_entry_t swap, struct page *page)
{
- struct list_head *p, *next;
+ struct list_head *this, *next;
struct shmem_inode_info *info;
int found = 0;
int error;
@@ -999,32 +634,25 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
* Charge page using GFP_KERNEL while we can wait, before taking
* the shmem_swaplist_mutex which might hold up shmem_writepage().
* Charged back to the user (not to caller) when swap account is used.
- * add_to_page_cache() will be called with GFP_NOWAIT.
*/
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
if (error)
goto out;
- /*
- * Try to preload while we can wait, to not make a habit of
- * draining atomic reserves; but don't latch on to this cpu,
- * it's okay if sometimes we get rescheduled after this.
- */
- error = radix_tree_preload(GFP_KERNEL);
- if (error)
- goto uncharge;
- radix_tree_preload_end();
+ /* No radix_tree_preload: swap entry keeps a place for page in tree */
mutex_lock(&shmem_swaplist_mutex);
- list_for_each_safe(p, next, &shmem_swaplist) {
- info = list_entry(p, struct shmem_inode_info, swaplist);
- found = shmem_unuse_inode(info, entry, page);
+ list_for_each_safe(this, next, &shmem_swaplist) {
+ info = list_entry(this, struct shmem_inode_info, swaplist);
+ if (info->swapped)
+ found = shmem_unuse_inode(info, swap, page);
+ else
+ list_del_init(&info->swaplist);
cond_resched();
if (found)
break;
}
mutex_unlock(&shmem_swaplist_mutex);
-uncharge:
if (!found)
mem_cgroup_uncharge_cache_page(page);
if (found < 0)
@@ -1041,10 +669,10 @@ out:
static int shmem_writepage(struct page *page, struct writeback_control *wbc)
{
struct shmem_inode_info *info;
- swp_entry_t *entry, swap;
struct address_space *mapping;
- unsigned long index;
struct inode *inode;
+ swp_entry_t swap;
+ pgoff_t index;
BUG_ON(!PageLocked(page));
mapping = page->mapping;
@@ -1073,50 +701,32 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
- * if it's not already there. Do it now because we cannot take
- * mutex while holding spinlock, and must do so before the page
- * is moved to swap cache, when its pagelock no longer protects
+ * if it's not already there. Do it now before the page is
+ * moved to swap cache, when its pagelock no longer protects
* the inode from eviction. But don't unlock the mutex until
- * we've taken the spinlock, because shmem_unuse_inode() will
- * prune a !swapped inode from the swaplist under both locks.
+ * we've incremented swapped, because shmem_unuse_inode() will
+ * prune a !swapped inode from the swaplist under this mutex.
*/
mutex_lock(&shmem_swaplist_mutex);
if (list_empty(&info->swaplist))
list_add_tail(&info->swaplist, &shmem_swaplist);
- spin_lock(&info->lock);
- mutex_unlock(&shmem_swaplist_mutex);
-
- if (index >= info->next_index) {
- BUG_ON(!(info->flags & SHMEM_TRUNCATE));
- goto unlock;
- }
- entry = shmem_swp_entry(info, index, NULL);
- if (entry->val) {
- WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
- free_swap_and_cache(*entry);
- shmem_swp_set(info, entry, 0);
- }
- shmem_recalc_inode(inode);
-
if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
- delete_from_page_cache(page);
- shmem_swp_set(info, entry, swap.val);
- shmem_swp_unmap(entry);
swap_shmem_alloc(swap);
+ shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+
+ spin_lock(&info->lock);
+ info->swapped++;
+ shmem_recalc_inode(inode);
spin_unlock(&info->lock);
+
+ mutex_unlock(&shmem_swaplist_mutex);
BUG_ON(page_mapped(page));
swap_writepage(page, wbc);
return 0;
}
- shmem_swp_unmap(entry);
-unlock:
- spin_unlock(&info->lock);
- /*
- * add_to_swap_cache() doesn't return -EEXIST, so we can safely
- * clear SWAP_HAS_CACHE flag.
- */
+ mutex_unlock(&shmem_swaplist_mutex);
swapcache_free(swap, NULL);
redirty:
set_page_dirty(page);
@@ -1153,35 +763,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
}
#endif /* CONFIG_TMPFS */
-static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
- struct shmem_inode_info *info, unsigned long idx)
+static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
+ struct shmem_inode_info *info, pgoff_t index)
{
struct mempolicy mpol, *spol;
struct vm_area_struct pvma;
- struct page *page;
spol = mpol_cond_copy(&mpol,
- mpol_shared_policy_lookup(&info->policy, idx));
+ mpol_shared_policy_lookup(&info->policy, index));
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
- pvma.vm_pgoff = idx;
+ pvma.vm_pgoff = index;
pvma.vm_ops = NULL;
pvma.vm_policy = spol;
- page = swapin_readahead(entry, gfp, &pvma, 0);
- return page;
+ return swapin_readahead(swap, gfp, &pvma, 0);
}
static struct page *shmem_alloc_page(gfp_t gfp,
- struct shmem_inode_info *info, unsigned long idx)
+ struct shmem_inode_info *info, pgoff_t index)
{
struct vm_area_struct pvma;
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
- pvma.vm_pgoff = idx;
+ pvma.vm_pgoff = index;
pvma.vm_ops = NULL;
- pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+ pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
/*
* alloc_page_vma() will drop the shared policy reference
@@ -1190,19 +798,19 @@ static struct page *shmem_alloc_page(gfp_t gfp,
}
#else /* !CONFIG_NUMA */
#ifdef CONFIG_TMPFS
-static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
{
}
#endif /* CONFIG_TMPFS */
-static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
- struct shmem_inode_info *info, unsigned long idx)
+static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
+ struct shmem_inode_info *info, pgoff_t index)
{
- return swapin_readahead(entry, gfp, NULL, 0);
+ return swapin_readahead(swap, gfp, NULL, 0);
}
static inline struct page *shmem_alloc_page(gfp_t gfp,
- struct shmem_inode_info *info, unsigned long idx)
+ struct shmem_inode_info *info, pgoff_t index)
{
return alloc_page(gfp);
}
@@ -1222,243 +830,190 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
* vm. If we swap it in we mark it dirty since we also free the swap
* entry since a page cannot live in both the swap and page cache
*/
-static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx,
+static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
{
struct address_space *mapping = inode->i_mapping;
- struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo;
struct page *page;
- struct page *prealloc_page = NULL;
- swp_entry_t *entry;
swp_entry_t swap;
int error;
- int ret;
+ int once = 0;
- if (idx >= SHMEM_MAX_INDEX)
+ if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
return -EFBIG;
repeat:
- page = find_lock_page(mapping, idx);
- if (page) {
+ swap.val = 0;
+ page = find_lock_page(mapping, index);
+ if (radix_tree_exceptional_entry(page)) {
+ swap = radix_to_swp_entry(page);
+ page = NULL;
+ }
+
+ if (sgp != SGP_WRITE &&
+ ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ error = -EINVAL;
+ goto failed;
+ }
+
+ if (page || (sgp == SGP_READ && !swap.val)) {
/*
* Once we can get the page lock, it must be uptodate:
* if there were an error in reading back from swap,
* the page would not be inserted into the filecache.
*/
- BUG_ON(!PageUptodate(page));
- goto done;
+ BUG_ON(page && !PageUptodate(page));
+ *pagep = page;
+ return 0;
}
/*
- * Try to preload while we can wait, to not make a habit of
- * draining atomic reserves; but don't latch on to this cpu.
+ * Fast cache lookup did not find it:
+ * bring it back from swap or allocate.
*/
- error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
- if (error)
- goto out;
- radix_tree_preload_end();
-
- if (sgp != SGP_READ && !prealloc_page) {
- prealloc_page = shmem_alloc_page(gfp, info, idx);
- if (prealloc_page) {
- SetPageSwapBacked(prealloc_page);
- if (mem_cgroup_cache_charge(prealloc_page,
- current->mm, GFP_KERNEL)) {
- page_cache_release(prealloc_page);
- prealloc_page = NULL;
- }
- }
- }
-
- spin_lock(&info->lock);
- shmem_recalc_inode(inode);
- entry = shmem_swp_alloc(info, idx, sgp, gfp);
- if (IS_ERR(entry)) {
- spin_unlock(&info->lock);
- error = PTR_ERR(entry);
- goto out;
- }
- swap = *entry;
+ info = SHMEM_I(inode);
+ sbinfo = SHMEM_SB(inode->i_sb);
if (swap.val) {
/* Look it up and read it in.. */
page = lookup_swap_cache(swap);
if (!page) {
- shmem_swp_unmap(entry);
- spin_unlock(&info->lock);
/* here we actually do the io */
if (fault_type)
*fault_type |= VM_FAULT_MAJOR;
- page = shmem_swapin(swap, gfp, info, idx);
+ page = shmem_swapin(swap, gfp, info, index);
if (!page) {
- spin_lock(&info->lock);
- entry = shmem_swp_alloc(info, idx, sgp, gfp);
- if (IS_ERR(entry))
- error = PTR_ERR(entry);
- else {
- if (entry->val == swap.val)
- error = -ENOMEM;
- shmem_swp_unmap(entry);
- }
- spin_unlock(&info->lock);
- if (error)
- goto out;
- goto repeat;
+ error = -ENOMEM;
+ goto failed;
}
- wait_on_page_locked(page);
- page_cache_release(page);
- goto repeat;
}
/* We have to do this with page locked to prevent races */
- if (!trylock_page(page)) {
- shmem_swp_unmap(entry);
- spin_unlock(&info->lock);
- wait_on_page_locked(page);
- page_cache_release(page);
- goto repeat;
- }
- if (PageWriteback(page)) {
- shmem_swp_unmap(entry);
- spin_unlock(&info->lock);
- wait_on_page_writeback(page);
- unlock_page(page);
- page_cache_release(page);
- goto repeat;
- }
+ lock_page(page);
if (!PageUptodate(page)) {
- shmem_swp_unmap(entry);
- spin_unlock(&info->lock);
- unlock_page(page);
- page_cache_release(page);
error = -EIO;
- goto out;
+ goto failed;
}
-
- error = add_to_page_cache_locked(page, mapping,
- idx, GFP_NOWAIT);
- if (error) {
- shmem_swp_unmap(entry);
- spin_unlock(&info->lock);
- if (error == -ENOMEM) {
- /*
- * reclaim from proper memory cgroup and
- * call memcg's OOM if needed.
- */
- error = mem_cgroup_shmem_charge_fallback(
- page, current->mm, gfp);
- if (error) {
- unlock_page(page);
- page_cache_release(page);
- goto out;
- }
- }
- unlock_page(page);
- page_cache_release(page);
- goto repeat;
+ wait_on_page_writeback(page);
+
+ /* Someone may have already done it for us */
+ if (page->mapping) {
+ if (page->mapping == mapping &&
+ page->index == index)
+ goto done;
+ error = -EEXIST;
+ goto failed;
}
- info->flags |= SHMEM_PAGEIN;
- shmem_swp_set(info, entry, 0);
- shmem_swp_unmap(entry);
- delete_from_swap_cache(page);
+ error = mem_cgroup_cache_charge(page, current->mm,
+ gfp & GFP_RECLAIM_MASK);
+ if (!error)
+ error = shmem_add_to_page_cache(page, mapping, index,
+ gfp, swp_to_radix_entry(swap));
+ if (error)
+ goto failed;
+
+ spin_lock(&info->lock);
+ info->swapped--;
+ shmem_recalc_inode(inode);
spin_unlock(&info->lock);
+
+ delete_from_swap_cache(page);
set_page_dirty(page);
swap_free(swap);
- } else if (sgp == SGP_READ) {
- shmem_swp_unmap(entry);
- page = find_get_page(mapping, idx);
- if (page && !trylock_page(page)) {
- spin_unlock(&info->lock);
- wait_on_page_locked(page);
- page_cache_release(page);
- goto repeat;
+ } else {
+ if (shmem_acct_block(info->flags)) {
+ error = -ENOSPC;
+ goto failed;
}
- spin_unlock(&info->lock);
-
- } else if (prealloc_page) {
- shmem_swp_unmap(entry);
- sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks) {
if (percpu_counter_compare(&sbinfo->used_blocks,
- sbinfo->max_blocks) >= 0 ||
- shmem_acct_block(info->flags))
- goto nospace;
+ sbinfo->max_blocks) >= 0) {
+ error = -ENOSPC;
+ goto unacct;
+ }
percpu_counter_inc(&sbinfo->used_blocks);
- inode->i_blocks += BLOCKS_PER_PAGE;
- } else if (shmem_acct_block(info->flags))
- goto nospace;
-
- page = prealloc_page;
- prealloc_page = NULL;
-
- entry = shmem_swp_alloc(info, idx, sgp, gfp);
- if (IS_ERR(entry))
- error = PTR_ERR(entry);
- else {
- swap = *entry;
- shmem_swp_unmap(entry);
}
- ret = error || swap.val;
- if (ret)
- mem_cgroup_uncharge_cache_page(page);
- else
- ret = add_to_page_cache_lru(page, mapping,
- idx, GFP_NOWAIT);
- /*
- * At add_to_page_cache_lru() failure,
- * uncharge will be done automatically.
- */
- if (ret) {
- shmem_unacct_blocks(info->flags, 1);
- shmem_free_blocks(inode, 1);
- spin_unlock(&info->lock);
- page_cache_release(page);
- if (error)
- goto out;
- goto repeat;
+
+ page = shmem_alloc_page(gfp, info, index);
+ if (!page) {
+ error = -ENOMEM;
+ goto decused;
}
- info->flags |= SHMEM_PAGEIN;
+ SetPageSwapBacked(page);
+ __set_page_locked(page);
+ error = mem_cgroup_cache_charge(page, current->mm,
+ gfp & GFP_RECLAIM_MASK);
+ if (!error)
+ error = shmem_add_to_page_cache(page, mapping, index,
+ gfp, NULL);
+ if (error)
+ goto decused;
+ lru_cache_add_anon(page);
+
+ spin_lock(&info->lock);
info->alloced++;
+ inode->i_blocks += BLOCKS_PER_PAGE;
+ shmem_recalc_inode(inode);
spin_unlock(&info->lock);
+
clear_highpage(page);
flush_dcache_page(page);
SetPageUptodate(page);
if (sgp == SGP_DIRTY)
set_page_dirty(page);
-
- } else {
- spin_unlock(&info->lock);
- error = -ENOMEM;
- goto out;
}
done:
- *pagep = page;
- error = 0;
-out:
- if (prealloc_page) {
- mem_cgroup_uncharge_cache_page(prealloc_page);
- page_cache_release(prealloc_page);
+ /* Perhaps the file has been truncated since we checked */
+ if (sgp != SGP_WRITE &&
+ ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ error = -EINVAL;
+ goto trunc;
}
- return error;
+ *pagep = page;
+ return 0;
-nospace:
/*
- * Perhaps the page was brought in from swap between find_lock_page
- * and taking info->lock? We allow for that at add_to_page_cache_lru,
- * but must also avoid reporting a spurious ENOSPC while working on a
- * full tmpfs.
+ * Error recovery.
*/
- page = find_get_page(mapping, idx);
+trunc:
+ ClearPageDirty(page);
+ delete_from_page_cache(page);
+ spin_lock(&info->lock);
+ info->alloced--;
+ inode->i_blocks -= BLOCKS_PER_PAGE;
spin_unlock(&info->lock);
+decused:
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -1);
+unacct:
+ shmem_unacct_blocks(info->flags, 1);
+failed:
+ if (swap.val && error != -EINVAL) {
+ struct page *test = find_get_page(mapping, index);
+ if (test && !radix_tree_exceptional_entry(test))
+ page_cache_release(test);
+ /* Have another try if the entry has changed */
+ if (test != swp_to_radix_entry(swap))
+ error = -EEXIST;
+ }
if (page) {
+ unlock_page(page);
page_cache_release(page);
+ }
+ if (error == -ENOSPC && !once++) {
+ info = SHMEM_I(inode);
+ spin_lock(&info->lock);
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
goto repeat;
}
- error = -ENOSPC;
- goto out;
+ if (error == -EEXIST)
+ goto repeat;
+ return error;
}
static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -1467,9 +1022,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
int error;
int ret = VM_FAULT_LOCKED;
- if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
- return VM_FAULT_SIGBUS;
-
error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
if (error)
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1482,20 +1034,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
#ifdef CONFIG_NUMA
-static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
{
- struct inode *i = vma->vm_file->f_path.dentry->d_inode;
- return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
}
static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
unsigned long addr)
{
- struct inode *i = vma->vm_file->f_path.dentry->d_inode;
- unsigned long idx;
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ pgoff_t index;
- idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
- return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
+ index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
}
#endif
@@ -1593,7 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
#ifdef CONFIG_TMPFS
static const struct inode_operations shmem_symlink_inode_operations;
-static const struct inode_operations shmem_symlink_inline_operations;
+static const struct inode_operations shmem_short_symlink_operations;
static int
shmem_write_begin(struct file *file, struct address_space *mapping,
@@ -1626,7 +1178,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
{
struct inode *inode = filp->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping;
- unsigned long index, offset;
+ pgoff_t index;
+ unsigned long offset;
enum sgp_type sgp = SGP_READ;
/*
@@ -1642,7 +1195,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
for (;;) {
struct page *page = NULL;
- unsigned long end_index, nr, ret;
+ pgoff_t end_index;
+ unsigned long nr, ret;
loff_t i_size = i_size_read(inode);
end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -1880,8 +1434,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = NAME_MAX;
if (sbinfo->max_blocks) {
buf->f_blocks = sbinfo->max_blocks;
- buf->f_bavail = buf->f_bfree =
- sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks);
+ buf->f_bavail =
+ buf->f_bfree = sbinfo->max_blocks -
+ percpu_counter_sum(&sbinfo->used_blocks);
}
if (sbinfo->max_inodes) {
buf->f_files = sbinfo->max_inodes;
@@ -2055,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
info = SHMEM_I(inode);
inode->i_size = len-1;
- if (len <= SHMEM_SYMLINK_INLINE_LEN) {
- /* do it inline */
- memcpy(info->inline_symlink, symname, len);
- inode->i_op = &shmem_symlink_inline_operations;
+ if (len <= SHORT_SYMLINK_LEN) {
+ info->symlink = kmemdup(symname, len, GFP_KERNEL);
+ if (!info->symlink) {
+ iput(inode);
+ return -ENOMEM;
+ }
+ inode->i_op = &shmem_short_symlink_operations;
} else {
error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
if (error) {
@@ -2081,17 +1639,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
return 0;
}
-static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
{
- nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
+ nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
return NULL;
}
static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct page *page = NULL;
- int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
- nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
+ int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
+ nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
if (page)
unlock_page(page);
return page;
@@ -2202,7 +1760,6 @@ out:
return err;
}
-
static const struct xattr_handler *shmem_xattr_handlers[] = {
#ifdef CONFIG_TMPFS_POSIX_ACL
&generic_acl_access_handler,
@@ -2332,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
#endif /* CONFIG_TMPFS_XATTR */
-static const struct inode_operations shmem_symlink_inline_operations = {
+static const struct inode_operations shmem_short_symlink_operations = {
.readlink = generic_readlink,
- .follow_link = shmem_follow_link_inline,
+ .follow_link = shmem_follow_short_symlink,
#ifdef CONFIG_TMPFS_XATTR
.setxattr = shmem_setxattr,
.getxattr = shmem_getxattr,
@@ -2534,8 +2091,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
if (config.max_inodes < inodes)
goto out;
/*
- * Those tests also disallow limited->unlimited while any are in
- * use, so i_blocks will always be zero when max_blocks is zero;
+ * Those tests disallow limited->unlimited while any are in use;
* but we must separately disallow unlimited->limited, because
* in that case we have no record of how much is already in use.
*/
@@ -2627,7 +2183,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
goto failed;
sbinfo->free_inodes = sbinfo->max_inodes;
- sb->s_maxbytes = SHMEM_MAX_BYTES;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = TMPFS_MAGIC;
@@ -2662,14 +2218,14 @@ static struct kmem_cache *shmem_inode_cachep;
static struct inode *shmem_alloc_inode(struct super_block *sb)
{
- struct shmem_inode_info *p;
- p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
- if (!p)
+ struct shmem_inode_info *info;
+ info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
+ if (!info)
return NULL;
- return &p->vfs_inode;
+ return &info->vfs_inode;
}
-static void shmem_i_callback(struct rcu_head *head)
+static void shmem_destroy_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
INIT_LIST_HEAD(&inode->i_dentry);
@@ -2678,29 +2234,26 @@ static void shmem_i_callback(struct rcu_head *head)
static void shmem_destroy_inode(struct inode *inode)
{
- if ((inode->i_mode & S_IFMT) == S_IFREG) {
- /* only struct inode is valid if it's an inline symlink */
+ if ((inode->i_mode & S_IFMT) == S_IFREG)
mpol_free_shared_policy(&SHMEM_I(inode)->policy);
- }
- call_rcu(&inode->i_rcu, shmem_i_callback);
+ call_rcu(&inode->i_rcu, shmem_destroy_callback);
}
-static void init_once(void *foo)
+static void shmem_init_inode(void *foo)
{
- struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
-
- inode_init_once(&p->vfs_inode);
+ struct shmem_inode_info *info = foo;
+ inode_init_once(&info->vfs_inode);
}
-static int init_inodecache(void)
+static int shmem_init_inodecache(void)
{
shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
sizeof(struct shmem_inode_info),
- 0, SLAB_PANIC, init_once);
+ 0, SLAB_PANIC, shmem_init_inode);
return 0;
}
-static void destroy_inodecache(void)
+static void shmem_destroy_inodecache(void)
{
kmem_cache_destroy(shmem_inode_cachep);
}
@@ -2797,21 +2350,20 @@ static const struct vm_operations_struct shmem_vm_ops = {
#endif
};
-
static struct dentry *shmem_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_nodev(fs_type, flags, data, shmem_fill_super);
}
-static struct file_system_type tmpfs_fs_type = {
+static struct file_system_type shmem_fs_type = {
.owner = THIS_MODULE,
.name = "tmpfs",
.mount = shmem_mount,
.kill_sb = kill_litter_super,
};
-int __init init_tmpfs(void)
+int __init shmem_init(void)
{
int error;
@@ -2819,18 +2371,18 @@ int __init init_tmpfs(void)
if (error)
goto out4;
- error = init_inodecache();
+ error = shmem_init_inodecache();
if (error)
goto out3;
- error = register_filesystem(&tmpfs_fs_type);
+ error = register_filesystem(&shmem_fs_type);
if (error) {
printk(KERN_ERR "Could not register tmpfs\n");
goto out2;
}
- shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
- tmpfs_fs_type.name, NULL);
+ shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
+ shmem_fs_type.name, NULL);
if (IS_ERR(shm_mnt)) {
error = PTR_ERR(shm_mnt);
printk(KERN_ERR "Could not kern_mount tmpfs\n");
@@ -2839,9 +2391,9 @@ int __init init_tmpfs(void)
return 0;
out1:
- unregister_filesystem(&tmpfs_fs_type);
+ unregister_filesystem(&shmem_fs_type);
out2:
- destroy_inodecache();
+ shmem_destroy_inodecache();
out3:
bdi_destroy(&shmem_backing_dev_info);
out4:
@@ -2849,45 +2401,6 @@ out4:
return error;
}
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-/**
- * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
- * @inode: the inode to be searched
- * @pgoff: the offset to be searched
- * @pagep: the pointer for the found page to be stored
- * @ent: the pointer for the found swap entry to be stored
- *
- * If a page is found, refcount of it is incremented. Callers should handle
- * these refcount.
- */
-void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
- struct page **pagep, swp_entry_t *ent)
-{
- swp_entry_t entry = { .val = 0 }, *ptr;
- struct page *page = NULL;
- struct shmem_inode_info *info = SHMEM_I(inode);
-
- if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
- goto out;
-
- spin_lock(&info->lock);
- ptr = shmem_swp_entry(info, pgoff, NULL);
-#ifdef CONFIG_SWAP
- if (ptr && ptr->val) {
- entry.val = ptr->val;
- page = find_get_page(&swapper_space, entry.val);
- } else
-#endif
- page = find_get_page(inode->i_mapping, pgoff);
- if (ptr)
- shmem_swp_unmap(ptr);
- spin_unlock(&info->lock);
-out:
- *pagep = page;
- *ent = entry;
-}
-#endif
-
#else /* !CONFIG_SHMEM */
/*
@@ -2901,23 +2414,23 @@ out:
#include <linux/ramfs.h>
-static struct file_system_type tmpfs_fs_type = {
+static struct file_system_type shmem_fs_type = {
.name = "tmpfs",
.mount = ramfs_mount,
.kill_sb = kill_litter_super,
};
-int __init init_tmpfs(void)
+int __init shmem_init(void)
{
- BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
+ BUG_ON(register_filesystem(&shmem_fs_type) != 0);
- shm_mnt = kern_mount(&tmpfs_fs_type);
+ shm_mnt = kern_mount(&shmem_fs_type);
BUG_ON(IS_ERR(shm_mnt));
return 0;
}
-int shmem_unuse(swp_entry_t entry, struct page *page)
+int shmem_unuse(swp_entry_t swap, struct page *page)
{
return 0;
}
@@ -2927,43 +2440,17 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
return 0;
}
-void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
- truncate_inode_pages_range(inode->i_mapping, start, end);
+ truncate_inode_pages_range(inode->i_mapping, lstart, lend);
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-/**
- * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
- * @inode: the inode to be searched
- * @pgoff: the offset to be searched
- * @pagep: the pointer for the found page to be stored
- * @ent: the pointer for the found swap entry to be stored
- *
- * If a page is found, refcount of it is incremented. Callers should handle
- * these refcount.
- */
-void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
- struct page **pagep, swp_entry_t *ent)
-{
- struct page *page = NULL;
-
- if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
- goto out;
- page = find_get_page(inode->i_mapping, pgoff);
-out:
- *pagep = page;
- *ent = (swp_entry_t){ .val = 0 };
-}
-#endif
-
#define shmem_vm_ops generic_file_vm_ops
#define shmem_file_operations ramfs_file_operations
#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
#define shmem_acct_size(flags, size) 0
#define shmem_unacct_size(flags, size) do {} while (0)
-#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE
#endif /* CONFIG_SHMEM */
@@ -2987,7 +2474,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
if (IS_ERR(shm_mnt))
return (void *)shm_mnt;
- if (size < 0 || size > SHMEM_MAX_BYTES)
+ if (size < 0 || size > MAX_LFS_FILESIZE)
return ERR_PTR(-EINVAL);
if (shmem_acct_size(flags, size))
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1b8c33907242..17bc224bce68 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
/*
* Find out how many pages are allowed for a single swap
- * device. There are two limiting factors: 1) the number of
- * bits for the swap offset in the swp_entry_t type and
- * 2) the number of bits in the a swap pte as defined by
- * the different architectures. In order to find the
- * largest possible bit mask a swap entry with swap type 0
+ * device. There are three limiting factors: 1) the number
+ * of bits for the swap offset in the swp_entry_t type, and
+ * 2) the number of bits in the swap pte as defined by the
+ * the different architectures, and 3) the number of free bits
+ * in an exceptional radix_tree entry. In order to find the
+ * largest possible bit mask, a swap entry with swap type 0
* and swap offset ~0UL is created, encoded to a swap pte,
- * decoded to a swp_entry_t again and finally the swap
+ * decoded to a swp_entry_t again, and finally the swap
* offset is extracted. This will mask all the bits from
* the initial ~0UL mask that can't be encoded in either
* the swp_entry_t or the architecture definition of a
- * swap pte.
+ * swap pte. Then the same is done for a radix_tree entry.
*/
maxpages = swp_offset(pte_to_swp_entry(
- swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+ swp_entry_to_pte(swp_entry(0, ~0UL))));
+ maxpages = swp_offset(radix_to_swp_entry(
+ swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
+
if (maxpages > swap_header->info.last_page) {
maxpages = swap_header->info.last_page + 1;
/* p->max is an unsigned int: don't overflow it */
diff --git a/mm/truncate.c b/mm/truncate.c
index 232eb2736a79..b40ac6d4e86e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
unsigned long count = 0;
int i;
+ /*
+ * Note: this function may get called on a shmem/tmpfs mapping:
+ * pagevec_lookup() might then return 0 prematurely (because it
+ * got a gangful of swap entries); but it's hardly worth worrying
+ * about - it can rarely have anything to free from such a mapping
+ * (most pages are dirty), and already skips over any difficulties.
+ */
+
pagevec_init(&pvec, 0);
while (index <= end && pagevec_lookup(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 5fb2e28e796f..91cdf9435fec 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -342,7 +342,7 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream,
kfree(bufs);
return -EFAULT;
}
- bufs[ch] = compat_ptr(ptr);
+ bufs[i] = compat_ptr(ptr);
bufptr++;
}
if (dir == SNDRV_PCM_STREAM_PLAYBACK)
diff --git a/sound/core/rtctimer.c b/sound/core/rtctimer.c
index 0851cd13e303..e85e72baff9e 100644
--- a/sound/core/rtctimer.c
+++ b/sound/core/rtctimer.c
@@ -22,7 +22,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
-#include <linux/moduleparam.h>
+#include <linux/module.h>
#include <linux/log2.h>
#include <sound/core.h>
#include <sound/timer.h>
diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c
index 3a7afa31c1d8..71d32c868c92 100644
--- a/sound/pci/asihpi/hpidspcd.c
+++ b/sound/pci/asihpi/hpidspcd.c
@@ -43,6 +43,7 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code,
struct pci_dev *dev = os_data;
struct code_header header;
char fw_name[20];
+ short err_ret = HPI_ERROR_DSP_FILE_NOT_FOUND;
int err;
sprintf(fw_name, "asihpi/dsp%04x.bin", adapter);
@@ -85,8 +86,10 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code,
HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name);
dsp_code->pvt = kmalloc(sizeof(*dsp_code->pvt), GFP_KERNEL);
- if (!dsp_code->pvt)
- return HPI_ERROR_MEMORY_ALLOC;
+ if (!dsp_code->pvt) {
+ err_ret = HPI_ERROR_MEMORY_ALLOC;
+ goto error2;
+ }
dsp_code->pvt->dev = dev;
dsp_code->pvt->firmware = firmware;
@@ -99,7 +102,7 @@ error2:
release_firmware(firmware);
error1:
dsp_code->block_length = 0;
- return HPI_ERROR_DSP_FILE_NOT_FOUND;
+ return err_ret;
}
/*-------------------------------------------------------------------*/
diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c
index 9683f84ecdc8..a32502e796de 100644
--- a/sound/pci/asihpi/hpioctl.c
+++ b/sound/pci/asihpi/hpioctl.c
@@ -177,16 +177,21 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
} else {
u16 __user *ptr = NULL;
u32 size = 0;
-
+ u32 adapter_present;
/* -1=no data 0=read from user mem, 1=write to user mem */
int wrflag = -1;
- u32 adapter = hm->h.adapter_index;
- struct hpi_adapter *pa = &adapters[adapter];
+ struct hpi_adapter *pa;
+
+ if (hm->h.adapter_index < HPI_MAX_ADAPTERS) {
+ pa = &adapters[hm->h.adapter_index];
+ adapter_present = pa->type;
+ } else {
+ adapter_present = 0;
+ }
- if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) {
- hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER,
- HPI_ADAPTER_OPEN,
- HPI_ERROR_BAD_ADAPTER_NUMBER);
+ if (!adapter_present) {
+ hpi_init_response(&hr->r0, hm->h.object,
+ hm->h.function, HPI_ERROR_BAD_ADAPTER_NUMBER);
uncopied_bytes =
copy_to_user(puhr, hr, sizeof(hr->h));
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index af130ee0c45d..6edc67ced905 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -521,6 +521,7 @@ MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}");
#define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024)
/* revisions >= 230 indicate AES32 card */
+#define HDSPM_MADI_ANCIENT_REV 204
#define HDSPM_MADI_OLD_REV 207
#define HDSPM_MADI_REV 210
#define HDSPM_RAYDAT_REV 211
@@ -1217,6 +1218,22 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm)
rate = 0;
break;
}
+
+ /* QS and DS rates normally can not be detected
+ * automatically by the card. Only exception is MADI
+ * in 96k frame mode.
+ *
+ * So if we read SS values (32 .. 48k), check for
+ * user-provided DS/QS bits in the control register
+ * and multiply the base frequency accordingly.
+ */
+ if (rate <= 48000) {
+ if (hdspm->control_register & HDSPM_QuadSpeed)
+ rate *= 4;
+ else if (hdspm->control_register &
+ HDSPM_DoubleSpeed)
+ rate *= 2;
+ }
}
break;
}
@@ -3415,6 +3432,91 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol,
return change;
}
+#define HDSPM_MADI_SPEEDMODE(xname, xindex) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+ .name = xname, \
+ .index = xindex, \
+ .info = snd_hdspm_info_madi_speedmode, \
+ .get = snd_hdspm_get_madi_speedmode, \
+ .put = snd_hdspm_put_madi_speedmode \
+}
+
+static int hdspm_madi_speedmode(struct hdspm *hdspm)
+{
+ if (hdspm->control_register & HDSPM_QuadSpeed)
+ return 2;
+ if (hdspm->control_register & HDSPM_DoubleSpeed)
+ return 1;
+ return 0;
+}
+
+static int hdspm_set_madi_speedmode(struct hdspm *hdspm, int mode)
+{
+ hdspm->control_register &= ~(HDSPM_DoubleSpeed | HDSPM_QuadSpeed);
+ switch (mode) {
+ case 0:
+ break;
+ case 1:
+ hdspm->control_register |= HDSPM_DoubleSpeed;
+ break;
+ case 2:
+ hdspm->control_register |= HDSPM_QuadSpeed;
+ break;
+ }
+ hdspm_write(hdspm, HDSPM_controlRegister, hdspm->control_register);
+
+ return 0;
+}
+
+static int snd_hdspm_info_madi_speedmode(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ static char *texts[] = { "Single", "Double", "Quad" };
+
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+ uinfo->count = 1;
+ uinfo->value.enumerated.items = 3;
+
+ if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+ uinfo->value.enumerated.item =
+ uinfo->value.enumerated.items - 1;
+ strcpy(uinfo->value.enumerated.name,
+ texts[uinfo->value.enumerated.item]);
+
+ return 0;
+}
+
+static int snd_hdspm_get_madi_speedmode(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
+
+ spin_lock_irq(&hdspm->lock);
+ ucontrol->value.enumerated.item[0] = hdspm_madi_speedmode(hdspm);
+ spin_unlock_irq(&hdspm->lock);
+ return 0;
+}
+
+static int snd_hdspm_put_madi_speedmode(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
+ int change;
+ int val;
+
+ if (!snd_hdspm_use_is_exclusive(hdspm))
+ return -EBUSY;
+ val = ucontrol->value.integer.value[0];
+ if (val < 0)
+ val = 0;
+ if (val > 2)
+ val = 2;
+ spin_lock_irq(&hdspm->lock);
+ change = val != hdspm_madi_speedmode(hdspm);
+ hdspm_set_madi_speedmode(hdspm, val);
+ spin_unlock_irq(&hdspm->lock);
+ return change;
+}
#define HDSPM_MIXER(xname, xindex) \
{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \
@@ -4289,7 +4391,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madi[] = {
HDSPM_TX_64("TX 64 channels mode", 0),
HDSPM_C_TMS("Clear Track Marker", 0),
HDSPM_SAFE_MODE("Safe Mode", 0),
- HDSPM_INPUT_SELECT("Input Select", 0)
+ HDSPM_INPUT_SELECT("Input Select", 0),
+ HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0)
};
@@ -4302,7 +4405,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madiface[] = {
HDSPM_SYNC_CHECK("MADI SyncCheck", 0),
HDSPM_TX_64("TX 64 channels mode", 0),
HDSPM_C_TMS("Clear Track Marker", 0),
- HDSPM_SAFE_MODE("Safe Mode", 0)
+ HDSPM_SAFE_MODE("Safe Mode", 0),
+ HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0)
};
static struct snd_kcontrol_new snd_hdspm_controls_aio[] = {
@@ -6381,6 +6485,7 @@ static int __devinit snd_hdspm_create(struct snd_card *card,
switch (hdspm->firmware_rev) {
case HDSPM_MADI_REV:
case HDSPM_MADI_OLD_REV:
+ case HDSPM_MADI_ANCIENT_REV:
hdspm->io_type = MADI;
hdspm->card_name = "RME MADI";
hdspm->midiPorts = 3;
diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c
index 34aa972669ed..3de99af8cb82 100644
--- a/sound/soc/txx9/txx9aclc.c
+++ b/sound/soc/txx9/txx9aclc.c
@@ -290,6 +290,7 @@ static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm)
static int txx9aclc_pcm_new(struct snd_soc_pcm_runtime *rtd)
{
+ struct snd_card *card = rtd->card->snd_card;
struct snd_soc_dai *dai = rtd->cpu_dai;
struct snd_pcm *pcm = rtd->pcm;
struct platform_device *pdev = to_platform_device(dai->platform->dev);
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 6d8ef4a3a9b5..8b2d37b59c9e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -128,34 +128,34 @@ unsigned long long get_msr(int cpu, off_t offset)
void print_header(void)
{
if (show_pkg)
- fprintf(stderr, "pkg ");
+ fprintf(stderr, "pk");
if (show_core)
- fprintf(stderr, "core");
+ fprintf(stderr, " cr");
if (show_cpu)
fprintf(stderr, " CPU");
if (do_nhm_cstates)
- fprintf(stderr, " %%c0 ");
+ fprintf(stderr, " %%c0 ");
if (has_aperf)
- fprintf(stderr, " GHz");
+ fprintf(stderr, " GHz");
fprintf(stderr, " TSC");
if (do_nhm_cstates)
- fprintf(stderr, " %%c1 ");
+ fprintf(stderr, " %%c1");
if (do_nhm_cstates)
- fprintf(stderr, " %%c3 ");
+ fprintf(stderr, " %%c3");
if (do_nhm_cstates)
- fprintf(stderr, " %%c6 ");
+ fprintf(stderr, " %%c6");
if (do_snb_cstates)
- fprintf(stderr, " %%c7 ");
+ fprintf(stderr, " %%c7");
if (do_snb_cstates)
- fprintf(stderr, " %%pc2 ");
+ fprintf(stderr, " %%pc2");
if (do_nhm_cstates)
- fprintf(stderr, " %%pc3 ");
+ fprintf(stderr, " %%pc3");
if (do_nhm_cstates)
- fprintf(stderr, " %%pc6 ");
+ fprintf(stderr, " %%pc6");
if (do_snb_cstates)
- fprintf(stderr, " %%pc7 ");
+ fprintf(stderr, " %%pc7");
if (extra_msr_offset)
- fprintf(stderr, " MSR 0x%x ", extra_msr_offset);
+ fprintf(stderr, " MSR 0x%x ", extra_msr_offset);
putc('\n', stderr);
}
@@ -194,14 +194,14 @@ void print_cnt(struct counters *p)
/* topology columns, print blanks on 1st (average) line */
if (p == cnt_average) {
if (show_pkg)
- fprintf(stderr, " ");
+ fprintf(stderr, " ");
if (show_core)
fprintf(stderr, " ");
if (show_cpu)
fprintf(stderr, " ");
} else {
if (show_pkg)
- fprintf(stderr, "%4d", p->pkg);
+ fprintf(stderr, "%d", p->pkg);
if (show_core)
fprintf(stderr, "%4d", p->core);
if (show_cpu)
@@ -241,22 +241,22 @@ void print_cnt(struct counters *p)
if (!skip_c1)
fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
else
- fprintf(stderr, " ****");
+ fprintf(stderr, " ****");
}
if (do_nhm_cstates)
- fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc);
+ fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc);
if (do_nhm_cstates)
- fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc);
+ fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc);
if (do_snb_cstates)
- fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc);
+ fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc);
if (do_snb_cstates)
- fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc);
+ fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc);
if (do_nhm_cstates)
- fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc);
+ fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc);
if (do_nhm_cstates)
- fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc);
+ fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc);
if (do_snb_cstates)
- fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc);
+ fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc);
if (extra_msr_offset)
fprintf(stderr, " 0x%016llx", p->extra_msr);
putc('\n', stderr);
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 2618ef2ba31f..33c5c7ee148f 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -137,7 +137,6 @@ void cmdline(int argc, char **argv)
void validate_cpuid(void)
{
unsigned int eax, ebx, ecx, edx, max_level;
- char brand[16];
unsigned int fms, family, model, stepping;
eax = ebx = ecx = edx = 0;
@@ -160,8 +159,8 @@ void validate_cpuid(void)
model += ((fms >> 16) & 0xf) << 4;
if (verbose > 1)
- printf("CPUID %s %d levels family:model:stepping "
- "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
+ printf("CPUID %d levels family:model:stepping "
+ "0x%x:%x:%x (%d:%d:%d)\n", max_level,
family, model, stepping, family, model, stepping);
if (!(edx & (1 << 5))) {