diff options
244 files changed, 8251 insertions, 5757 deletions
diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/removed/o2cb index 9c49d8e6c0cc..7f5daa465093 100644 --- a/Documentation/ABI/obsolete/o2cb +++ b/Documentation/ABI/removed/o2cb @@ -1,11 +1,10 @@ What: /sys/o2cb symlink -Date: Dec 2005 -KernelVersion: 2.6.16 +Date: May 2011 +KernelVersion: 2.6.40 Contact: ocfs2-devel@oss.oracle.com -Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will - be removed when new versions of ocfs2-tools which know to look +Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink is + removed when new versions of ocfs2-tools which know to look in /sys/fs/o2cb are sufficiently prevalent. Don't code new software to look here, it should try /sys/fs/o2cb instead. - See Documentation/ABI/stable/o2cb for more information on usage. Users: ocfs2-tools. It's sufficient to mail proposed changes to ocfs2-devel@oss.oracle.com. diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cleancache b/Documentation/ABI/testing/sysfs-kernel-mm-cleancache new file mode 100644 index 000000000000..662ae646ea12 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-mm-cleancache @@ -0,0 +1,11 @@ +What: /sys/kernel/mm/cleancache/ +Date: April 2011 +Contact: Dan Magenheimer <dan.magenheimer@oracle.com> +Description: + /sys/kernel/mm/cleancache/ contains a number of files which + record a count of various cleancache operations + (sum across all filesystems): + succ_gets + failed_gets + puts + flushes diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 95788ad2506c..ff31b1cc50aa 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -262,16 +262,6 @@ Who: Michael Buesch <mb@bu3sch.de> --------------------------- -What: /sys/o2cb symlink -When: January 2010 -Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb - exists as a symlink for backwards compatibility for old versions of - ocfs2-tools. 2 years should be sufficient time to phase in new versions - which know to look in /sys/fs/o2cb. -Who: ocfs2-devel@oss.oracle.com - ---------------------------- - What: Ability for non root users to shm_get hugetlb pages based on mlock resource limits When: 2.6.31 diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index c79ec58fd7f6..3ae9bc94352a 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -226,10 +226,6 @@ acl Enables POSIX Access Control Lists support. noacl This option disables POSIX Access Control List support. -reservation - -noreservation - bsddf (*) Make 'df' act like BSD. minixdf Make 'df' act like Minix. diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 9ed920a8cd79..7618a287aa41 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -46,9 +46,15 @@ errors=panic Panic and halt the machine if an error occurs. intr (*) Allow signals to interrupt cluster operations. nointr Do not allow signals to interrupt cluster operations. +noatime Do not update access time. +relatime(*) Update atime if the previous atime is older than + mtime or ctime +strictatime Always update atime, but the minimum update interval + is specified by atime_quantum. atime_quantum=60(*) OCFS2 will not update atime unless this number of seconds has passed since the last update. - Set to zero to always update atime. + Set to zero to always update atime. This option need + work with strictatime. data=ordered (*) All data are forced directly out to the main file system prior to its metadata being committed to the journal. diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 7bff3e4f35df..3fc0c31a6f5d 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -39,6 +39,12 @@ When mounting an XFS filesystem, the following options are accepted. drive level write caching to be enabled, for devices that support write barriers. + discard + Issue command to let the block device reclaim space freed by the + filesystem. This is useful for SSD devices, thinly provisioned + LUNs and virtual machine images, but may have a performance + impact. This option is incompatible with the nodelaylog option. + dmapi Enable the DMAPI (Data Management API) event callouts. Use with the "mtpt" option. diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt new file mode 100644 index 000000000000..36c367c73084 --- /dev/null +++ b/Documentation/vm/cleancache.txt @@ -0,0 +1,278 @@ +MOTIVATION + +Cleancache is a new optional feature provided by the VFS layer that +potentially dramatically increases page cache effectiveness for +many workloads in many environments at a negligible cost. + +Cleancache can be thought of as a page-granularity victim cache for clean +pages that the kernel's pageframe replacement algorithm (PFRA) would like +to keep around, but can't since there isn't enough memory. So when the +PFRA "evicts" a page, it first attempts to use cleancache code to +put the data contained in that page into "transcendent memory", memory +that is not directly accessible or addressable by the kernel and is +of unknown and possibly time-varying size. + +Later, when a cleancache-enabled filesystem wishes to access a page +in a file on disk, it first checks cleancache to see if it already +contains it; if it does, the page of data is copied into the kernel +and a disk access is avoided. + +Transcendent memory "drivers" for cleancache are currently implemented +in Xen (using hypervisor memory) and zcache (using in-kernel compressed +memory) and other implementations are in development. + +FAQs are included below. + +IMPLEMENTATION OVERVIEW + +A cleancache "backend" that provides transcendent memory registers itself +to the kernel's cleancache "frontend" by calling cleancache_register_ops, +passing a pointer to a cleancache_ops structure with funcs set appropriately. +Note that cleancache_register_ops returns the previous settings so that +chaining can be performed if desired. The functions provided must conform to +certain semantics as follows: + +Most important, cleancache is "ephemeral". Pages which are copied into +cleancache have an indefinite lifetime which is completely unknowable +by the kernel and so may or may not still be in cleancache at any later time. +Thus, as its name implies, cleancache is not suitable for dirty pages. +Cleancache has complete discretion over what pages to preserve and what +pages to discard and when. + +Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a +pool id which, if positive, must be saved in the filesystem's superblock; +a negative return value indicates failure. A "put_page" will copy a +(presumably about-to-be-evicted) page into cleancache and associate it with +the pool id, a file key, and a page index into the file. (The combination +of a pool id, a file key, and an index is sometimes called a "handle".) +A "get_page" will copy the page, if found, from cleancache into kernel memory. +A "flush_page" will ensure the page no longer is present in cleancache; +a "flush_inode" will flush all pages associated with the specified file; +and, when a filesystem is unmounted, a "flush_fs" will flush all pages in +all files specified by the given pool id and also surrender the pool id. + +An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache +to treat the pool as shared using a 128-bit UUID as a key. On systems +that may run multiple kernels (such as hard partitioned or virtualized +systems) that may share a clustered filesystem, and where cleancache +may be shared among those kernels, calls to init_shared_fs that specify the +same UUID will receive the same pool id, thus allowing the pages to +be shared. Note that any security requirements must be imposed outside +of the kernel (e.g. by "tools" that control cleancache). Or a +cleancache implementation can simply disable shared_init by always +returning a negative value. + +If a get_page is successful on a non-shared pool, the page is flushed (thus +making cleancache an "exclusive" cache). On a shared pool, the page +is NOT flushed on a successful get_page so that it remains accessible to +other sharers. The kernel is responsible for ensuring coherency between +cleancache (shared or not), the page cache, and the filesystem, using +cleancache flush operations as required. + +Note that cleancache must enforce put-put-get coherency and get-get +coherency. For the former, if two puts are made to the same handle but +with different data, say AAA by the first put and BBB by the second, a +subsequent get can never return the stale data (AAA). For get-get coherency, +if a get for a given handle fails, subsequent gets for that handle will +never succeed unless preceded by a successful put with that handle. + +Last, cleancache provides no SMP serialization guarantees; if two +different Linux threads are simultaneously putting and flushing a page +with the same handle, the results are indeterminate. Callers must +lock the page to ensure serial behavior. + +CLEANCACHE PERFORMANCE METRICS + +Cleancache monitoring is done by sysfs files in the +/sys/kernel/mm/cleancache directory. The effectiveness of cleancache +can be measured (across all filesystems) with: + +succ_gets - number of gets that were successful +failed_gets - number of gets that failed +puts - number of puts attempted (all "succeed") +flushes - number of flushes attempted + +A backend implementatation may provide additional metrics. + +FAQ + +1) Where's the value? (Andrew Morton) + +Cleancache provides a significant performance benefit to many workloads +in many environments with negligible overhead by improving the +effectiveness of the pagecache. Clean pagecache pages are +saved in transcendent memory (RAM that is otherwise not directly +addressable to the kernel); fetching those pages later avoids "refaults" +and thus disk reads. + +Cleancache (and its sister code "frontswap") provide interfaces for +this transcendent memory (aka "tmem"), which conceptually lies between +fast kernel-directly-addressable RAM and slower DMA/asynchronous devices. +Disallowing direct kernel or userland reads/writes to tmem +is ideal when data is transformed to a different form and size (such +as with compression) or secretly moved (as might be useful for write- +balancing for some RAM-like devices). Evicted page-cache pages (and +swap pages) are a great use for this kind of slower-than-RAM-but-much- +faster-than-disk transcendent memory, and the cleancache (and frontswap) +"page-object-oriented" specification provides a nice way to read and +write -- and indirectly "name" -- the pages. + +In the virtual case, the whole point of virtualization is to statistically +multiplex physical resources across the varying demands of multiple +virtual machines. This is really hard to do with RAM and efforts to +do it well with no kernel change have essentially failed (except in some +well-publicized special-case workloads). Cleancache -- and frontswap -- +with a fairly small impact on the kernel, provide a huge amount +of flexibility for more dynamic, flexible RAM multiplexing. +Specifically, the Xen Transcendent Memory backend allows otherwise +"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple +virtual machines, but the pages can be compressed and deduplicated to +optimize RAM utilization. And when guest OS's are induced to surrender +underutilized RAM (e.g. with "self-ballooning"), page cache pages +are the first to go, and cleancache allows those pages to be +saved and reclaimed if overall host system memory conditions allow. + +And the identical interface used for cleancache can be used in +physical systems as well. The zcache driver acts as a memory-hungry +device that stores pages of data in a compressed state. And +the proposed "RAMster" driver shares RAM across multiple physical +systems. + +2) Why does cleancache have its sticky fingers so deep inside the + filesystems and VFS? (Andrew Morton and Christoph Hellwig) + +The core hooks for cleancache in VFS are in most cases a single line +and the minimum set are placed precisely where needed to maintain +coherency (via cleancache_flush operations) between cleancache, +the page cache, and disk. All hooks compile into nothingness if +cleancache is config'ed off and turn into a function-pointer- +compare-to-NULL if config'ed on but no backend claims the ops +functions, or to a compare-struct-element-to-negative if a +backend claims the ops functions but a filesystem doesn't enable +cleancache. + +Some filesystems are built entirely on top of VFS and the hooks +in VFS are sufficient, so don't require an "init_fs" hook; the +initial implementation of cleancache didn't provide this hook. +But for some filesystems (such as btrfs), the VFS hooks are +incomplete and one or more hooks in fs-specific code are required. +And for some other filesystems, such as tmpfs, cleancache may +be counterproductive. So it seemed prudent to require a filesystem +to "opt in" to use cleancache, which requires adding a hook in +each filesystem. Not all filesystems are supported by cleancache +only because they haven't been tested. The existing set should +be sufficient to validate the concept, the opt-in approach means +that untested filesystems are not affected, and the hooks in the +existing filesystems should make it very easy to add more +filesystems in the future. + +The total impact of the hooks to existing fs and mm files is only +about 40 lines added (not counting comments and blank lines). + +3) Why not make cleancache asynchronous and batched so it can + more easily interface with real devices with DMA instead + of copying each individual page? (Minchan Kim) + +The one-page-at-a-time copy semantics simplifies the implementation +on both the frontend and backend and also allows the backend to +do fancy things on-the-fly like page compression and +page deduplication. And since the data is "gone" (copied into/out +of the pageframe) before the cleancache get/put call returns, +a great deal of race conditions and potential coherency issues +are avoided. While the interface seems odd for a "real device" +or for real kernel-addressable RAM, it makes perfect sense for +transcendent memory. + +4) Why is non-shared cleancache "exclusive"? And where is the + page "flushed" after a "get"? (Minchan Kim) + +The main reason is to free up space in transcendent memory and +to avoid unnecessary cleancache_flush calls. If you want inclusive, +the page can be "put" immediately following the "get". If +put-after-get for inclusive becomes common, the interface could +be easily extended to add a "get_no_flush" call. + +The flush is done by the cleancache backend implementation. + +5) What's the performance impact? + +Performance analysis has been presented at OLS'09 and LCA'10. +Briefly, performance gains can be significant on most workloads, +especially when memory pressure is high (e.g. when RAM is +overcommitted in a virtual workload); and because the hooks are +invoked primarily in place of or in addition to a disk read/write, +overhead is negligible even in worst case workloads. Basically +cleancache replaces I/O with memory-copy-CPU-overhead; on older +single-core systems with slow memory-copy speeds, cleancache +has little value, but in newer multicore machines, especially +consolidated/virtualized machines, it has great value. + +6) How do I add cleancache support for filesystem X? (Boaz Harrash) + +Filesystems that are well-behaved and conform to certain +restrictions can utilize cleancache simply by making a call to +cleancache_init_fs at mount time. Unusual, misbehaving, or +poorly layered filesystems must either add additional hooks +and/or undergo extensive additional testing... or should just +not enable the optional cleancache. + +Some points for a filesystem to consider: + +- The FS should be block-device-based (e.g. a ram-based FS such + as tmpfs should not enable cleancache) +- To ensure coherency/correctness, the FS must ensure that all + file removal or truncation operations either go through VFS or + add hooks to do the equivalent cleancache "flush" operations +- To ensure coherency/correctness, either inode numbers must + be unique across the lifetime of the on-disk file OR the + FS must provide an "encode_fh" function. +- The FS must call the VFS superblock alloc and deactivate routines + or add hooks to do the equivalent cleancache calls done there. +- To maximize performance, all pages fetched from the FS should + go through the do_mpag_readpage routine or the FS should add + hooks to do the equivalent (cf. btrfs) +- Currently, the FS blocksize must be the same as PAGESIZE. This + is not an architectural restriction, but no backends currently + support anything different. +- A clustered FS should invoke the "shared_init_fs" cleancache + hook to get best performance for some backends. + +7) Why not use the KVA of the inode as the key? (Christoph Hellwig) + +If cleancache would use the inode virtual address instead of +inode/filehandle, the pool id could be eliminated. But, this +won't work because cleancache retains pagecache data pages +persistently even when the inode has been pruned from the +inode unused list, and only flushes the data page if the file +gets removed/truncated. So if cleancache used the inode kva, +there would be potential coherency issues if/when the inode +kva is reused for a different file. Alternately, if cleancache +flushed the pages when the inode kva was freed, much of the value +of cleancache would be lost because the cache of pages in cleanache +is potentially much larger than the kernel pagecache and is most +useful if the pages survive inode cache removal. + +8) Why is a global variable required? + +The cleancache_enabled flag is checked in all of the frequently-used +cleancache hooks. The alternative is a function call to check a static +variable. Since cleancache is enabled dynamically at runtime, systems +that don't enable cleancache would suffer thousands (possibly +tens-of-thousands) of unnecessary function calls per second. So the +global variable allows cleancache to be enabled by default at compile +time, but have insignificant performance impact when cleancache remains +disabled at runtime. + +9) Does cleanache work with KVM? + +The memory model of KVM is sufficiently different that a cleancache +backend may have less value for KVM. This remains to be tested, +especially in an overcommitted system. + +10) Does cleancache work in userspace? It sounds useful for + memory hungry caches like web browsers. (Jamie Lokier) + +No plans yet, though we agree it sounds useful, at least for +apps that bypass the page cache (e.g. O_DIRECT). + +Last updated: Dan Magenheimer, April 13 2011 diff --git a/MAINTAINERS b/MAINTAINERS index b75366b8dfeb..21a871c0527a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3574,9 +3574,16 @@ M: Andrew Morton <akpm@linux-foundation.org> M: Jan Kara <jack@suse.cz> L: linux-ext4@vger.kernel.org S: Maintained -F: fs/jbd*/ -F: include/linux/ext*jbd*.h -F: include/linux/jbd*.h +F: fs/jbd/ +F: include/linux/ext3_jbd.h +F: include/linux/jbd.h + +JOURNALLING LAYER FOR BLOCK DEVICES (JBD2) +M: "Theodore Ts'o" <tytso@mit.edu> +L: linux-ext4@vger.kernel.org +S: Maintained +F: fs/jbd2/ +F: include/linux/jbd2.h JSM Neo PCI based serial card M: Breno Leitao <leitao@linux.vnet.ibm.com> diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 076db52ff672..d5f00d7eb075 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -21,58 +21,22 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_OMAP=y -CONFIG_ARCH_OMAP2=y -CONFIG_ARCH_OMAP3=y -CONFIG_ARCH_OMAP4=y CONFIG_OMAP_RESET_CLOCKS=y CONFIG_OMAP_MUX_DEBUG=y -CONFIG_OMAP_32K_TIMER=y -CONFIG_MACH_OMAP_GENERIC=y -CONFIG_ARCH_OMAP2420=y -CONFIG_ARCH_OMAP2430=y -CONFIG_ARCH_OMAP3430=y -CONFIG_MACH_OMAP_H4=y -CONFIG_MACH_OMAP_APOLLON=y -CONFIG_MACH_OMAP_2430SDP=y -CONFIG_MACH_OMAP3_BEAGLE=y -CONFIG_MACH_DEVKIT8000=y -CONFIG_MACH_OMAP_LDP=y -CONFIG_MACH_OVERO=y -CONFIG_MACH_OMAP3EVM=y -CONFIG_MACH_OMAP3517EVM=y -CONFIG_MACH_OMAP3_PANDORA=y -CONFIG_MACH_OMAP3_TOUCHBOOK=y -CONFIG_MACH_OMAP_3430SDP=y -CONFIG_MACH_NOKIA_N8X0=y -CONFIG_MACH_NOKIA_RX51=y -CONFIG_MACH_OMAP_ZOOM2=y -CONFIG_MACH_OMAP_ZOOM3=y -CONFIG_MACH_CM_T35=y -CONFIG_MACH_IGEP0020=y -CONFIG_MACH_SBC3530=y -CONFIG_MACH_OMAP_3630SDP=y -CONFIG_MACH_OMAP_4430SDP=y CONFIG_ARM_THUMBEE=y -CONFIG_ARM_L1_CACHE_SHIFT=5 CONFIG_ARM_ERRATA_411920=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -# CONFIG_LOCAL_TIMERS is not set -CONFIG_AEABI=y CONFIG_LEDS=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="root=/dev/mmcblk0p2 rootwait console=ttyO2,115200" CONFIG_KEXEC=y CONFIG_FPE_NWFPE=y -CONFIG_VFP=y -CONFIG_NEON=y CONFIG_BINFMT_MISC=y -CONFIG_PM=y CONFIG_PM_DEBUG=y -CONFIG_PM_RUNTIME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -89,14 +53,6 @@ CONFIG_IP_PNP_RARP=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m -CONFIG_BT_RFCOMM=y -CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_BNEP=m -CONFIG_BT_BNEP_MC_FILTER=y -CONFIG_BT_BNEP_PROTO_FILTER=y -CONFIG_BT_HIDP=m CONFIG_BT_HCIUART=m CONFIG_BT_HCIUART_H4=y CONFIG_BT_HCIUART_BCSP=y @@ -107,11 +63,9 @@ CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_RC_PID=y CONFIG_MAC80211_RC_DEFAULT_PID=y -CONFIG_MAC80211_LEDS=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y @@ -127,7 +81,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 -CONFIG_EEPROM_LEGACY=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_SCSI_MULTI_LUN=y @@ -158,19 +111,15 @@ CONFIG_TOUCHSCREEN_ADS7846=y CONFIG_INPUT_MISC=y CONFIG_INPUT_TWL4030_PWRBUTTON=y CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y -CONFIG_I2C=y CONFIG_I2C_CHARDEV=y -CONFIG_I2C_OMAP=y CONFIG_SPI=y CONFIG_SPI_OMAP24XX=y CONFIG_DEBUG_GPIO=y @@ -181,10 +130,6 @@ CONFIG_POWER_SUPPLY=y CONFIG_WATCHDOG=y CONFIG_OMAP_WATCHDOG=y CONFIG_TWL4030_WATCHDOG=y -CONFIG_MENELAUS=y -CONFIG_TWL4030_CORE=y -CONFIG_TWL4030_POWER=y -CONFIG_REGULATOR=y CONFIG_REGULATOR_TWL4030=y CONFIG_REGULATOR_TPS65023=y CONFIG_REGULATOR_TPS6507X=y @@ -208,7 +153,6 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=y CONFIG_DISPLAY_SUPPORT=y -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_FONTS=y @@ -217,25 +161,20 @@ CONFIG_FONT_8x16=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m CONFIG_SND_VERBOSE_PRINTK=y CONFIG_SND_DEBUG=y -CONFIG_SND_USB_AUDIO=y -CONFIG_SND_SOC=y -CONFIG_SND_OMAP_SOC=y -CONFIG_SND_OMAP_SOC_OMAP3_PANDORA=y +CONFIG_SND_USB_AUDIO=m +CONFIG_SND_SOC=m +CONFIG_SND_OMAP_SOC=m +CONFIG_SND_OMAP_SOC_OMAP3_PANDORA=m CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_DEVICEFS=y CONFIG_USB_SUSPEND=y -# CONFIG_USB_OTG_WHITELIST is not set CONFIG_USB_MON=y -# CONFIG_USB_MUSB_HDRC is not set -# CONFIG_USB_MUSB_OTG is not set -# CONFIG_USB_GADGET_MUSB_HDRC is not set -CONFIG_USB_MUSB_DEBUG=y CONFIG_USB_WDM=y CONFIG_USB_STORAGE=y CONFIG_USB_LIBUSUAL=y @@ -250,18 +189,12 @@ CONFIG_MMC_UNSAFE_RESUME=y CONFIG_SDIO_UART=y CONFIG_MMC_OMAP=y CONFIG_MMC_OMAP_HS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_TWL92330=y CONFIG_RTC_DRV_TWL4030=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_QUOTA=y CONFIG_QFMT_V2=y CONFIG_MSDOS_FS=y @@ -285,12 +218,10 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_PROVE_LOCKING=y -# CONFIG_LOCK_STAT is not set CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index b997a35830fc..19d5891c48e3 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -288,6 +288,7 @@ config MACH_IGEP0030 depends on ARCH_OMAP3 default y select OMAP_PACKAGE_CBB + select MACH_IGEP0020 config MACH_SBC3530 bool "OMAP3 SBC STALKER board" diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 66dfbccacd25..b14807794401 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -229,8 +229,6 @@ obj-$(CONFIG_MACH_CM_T35) += board-cm-t35.o \ obj-$(CONFIG_MACH_CM_T3517) += board-cm-t3517.o obj-$(CONFIG_MACH_IGEP0020) += board-igep0020.o \ hsmmc.o -obj-$(CONFIG_MACH_IGEP0030) += board-igep0030.o \ - hsmmc.o obj-$(CONFIG_MACH_OMAP3_TOUCHBOOK) += board-omap3touchbook.o \ hsmmc.o obj-$(CONFIG_MACH_OMAP_4430SDP) += board-4430sdp.o \ @@ -270,3 +268,5 @@ obj-$(CONFIG_ARCH_OMAP4) += hwspinlock.o disp-$(CONFIG_OMAP2_DSS) := display.o obj-y += $(disp-m) $(disp-y) + +obj-y += common-board-devices.o diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c index 1fa6bb896f41..d54969be0a54 100644 --- a/arch/arm/mach-omap2/board-2430sdp.c +++ b/arch/arm/mach-omap2/board-2430sdp.c @@ -41,6 +41,7 @@ #include "mux.h" #include "hsmmc.h" +#include "common-board-devices.h" #define SDP2430_CS0_BASE 0x04000000 #define SECONDARY_LCD_GPIO 147 @@ -180,15 +181,6 @@ static struct twl4030_platform_data sdp2430_twldata = { .vmmc1 = &sdp2430_vmmc1, }; -static struct i2c_board_info __initdata sdp2430_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_24XX_SYS_NIRQ, - .platform_data = &sdp2430_twldata, - }, -}; - static struct i2c_board_info __initdata sdp2430_i2c1_boardinfo[] = { { I2C_BOARD_INFO("isp1301_omap", 0x2D), @@ -201,8 +193,7 @@ static int __init omap2430_i2c_init(void) { omap_register_i2c_bus(1, 100, sdp2430_i2c1_boardinfo, ARRAY_SIZE(sdp2430_i2c1_boardinfo)); - omap_register_i2c_bus(2, 2600, sdp2430_i2c_boardinfo, - ARRAY_SIZE(sdp2430_i2c_boardinfo)); + omap2_pmic_init("twl4030", &sdp2430_twldata); return 0; } @@ -217,11 +208,6 @@ static struct omap2_hsmmc_info mmc[] __initdata = { {} /* Terminator */ }; -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; static struct omap_usb_config sdp2430_usb_config __initdata = { .otg = 1, #ifdef CONFIG_USB_GADGET_OMAP @@ -240,8 +226,6 @@ static struct omap_board_mux board_mux[] __initdata = { static void __init omap_2430sdp_init(void) { - int ret; - omap2430_mux_init(board_mux, OMAP_PACKAGE_ZAC); omap_board_config = sdp2430_config; @@ -255,14 +239,13 @@ static void __init omap_2430sdp_init(void) omap2_usbfs_init(&sdp2430_usb_config); omap_mux_init_signal("usb0hs_stp", OMAP_PULL_ENA | OMAP_PULL_UP); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); board_smc91x_init(); /* Turn off secondary LCD backlight */ - ret = gpio_request(SECONDARY_LCD_GPIO, "Secondary LCD backlight"); - if (ret == 0) - gpio_direction_output(SECONDARY_LCD_GPIO, 0); + gpio_request_one(SECONDARY_LCD_GPIO, GPIOF_OUT_INIT_LOW, + "Secondary LCD backlight"); } static void __init omap_2430sdp_map_io(void) diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c index 23244cd0a5b6..ae2963a98041 100644 --- a/arch/arm/mach-omap2/board-3430sdp.c +++ b/arch/arm/mach-omap2/board-3430sdp.c @@ -19,7 +19,6 @@ #include <linux/input.h> #include <linux/input/matrix_keypad.h> #include <linux/spi/spi.h> -#include <linux/spi/ads7846.h> #include <linux/i2c/twl.h> #include <linux/regulator/machine.h> #include <linux/io.h> @@ -48,6 +47,7 @@ #include "hsmmc.h" #include "pm.h" #include "control.h" +#include "common-board-devices.h" #define CONFIG_DISABLE_HFCLK 1 @@ -59,24 +59,6 @@ #define TWL4030_MSECURE_GPIO 22 -/* FIXME: These values need to be updated based on more profiling on 3430sdp*/ -static struct cpuidle_params omap3_cpuidle_params_table[] = { - /* C1 */ - {1, 2, 2, 5}, - /* C2 */ - {1, 10, 10, 30}, - /* C3 */ - {1, 50, 50, 300}, - /* C4 */ - {1, 1500, 1800, 4000}, - /* C5 */ - {1, 2500, 7500, 12000}, - /* C6 */ - {1, 3000, 8500, 15000}, - /* C7 */ - {1, 10000, 30000, 300000}, -}; - static uint32_t board_keymap[] = { KEY(0, 0, KEY_LEFT), KEY(0, 1, KEY_RIGHT), @@ -123,63 +105,14 @@ static struct twl4030_keypad_data sdp3430_kp_data = { .rep = 1, }; -static int ts_gpio; /* Needed for ads7846_get_pendown_state */ - -/** - * @brief ads7846_dev_init : Requests & sets GPIO line for pen-irq - * - * @return - void. If request gpio fails then Flag KERN_ERR. - */ -static void ads7846_dev_init(void) -{ - if (gpio_request(ts_gpio, "ADS7846 pendown") < 0) { - printk(KERN_ERR "can't get ads746 pen down GPIO\n"); - return; - } - - gpio_direction_input(ts_gpio); - gpio_set_debounce(ts_gpio, 310); -} - -static int ads7846_get_pendown_state(void) -{ - return !gpio_get_value(ts_gpio); -} - -static struct ads7846_platform_data tsc2046_config __initdata = { - .get_pendown_state = ads7846_get_pendown_state, - .keep_vref_on = 1, - .wakeup = true, -}; - - -static struct omap2_mcspi_device_config tsc2046_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - -static struct spi_board_info sdp3430_spi_board_info[] __initdata = { - [0] = { - /* - * TSC2046 operates at a max freqency of 2MHz, so - * operate slightly below at 1.5MHz - */ - .modalias = "ads7846", - .bus_num = 1, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &tsc2046_mcspi_config, - .irq = 0, - .platform_data = &tsc2046_config, - }, -}; - - #define SDP3430_LCD_PANEL_BACKLIGHT_GPIO 8 #define SDP3430_LCD_PANEL_ENABLE_GPIO 5 -static unsigned backlight_gpio; -static unsigned enable_gpio; +static struct gpio sdp3430_dss_gpios[] __initdata = { + {SDP3430_LCD_PANEL_ENABLE_GPIO, GPIOF_OUT_INIT_LOW, "LCD reset" }, + {SDP3430_LCD_PANEL_BACKLIGHT_GPIO, GPIOF_OUT_INIT_LOW, "LCD Backlight"}, +}; + static int lcd_enabled; static int dvi_enabled; @@ -187,29 +120,11 @@ static void __init sdp3430_display_init(void) { int r; - enable_gpio = SDP3430_LCD_PANEL_ENABLE_GPIO; - backlight_gpio = SDP3430_LCD_PANEL_BACKLIGHT_GPIO; - - r = gpio_request(enable_gpio, "LCD reset"); - if (r) { - printk(KERN_ERR "failed to get LCD reset GPIO\n"); - goto err0; - } - - r = gpio_request(backlight_gpio, "LCD Backlight"); - if (r) { - printk(KERN_ERR "failed to get LCD backlight GPIO\n"); - goto err1; - } - - gpio_direction_output(enable_gpio, 0); - gpio_direction_output(backlight_gpio, 0); + r = gpio_request_array(sdp3430_dss_gpios, + ARRAY_SIZE(sdp3430_dss_gpios)); + if (r) + printk(KERN_ERR "failed to get LCD control GPIOs\n"); - return; -err1: - gpio_free(enable_gpio); -err0: - return; } static int sdp3430_panel_enable_lcd(struct omap_dss_device *dssdev) @@ -219,8 +134,8 @@ static int sdp3430_panel_enable_lcd(struct omap_dss_device *dssdev) return -EINVAL; } - gpio_direction_output(enable_gpio, 1); - gpio_direction_output(backlight_gpio, 1); + gpio_direction_output(SDP3430_LCD_PANEL_ENABLE_GPIO, 1); + gpio_direction_output(SDP3430_LCD_PANEL_BACKLIGHT_GPIO, 1); lcd_enabled = 1; @@ -231,8 +146,8 @@ static void sdp3430_panel_disable_lcd(struct omap_dss_device *dssdev) { lcd_enabled = 0; - gpio_direction_output(enable_gpio, 0); - gpio_direction_output(backlight_gpio, 0); + gpio_direction_output(SDP3430_LCD_PANEL_ENABLE_GPIO, 0); + gpio_direction_output(SDP3430_LCD_PANEL_BACKLIGHT_GPIO, 0); } static int sdp3430_panel_enable_dvi(struct omap_dss_device *dssdev) @@ -360,12 +275,10 @@ static int sdp3430_twl_gpio_setup(struct device *dev, omap2_hsmmc_init(mmc); /* gpio + 7 is "sub_lcd_en_bkl" (output/PWM1) */ - gpio_request(gpio + 7, "sub_lcd_en_bkl"); - gpio_direction_output(gpio + 7, 0); + gpio_request_one(gpio + 7, GPIOF_OUT_INIT_LOW, "sub_lcd_en_bkl"); /* gpio + 15 is "sub_lcd_nRST" (output) */ - gpio_request(gpio + 15, "sub_lcd_nRST"); - gpio_direction_output(gpio + 15, 0); + gpio_request_one(gpio + 15, GPIOF_OUT_INIT_LOW, "sub_lcd_nRST"); return 0; } @@ -580,20 +493,10 @@ static struct twl4030_platform_data sdp3430_twldata = { .vpll2 = &sdp3430_vpll2, }; -static struct i2c_board_info __initdata sdp3430_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &sdp3430_twldata, - }, -}; - static int __init omap3430_i2c_init(void) { /* i2c1 for PMIC only */ - omap_register_i2c_bus(1, 2600, sdp3430_i2c_boardinfo, - ARRAY_SIZE(sdp3430_i2c_boardinfo)); + omap3_pmic_init("twl4030", &sdp3430_twldata); /* i2c2 on camera connector (for sensor control) and optional isp1301 */ omap_register_i2c_bus(2, 400, NULL, 0); /* i2c3 on display connector (for DVI, tfp410) */ @@ -872,30 +775,22 @@ static struct flash_partitions sdp_flash_partitions[] = { }, }; -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static void __init omap_3430sdp_init(void) { + int gpio_pendown; + omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); omap_board_config = sdp3430_config; omap_board_config_size = ARRAY_SIZE(sdp3430_config); - omap3_pm_init_cpuidle(omap3_cpuidle_params_table); omap3430_i2c_init(); omap_display_init(&sdp3430_dss_data); if (omap_rev() > OMAP3430_REV_ES1_0) - ts_gpio = SDP3430_TS_GPIO_IRQ_SDPV2; + gpio_pendown = SDP3430_TS_GPIO_IRQ_SDPV2; else - ts_gpio = SDP3430_TS_GPIO_IRQ_SDPV1; - sdp3430_spi_board_info[0].irq = gpio_to_irq(ts_gpio); - spi_register_board_info(sdp3430_spi_board_info, - ARRAY_SIZE(sdp3430_spi_board_info)); - ads7846_dev_init(); + gpio_pendown = SDP3430_TS_GPIO_IRQ_SDPV1; + omap_ads7846_init(1, gpio_pendown, 310, NULL); board_serial_init(); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); board_smc91x_init(); board_flash_init(sdp_flash_partitions, chip_sel_3430, 0); sdp3430_display_init(); diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c index 93edd7fcf451..73fa90bb6953 100644 --- a/arch/arm/mach-omap2/board-4430sdp.c +++ b/arch/arm/mach-omap2/board-4430sdp.c @@ -42,6 +42,7 @@ #include "hsmmc.h" #include "timer-gp.h" #include "control.h" +#include "common-board-devices.h" #define ETH_KS8851_IRQ 34 #define ETH_KS8851_POWER_ON 48 @@ -251,58 +252,22 @@ static struct spi_board_info sdp4430_spi_board_info[] __initdata = { }, }; +static struct gpio sdp4430_eth_gpios[] __initdata = { + { ETH_KS8851_POWER_ON, GPIOF_OUT_INIT_HIGH, "eth_power" }, + { ETH_KS8851_QUART, GPIOF_OUT_INIT_HIGH, "quart" }, + { ETH_KS8851_IRQ, GPIOF_IN, "eth_irq" }, +}; + static int omap_ethernet_init(void) { int status; /* Request of GPIO lines */ + status = gpio_request_array(sdp4430_eth_gpios, + ARRAY_SIZE(sdp4430_eth_gpios)); + if (status) + pr_err("Cannot request ETH GPIOs\n"); - status = gpio_request(ETH_KS8851_POWER_ON, "eth_power"); - if (status) { - pr_err("Cannot request GPIO %d\n", ETH_KS8851_POWER_ON); - return status; - } - - status = gpio_request(ETH_KS8851_QUART, "quart"); - if (status) { - pr_err("Cannot request GPIO %d\n", ETH_KS8851_QUART); - goto error1; - } - - status = gpio_request(ETH_KS8851_IRQ, "eth_irq"); - if (status) { - pr_err("Cannot request GPIO %d\n", ETH_KS8851_IRQ); - goto error2; - } - - /* Configuration of requested GPIO lines */ - - status = gpio_direction_output(ETH_KS8851_POWER_ON, 1); - if (status) { - pr_err("Cannot set output GPIO %d\n", ETH_KS8851_IRQ); - goto error3; - } - - status = gpio_direction_output(ETH_KS8851_QUART, 1); - if (status) { - pr_err("Cannot set output GPIO %d\n", ETH_KS8851_QUART); - goto error3; - } - - status = gpio_direction_input(ETH_KS8851_IRQ); - if (status) { - pr_err("Cannot set input GPIO %d\n", ETH_KS8851_IRQ); - goto error3; - } - - return 0; - -error3: - gpio_free(ETH_KS8851_IRQ); -error2: - gpio_free(ETH_KS8851_QUART); -error1: - gpio_free(ETH_KS8851_POWER_ON); return status; } @@ -575,14 +540,6 @@ static struct twl4030_platform_data sdp4430_twldata = { .usb = &omap4_usbphy_data }; -static struct i2c_board_info __initdata sdp4430_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl6030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = OMAP44XX_IRQ_SYS_1N, - .platform_data = &sdp4430_twldata, - }, -}; static struct i2c_board_info __initdata sdp4430_i2c_3_boardinfo[] = { { I2C_BOARD_INFO("tmp105", 0x48), @@ -598,12 +555,7 @@ static struct i2c_board_info __initdata sdp4430_i2c_4_boardinfo[] = { }; static int __init omap4_i2c_init(void) { - /* - * Phoenix Audio IC needs I2C1 to - * start with 400 KHz or less - */ - omap_register_i2c_bus(1, 400, sdp4430_i2c_boardinfo, - ARRAY_SIZE(sdp4430_i2c_boardinfo)); + omap4_pmic_init("twl6030", &sdp4430_twldata); omap_register_i2c_bus(2, 400, NULL, 0); omap_register_i2c_bus(3, 400, sdp4430_i2c_3_boardinfo, ARRAY_SIZE(sdp4430_i2c_3_boardinfo)); @@ -614,21 +566,13 @@ static int __init omap4_i2c_init(void) static void __init omap_sfh7741prox_init(void) { - int error; + int error; - error = gpio_request(OMAP4_SFH7741_ENABLE_GPIO, "sfh7741"); - if (error < 0) { + error = gpio_request_one(OMAP4_SFH7741_ENABLE_GPIO, + GPIOF_OUT_INIT_LOW, "sfh7741"); + if (error < 0) pr_err("%s:failed to request GPIO %d, error %d\n", __func__, OMAP4_SFH7741_ENABLE_GPIO, error); - return; - } - - error = gpio_direction_output(OMAP4_SFH7741_ENABLE_GPIO , 0); - if (error < 0) { - pr_err("%s: GPIO configuration failed: GPIO %d,error %d\n", - __func__, OMAP4_SFH7741_ENABLE_GPIO, error); - gpio_free(OMAP4_SFH7741_ENABLE_GPIO); - } } static void sdp4430_hdmi_mux_init(void) @@ -645,27 +589,19 @@ static void sdp4430_hdmi_mux_init(void) OMAP_PIN_INPUT_PULLUP); } +static struct gpio sdp4430_hdmi_gpios[] = { + { HDMI_GPIO_HPD, GPIOF_OUT_INIT_HIGH, "hdmi_gpio_hpd" }, + { HDMI_GPIO_LS_OE, GPIOF_OUT_INIT_HIGH, "hdmi_gpio_ls_oe" }, +}; + static int sdp4430_panel_enable_hdmi(struct omap_dss_device *dssdev) { int status; - status = gpio_request_one(HDMI_GPIO_HPD, GPIOF_OUT_INIT_HIGH, - "hdmi_gpio_hpd"); - if (status) { - pr_err("Cannot request GPIO %d\n", HDMI_GPIO_HPD); - return status; - } - status = gpio_request_one(HDMI_GPIO_LS_OE, GPIOF_OUT_INIT_HIGH, - "hdmi_gpio_ls_oe"); - if (status) { - pr_err("Cannot request GPIO %d\n", HDMI_GPIO_LS_OE); - goto error1; - } - - return 0; - -error1: - gpio_free(HDMI_GPIO_HPD); + status = gpio_request_array(sdp4430_hdmi_gpios, + ARRAY_SIZE(sdp4430_hdmi_gpios)); + if (status) + pr_err("%s: Cannot request HDMI GPIOs\n", __func__); return status; } diff --git a/arch/arm/mach-omap2/board-am3517crane.c b/arch/arm/mach-omap2/board-am3517crane.c index a890d244fec6..5e438a77cd72 100644 --- a/arch/arm/mach-omap2/board-am3517crane.c +++ b/arch/arm/mach-omap2/board-am3517crane.c @@ -89,19 +89,13 @@ static void __init am3517_crane_init(void) return; } - ret = gpio_request(GPIO_USB_POWER, "usb_ehci_enable"); + ret = gpio_request_one(GPIO_USB_POWER, GPIOF_OUT_INIT_HIGH, + "usb_ehci_enable"); if (ret < 0) { pr_err("Can not request GPIO %d\n", GPIO_USB_POWER); return; } - ret = gpio_direction_output(GPIO_USB_POWER, 1); - if (ret < 0) { - gpio_free(GPIO_USB_POWER); - pr_err("Unable to initialize EHCI power\n"); - return; - } - usbhs_init(&usbhs_bdata); } diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c index ff8c59be36e5..63af4171c043 100644 --- a/arch/arm/mach-omap2/board-am3517evm.c +++ b/arch/arm/mach-omap2/board-am3517evm.c @@ -174,19 +174,14 @@ static void __init am3517_evm_rtc_init(void) int r; omap_mux_init_gpio(GPIO_RTCS35390A_IRQ, OMAP_PIN_INPUT_PULLUP); - r = gpio_request(GPIO_RTCS35390A_IRQ, "rtcs35390a-irq"); + + r = gpio_request_one(GPIO_RTCS35390A_IRQ, GPIOF_IN, "rtcs35390a-irq"); if (r < 0) { printk(KERN_WARNING "failed to request GPIO#%d\n", GPIO_RTCS35390A_IRQ); return; } - r = gpio_direction_input(GPIO_RTCS35390A_IRQ); - if (r < 0) { - printk(KERN_WARNING "GPIO#%d cannot be configured as input\n", - GPIO_RTCS35390A_IRQ); - gpio_free(GPIO_RTCS35390A_IRQ); - return; - } + am3517evm_i2c1_boardinfo[0].irq = gpio_to_irq(GPIO_RTCS35390A_IRQ); } @@ -242,6 +237,15 @@ static int dvi_enabled; #if defined(CONFIG_PANEL_SHARP_LQ043T1DG01) || \ defined(CONFIG_PANEL_SHARP_LQ043T1DG01_MODULE) +static struct gpio am3517_evm_dss_gpios[] __initdata = { + /* GPIO 182 = LCD Backlight Power */ + { LCD_PANEL_BKLIGHT_PWR, GPIOF_OUT_INIT_HIGH, "lcd_backlight_pwr" }, + /* GPIO 181 = LCD Panel PWM */ + { LCD_PANEL_PWM, GPIOF_OUT_INIT_HIGH, "lcd bl enable" }, + /* GPIO 176 = LCD Panel Power enable pin */ + { LCD_PANEL_PWR, GPIOF_OUT_INIT_HIGH, "dvi enable" }, +}; + static void __init am3517_evm_display_init(void) { int r; @@ -249,41 +253,15 @@ static void __init am3517_evm_display_init(void) omap_mux_init_gpio(LCD_PANEL_PWR, OMAP_PIN_INPUT_PULLUP); omap_mux_init_gpio(LCD_PANEL_BKLIGHT_PWR, OMAP_PIN_INPUT_PULLDOWN); omap_mux_init_gpio(LCD_PANEL_PWM, OMAP_PIN_INPUT_PULLDOWN); - /* - * Enable GPIO 182 = LCD Backlight Power - */ - r = gpio_request(LCD_PANEL_BKLIGHT_PWR, "lcd_backlight_pwr"); + + r = gpio_request_array(am3517_evm_dss_gpios, + ARRAY_SIZE(am3517_evm_dss_gpios)); if (r) { - printk(KERN_ERR "failed to get lcd_backlight_pwr\n"); + printk(KERN_ERR "failed to get DSS panel control GPIOs\n"); return; } - gpio_direction_output(LCD_PANEL_BKLIGHT_PWR, 1); - /* - * Enable GPIO 181 = LCD Panel PWM - */ - r = gpio_request(LCD_PANEL_PWM, "lcd_pwm"); - if (r) { - printk(KERN_ERR "failed to get lcd_pwm\n"); - goto err_1; - } - gpio_direction_output(LCD_PANEL_PWM, 1); - /* - * Enable GPIO 176 = LCD Panel Power enable pin - */ - r = gpio_request(LCD_PANEL_PWR, "lcd_panel_pwr"); - if (r) { - printk(KERN_ERR "failed to get lcd_panel_pwr\n"); - goto err_2; - } - gpio_direction_output(LCD_PANEL_PWR, 1); printk(KERN_INFO "Display initialized successfully\n"); - return; - -err_2: - gpio_free(LCD_PANEL_PWM); -err_1: - gpio_free(LCD_PANEL_BKLIGHT_PWR); } #else static void __init am3517_evm_display_init(void) {} @@ -396,7 +374,7 @@ static struct omap_musb_board_data musb_board_data = { .power = 500, .set_phy_power = am35x_musb_phy_power, .clear_irq = am35x_musb_clear_irq, - .set_mode = am35x_musb_set_mode, + .set_mode = am35x_set_mode, .reset = am35x_musb_reset, }; diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c index f4f8374a0298..f3beb8eeef77 100644 --- a/arch/arm/mach-omap2/board-apollon.c +++ b/arch/arm/mach-omap2/board-apollon.c @@ -202,6 +202,7 @@ static inline void __init apollon_init_smc91x(void) unsigned int rate; struct clk *gpmc_fck; int eth_cs; + int err; gpmc_fck = clk_get(NULL, "gpmc_fck"); /* Always on ENABLE_ON_INIT */ if (IS_ERR(gpmc_fck)) { @@ -245,15 +246,13 @@ static inline void __init apollon_init_smc91x(void) apollon_smc91x_resources[0].end = base + 0x30f; udelay(100); - omap_mux_init_gpio(74, 0); - if (gpio_request(APOLLON_ETHR_GPIO_IRQ, "SMC91x irq") < 0) { + omap_mux_init_gpio(APOLLON_ETHR_GPIO_IRQ, 0); + err = gpio_request_one(APOLLON_ETHR_GPIO_IRQ, GPIOF_IN, "SMC91x irq"); + if (err) { printk(KERN_ERR "Failed to request GPIO%d for smc91x IRQ\n", APOLLON_ETHR_GPIO_IRQ); gpmc_cs_free(APOLLON_ETH_CS); - goto out; } - gpio_direction_input(APOLLON_ETHR_GPIO_IRQ); - out: clk_disable(gpmc_fck); clk_put(gpmc_fck); @@ -280,20 +279,19 @@ static void __init omap_apollon_init_early(void) omap2_init_common_devices(NULL, NULL); } +static struct gpio apollon_gpio_leds[] __initdata = { + { LED0_GPIO13, GPIOF_OUT_INIT_LOW, "LED0" }, /* LED0 - AA10 */ + { LED1_GPIO14, GPIOF_OUT_INIT_LOW, "LED1" }, /* LED1 - AA6 */ + { LED2_GPIO15, GPIOF_OUT_INIT_LOW, "LED2" }, /* LED2 - AA4 */ +}; + static void __init apollon_led_init(void) { - /* LED0 - AA10 */ omap_mux_init_signal("vlynq_clk.gpio_13", 0); - gpio_request(LED0_GPIO13, "LED0"); - gpio_direction_output(LED0_GPIO13, 0); - /* LED1 - AA6 */ omap_mux_init_signal("vlynq_rx1.gpio_14", 0); - gpio_request(LED1_GPIO14, "LED1"); - gpio_direction_output(LED1_GPIO14, 0); - /* LED2 - AA4 */ omap_mux_init_signal("vlynq_rx0.gpio_15", 0); - gpio_request(LED2_GPIO15, "LED2"); - gpio_direction_output(LED2_GPIO15, 0); + + gpio_request_array(apollon_gpio_leds, ARRAY_SIZE(apollon_gpio_leds)); } static void __init apollon_usb_init(void) @@ -301,8 +299,7 @@ static void __init apollon_usb_init(void) /* USB device */ /* DEVICE_SUSPEND */ omap_mux_init_signal("mcbsp2_clkx.gpio_12", 0); - gpio_request(12, "USB suspend"); - gpio_direction_output(12, 0); + gpio_request_one(12, GPIOF_OUT_INIT_LOW, "USB suspend"); omap2_usbfs_init(&apollon_usb_config); } diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c index 9340f6a06f4a..c63115bc1536 100644 --- a/arch/arm/mach-omap2/board-cm-t35.c +++ b/arch/arm/mach-omap2/board-cm-t35.c @@ -54,6 +54,7 @@ #include "mux.h" #include "sdram-micron-mt46h32m32lf-6.h" #include "hsmmc.h" +#include "common-board-devices.h" #define CM_T35_GPIO_PENDOWN 57 @@ -66,86 +67,28 @@ #if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE) #include <linux/smsc911x.h> +#include <plat/gpmc-smsc911x.h> -static struct smsc911x_platform_config cm_t35_smsc911x_config = { - .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW, - .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN, - .flags = SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS, - .phy_interface = PHY_INTERFACE_MODE_MII, -}; - -static struct resource cm_t35_smsc911x_resources[] = { - { - .flags = IORESOURCE_MEM, - }, - { - .start = OMAP_GPIO_IRQ(CM_T35_SMSC911X_GPIO), - .end = OMAP_GPIO_IRQ(CM_T35_SMSC911X_GPIO), - .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL, - }, -}; - -static struct platform_device cm_t35_smsc911x_device = { - .name = "smsc911x", +static struct omap_smsc911x_platform_data cm_t35_smsc911x_cfg = { .id = 0, - .num_resources = ARRAY_SIZE(cm_t35_smsc911x_resources), - .resource = cm_t35_smsc911x_resources, - .dev = { - .platform_data = &cm_t35_smsc911x_config, - }, -}; - -static struct resource sb_t35_smsc911x_resources[] = { - { - .flags = IORESOURCE_MEM, - }, - { - .start = OMAP_GPIO_IRQ(SB_T35_SMSC911X_GPIO), - .end = OMAP_GPIO_IRQ(SB_T35_SMSC911X_GPIO), - .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL, - }, + .cs = CM_T35_SMSC911X_CS, + .gpio_irq = CM_T35_SMSC911X_GPIO, + .gpio_reset = -EINVAL, + .flags = SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS, }; -static struct platform_device sb_t35_smsc911x_device = { - .name = "smsc911x", +static struct omap_smsc911x_platform_data sb_t35_smsc911x_cfg = { .id = 1, - .num_resources = ARRAY_SIZE(sb_t35_smsc911x_resources), - .resource = sb_t35_smsc911x_resources, - .dev = { - .platform_data = &cm_t35_smsc911x_config, - }, + .cs = SB_T35_SMSC911X_CS, + .gpio_irq = SB_T35_SMSC911X_GPIO, + .gpio_reset = -EINVAL, + .flags = SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS, }; -static void __init cm_t35_init_smsc911x(struct platform_device *dev, - int cs, int irq_gpio) -{ - unsigned long cs_mem_base; - - if (gpmc_cs_request(cs, SZ_16M, &cs_mem_base) < 0) { - pr_err("CM-T35: Failed request for GPMC mem for smsc911x\n"); - return; - } - - dev->resource[0].start = cs_mem_base + 0x0; - dev->resource[0].end = cs_mem_base + 0xff; - - if ((gpio_request(irq_gpio, "ETH IRQ") == 0) && - (gpio_direction_input(irq_gpio) == 0)) { - gpio_export(irq_gpio, 0); - } else { - pr_err("CM-T35: could not obtain gpio for SMSC911X IRQ\n"); - return; - } - - platform_device_register(dev); -} - static void __init cm_t35_init_ethernet(void) { - cm_t35_init_smsc911x(&cm_t35_smsc911x_device, - CM_T35_SMSC911X_CS, CM_T35_SMSC911X_GPIO); - cm_t35_init_smsc911x(&sb_t35_smsc911x_device, - SB_T35_SMSC911X_CS, SB_T35_SMSC911X_GPIO); + gpmc_smsc911x_init(&cm_t35_smsc911x_cfg); + gpmc_smsc911x_init(&sb_t35_smsc911x_cfg); } #else static inline void __init cm_t35_init_ethernet(void) { return; } @@ -235,69 +178,10 @@ static void __init cm_t35_init_nand(void) static inline void cm_t35_init_nand(void) {} #endif -#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \ - defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) -#include <linux/spi/ads7846.h> - -static struct omap2_mcspi_device_config ads7846_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - -static int ads7846_get_pendown_state(void) -{ - return !gpio_get_value(CM_T35_GPIO_PENDOWN); -} - -static struct ads7846_platform_data ads7846_config = { - .x_max = 0x0fff, - .y_max = 0x0fff, - .x_plate_ohms = 180, - .pressure_max = 255, - .debounce_max = 10, - .debounce_tol = 3, - .debounce_rep = 1, - .get_pendown_state = ads7846_get_pendown_state, - .keep_vref_on = 1, -}; - -static struct spi_board_info cm_t35_spi_board_info[] __initdata = { - { - .modalias = "ads7846", - .bus_num = 1, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &ads7846_mcspi_config, - .irq = OMAP_GPIO_IRQ(CM_T35_GPIO_PENDOWN), - .platform_data = &ads7846_config, - }, -}; - -static void __init cm_t35_init_ads7846(void) -{ - if ((gpio_request(CM_T35_GPIO_PENDOWN, "ADS7846_PENDOWN") == 0) && - (gpio_direction_input(CM_T35_GPIO_PENDOWN) == 0)) { - gpio_export(CM_T35_GPIO_PENDOWN, 0); - } else { - pr_err("CM-T35: could not obtain gpio for ADS7846_PENDOWN\n"); - return; - } - - spi_register_board_info(cm_t35_spi_board_info, - ARRAY_SIZE(cm_t35_spi_board_info)); -} -#else -static inline void cm_t35_init_ads7846(void) {} -#endif - #define CM_T35_LCD_EN_GPIO 157 #define CM_T35_LCD_BL_GPIO 58 #define CM_T35_DVI_EN_GPIO 54 -static int lcd_bl_gpio; -static int lcd_en_gpio; -static int dvi_en_gpio; - static int lcd_enabled; static int dvi_enabled; @@ -308,8 +192,8 @@ static int cm_t35_panel_enable_lcd(struct omap_dss_device *dssdev) return -EINVAL; } - gpio_set_value(lcd_en_gpio, 1); - gpio_set_value(lcd_bl_gpio, 1); + gpio_set_value(CM_T35_LCD_EN_GPIO, 1); + gpio_set_value(CM_T35_LCD_BL_GPIO, 1); lcd_enabled = 1; @@ -320,8 +204,8 @@ static void cm_t35_panel_disable_lcd(struct omap_dss_device *dssdev) { lcd_enabled = 0; - gpio_set_value(lcd_bl_gpio, 0); - gpio_set_value(lcd_en_gpio, 0); + gpio_set_value(CM_T35_LCD_BL_GPIO, 0); + gpio_set_value(CM_T35_LCD_EN_GPIO, 0); } static int cm_t35_panel_enable_dvi(struct omap_dss_device *dssdev) @@ -331,7 +215,7 @@ static int cm_t35_panel_enable_dvi(struct omap_dss_device *dssdev) return -EINVAL; } - gpio_set_value(dvi_en_gpio, 0); + gpio_set_value(CM_T35_DVI_EN_GPIO, 0); dvi_enabled = 1; return 0; @@ -339,7 +223,7 @@ static int cm_t35_panel_enable_dvi(struct omap_dss_device *dssdev) static void cm_t35_panel_disable_dvi(struct omap_dss_device *dssdev) { - gpio_set_value(dvi_en_gpio, 1); + gpio_set_value(CM_T35_DVI_EN_GPIO, 1); dvi_enabled = 0; } @@ -421,62 +305,38 @@ static struct spi_board_info cm_t35_lcd_spi_board_info[] __initdata = { }, }; +static struct gpio cm_t35_dss_gpios[] __initdata = { + { CM_T35_LCD_EN_GPIO, GPIOF_OUT_INIT_LOW, "lcd enable" }, + { CM_T35_LCD_BL_GPIO, GPIOF_OUT_INIT_LOW, "lcd bl enable" }, + { CM_T35_DVI_EN_GPIO, GPIOF_OUT_INIT_HIGH, "dvi enable" }, +}; + static void __init cm_t35_init_display(void) { int err; - lcd_en_gpio = CM_T35_LCD_EN_GPIO; - lcd_bl_gpio = CM_T35_LCD_BL_GPIO; - dvi_en_gpio = CM_T35_DVI_EN_GPIO; - spi_register_board_info(cm_t35_lcd_spi_board_info, ARRAY_SIZE(cm_t35_lcd_spi_board_info)); - err = gpio_request(lcd_en_gpio, "LCD RST"); - if (err) { - pr_err("CM-T35: failed to get LCD reset GPIO\n"); - goto out; - } - - err = gpio_request(lcd_bl_gpio, "LCD BL"); + err = gpio_request_array(cm_t35_dss_gpios, + ARRAY_SIZE(cm_t35_dss_gpios)); if (err) { - pr_err("CM-T35: failed to get LCD backlight control GPIO\n"); - goto err_lcd_bl; - } - - err = gpio_request(dvi_en_gpio, "DVI EN"); - if (err) { - pr_err("CM-T35: failed to get DVI reset GPIO\n"); - goto err_dvi_en; + pr_err("CM-T35: failed to request DSS control GPIOs\n"); + return; } - gpio_export(lcd_en_gpio, 0); - gpio_export(lcd_bl_gpio, 0); - gpio_export(dvi_en_gpio, 0); - gpio_direction_output(lcd_en_gpio, 0); - gpio_direction_output(lcd_bl_gpio, 0); - gpio_direction_output(dvi_en_gpio, 1); + gpio_export(CM_T35_LCD_EN_GPIO, 0); + gpio_export(CM_T35_LCD_BL_GPIO, 0); + gpio_export(CM_T35_DVI_EN_GPIO, 0); msleep(50); - gpio_set_value(lcd_en_gpio, 1); + gpio_set_value(CM_T35_LCD_EN_GPIO, 1); err = omap_display_init(&cm_t35_dss_data); if (err) { pr_err("CM-T35: failed to register DSS device\n"); - goto err_dev_reg; + gpio_free_array(cm_t35_dss_gpios, ARRAY_SIZE(cm_t35_dss_gpios)); } - - return; - -err_dev_reg: - gpio_free(dvi_en_gpio); -err_dvi_en: - gpio_free(lcd_bl_gpio); -err_lcd_bl: - gpio_free(lcd_en_gpio); -out: - - return; } static struct regulator_consumer_supply cm_t35_vmmc1_supply = { @@ -609,10 +469,8 @@ static int cm_t35_twl_gpio_setup(struct device *dev, unsigned gpio, { int wlan_rst = gpio + 2; - if ((gpio_request(wlan_rst, "WLAN RST") == 0) && - (gpio_direction_output(wlan_rst, 1) == 0)) { + if (gpio_request_one(wlan_rst, GPIOF_OUT_INIT_HIGH, "WLAN RST") == 0) { gpio_export(wlan_rst, 0); - udelay(10); gpio_set_value(wlan_rst, 0); udelay(10); @@ -653,19 +511,9 @@ static struct twl4030_platform_data cm_t35_twldata = { .vpll2 = &cm_t35_vpll2, }; -static struct i2c_board_info __initdata cm_t35_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("tps65930", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &cm_t35_twldata, - }, -}; - static void __init cm_t35_init_i2c(void) { - omap_register_i2c_bus(1, 2600, cm_t35_i2c_boardinfo, - ARRAY_SIZE(cm_t35_i2c_boardinfo)); + omap3_pmic_init("tps65930", &cm_t35_twldata); } static void __init cm_t35_init_early(void) @@ -775,12 +623,6 @@ static struct omap_board_mux board_mux[] __initdata = { }; #endif -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static struct omap_board_config_kernel cm_t35_config[] __initdata = { }; @@ -792,12 +634,12 @@ static void __init cm_t35_init(void) omap_serial_init(); cm_t35_init_i2c(); cm_t35_init_nand(); - cm_t35_init_ads7846(); + omap_ads7846_init(1, CM_T35_GPIO_PENDOWN, 0, NULL); cm_t35_init_ethernet(); cm_t35_init_led(); cm_t35_init_display(); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); usbhs_init(&usbhs_bdata); } diff --git a/arch/arm/mach-omap2/board-cm-t3517.c b/arch/arm/mach-omap2/board-cm-t3517.c index a27e3eee8292..08f08e812492 100644 --- a/arch/arm/mach-omap2/board-cm-t3517.c +++ b/arch/arm/mach-omap2/board-cm-t3517.c @@ -148,14 +148,13 @@ static void __init cm_t3517_init_rtc(void) { int err; - err = gpio_request(RTC_CS_EN_GPIO, "rtc cs en"); + err = gpio_request_one(RTC_CS_EN_GPIO, GPIOF_OUT_INIT_HIGH, + "rtc cs en"); if (err) { pr_err("CM-T3517: rtc cs en gpio request failed: %d\n", err); return; } - gpio_direction_output(RTC_CS_EN_GPIO, 1); - platform_device_register(&cm_t3517_rtc_device); } #else @@ -182,11 +181,11 @@ static int cm_t3517_init_usbh(void) { int err; - err = gpio_request(USB_HUB_RESET_GPIO, "usb hub rst"); + err = gpio_request_one(USB_HUB_RESET_GPIO, GPIOF_OUT_INIT_LOW, + "usb hub rst"); if (err) { pr_err("CM-T3517: usb hub rst gpio request failed: %d\n", err); } else { - gpio_direction_output(USB_HUB_RESET_GPIO, 0); udelay(10); gpio_set_value(USB_HUB_RESET_GPIO, 1); msleep(1); diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c index 1d1b56a29fb1..cf520d7dd614 100644 --- a/arch/arm/mach-omap2/board-devkit8000.c +++ b/arch/arm/mach-omap2/board-devkit8000.c @@ -51,7 +51,6 @@ #include <plat/mcspi.h> #include <linux/input/matrix_keypad.h> #include <linux/spi/spi.h> -#include <linux/spi/ads7846.h> #include <linux/dm9000.h> #include <linux/interrupt.h> @@ -60,6 +59,7 @@ #include "mux.h" #include "hsmmc.h" #include "timer-gp.h" +#include "common-board-devices.h" #define NAND_BLOCK_SIZE SZ_128K @@ -97,13 +97,6 @@ static struct mtd_partition devkit8000_nand_partitions[] = { }, }; -static struct omap_nand_platform_data devkit8000_nand_data = { - .options = NAND_BUSWIDTH_16, - .parts = devkit8000_nand_partitions, - .nr_parts = ARRAY_SIZE(devkit8000_nand_partitions), - .dma_channel = -1, /* disable DMA in OMAP NAND driver */ -}; - static struct omap2_hsmmc_info mmc[] = { { .mmc = 1, @@ -249,7 +242,7 @@ static int devkit8000_twl_gpio_setup(struct device *dev, /* TWL4030_GPIO_MAX + 0 is "LCD_PWREN" (out, active high) */ devkit8000_lcd_device.reset_gpio = gpio + TWL4030_GPIO_MAX + 0; ret = gpio_request_one(devkit8000_lcd_device.reset_gpio, - GPIOF_DIR_OUT | GPIOF_INIT_LOW, "LCD_PWREN"); + GPIOF_OUT_INIT_LOW, "LCD_PWREN"); if (ret < 0) { devkit8000_lcd_device.reset_gpio = -EINVAL; printk(KERN_ERR "Failed to request GPIO for LCD_PWRN\n"); @@ -258,7 +251,7 @@ static int devkit8000_twl_gpio_setup(struct device *dev, /* gpio + 7 is "DVI_PD" (out, active low) */ devkit8000_dvi_device.reset_gpio = gpio + 7; ret = gpio_request_one(devkit8000_dvi_device.reset_gpio, - GPIOF_DIR_OUT | GPIOF_INIT_LOW, "DVI PowerDown"); + GPIOF_OUT_INIT_LOW, "DVI PowerDown"); if (ret < 0) { devkit8000_dvi_device.reset_gpio = -EINVAL; printk(KERN_ERR "Failed to request GPIO for DVI PowerDown\n"); @@ -366,19 +359,9 @@ static struct twl4030_platform_data devkit8000_twldata = { .keypad = &devkit8000_kp_data, }; -static struct i2c_board_info __initdata devkit8000_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("tps65930", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &devkit8000_twldata, - }, -}; - static int __init devkit8000_i2c_init(void) { - omap_register_i2c_bus(1, 2600, devkit8000_i2c_boardinfo, - ARRAY_SIZE(devkit8000_i2c_boardinfo)); + omap3_pmic_init("tps65930", &devkit8000_twldata); /* Bus 3 is attached to the DVI port where devices like the pico DLP * projector don't work reliably with 400kHz */ omap_register_i2c_bus(3, 400, NULL, 0); @@ -463,56 +446,6 @@ static void __init devkit8000_init_irq(void) #endif } -static void __init devkit8000_ads7846_init(void) -{ - int gpio = OMAP3_DEVKIT_TS_GPIO; - int ret; - - ret = gpio_request(gpio, "ads7846_pen_down"); - if (ret < 0) { - printk(KERN_ERR "Failed to request GPIO %d for " - "ads7846 pen down IRQ\n", gpio); - return; - } - - gpio_direction_input(gpio); -} - -static int ads7846_get_pendown_state(void) -{ - return !gpio_get_value(OMAP3_DEVKIT_TS_GPIO); -} - -static struct ads7846_platform_data ads7846_config = { - .x_max = 0x0fff, - .y_max = 0x0fff, - .x_plate_ohms = 180, - .pressure_max = 255, - .debounce_max = 10, - .debounce_tol = 5, - .debounce_rep = 1, - .get_pendown_state = ads7846_get_pendown_state, - .keep_vref_on = 1, - .settle_delay_usecs = 150, -}; - -static struct omap2_mcspi_device_config ads7846_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - -static struct spi_board_info devkit8000_spi_board_info[] __initdata = { - { - .modalias = "ads7846", - .bus_num = 2, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &ads7846_mcspi_config, - .irq = OMAP_GPIO_IRQ(OMAP3_DEVKIT_TS_GPIO), - .platform_data = &ads7846_config, - } -}; - #define OMAP_DM9000_BASE 0x2c000000 static struct resource omap_dm9000_resources[] = { @@ -550,14 +483,14 @@ static void __init omap_dm9000_init(void) { unsigned char *eth_addr = omap_dm9000_platdata.dev_addr; struct omap_die_id odi; + int ret; - if (gpio_request(OMAP_DM9000_GPIO_IRQ, "dm9000 irq") < 0) { + ret = gpio_request_one(OMAP_DM9000_GPIO_IRQ, GPIOF_IN, "dm9000 irq"); + if (ret < 0) { printk(KERN_ERR "Failed to request GPIO%d for dm9000 IRQ\n", OMAP_DM9000_GPIO_IRQ); return; - } - - gpio_direction_input(OMAP_DM9000_GPIO_IRQ); + } /* init the mac address using DIE id */ omap_get_die_id(&odi); @@ -576,45 +509,6 @@ static struct platform_device *devkit8000_devices[] __initdata = { &omap_dm9000_dev, }; -static void __init devkit8000_flash_init(void) -{ - u8 cs = 0; - u8 nandcs = GPMC_CS_NUM + 1; - - /* find out the chip-select on which NAND exists */ - while (cs < GPMC_CS_NUM) { - u32 ret = 0; - ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG1); - - if ((ret & 0xC00) == 0x800) { - printk(KERN_INFO "Found NAND on CS%d\n", cs); - if (nandcs > GPMC_CS_NUM) - nandcs = cs; - } - cs++; - } - - if (nandcs > GPMC_CS_NUM) { - printk(KERN_INFO "NAND: Unable to find configuration " - "in GPMC\n "); - return; - } - - if (nandcs < GPMC_CS_NUM) { - devkit8000_nand_data.cs = nandcs; - - printk(KERN_INFO "Registering NAND on CS%d\n", nandcs); - if (gpmc_nand_init(&devkit8000_nand_data) < 0) - printk(KERN_ERR "Unable to register NAND device\n"); - } -} - -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static const struct usbhs_omap_board_data usbhs_bdata __initconst = { .port_mode[0] = OMAP_EHCI_PORT_MODE_PHY, @@ -795,14 +689,13 @@ static void __init devkit8000_init(void) ARRAY_SIZE(devkit8000_devices)); omap_display_init(&devkit8000_dss_data); - spi_register_board_info(devkit8000_spi_board_info, - ARRAY_SIZE(devkit8000_spi_board_info)); - devkit8000_ads7846_init(); + omap_ads7846_init(2, OMAP3_DEVKIT_TS_GPIO, 0, NULL); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); usbhs_init(&usbhs_bdata); - devkit8000_flash_init(); + omap_nand_flash_init(NAND_BUSWIDTH_16, devkit8000_nand_partitions, + ARRAY_SIZE(devkit8000_nand_partitions)); /* Ensure SDRC pins are mux'd for self-refresh */ omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT); diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c index 3da64d361651..0c1bfca3f731 100644 --- a/arch/arm/mach-omap2/board-igep0020.c +++ b/arch/arm/mach-omap2/board-igep0020.c @@ -38,6 +38,7 @@ #include "mux.h" #include "hsmmc.h" #include "sdram-numonyx-m65kxxxxam.h" +#include "common-board-devices.h" #define IGEP2_SMSC911X_CS 5 #define IGEP2_SMSC911X_GPIO 176 @@ -54,6 +55,11 @@ #define IGEP2_RC_GPIO_WIFI_NRESET 139 #define IGEP2_RC_GPIO_BT_NRESET 137 +#define IGEP3_GPIO_LED0_GREEN 54 +#define IGEP3_GPIO_LED0_RED 53 +#define IGEP3_GPIO_LED1_RED 16 +#define IGEP3_GPIO_USBH_NRESET 183 + /* * IGEP2 Hardware Revision Table * @@ -68,6 +74,7 @@ #define IGEP2_BOARD_HWREV_B 0 #define IGEP2_BOARD_HWREV_C 1 +#define IGEP3_BOARD_HWREV 2 static u8 hwrev; @@ -75,24 +82,29 @@ static void __init igep2_get_revision(void) { u8 ret; + if (machine_is_igep0030()) { + hwrev = IGEP3_BOARD_HWREV; + return; + } + omap_mux_init_gpio(IGEP2_GPIO_LED1_RED, OMAP_PIN_INPUT); - if ((gpio_request(IGEP2_GPIO_LED1_RED, "GPIO_HW0_REV") == 0) && - (gpio_direction_input(IGEP2_GPIO_LED1_RED) == 0)) { - ret = gpio_get_value(IGEP2_GPIO_LED1_RED); - if (ret == 0) { - pr_info("IGEP2: Hardware Revision C (B-NON compatible)\n"); - hwrev = IGEP2_BOARD_HWREV_C; - } else if (ret == 1) { - pr_info("IGEP2: Hardware Revision B/C (B compatible)\n"); - hwrev = IGEP2_BOARD_HWREV_B; - } else { - pr_err("IGEP2: Unknown Hardware Revision\n"); - hwrev = -1; - } - } else { + if (gpio_request_one(IGEP2_GPIO_LED1_RED, GPIOF_IN, "GPIO_HW0_REV")) { pr_warning("IGEP2: Could not obtain gpio GPIO_HW0_REV\n"); pr_err("IGEP2: Unknown Hardware Revision\n"); + return; + } + + ret = gpio_get_value(IGEP2_GPIO_LED1_RED); + if (ret == 0) { + pr_info("IGEP2: Hardware Revision C (B-NON compatible)\n"); + hwrev = IGEP2_BOARD_HWREV_C; + } else if (ret == 1) { + pr_info("IGEP2: Hardware Revision B/C (B compatible)\n"); + hwrev = IGEP2_BOARD_HWREV_B; + } else { + pr_err("IGEP2: Unknown Hardware Revision\n"); + hwrev = -1; } gpio_free(IGEP2_GPIO_LED1_RED); @@ -111,7 +123,7 @@ static void __init igep2_get_revision(void) * So MTD regards it as 4KiB page size and 256KiB block size 64*(2*2048) */ -static struct mtd_partition igep2_onenand_partitions[] = { +static struct mtd_partition igep_onenand_partitions[] = { { .name = "X-Loader", .offset = 0, @@ -139,21 +151,21 @@ static struct mtd_partition igep2_onenand_partitions[] = { }, }; -static struct omap_onenand_platform_data igep2_onenand_data = { - .parts = igep2_onenand_partitions, - .nr_parts = ARRAY_SIZE(igep2_onenand_partitions), +static struct omap_onenand_platform_data igep_onenand_data = { + .parts = igep_onenand_partitions, + .nr_parts = ARRAY_SIZE(igep_onenand_partitions), .dma_channel = -1, /* disable DMA in OMAP OneNAND driver */ }; -static struct platform_device igep2_onenand_device = { +static struct platform_device igep_onenand_device = { .name = "omap2-onenand", .id = -1, .dev = { - .platform_data = &igep2_onenand_data, + .platform_data = &igep_onenand_data, }, }; -static void __init igep2_flash_init(void) +static void __init igep_flash_init(void) { u8 cs = 0; u8 onenandcs = GPMC_CS_NUM + 1; @@ -165,7 +177,7 @@ static void __init igep2_flash_init(void) /* Check if NAND/oneNAND is configured */ if ((ret & 0xC00) == 0x800) /* NAND found */ - pr_err("IGEP2: Unsupported NAND found\n"); + pr_err("IGEP: Unsupported NAND found\n"); else { ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG7); if ((ret & 0x3F) == (ONENAND_MAP >> 24)) @@ -175,85 +187,46 @@ static void __init igep2_flash_init(void) } if (onenandcs > GPMC_CS_NUM) { - pr_err("IGEP2: Unable to find configuration in GPMC\n"); + pr_err("IGEP: Unable to find configuration in GPMC\n"); return; } - igep2_onenand_data.cs = onenandcs; + igep_onenand_data.cs = onenandcs; - if (platform_device_register(&igep2_onenand_device) < 0) - pr_err("IGEP2: Unable to register OneNAND device\n"); + if (platform_device_register(&igep_onenand_device) < 0) + pr_err("IGEP: Unable to register OneNAND device\n"); } #else -static void __init igep2_flash_init(void) {} +static void __init igep_flash_init(void) {} #endif #if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE) #include <linux/smsc911x.h> +#include <plat/gpmc-smsc911x.h> -static struct smsc911x_platform_config igep2_smsc911x_config = { - .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW, - .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN, - .flags = SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS , - .phy_interface = PHY_INTERFACE_MODE_MII, -}; - -static struct resource igep2_smsc911x_resources[] = { - { - .flags = IORESOURCE_MEM, - }, - { - .start = OMAP_GPIO_IRQ(IGEP2_SMSC911X_GPIO), - .end = OMAP_GPIO_IRQ(IGEP2_SMSC911X_GPIO), - .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL, - }, -}; - -static struct platform_device igep2_smsc911x_device = { - .name = "smsc911x", - .id = 0, - .num_resources = ARRAY_SIZE(igep2_smsc911x_resources), - .resource = igep2_smsc911x_resources, - .dev = { - .platform_data = &igep2_smsc911x_config, - }, +static struct omap_smsc911x_platform_data smsc911x_cfg = { + .cs = IGEP2_SMSC911X_CS, + .gpio_irq = IGEP2_SMSC911X_GPIO, + .gpio_reset = -EINVAL, + .flags = SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS, }; static inline void __init igep2_init_smsc911x(void) { - unsigned long cs_mem_base; - - if (gpmc_cs_request(IGEP2_SMSC911X_CS, SZ_16M, &cs_mem_base) < 0) { - pr_err("IGEP v2: Failed request for GPMC mem for smsc911x\n"); - gpmc_cs_free(IGEP2_SMSC911X_CS); - return; - } - - igep2_smsc911x_resources[0].start = cs_mem_base + 0x0; - igep2_smsc911x_resources[0].end = cs_mem_base + 0xff; - - if ((gpio_request(IGEP2_SMSC911X_GPIO, "SMSC911X IRQ") == 0) && - (gpio_direction_input(IGEP2_SMSC911X_GPIO) == 0)) { - gpio_export(IGEP2_SMSC911X_GPIO, 0); - } else { - pr_err("IGEP v2: Could not obtain gpio for for SMSC911X IRQ\n"); - return; - } - - platform_device_register(&igep2_smsc911x_device); + gpmc_smsc911x_init(&smsc911x_cfg); } #else static inline void __init igep2_init_smsc911x(void) { } #endif -static struct regulator_consumer_supply igep2_vmmc1_supply = +static struct regulator_consumer_supply igep_vmmc1_supply = REGULATOR_SUPPLY("vmmc", "omap_hsmmc.0"); /* VMMC1 for OMAP VDD_MMC1 (i/o) and MMC1 card */ -static struct regulator_init_data igep2_vmmc1 = { +static struct regulator_init_data igep_vmmc1 = { .constraints = { .min_uV = 1850000, .max_uV = 3150000, @@ -264,13 +237,13 @@ static struct regulator_init_data igep2_vmmc1 = { | REGULATOR_CHANGE_STATUS, }, .num_consumer_supplies = 1, - .consumer_supplies = &igep2_vmmc1_supply, + .consumer_supplies = &igep_vmmc1_supply, }; -static struct regulator_consumer_supply igep2_vio_supply = +static struct regulator_consumer_supply igep_vio_supply = REGULATOR_SUPPLY("vmmc_aux", "omap_hsmmc.1"); -static struct regulator_init_data igep2_vio = { +static struct regulator_init_data igep_vio = { .constraints = { .min_uV = 1800000, .max_uV = 1800000, @@ -282,34 +255,34 @@ static struct regulator_init_data igep2_vio = { | REGULATOR_CHANGE_STATUS, }, .num_consumer_supplies = 1, - .consumer_supplies = &igep2_vio_supply, + .consumer_supplies = &igep_vio_supply, }; -static struct regulator_consumer_supply igep2_vmmc2_supply = +static struct regulator_consumer_supply igep_vmmc2_supply = REGULATOR_SUPPLY("vmmc", "omap_hsmmc.1"); -static struct regulator_init_data igep2_vmmc2 = { +static struct regulator_init_data igep_vmmc2 = { .constraints = { .valid_modes_mask = REGULATOR_MODE_NORMAL, .always_on = 1, }, .num_consumer_supplies = 1, - .consumer_supplies = &igep2_vmmc2_supply, + .consumer_supplies = &igep_vmmc2_supply, }; -static struct fixed_voltage_config igep2_vwlan = { +static struct fixed_voltage_config igep_vwlan = { .supply_name = "vwlan", .microvolts = 3300000, .gpio = -EINVAL, .enabled_at_boot = 1, - .init_data = &igep2_vmmc2, + .init_data = &igep_vmmc2, }; -static struct platform_device igep2_vwlan_device = { +static struct platform_device igep_vwlan_device = { .name = "reg-fixed-voltage", .id = 0, .dev = { - .platform_data = &igep2_vwlan, + .platform_data = &igep_vwlan, }, }; @@ -334,20 +307,17 @@ static struct omap2_hsmmc_info mmc[] = { #if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE) #include <linux/leds.h> -static struct gpio_led igep2_gpio_leds[] = { +static struct gpio_led igep_gpio_leds[] = { [0] = { .name = "gpio-led:red:d0", - .gpio = IGEP2_GPIO_LED0_RED, .default_trigger = "default-off" }, [1] = { .name = "gpio-led:green:d0", - .gpio = IGEP2_GPIO_LED0_GREEN, .default_trigger = "default-off", }, [2] = { .name = "gpio-led:red:d1", - .gpio = IGEP2_GPIO_LED1_RED, .default_trigger = "default-off", }, [3] = { @@ -358,94 +328,119 @@ static struct gpio_led igep2_gpio_leds[] = { }, }; -static struct gpio_led_platform_data igep2_led_pdata = { - .leds = igep2_gpio_leds, - .num_leds = ARRAY_SIZE(igep2_gpio_leds), +static struct gpio_led_platform_data igep_led_pdata = { + .leds = igep_gpio_leds, + .num_leds = ARRAY_SIZE(igep_gpio_leds), }; -static struct platform_device igep2_led_device = { +static struct platform_device igep_led_device = { .name = "leds-gpio", .id = -1, .dev = { - .platform_data = &igep2_led_pdata, + .platform_data = &igep_led_pdata, }, }; -static void __init igep2_leds_init(void) +static void __init igep_leds_init(void) { - platform_device_register(&igep2_led_device); + if (machine_is_igep0020()) { + igep_gpio_leds[0].gpio = IGEP2_GPIO_LED0_RED; + igep_gpio_leds[1].gpio = IGEP2_GPIO_LED0_GREEN; + igep_gpio_leds[2].gpio = IGEP2_GPIO_LED1_RED; + } else { + igep_gpio_leds[0].gpio = IGEP3_GPIO_LED0_RED; + igep_gpio_leds[1].gpio = IGEP3_GPIO_LED0_GREEN; + igep_gpio_leds[2].gpio = IGEP3_GPIO_LED1_RED; + } + + platform_device_register(&igep_led_device); } #else -static inline void igep2_leds_init(void) +static struct gpio igep_gpio_leds[] __initdata = { + { -EINVAL, GPIOF_OUT_INIT_LOW, "gpio-led:red:d0" }, + { -EINVAL, GPIOF_OUT_INIT_LOW, "gpio-led:green:d0" }, + { -EINVAL, GPIOF_OUT_INIT_LOW, "gpio-led:red:d1" }, +}; + +static inline void igep_leds_init(void) { - if ((gpio_request(IGEP2_GPIO_LED0_RED, "gpio-led:red:d0") == 0) && - (gpio_direction_output(IGEP2_GPIO_LED0_RED, 0) == 0)) - gpio_export(IGEP2_GPIO_LED0_RED, 0); - else - pr_warning("IGEP v2: Could not obtain gpio GPIO_LED0_RED\n"); + int i; - if ((gpio_request(IGEP2_GPIO_LED0_GREEN, "gpio-led:green:d0") == 0) && - (gpio_direction_output(IGEP2_GPIO_LED0_GREEN, 0) == 0)) - gpio_export(IGEP2_GPIO_LED0_GREEN, 0); - else - pr_warning("IGEP v2: Could not obtain gpio GPIO_LED0_GREEN\n"); + if (machine_is_igep0020()) { + igep_gpio_leds[0].gpio = IGEP2_GPIO_LED0_RED; + igep_gpio_leds[1].gpio = IGEP2_GPIO_LED0_GREEN; + igep_gpio_leds[2].gpio = IGEP2_GPIO_LED1_RED; + } else { + igep_gpio_leds[0].gpio = IGEP3_GPIO_LED0_RED; + igep_gpio_leds[1].gpio = IGEP3_GPIO_LED0_GREEN; + igep_gpio_leds[2].gpio = IGEP3_GPIO_LED1_RED; + } - if ((gpio_request(IGEP2_GPIO_LED1_RED, "gpio-led:red:d1") == 0) && - (gpio_direction_output(IGEP2_GPIO_LED1_RED, 0) == 0)) - gpio_export(IGEP2_GPIO_LED1_RED, 0); - else - pr_warning("IGEP v2: Could not obtain gpio GPIO_LED1_RED\n"); + if (gpio_request_array(igep_gpio_leds, ARRAY_SIZE(igep_gpio_leds))) { + pr_warning("IGEP v2: Could not obtain leds gpios\n"); + return; + } + for (i = 0; i < ARRAY_SIZE(igep_gpio_leds); i++) + gpio_export(igep_gpio_leds[i].gpio, 0); } #endif -static int igep2_twl_gpio_setup(struct device *dev, +static struct gpio igep2_twl_gpios[] = { + { -EINVAL, GPIOF_IN, "GPIO_EHCI_NOC" }, + { -EINVAL, GPIOF_OUT_INIT_LOW, "GPIO_USBH_CPEN" }, +}; + +static int igep_twl_gpio_setup(struct device *dev, unsigned gpio, unsigned ngpio) { + int ret; + /* gpio + 0 is "mmc0_cd" (input/IRQ) */ mmc[0].gpio_cd = gpio + 0; omap2_hsmmc_init(mmc); - /* - * REVISIT: need ehci-omap hooks for external VBUS - * power switch and overcurrent detect - */ - if ((gpio_request(gpio + 1, "GPIO_EHCI_NOC") < 0) || - (gpio_direction_input(gpio + 1) < 0)) - pr_err("IGEP2: Could not obtain gpio for EHCI NOC"); - - /* - * TWL4030_GPIO_MAX + 0 == ledA, GPIO_USBH_CPEN - * (out, active low) - */ - if ((gpio_request(gpio + TWL4030_GPIO_MAX, "GPIO_USBH_CPEN") < 0) || - (gpio_direction_output(gpio + TWL4030_GPIO_MAX, 0) < 0)) - pr_err("IGEP2: Could not obtain gpio for USBH_CPEN"); - /* TWL4030_GPIO_MAX + 1 == ledB (out, active low LED) */ #if !defined(CONFIG_LEDS_GPIO) && !defined(CONFIG_LEDS_GPIO_MODULE) - if ((gpio_request(gpio+TWL4030_GPIO_MAX+1, "gpio-led:green:d1") == 0) - && (gpio_direction_output(gpio + TWL4030_GPIO_MAX + 1, 1) == 0)) + ret = gpio_request_one(gpio + TWL4030_GPIO_MAX + 1, GPIOF_OUT_INIT_HIGH, + "gpio-led:green:d1"); + if (ret == 0) gpio_export(gpio + TWL4030_GPIO_MAX + 1, 0); else - pr_warning("IGEP v2: Could not obtain gpio GPIO_LED1_GREEN\n"); + pr_warning("IGEP: Could not obtain gpio GPIO_LED1_GREEN\n"); #else - igep2_gpio_leds[3].gpio = gpio + TWL4030_GPIO_MAX + 1; + igep_gpio_leds[3].gpio = gpio + TWL4030_GPIO_MAX + 1; #endif + if (machine_is_igep0030()) + return 0; + + /* + * REVISIT: need ehci-omap hooks for external VBUS + * power switch and overcurrent detect + */ + igep2_twl_gpios[0].gpio = gpio + 1; + + /* TWL4030_GPIO_MAX + 0 == ledA, GPIO_USBH_CPEN (out, active low) */ + igep2_twl_gpios[1].gpio = gpio + TWL4030_GPIO_MAX; + + ret = gpio_request_array(igep2_twl_gpios, ARRAY_SIZE(igep2_twl_gpios)); + if (ret < 0) + pr_err("IGEP2: Could not obtain gpio for USBH_CPEN"); + return 0; }; -static struct twl4030_gpio_platform_data igep2_twl4030_gpio_pdata = { +static struct twl4030_gpio_platform_data igep_twl4030_gpio_pdata = { .gpio_base = OMAP_MAX_GPIO_LINES, .irq_base = TWL4030_GPIO_IRQ_BASE, .irq_end = TWL4030_GPIO_IRQ_END, .use_leds = true, - .setup = igep2_twl_gpio_setup, + .setup = igep_twl_gpio_setup, }; -static struct twl4030_usb_data igep2_usb_data = { +static struct twl4030_usb_data igep_usb_data = { .usb_mode = T2_USB_MODE_ULPI, }; @@ -507,16 +502,17 @@ static struct regulator_init_data igep2_vpll2 = { static void __init igep2_display_init(void) { - if (gpio_request(IGEP2_GPIO_DVI_PUP, "GPIO_DVI_PUP") && - gpio_direction_output(IGEP2_GPIO_DVI_PUP, 1)) + int err = gpio_request_one(IGEP2_GPIO_DVI_PUP, GPIOF_OUT_INIT_HIGH, + "GPIO_DVI_PUP"); + if (err) pr_err("IGEP v2: Could not obtain gpio GPIO_DVI_PUP\n"); } -static struct platform_device *igep2_devices[] __initdata = { - &igep2_vwlan_device, +static struct platform_device *igep_devices[] __initdata = { + &igep_vwlan_device, }; -static void __init igep2_init_early(void) +static void __init igep_init_early(void) { omap2_init_common_infrastructure(); omap2_init_common_devices(m65kxxxxam_sdrc_params, @@ -561,27 +557,15 @@ static struct twl4030_keypad_data igep2_keypad_pdata = { .rep = 1, }; -static struct twl4030_platform_data igep2_twldata = { +static struct twl4030_platform_data igep_twldata = { .irq_base = TWL4030_IRQ_BASE, .irq_end = TWL4030_IRQ_END, /* platform_data for children goes here */ - .usb = &igep2_usb_data, - .codec = &igep2_codec_data, - .gpio = &igep2_twl4030_gpio_pdata, - .keypad = &igep2_keypad_pdata, - .vmmc1 = &igep2_vmmc1, - .vpll2 = &igep2_vpll2, - .vio = &igep2_vio, -}; - -static struct i2c_board_info __initdata igep2_i2c1_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &igep2_twldata, - }, + .usb = &igep_usb_data, + .gpio = &igep_twl4030_gpio_pdata, + .vmmc1 = &igep_vmmc1, + .vio = &igep_vio, }; static struct i2c_board_info __initdata igep2_i2c3_boardinfo[] = { @@ -590,32 +574,29 @@ static struct i2c_board_info __initdata igep2_i2c3_boardinfo[] = { }, }; -static void __init igep2_i2c_init(void) +static void __init igep_i2c_init(void) { int ret; - ret = omap_register_i2c_bus(1, 2600, igep2_i2c1_boardinfo, - ARRAY_SIZE(igep2_i2c1_boardinfo)); - if (ret) - pr_warning("IGEP2: Could not register I2C1 bus (%d)\n", ret); + if (machine_is_igep0020()) { + /* + * Bus 3 is attached to the DVI port where devices like the + * pico DLP projector don't work reliably with 400kHz + */ + ret = omap_register_i2c_bus(3, 100, igep2_i2c3_boardinfo, + ARRAY_SIZE(igep2_i2c3_boardinfo)); + if (ret) + pr_warning("IGEP2: Could not register I2C3 bus (%d)\n", ret); + + igep_twldata.codec = &igep2_codec_data; + igep_twldata.keypad = &igep2_keypad_pdata; + igep_twldata.vpll2 = &igep2_vpll2; + } - /* - * Bus 3 is attached to the DVI port where devices like the pico DLP - * projector don't work reliably with 400kHz - */ - ret = omap_register_i2c_bus(3, 100, igep2_i2c3_boardinfo, - ARRAY_SIZE(igep2_i2c3_boardinfo)); - if (ret) - pr_warning("IGEP2: Could not register I2C3 bus (%d)\n", ret); + omap3_pmic_init("twl4030", &igep_twldata); } -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - -static const struct usbhs_omap_board_data usbhs_bdata __initconst = { +static const struct usbhs_omap_board_data igep2_usbhs_bdata __initconst = { .port_mode[0] = OMAP_EHCI_PORT_MODE_PHY, .port_mode[1] = OMAP_USBHS_PORT_MODE_UNUSED, .port_mode[2] = OMAP_USBHS_PORT_MODE_UNUSED, @@ -626,6 +607,17 @@ static const struct usbhs_omap_board_data usbhs_bdata __initconst = { .reset_gpio_port[2] = -EINVAL, }; +static const struct usbhs_omap_board_data igep3_usbhs_bdata __initconst = { + .port_mode[0] = OMAP_USBHS_PORT_MODE_UNUSED, + .port_mode[1] = OMAP_EHCI_PORT_MODE_PHY, + .port_mode[2] = OMAP_USBHS_PORT_MODE_UNUSED, + + .phy_reset = true, + .reset_gpio_port[0] = -EINVAL, + .reset_gpio_port[1] = IGEP3_GPIO_USBH_NRESET, + .reset_gpio_port[2] = -EINVAL, +}; + #ifdef CONFIG_OMAP_MUX static struct omap_board_mux board_mux[] __initdata = { { .reg_offset = OMAP_MUX_TERMINATOR }, @@ -633,82 +625,95 @@ static struct omap_board_mux board_mux[] __initdata = { #endif #if defined(CONFIG_LIBERTAS_SDIO) || defined(CONFIG_LIBERTAS_SDIO_MODULE) +static struct gpio igep_wlan_bt_gpios[] __initdata = { + { -EINVAL, GPIOF_OUT_INIT_HIGH, "GPIO_WIFI_NPD" }, + { -EINVAL, GPIOF_OUT_INIT_HIGH, "GPIO_WIFI_NRESET" }, + { -EINVAL, GPIOF_OUT_INIT_HIGH, "GPIO_BT_NRESET" }, +}; -static void __init igep2_wlan_bt_init(void) +static void __init igep_wlan_bt_init(void) { - unsigned npd, wreset, btreset; + int err; /* GPIO's for WLAN-BT combo depends on hardware revision */ if (hwrev == IGEP2_BOARD_HWREV_B) { - npd = IGEP2_RB_GPIO_WIFI_NPD; - wreset = IGEP2_RB_GPIO_WIFI_NRESET; - btreset = IGEP2_RB_GPIO_BT_NRESET; - } else if (hwrev == IGEP2_BOARD_HWREV_C) { - npd = IGEP2_RC_GPIO_WIFI_NPD; - wreset = IGEP2_RC_GPIO_WIFI_NRESET; - btreset = IGEP2_RC_GPIO_BT_NRESET; + igep_wlan_bt_gpios[0].gpio = IGEP2_RB_GPIO_WIFI_NPD; + igep_wlan_bt_gpios[1].gpio = IGEP2_RB_GPIO_WIFI_NRESET; + igep_wlan_bt_gpios[2].gpio = IGEP2_RB_GPIO_BT_NRESET; + } else if (hwrev == IGEP2_BOARD_HWREV_C || machine_is_igep0030()) { + igep_wlan_bt_gpios[0].gpio = IGEP2_RC_GPIO_WIFI_NPD; + igep_wlan_bt_gpios[1].gpio = IGEP2_RC_GPIO_WIFI_NRESET; + igep_wlan_bt_gpios[2].gpio = IGEP2_RC_GPIO_BT_NRESET; } else return; - /* Set GPIO's for WLAN-BT combo module */ - if ((gpio_request(npd, "GPIO_WIFI_NPD") == 0) && - (gpio_direction_output(npd, 1) == 0)) { - gpio_export(npd, 0); - } else - pr_warning("IGEP2: Could not obtain gpio GPIO_WIFI_NPD\n"); - - if ((gpio_request(wreset, "GPIO_WIFI_NRESET") == 0) && - (gpio_direction_output(wreset, 1) == 0)) { - gpio_export(wreset, 0); - gpio_set_value(wreset, 0); - udelay(10); - gpio_set_value(wreset, 1); - } else - pr_warning("IGEP2: Could not obtain gpio GPIO_WIFI_NRESET\n"); + err = gpio_request_array(igep_wlan_bt_gpios, + ARRAY_SIZE(igep_wlan_bt_gpios)); + if (err) { + pr_warning("IGEP2: Could not obtain WIFI/BT gpios\n"); + return; + } + + gpio_export(igep_wlan_bt_gpios[0].gpio, 0); + gpio_export(igep_wlan_bt_gpios[1].gpio, 0); + gpio_export(igep_wlan_bt_gpios[2].gpio, 0); + + gpio_set_value(igep_wlan_bt_gpios[1].gpio, 0); + udelay(10); + gpio_set_value(igep_wlan_bt_gpios[1].gpio, 1); - if ((gpio_request(btreset, "GPIO_BT_NRESET") == 0) && - (gpio_direction_output(btreset, 1) == 0)) { - gpio_export(btreset, 0); - } else - pr_warning("IGEP2: Could not obtain gpio GPIO_BT_NRESET\n"); } #else -static inline void __init igep2_wlan_bt_init(void) { } +static inline void __init igep_wlan_bt_init(void) { } #endif -static void __init igep2_init(void) +static void __init igep_init(void) { omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); /* Get IGEP2 hardware revision */ igep2_get_revision(); /* Register I2C busses and drivers */ - igep2_i2c_init(); - platform_add_devices(igep2_devices, ARRAY_SIZE(igep2_devices)); - omap_display_init(&igep2_dss_data); + igep_i2c_init(); + platform_add_devices(igep_devices, ARRAY_SIZE(igep_devices)); omap_serial_init(); - usb_musb_init(&musb_board_data); - usbhs_init(&usbhs_bdata); + usb_musb_init(NULL); - igep2_flash_init(); - igep2_leds_init(); - igep2_display_init(); - igep2_init_smsc911x(); + igep_flash_init(); + igep_leds_init(); /* * WLAN-BT combo module from MuRata which has a Marvell WLAN * (88W8686) + CSR Bluetooth chipset. Uses SDIO interface. */ - igep2_wlan_bt_init(); + igep_wlan_bt_init(); + if (machine_is_igep0020()) { + omap_display_init(&igep2_dss_data); + igep2_display_init(); + igep2_init_smsc911x(); + usbhs_init(&igep2_usbhs_bdata); + } else { + usbhs_init(&igep3_usbhs_bdata); + } } MACHINE_START(IGEP0020, "IGEP v2 board") .boot_params = 0x80000100, .reserve = omap_reserve, .map_io = omap3_map_io, - .init_early = igep2_init_early, + .init_early = igep_init_early, + .init_irq = omap_init_irq, + .init_machine = igep_init, + .timer = &omap_timer, +MACHINE_END + +MACHINE_START(IGEP0030, "IGEP OMAP3 module") + .boot_params = 0x80000100, + .reserve = omap_reserve, + .map_io = omap3_map_io, + .init_early = igep_init_early, .init_irq = omap_init_irq, - .init_machine = igep2_init, + .init_machine = igep_init, .timer = &omap_timer, MACHINE_END diff --git a/arch/arm/mach-omap2/board-igep0030.c b/arch/arm/mach-omap2/board-igep0030.c deleted file mode 100644 index 2cf86c3cb1a3..000000000000 --- a/arch/arm/mach-omap2/board-igep0030.c +++ /dev/null @@ -1,458 +0,0 @@ -/* - * Copyright (C) 2010 - ISEE 2007 SL - * - * Modified from mach-omap2/board-generic.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/delay.h> -#include <linux/err.h> -#include <linux/clk.h> -#include <linux/io.h> -#include <linux/gpio.h> -#include <linux/interrupt.h> - -#include <linux/regulator/machine.h> -#include <linux/regulator/fixed.h> -#include <linux/i2c/twl.h> -#include <linux/mmc/host.h> - -#include <asm/mach-types.h> -#include <asm/mach/arch.h> - -#include <plat/board.h> -#include <plat/common.h> -#include <plat/gpmc.h> -#include <plat/usb.h> -#include <plat/onenand.h> - -#include "mux.h" -#include "hsmmc.h" -#include "sdram-numonyx-m65kxxxxam.h" - -#define IGEP3_GPIO_LED0_GREEN 54 -#define IGEP3_GPIO_LED0_RED 53 -#define IGEP3_GPIO_LED1_RED 16 - -#define IGEP3_GPIO_WIFI_NPD 138 -#define IGEP3_GPIO_WIFI_NRESET 139 -#define IGEP3_GPIO_BT_NRESET 137 - -#define IGEP3_GPIO_USBH_NRESET 183 - - -#if defined(CONFIG_MTD_ONENAND_OMAP2) || \ - defined(CONFIG_MTD_ONENAND_OMAP2_MODULE) - -#define ONENAND_MAP 0x20000000 - -/* - * x2 Flash built-in COMBO POP MEMORY - * Since the device is equipped with two DataRAMs, and two-plane NAND - * Flash memory array, these two component enables simultaneous program - * of 4KiB. Plane1 has only even blocks such as block0, block2, block4 - * while Plane2 has only odd blocks such as block1, block3, block5. - * So MTD regards it as 4KiB page size and 256KiB block size 64*(2*2048) - */ - -static struct mtd_partition igep3_onenand_partitions[] = { - { - .name = "X-Loader", - .offset = 0, - .size = 2 * (64*(2*2048)) - }, - { - .name = "U-Boot", - .offset = MTDPART_OFS_APPEND, - .size = 6 * (64*(2*2048)), - }, - { - .name = "Environment", - .offset = MTDPART_OFS_APPEND, - .size = 2 * (64*(2*2048)), - }, - { - .name = "Kernel", - .offset = MTDPART_OFS_APPEND, - .size = 12 * (64*(2*2048)), - }, - { - .name = "File System", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - }, -}; - -static struct omap_onenand_platform_data igep3_onenand_pdata = { - .parts = igep3_onenand_partitions, - .nr_parts = ARRAY_SIZE(igep3_onenand_partitions), - .onenand_setup = NULL, - .dma_channel = -1, /* disable DMA in OMAP OneNAND driver */ -}; - -static struct platform_device igep3_onenand_device = { - .name = "omap2-onenand", - .id = -1, - .dev = { - .platform_data = &igep3_onenand_pdata, - }, -}; - -static void __init igep3_flash_init(void) -{ - u8 cs = 0; - u8 onenandcs = GPMC_CS_NUM + 1; - - for (cs = 0; cs < GPMC_CS_NUM; cs++) { - u32 ret; - ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG1); - - /* Check if NAND/oneNAND is configured */ - if ((ret & 0xC00) == 0x800) - /* NAND found */ - pr_err("IGEP3: Unsupported NAND found\n"); - else { - ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG7); - - if ((ret & 0x3F) == (ONENAND_MAP >> 24)) - /* OneNAND found */ - onenandcs = cs; - } - } - - if (onenandcs > GPMC_CS_NUM) { - pr_err("IGEP3: Unable to find configuration in GPMC\n"); - return; - } - - igep3_onenand_pdata.cs = onenandcs; - - if (platform_device_register(&igep3_onenand_device) < 0) - pr_err("IGEP3: Unable to register OneNAND device\n"); -} - -#else -static void __init igep3_flash_init(void) {} -#endif - -static struct regulator_consumer_supply igep3_vmmc1_supply = - REGULATOR_SUPPLY("vmmc", "omap_hsmmc.0"); - -/* VMMC1 for OMAP VDD_MMC1 (i/o) and MMC1 card */ -static struct regulator_init_data igep3_vmmc1 = { - .constraints = { - .min_uV = 1850000, - .max_uV = 3150000, - .valid_modes_mask = REGULATOR_MODE_NORMAL - | REGULATOR_MODE_STANDBY, - .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE - | REGULATOR_CHANGE_MODE - | REGULATOR_CHANGE_STATUS, - }, - .num_consumer_supplies = 1, - .consumer_supplies = &igep3_vmmc1_supply, -}; - -static struct regulator_consumer_supply igep3_vio_supply = - REGULATOR_SUPPLY("vmmc_aux", "omap_hsmmc.1"); - -static struct regulator_init_data igep3_vio = { - .constraints = { - .min_uV = 1800000, - .max_uV = 1800000, - .apply_uV = 1, - .valid_modes_mask = REGULATOR_MODE_NORMAL - | REGULATOR_MODE_STANDBY, - .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE - | REGULATOR_CHANGE_MODE - | REGULATOR_CHANGE_STATUS, - }, - .num_consumer_supplies = 1, - .consumer_supplies = &igep3_vio_supply, -}; - -static struct regulator_consumer_supply igep3_vmmc2_supply = - REGULATOR_SUPPLY("vmmc", "omap_hsmmc.1"); - -static struct regulator_init_data igep3_vmmc2 = { - .constraints = { - .valid_modes_mask = REGULATOR_MODE_NORMAL, - .always_on = 1, - }, - .num_consumer_supplies = 1, - .consumer_supplies = &igep3_vmmc2_supply, -}; - -static struct fixed_voltage_config igep3_vwlan = { - .supply_name = "vwlan", - .microvolts = 3300000, - .gpio = -EINVAL, - .enabled_at_boot = 1, - .init_data = &igep3_vmmc2, -}; - -static struct platform_device igep3_vwlan_device = { - .name = "reg-fixed-voltage", - .id = 0, - .dev = { - .platform_data = &igep3_vwlan, - }, -}; - -static struct omap2_hsmmc_info mmc[] = { - [0] = { - .mmc = 1, - .caps = MMC_CAP_4_BIT_DATA, - .gpio_cd = -EINVAL, - .gpio_wp = -EINVAL, - }, -#if defined(CONFIG_LIBERTAS_SDIO) || defined(CONFIG_LIBERTAS_SDIO_MODULE) - [1] = { - .mmc = 2, - .caps = MMC_CAP_4_BIT_DATA, - .gpio_cd = -EINVAL, - .gpio_wp = -EINVAL, - }, -#endif - {} /* Terminator */ -}; - -#if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE) -#include <linux/leds.h> - -static struct gpio_led igep3_gpio_leds[] = { - [0] = { - .name = "gpio-led:red:d0", - .gpio = IGEP3_GPIO_LED0_RED, - .default_trigger = "default-off" - }, - [1] = { - .name = "gpio-led:green:d0", - .gpio = IGEP3_GPIO_LED0_GREEN, - .default_trigger = "default-off", - }, - [2] = { - .name = "gpio-led:red:d1", - .gpio = IGEP3_GPIO_LED1_RED, - .default_trigger = "default-off", - }, - [3] = { - .name = "gpio-led:green:d1", - .default_trigger = "heartbeat", - .gpio = -EINVAL, /* gets replaced */ - }, -}; - -static struct gpio_led_platform_data igep3_led_pdata = { - .leds = igep3_gpio_leds, - .num_leds = ARRAY_SIZE(igep3_gpio_leds), -}; - -static struct platform_device igep3_led_device = { - .name = "leds-gpio", - .id = -1, - .dev = { - .platform_data = &igep3_led_pdata, - }, -}; - -static void __init igep3_leds_init(void) -{ - platform_device_register(&igep3_led_device); -} - -#else -static inline void igep3_leds_init(void) -{ - if ((gpio_request(IGEP3_GPIO_LED0_RED, "gpio-led:red:d0") == 0) && - (gpio_direction_output(IGEP3_GPIO_LED0_RED, 1) == 0)) { - gpio_export(IGEP3_GPIO_LED0_RED, 0); - gpio_set_value(IGEP3_GPIO_LED0_RED, 1); - } else - pr_warning("IGEP3: Could not obtain gpio GPIO_LED0_RED\n"); - - if ((gpio_request(IGEP3_GPIO_LED0_GREEN, "gpio-led:green:d0") == 0) && - (gpio_direction_output(IGEP3_GPIO_LED0_GREEN, 1) == 0)) { - gpio_export(IGEP3_GPIO_LED0_GREEN, 0); - gpio_set_value(IGEP3_GPIO_LED0_GREEN, 1); - } else - pr_warning("IGEP3: Could not obtain gpio GPIO_LED0_GREEN\n"); - - if ((gpio_request(IGEP3_GPIO_LED1_RED, "gpio-led:red:d1") == 0) && - (gpio_direction_output(IGEP3_GPIO_LED1_RED, 1) == 0)) { - gpio_export(IGEP3_GPIO_LED1_RED, 0); - gpio_set_value(IGEP3_GPIO_LED1_RED, 1); - } else - pr_warning("IGEP3: Could not obtain gpio GPIO_LED1_RED\n"); -} -#endif - -static int igep3_twl4030_gpio_setup(struct device *dev, - unsigned gpio, unsigned ngpio) -{ - /* gpio + 0 is "mmc0_cd" (input/IRQ) */ - mmc[0].gpio_cd = gpio + 0; - omap2_hsmmc_init(mmc); - - /* TWL4030_GPIO_MAX + 1 == ledB (out, active low LED) */ -#if !defined(CONFIG_LEDS_GPIO) && !defined(CONFIG_LEDS_GPIO_MODULE) - if ((gpio_request(gpio+TWL4030_GPIO_MAX+1, "gpio-led:green:d1") == 0) - && (gpio_direction_output(gpio + TWL4030_GPIO_MAX + 1, 1) == 0)) { - gpio_export(gpio + TWL4030_GPIO_MAX + 1, 0); - gpio_set_value(gpio + TWL4030_GPIO_MAX + 1, 0); - } else - pr_warning("IGEP3: Could not obtain gpio GPIO_LED1_GREEN\n"); -#else - igep3_gpio_leds[3].gpio = gpio + TWL4030_GPIO_MAX + 1; -#endif - - return 0; -}; - -static struct twl4030_gpio_platform_data igep3_twl4030_gpio_pdata = { - .gpio_base = OMAP_MAX_GPIO_LINES, - .irq_base = TWL4030_GPIO_IRQ_BASE, - .irq_end = TWL4030_GPIO_IRQ_END, - .use_leds = true, - .setup = igep3_twl4030_gpio_setup, -}; - -static struct twl4030_usb_data igep3_twl4030_usb_data = { - .usb_mode = T2_USB_MODE_ULPI, -}; - -static struct platform_device *igep3_devices[] __initdata = { - &igep3_vwlan_device, -}; - -static void __init igep3_init_early(void) -{ - omap2_init_common_infrastructure(); - omap2_init_common_devices(m65kxxxxam_sdrc_params, - m65kxxxxam_sdrc_params); -} - -static struct twl4030_platform_data igep3_twl4030_pdata = { - .irq_base = TWL4030_IRQ_BASE, - .irq_end = TWL4030_IRQ_END, - - /* platform_data for children goes here */ - .usb = &igep3_twl4030_usb_data, - .gpio = &igep3_twl4030_gpio_pdata, - .vmmc1 = &igep3_vmmc1, - .vio = &igep3_vio, -}; - -static struct i2c_board_info __initdata igep3_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &igep3_twl4030_pdata, - }, -}; - -static int __init igep3_i2c_init(void) -{ - omap_register_i2c_bus(1, 2600, igep3_i2c_boardinfo, - ARRAY_SIZE(igep3_i2c_boardinfo)); - - return 0; -} - -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - -#if defined(CONFIG_LIBERTAS_SDIO) || defined(CONFIG_LIBERTAS_SDIO_MODULE) - -static void __init igep3_wifi_bt_init(void) -{ - /* Configure MUX values for W-LAN + Bluetooth GPIO's */ - omap_mux_init_gpio(IGEP3_GPIO_WIFI_NPD, OMAP_PIN_OUTPUT); - omap_mux_init_gpio(IGEP3_GPIO_WIFI_NRESET, OMAP_PIN_OUTPUT); - omap_mux_init_gpio(IGEP3_GPIO_BT_NRESET, OMAP_PIN_OUTPUT); - - /* Set GPIO's for W-LAN + Bluetooth combo module */ - if ((gpio_request(IGEP3_GPIO_WIFI_NPD, "GPIO_WIFI_NPD") == 0) && - (gpio_direction_output(IGEP3_GPIO_WIFI_NPD, 1) == 0)) { - gpio_export(IGEP3_GPIO_WIFI_NPD, 0); - } else - pr_warning("IGEP3: Could not obtain gpio GPIO_WIFI_NPD\n"); - - if ((gpio_request(IGEP3_GPIO_WIFI_NRESET, "GPIO_WIFI_NRESET") == 0) && - (gpio_direction_output(IGEP3_GPIO_WIFI_NRESET, 1) == 0)) { - gpio_export(IGEP3_GPIO_WIFI_NRESET, 0); - gpio_set_value(IGEP3_GPIO_WIFI_NRESET, 0); - udelay(10); - gpio_set_value(IGEP3_GPIO_WIFI_NRESET, 1); - } else - pr_warning("IGEP3: Could not obtain gpio GPIO_WIFI_NRESET\n"); - - if ((gpio_request(IGEP3_GPIO_BT_NRESET, "GPIO_BT_NRESET") == 0) && - (gpio_direction_output(IGEP3_GPIO_BT_NRESET, 1) == 0)) { - gpio_export(IGEP3_GPIO_BT_NRESET, 0); - } else - pr_warning("IGEP3: Could not obtain gpio GPIO_BT_NRESET\n"); -} -#else -void __init igep3_wifi_bt_init(void) {} -#endif - -static const struct usbhs_omap_board_data usbhs_bdata __initconst = { - .port_mode[0] = OMAP_USBHS_PORT_MODE_UNUSED, - .port_mode[1] = OMAP_EHCI_PORT_MODE_PHY, - .port_mode[2] = OMAP_USBHS_PORT_MODE_UNUSED, - - .phy_reset = true, - .reset_gpio_port[0] = -EINVAL, - .reset_gpio_port[1] = IGEP3_GPIO_USBH_NRESET, - .reset_gpio_port[2] = -EINVAL, -}; - -#ifdef CONFIG_OMAP_MUX -static struct omap_board_mux board_mux[] __initdata = { - OMAP3_MUX(I2C2_SDA, OMAP_MUX_MODE4 | OMAP_PIN_OUTPUT), - { .reg_offset = OMAP_MUX_TERMINATOR }, -}; -#endif - -static void __init igep3_init(void) -{ - omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); - - /* Register I2C busses and drivers */ - igep3_i2c_init(); - platform_add_devices(igep3_devices, ARRAY_SIZE(igep3_devices)); - omap_serial_init(); - usb_musb_init(&musb_board_data); - usbhs_init(&usbhs_bdata); - - igep3_flash_init(); - igep3_leds_init(); - - /* - * WLAN-BT combo module from MuRata which has a Marvell WLAN - * (88W8686) + CSR Bluetooth chipset. Uses SDIO interface. - */ - igep3_wifi_bt_init(); - -} - -MACHINE_START(IGEP0030, "IGEP OMAP3 module") - .boot_params = 0x80000100, - .reserve = omap_reserve, - .map_io = omap3_map_io, - .init_early = igep3_init_early, - .init_irq = omap_init_irq, - .init_machine = igep3_init, - .timer = &omap_timer, -MACHINE_END diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c index e2ba77957a8c..f7d6038075f0 100644 --- a/arch/arm/mach-omap2/board-ldp.c +++ b/arch/arm/mach-omap2/board-ldp.c @@ -22,7 +22,6 @@ #include <linux/err.h> #include <linux/clk.h> #include <linux/spi/spi.h> -#include <linux/spi/ads7846.h> #include <linux/regulator/machine.h> #include <linux/i2c/twl.h> #include <linux/io.h> @@ -43,47 +42,19 @@ #include <asm/delay.h> #include <plat/usb.h> +#include <plat/gpmc-smsc911x.h> #include "board-flash.h" #include "mux.h" #include "hsmmc.h" #include "control.h" +#include "common-board-devices.h" #define LDP_SMSC911X_CS 1 #define LDP_SMSC911X_GPIO 152 #define DEBUG_BASE 0x08000000 #define LDP_ETHR_START DEBUG_BASE -static struct resource ldp_smsc911x_resources[] = { - [0] = { - .start = LDP_ETHR_START, - .end = LDP_ETHR_START + SZ_4K, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = 0, - .end = 0, - .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL, - }, -}; - -static struct smsc911x_platform_config ldp_smsc911x_config = { - .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW, - .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN, - .flags = SMSC911X_USE_32BIT, - .phy_interface = PHY_INTERFACE_MODE_MII, -}; - -static struct platform_device ldp_smsc911x_device = { - .name = "smsc911x", - .id = -1, - .num_resources = ARRAY_SIZE(ldp_smsc911x_resources), - .resource = ldp_smsc911x_resources, - .dev = { - .platform_data = &ldp_smsc911x_config, - }, -}; - static uint32_t board_keymap[] = { KEY(0, 0, KEY_1), KEY(1, 0, KEY_2), @@ -197,82 +168,16 @@ static struct platform_device ldp_gpio_keys_device = { }, }; -static int ts_gpio; - -/** - * @brief ads7846_dev_init : Requests & sets GPIO line for pen-irq - * - * @return - void. If request gpio fails then Flag KERN_ERR. - */ -static void ads7846_dev_init(void) -{ - if (gpio_request(ts_gpio, "ads7846 irq") < 0) { - printk(KERN_ERR "can't get ads746 pen down GPIO\n"); - return; - } - - gpio_direction_input(ts_gpio); - gpio_set_debounce(ts_gpio, 310); -} - -static int ads7846_get_pendown_state(void) -{ - return !gpio_get_value(ts_gpio); -} - -static struct ads7846_platform_data tsc2046_config __initdata = { - .get_pendown_state = ads7846_get_pendown_state, - .keep_vref_on = 1, -}; - -static struct omap2_mcspi_device_config tsc2046_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - -static struct spi_board_info ldp_spi_board_info[] __initdata = { - [0] = { - /* - * TSC2046 operates at a max freqency of 2MHz, so - * operate slightly below at 1.5MHz - */ - .modalias = "ads7846", - .bus_num = 1, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &tsc2046_mcspi_config, - .irq = 0, - .platform_data = &tsc2046_config, - }, +static struct omap_smsc911x_platform_data smsc911x_cfg = { + .cs = LDP_SMSC911X_CS, + .gpio_irq = LDP_SMSC911X_GPIO, + .gpio_reset = -EINVAL, + .flags = SMSC911X_USE_32BIT, }; static inline void __init ldp_init_smsc911x(void) { - int eth_cs; - unsigned long cs_mem_base; - int eth_gpio = 0; - - eth_cs = LDP_SMSC911X_CS; - - if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) { - printk(KERN_ERR "Failed to request GPMC mem for smsc911x\n"); - return; - } - - ldp_smsc911x_resources[0].start = cs_mem_base + 0x0; - ldp_smsc911x_resources[0].end = cs_mem_base + 0xff; - udelay(100); - - eth_gpio = LDP_SMSC911X_GPIO; - - ldp_smsc911x_resources[1].start = OMAP_GPIO_IRQ(eth_gpio); - - if (gpio_request(eth_gpio, "smsc911x irq") < 0) { - printk(KERN_ERR "Failed to request GPIO%d for smsc911x IRQ\n", - eth_gpio); - return; - } - gpio_direction_input(eth_gpio); + gpmc_smsc911x_init(&smsc911x_cfg); } static struct platform_device ldp_lcd_device = { @@ -360,19 +265,9 @@ static struct twl4030_platform_data ldp_twldata = { .keypad = &ldp_kp_twl4030_data, }; -static struct i2c_board_info __initdata ldp_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &ldp_twldata, - }, -}; - static int __init omap_i2c_init(void) { - omap_register_i2c_bus(1, 2600, ldp_i2c_boardinfo, - ARRAY_SIZE(ldp_i2c_boardinfo)); + omap3_pmic_init("twl4030", &ldp_twldata); omap_register_i2c_bus(2, 400, NULL, 0); omap_register_i2c_bus(3, 400, NULL, 0); return 0; @@ -389,7 +284,6 @@ static struct omap2_hsmmc_info mmc[] __initdata = { }; static struct platform_device *ldp_devices[] __initdata = { - &ldp_smsc911x_device, &ldp_lcd_device, &ldp_gpio_keys_device, }; @@ -400,12 +294,6 @@ static struct omap_board_mux board_mux[] __initdata = { }; #endif -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static struct mtd_partition ldp_nand_partitions[] = { /* All the partition sizes are listed in terms of NAND block size */ { @@ -446,13 +334,9 @@ static void __init omap_ldp_init(void) ldp_init_smsc911x(); omap_i2c_init(); platform_add_devices(ldp_devices, ARRAY_SIZE(ldp_devices)); - ts_gpio = 54; - ldp_spi_board_info[0].irq = gpio_to_irq(ts_gpio); - spi_register_board_info(ldp_spi_board_info, - ARRAY_SIZE(ldp_spi_board_info)); - ads7846_dev_init(); + omap_ads7846_init(1, 54, 310, NULL); omap_serial_init(); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); board_nand_init(ldp_nand_partitions, ARRAY_SIZE(ldp_nand_partitions), ZOOM_NAND_CS, 0); diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index e710cd9e079b..8d74318ed495 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -106,14 +106,13 @@ static void __init n8x0_usb_init(void) static char announce[] __initdata = KERN_INFO "TUSB 6010\n"; /* PM companion chip power control pin */ - ret = gpio_request(TUSB6010_GPIO_ENABLE, "TUSB6010 enable"); + ret = gpio_request_one(TUSB6010_GPIO_ENABLE, GPIOF_OUT_INIT_LOW, + "TUSB6010 enable"); if (ret != 0) { printk(KERN_ERR "Could not get TUSB power GPIO%i\n", TUSB6010_GPIO_ENABLE); return; } - gpio_direction_output(TUSB6010_GPIO_ENABLE, 0); - tusb_set_power(0); ret = tusb6010_setup_interface(&tusb_data, TUSB6010_REFCLK_19, 2, @@ -494,8 +493,12 @@ static struct omap_mmc_platform_data mmc1_data = { static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC]; -static void __init n8x0_mmc_init(void) +static struct gpio n810_emmc_gpios[] __initdata = { + { N810_EMMC_VSD_GPIO, GPIOF_OUT_INIT_LOW, "MMC slot 2 Vddf" }, + { N810_EMMC_VIO_GPIO, GPIOF_OUT_INIT_LOW, "MMC slot 2 Vdd" }, +}; +static void __init n8x0_mmc_init(void) { int err; @@ -512,27 +515,18 @@ static void __init n8x0_mmc_init(void) mmc1_data.slots[1].ban_openended = 1; } - err = gpio_request(N8X0_SLOT_SWITCH_GPIO, "MMC slot switch"); + err = gpio_request_one(N8X0_SLOT_SWITCH_GPIO, GPIOF_OUT_INIT_LOW, + "MMC slot switch"); if (err) return; - gpio_direction_output(N8X0_SLOT_SWITCH_GPIO, 0); - if (machine_is_nokia_n810()) { - err = gpio_request(N810_EMMC_VSD_GPIO, "MMC slot 2 Vddf"); - if (err) { - gpio_free(N8X0_SLOT_SWITCH_GPIO); - return; - } - gpio_direction_output(N810_EMMC_VSD_GPIO, 0); - - err = gpio_request(N810_EMMC_VIO_GPIO, "MMC slot 2 Vdd"); + err = gpio_request_array(n810_emmc_gpios, + ARRAY_SIZE(n810_emmc_gpios)); if (err) { gpio_free(N8X0_SLOT_SWITCH_GPIO); - gpio_free(N810_EMMC_VSD_GPIO); return; } - gpio_direction_output(N810_EMMC_VIO_GPIO, 0); } mmc_data[0] = &mmc1_data; diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c index 97750d483a70..be71426359f2 100644 --- a/arch/arm/mach-omap2/board-omap3beagle.c +++ b/arch/arm/mach-omap2/board-omap3beagle.c @@ -52,6 +52,7 @@ #include "hsmmc.h" #include "timer-gp.h" #include "pm.h" +#include "common-board-devices.h" #define NAND_BLOCK_SIZE SZ_128K @@ -79,6 +80,12 @@ static u8 omap3_beagle_get_rev(void) return omap3_beagle_version; } +static struct gpio omap3_beagle_rev_gpios[] __initdata = { + { 171, GPIOF_IN, "rev_id_0" }, + { 172, GPIOF_IN, "rev_id_1" }, + { 173, GPIOF_IN, "rev_id_2" }, +}; + static void __init omap3_beagle_init_rev(void) { int ret; @@ -88,21 +95,13 @@ static void __init omap3_beagle_init_rev(void) omap_mux_init_gpio(172, OMAP_PIN_INPUT_PULLUP); omap_mux_init_gpio(173, OMAP_PIN_INPUT_PULLUP); - ret = gpio_request(171, "rev_id_0"); - if (ret < 0) - goto fail0; - - ret = gpio_request(172, "rev_id_1"); - if (ret < 0) - goto fail1; - - ret = gpio_request(173, "rev_id_2"); - if (ret < 0) - goto fail2; - - gpio_direction_input(171); - gpio_direction_input(172); - gpio_direction_input(173); + ret = gpio_request_array(omap3_beagle_rev_gpios, + ARRAY_SIZE(omap3_beagle_rev_gpios)); + if (ret < 0) { + printk(KERN_ERR "Unable to get revision detection GPIO pins\n"); + omap3_beagle_version = OMAP3BEAGLE_BOARD_UNKN; + return; + } beagle_rev = gpio_get_value(171) | (gpio_get_value(172) << 1) | (gpio_get_value(173) << 2); @@ -128,18 +127,6 @@ static void __init omap3_beagle_init_rev(void) printk(KERN_INFO "OMAP3 Beagle Rev: unknown %hd\n", beagle_rev); omap3_beagle_version = OMAP3BEAGLE_BOARD_UNKN; } - - return; - -fail2: - gpio_free(172); -fail1: - gpio_free(171); -fail0: - printk(KERN_ERR "Unable to get revision detection GPIO pins\n"); - omap3_beagle_version = OMAP3BEAGLE_BOARD_UNKN; - - return; } static struct mtd_partition omap3beagle_nand_partitions[] = { @@ -173,15 +160,6 @@ static struct mtd_partition omap3beagle_nand_partitions[] = { }, }; -static struct omap_nand_platform_data omap3beagle_nand_data = { - .options = NAND_BUSWIDTH_16, - .parts = omap3beagle_nand_partitions, - .nr_parts = ARRAY_SIZE(omap3beagle_nand_partitions), - .dma_channel = -1, /* disable DMA in OMAP NAND driver */ - .nand_setup = NULL, - .dev_ready = NULL, -}; - /* DSS */ static int beagle_enable_dvi(struct omap_dss_device *dssdev) @@ -243,13 +221,10 @@ static void __init beagle_display_init(void) { int r; - r = gpio_request(beagle_dvi_device.reset_gpio, "DVI reset"); - if (r < 0) { + r = gpio_request_one(beagle_dvi_device.reset_gpio, GPIOF_OUT_INIT_LOW, + "DVI reset"); + if (r < 0) printk(KERN_ERR "Unable to get DVI reset GPIO\n"); - return; - } - - gpio_direction_output(beagle_dvi_device.reset_gpio, 0); } #include "sdram-micron-mt46h32m32lf-6.h" @@ -276,7 +251,7 @@ static struct gpio_led gpio_leds[]; static int beagle_twl_gpio_setup(struct device *dev, unsigned gpio, unsigned ngpio) { - int r; + int r, usb_pwr_level; if (omap3_beagle_get_rev() == OMAP3BEAGLE_BOARD_XM) { mmc[0].gpio_wp = -EINVAL; @@ -295,66 +270,46 @@ static int beagle_twl_gpio_setup(struct device *dev, beagle_vmmc1_supply.dev = mmc[0].dev; beagle_vsim_supply.dev = mmc[0].dev; - /* REVISIT: need ehci-omap hooks for external VBUS - * power switch and overcurrent detect - */ - if (omap3_beagle_get_rev() != OMAP3BEAGLE_BOARD_XM) { - r = gpio_request(gpio + 1, "EHCI_nOC"); - if (!r) { - r = gpio_direction_input(gpio + 1); - if (r) - gpio_free(gpio + 1); - } - if (r) - pr_err("%s: unable to configure EHCI_nOC\n", __func__); - } - /* * TWL4030_GPIO_MAX + 0 == ledA, EHCI nEN_USB_PWR (out, XM active * high / others active low) - */ - gpio_request(gpio + TWL4030_GPIO_MAX, "nEN_USB_PWR"); - if (omap3_beagle_get_rev() == OMAP3BEAGLE_BOARD_XM) - gpio_direction_output(gpio + TWL4030_GPIO_MAX, 1); - else - gpio_direction_output(gpio + TWL4030_GPIO_MAX, 0); - - /* DVI reset GPIO is different between beagle revisions */ - if (omap3_beagle_get_rev() == OMAP3BEAGLE_BOARD_XM) - beagle_dvi_device.reset_gpio = 129; - else - beagle_dvi_device.reset_gpio = 170; - - /* TWL4030_GPIO_MAX + 1 == ledB, PMU_STAT (out, active low LED) */ - gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1; - - /* - * gpio + 1 on Xm controls the TFP410's enable line (active low) - * gpio + 2 control varies depending on the board rev as follows: - * P7/P8 revisions(prototype): Camera EN - * A2+ revisions (production): LDO (supplies DVI, serial, led blocks) + * DVI reset GPIO is different between beagle revisions */ if (omap3_beagle_get_rev() == OMAP3BEAGLE_BOARD_XM) { - r = gpio_request(gpio + 1, "nDVI_PWR_EN"); - if (!r) { - r = gpio_direction_output(gpio + 1, 0); - if (r) - gpio_free(gpio + 1); - } + usb_pwr_level = GPIOF_OUT_INIT_HIGH; + beagle_dvi_device.reset_gpio = 129; + /* + * gpio + 1 on Xm controls the TFP410's enable line (active low) + * gpio + 2 control varies depending on the board rev as below: + * P7/P8 revisions(prototype): Camera EN + * A2+ revisions (production): LDO (DVI, serial, led blocks) + */ + r = gpio_request_one(gpio + 1, GPIOF_OUT_INIT_LOW, + "nDVI_PWR_EN"); if (r) pr_err("%s: unable to configure nDVI_PWR_EN\n", __func__); - r = gpio_request(gpio + 2, "DVI_LDO_EN"); - if (!r) { - r = gpio_direction_output(gpio + 2, 1); - if (r) - gpio_free(gpio + 2); - } + r = gpio_request_one(gpio + 2, GPIOF_OUT_INIT_HIGH, + "DVI_LDO_EN"); if (r) pr_err("%s: unable to configure DVI_LDO_EN\n", __func__); + } else { + usb_pwr_level = GPIOF_OUT_INIT_LOW; + beagle_dvi_device.reset_gpio = 170; + /* + * REVISIT: need ehci-omap hooks for external VBUS + * power switch and overcurrent detect + */ + if (gpio_request_one(gpio + 1, GPIOF_IN, "EHCI_nOC")) + pr_err("%s: unable to configure EHCI_nOC\n", __func__); } + gpio_request_one(gpio + TWL4030_GPIO_MAX, usb_pwr_level, "nEN_USB_PWR"); + + /* TWL4030_GPIO_MAX + 1 == ledB, PMU_STAT (out, active low LED) */ + gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1; + return 0; } @@ -453,15 +408,6 @@ static struct twl4030_platform_data beagle_twldata = { .vpll2 = &beagle_vpll2, }; -static struct i2c_board_info __initdata beagle_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &beagle_twldata, - }, -}; - static struct i2c_board_info __initdata beagle_i2c_eeprom[] = { { I2C_BOARD_INFO("eeprom", 0x50), @@ -470,8 +416,7 @@ static struct i2c_board_info __initdata beagle_i2c_eeprom[] = { static int __init omap3_beagle_i2c_init(void) { - omap_register_i2c_bus(1, 2600, beagle_i2c_boardinfo, - ARRAY_SIZE(beagle_i2c_boardinfo)); + omap3_pmic_init("twl4030", &beagle_twldata); /* Bus 3 is attached to the DVI port where devices like the pico DLP * projector don't work reliably with 400kHz */ omap_register_i2c_bus(3, 100, beagle_i2c_eeprom, ARRAY_SIZE(beagle_i2c_eeprom)); @@ -551,39 +496,6 @@ static struct platform_device *omap3_beagle_devices[] __initdata = { &keys_gpio, }; -static void __init omap3beagle_flash_init(void) -{ - u8 cs = 0; - u8 nandcs = GPMC_CS_NUM + 1; - - /* find out the chip-select on which NAND exists */ - while (cs < GPMC_CS_NUM) { - u32 ret = 0; - ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG1); - - if ((ret & 0xC00) == 0x800) { - printk(KERN_INFO "Found NAND on CS%d\n", cs); - if (nandcs > GPMC_CS_NUM) - nandcs = cs; - } - cs++; - } - - if (nandcs > GPMC_CS_NUM) { - printk(KERN_INFO "NAND: Unable to find configuration " - "in GPMC\n "); - return; - } - - if (nandcs < GPMC_CS_NUM) { - omap3beagle_nand_data.cs = nandcs; - - printk(KERN_INFO "Registering NAND on CS%d\n", nandcs); - if (gpmc_nand_init(&omap3beagle_nand_data) < 0) - printk(KERN_ERR "Unable to register NAND device\n"); - } -} - static const struct usbhs_omap_board_data usbhs_bdata __initconst = { .port_mode[0] = OMAP_EHCI_PORT_MODE_PHY, @@ -602,12 +514,6 @@ static struct omap_board_mux board_mux[] __initdata = { }; #endif -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static void __init beagle_opp_init(void) { int r = 0; @@ -665,13 +571,13 @@ static void __init omap3_beagle_init(void) omap_serial_init(); omap_mux_init_gpio(170, OMAP_PIN_INPUT); - gpio_request(170, "DVI_nPD"); /* REVISIT leave DVI powered down until it's needed ... */ - gpio_direction_output(170, true); + gpio_request_one(170, GPIOF_OUT_INIT_HIGH, "DVI_nPD"); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); usbhs_init(&usbhs_bdata); - omap3beagle_flash_init(); + omap_nand_flash_init(NAND_BUSWIDTH_16, omap3beagle_nand_partitions, + ARRAY_SIZE(omap3beagle_nand_partitions)); /* Ensure SDRC pins are mux'd for self-refresh */ omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT); diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c index 7f94cccdb076..b4d43464a303 100644 --- a/arch/arm/mach-omap2/board-omap3evm.c +++ b/arch/arm/mach-omap2/board-omap3evm.c @@ -50,6 +50,7 @@ #include "mux.h" #include "sdram-micron-mt46h32m32lf-6.h" #include "hsmmc.h" +#include "common-board-devices.h" #define OMAP3_EVM_TS_GPIO 175 #define OMAP3_EVM_EHCI_VBUS 22 @@ -101,49 +102,20 @@ static void __init omap3_evm_get_revision(void) } #if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE) -static struct resource omap3evm_smsc911x_resources[] = { - [0] = { - .start = OMAP3EVM_ETHR_START, - .end = (OMAP3EVM_ETHR_START + OMAP3EVM_ETHR_SIZE - 1), - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = OMAP_GPIO_IRQ(OMAP3EVM_ETHR_GPIO_IRQ), - .end = OMAP_GPIO_IRQ(OMAP3EVM_ETHR_GPIO_IRQ), - .flags = (IORESOURCE_IRQ | IRQF_TRIGGER_LOW), - }, -}; +#include <plat/gpmc-smsc911x.h> -static struct smsc911x_platform_config smsc911x_config = { - .phy_interface = PHY_INTERFACE_MODE_MII, - .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW, - .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN, - .flags = (SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS), -}; - -static struct platform_device omap3evm_smsc911x_device = { - .name = "smsc911x", - .id = -1, - .num_resources = ARRAY_SIZE(omap3evm_smsc911x_resources), - .resource = &omap3evm_smsc911x_resources[0], - .dev = { - .platform_data = &smsc911x_config, - }, +static struct omap_smsc911x_platform_data smsc911x_cfg = { + .cs = OMAP3EVM_SMSC911X_CS, + .gpio_irq = OMAP3EVM_ETHR_GPIO_IRQ, + .gpio_reset = -EINVAL, + .flags = SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS, }; static inline void __init omap3evm_init_smsc911x(void) { - int eth_cs, eth_rst; struct clk *l3ck; unsigned int rate; - if (get_omap3_evm_rev() == OMAP3EVM_BOARD_GEN_1) - eth_rst = OMAP3EVM_GEN1_ETHR_GPIO_RST; - else - eth_rst = OMAP3EVM_GEN2_ETHR_GPIO_RST; - - eth_cs = OMAP3EVM_SMSC911X_CS; - l3ck = clk_get(NULL, "l3_ck"); if (IS_ERR(l3ck)) rate = 100000000; @@ -152,33 +124,13 @@ static inline void __init omap3evm_init_smsc911x(void) /* Configure ethernet controller reset gpio */ if (cpu_is_omap3430()) { - if (gpio_request(eth_rst, "SMSC911x gpio") < 0) { - pr_err(KERN_ERR "Failed to request %d for smsc911x\n", - eth_rst); - return; - } - - if (gpio_direction_output(eth_rst, 1) < 0) { - pr_err(KERN_ERR "Failed to set direction of %d for" \ - " smsc911x\n", eth_rst); - return; - } - /* reset pulse to ethernet controller*/ - usleep_range(150, 220); - gpio_set_value(eth_rst, 0); - usleep_range(150, 220); - gpio_set_value(eth_rst, 1); - usleep_range(1, 2); - } - - if (gpio_request(OMAP3EVM_ETHR_GPIO_IRQ, "SMSC911x irq") < 0) { - printk(KERN_ERR "Failed to request GPIO%d for smsc911x IRQ\n", - OMAP3EVM_ETHR_GPIO_IRQ); - return; + if (get_omap3_evm_rev() == OMAP3EVM_BOARD_GEN_1) + smsc911x_cfg.gpio_reset = OMAP3EVM_GEN1_ETHR_GPIO_RST; + else + smsc911x_cfg.gpio_reset = OMAP3EVM_GEN2_ETHR_GPIO_RST; } - gpio_direction_input(OMAP3EVM_ETHR_GPIO_IRQ); - platform_device_register(&omap3evm_smsc911x_device); + gpmc_smsc911x_init(&smsc911x_cfg); } #else @@ -197,6 +149,15 @@ static inline void __init omap3evm_init_smsc911x(void) { return; } #define OMAP3EVM_LCD_PANEL_BKLIGHT_GPIO 210 #define OMAP3EVM_DVI_PANEL_EN_GPIO 199 +static struct gpio omap3_evm_dss_gpios[] __initdata = { + { OMAP3EVM_LCD_PANEL_RESB, GPIOF_OUT_INIT_HIGH, "lcd_panel_resb" }, + { OMAP3EVM_LCD_PANEL_INI, GPIOF_OUT_INIT_HIGH, "lcd_panel_ini" }, + { OMAP3EVM_LCD_PANEL_QVGA, GPIOF_OUT_INIT_LOW, "lcd_panel_qvga" }, + { OMAP3EVM_LCD_PANEL_LR, GPIOF_OUT_INIT_HIGH, "lcd_panel_lr" }, + { OMAP3EVM_LCD_PANEL_UD, GPIOF_OUT_INIT_HIGH, "lcd_panel_ud" }, + { OMAP3EVM_LCD_PANEL_ENVDD, GPIOF_OUT_INIT_LOW, "lcd_panel_envdd" }, +}; + static int lcd_enabled; static int dvi_enabled; @@ -204,61 +165,10 @@ static void __init omap3_evm_display_init(void) { int r; - r = gpio_request(OMAP3EVM_LCD_PANEL_RESB, "lcd_panel_resb"); - if (r) { - printk(KERN_ERR "failed to get lcd_panel_resb\n"); - return; - } - gpio_direction_output(OMAP3EVM_LCD_PANEL_RESB, 1); - - r = gpio_request(OMAP3EVM_LCD_PANEL_INI, "lcd_panel_ini"); - if (r) { - printk(KERN_ERR "failed to get lcd_panel_ini\n"); - goto err_1; - } - gpio_direction_output(OMAP3EVM_LCD_PANEL_INI, 1); - - r = gpio_request(OMAP3EVM_LCD_PANEL_QVGA, "lcd_panel_qvga"); - if (r) { - printk(KERN_ERR "failed to get lcd_panel_qvga\n"); - goto err_2; - } - gpio_direction_output(OMAP3EVM_LCD_PANEL_QVGA, 0); - - r = gpio_request(OMAP3EVM_LCD_PANEL_LR, "lcd_panel_lr"); - if (r) { - printk(KERN_ERR "failed to get lcd_panel_lr\n"); - goto err_3; - } - gpio_direction_output(OMAP3EVM_LCD_PANEL_LR, 1); - - r = gpio_request(OMAP3EVM_LCD_PANEL_UD, "lcd_panel_ud"); - if (r) { - printk(KERN_ERR "failed to get lcd_panel_ud\n"); - goto err_4; - } - gpio_direction_output(OMAP3EVM_LCD_PANEL_UD, 1); - - r = gpio_request(OMAP3EVM_LCD_PANEL_ENVDD, "lcd_panel_envdd"); - if (r) { - printk(KERN_ERR "failed to get lcd_panel_envdd\n"); - goto err_5; - } - gpio_direction_output(OMAP3EVM_LCD_PANEL_ENVDD, 0); - - return; - -err_5: - gpio_free(OMAP3EVM_LCD_PANEL_UD); -err_4: - gpio_free(OMAP3EVM_LCD_PANEL_LR); -err_3: - gpio_free(OMAP3EVM_LCD_PANEL_QVGA); -err_2: - gpio_free(OMAP3EVM_LCD_PANEL_INI); -err_1: - gpio_free(OMAP3EVM_LCD_PANEL_RESB); - + r = gpio_request_array(omap3_evm_dss_gpios, + ARRAY_SIZE(omap3_evm_dss_gpios)); + if (r) + printk(KERN_ERR "failed to get lcd_panel_* gpios\n"); } static int omap3_evm_enable_lcd(struct omap_dss_device *dssdev) @@ -448,7 +358,7 @@ static struct platform_device leds_gpio = { static int omap3evm_twl_gpio_setup(struct device *dev, unsigned gpio, unsigned ngpio) { - int r; + int r, lcd_bl_en; /* gpio + 0 is "mmc0_cd" (input/IRQ) */ omap_mux_init_gpio(63, OMAP_PIN_INPUT); @@ -465,16 +375,14 @@ static int omap3evm_twl_gpio_setup(struct device *dev, */ /* TWL4030_GPIO_MAX + 0 == ledA, LCD Backlight control */ - r = gpio_request(gpio + TWL4030_GPIO_MAX, "EN_LCD_BKL"); - if (!r) - r = gpio_direction_output(gpio + TWL4030_GPIO_MAX, - (get_omap3_evm_rev() >= OMAP3EVM_BOARD_GEN_2) ? 1 : 0); + lcd_bl_en = get_omap3_evm_rev() >= OMAP3EVM_BOARD_GEN_2 ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW; + r = gpio_request_one(gpio + TWL4030_GPIO_MAX, lcd_bl_en, "EN_LCD_BKL"); if (r) printk(KERN_ERR "failed to get/set lcd_bkl gpio\n"); /* gpio + 7 == DVI Enable */ - gpio_request(gpio + 7, "EN_DVI"); - gpio_direction_output(gpio + 7, 0); + gpio_request_one(gpio + 7, GPIOF_OUT_INIT_LOW, "EN_DVI"); /* TWL4030_GPIO_MAX + 1 == ledB (out, active low LED) */ gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1; @@ -652,78 +560,18 @@ static struct twl4030_platform_data omap3evm_twldata = { .vdac = &omap3_evm_vdac, .vpll2 = &omap3_evm_vpll2, .vio = &omap3evm_vio, -}; - -static struct i2c_board_info __initdata omap3evm_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &omap3evm_twldata, - }, + .vmmc1 = &omap3evm_vmmc1, + .vsim = &omap3evm_vsim, }; static int __init omap3_evm_i2c_init(void) { - /* - * REVISIT: These entries can be set in omap3evm_twl_data - * after a merge with MFD tree - */ - omap3evm_twldata.vmmc1 = &omap3evm_vmmc1; - omap3evm_twldata.vsim = &omap3evm_vsim; - - omap_register_i2c_bus(1, 2600, omap3evm_i2c_boardinfo, - ARRAY_SIZE(omap3evm_i2c_boardinfo)); + omap3_pmic_init("twl4030", &omap3evm_twldata); omap_register_i2c_bus(2, 400, NULL, 0); omap_register_i2c_bus(3, 400, NULL, 0); return 0; } -static void ads7846_dev_init(void) -{ - if (gpio_request(OMAP3_EVM_TS_GPIO, "ADS7846 pendown") < 0) - printk(KERN_ERR "can't get ads7846 pen down GPIO\n"); - - gpio_direction_input(OMAP3_EVM_TS_GPIO); - gpio_set_debounce(OMAP3_EVM_TS_GPIO, 310); -} - -static int ads7846_get_pendown_state(void) -{ - return !gpio_get_value(OMAP3_EVM_TS_GPIO); -} - -static struct ads7846_platform_data ads7846_config = { - .x_max = 0x0fff, - .y_max = 0x0fff, - .x_plate_ohms = 180, - .pressure_max = 255, - .debounce_max = 10, - .debounce_tol = 3, - .debounce_rep = 1, - .get_pendown_state = ads7846_get_pendown_state, - .keep_vref_on = 1, - .settle_delay_usecs = 150, - .wakeup = true, -}; - -static struct omap2_mcspi_device_config ads7846_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - -static struct spi_board_info omap3evm_spi_board_info[] = { - [0] = { - .modalias = "ads7846", - .bus_num = 1, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &ads7846_mcspi_config, - .irq = OMAP_GPIO_IRQ(OMAP3_EVM_TS_GPIO), - .platform_data = &ads7846_config, - }, -}; - static struct omap_board_config_kernel omap3_evm_config[] __initdata = { }; @@ -825,6 +673,11 @@ static struct omap_musb_board_data musb_board_data = { .power = 100, }; +static struct gpio omap3_evm_ehci_gpios[] __initdata = { + { OMAP3_EVM_EHCI_VBUS, GPIOF_OUT_INIT_HIGH, "enable EHCI VBUS" }, + { OMAP3_EVM_EHCI_SELECT, GPIOF_OUT_INIT_LOW, "select EHCI port" }, +}; + static void __init omap3_evm_init(void) { omap3_evm_get_revision(); @@ -841,9 +694,6 @@ static void __init omap3_evm_init(void) omap_display_init(&omap3_evm_dss_data); - spi_register_board_info(omap3evm_spi_board_info, - ARRAY_SIZE(omap3evm_spi_board_info)); - omap_serial_init(); /* OMAP3EVM uses ISP1504 phy and so register nop transceiver */ @@ -851,16 +701,12 @@ static void __init omap3_evm_init(void) if (get_omap3_evm_rev() >= OMAP3EVM_BOARD_GEN_2) { /* enable EHCI VBUS using GPIO22 */ - omap_mux_init_gpio(22, OMAP_PIN_INPUT_PULLUP); - gpio_request(OMAP3_EVM_EHCI_VBUS, "enable EHCI VBUS"); - gpio_direction_output(OMAP3_EVM_EHCI_VBUS, 0); - gpio_set_value(OMAP3_EVM_EHCI_VBUS, 1); - + omap_mux_init_gpio(OMAP3_EVM_EHCI_VBUS, OMAP_PIN_INPUT_PULLUP); /* Select EHCI port on main board */ - omap_mux_init_gpio(61, OMAP_PIN_INPUT_PULLUP); - gpio_request(OMAP3_EVM_EHCI_SELECT, "select EHCI port"); - gpio_direction_output(OMAP3_EVM_EHCI_SELECT, 0); - gpio_set_value(OMAP3_EVM_EHCI_SELECT, 0); + omap_mux_init_gpio(OMAP3_EVM_EHCI_SELECT, + OMAP_PIN_INPUT_PULLUP); + gpio_request_array(omap3_evm_ehci_gpios, + ARRAY_SIZE(omap3_evm_ehci_gpios)); /* setup EHCI phy reset config */ omap_mux_init_gpio(21, OMAP_PIN_INPUT_PULLUP); @@ -876,7 +722,7 @@ static void __init omap3_evm_init(void) } usb_musb_init(&musb_board_data); usbhs_init(&usbhs_bdata); - ads7846_dev_init(); + omap_ads7846_init(1, OMAP3_EVM_TS_GPIO, 310, NULL); omap3evm_init_smsc911x(); omap3_evm_display_init(); diff --git a/arch/arm/mach-omap2/board-omap3logic.c b/arch/arm/mach-omap2/board-omap3logic.c index b726943d7c93..60d9be49dbab 100644 --- a/arch/arm/mach-omap2/board-omap3logic.c +++ b/arch/arm/mach-omap2/board-omap3logic.c @@ -37,6 +37,7 @@ #include "hsmmc.h" #include "timer-gp.h" #include "control.h" +#include "common-board-devices.h" #include <plat/mux.h> #include <plat/board.h> @@ -93,19 +94,9 @@ static struct twl4030_platform_data omap3logic_twldata = { .vmmc1 = &omap3logic_vmmc1, }; -static struct i2c_board_info __initdata omap3logic_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &omap3logic_twldata, - }, -}; - static int __init omap3logic_i2c_init(void) { - omap_register_i2c_bus(1, 2600, omap3logic_i2c_boardinfo, - ARRAY_SIZE(omap3logic_i2c_boardinfo)); + omap3_pmic_init("twl4030", &omap3logic_twldata); return 0; } @@ -147,7 +138,6 @@ static struct omap_smsc911x_platform_data __initdata board_smsc911x_data = { .cs = OMAP3LOGIC_SMSC911X_CS, .gpio_irq = -EINVAL, .gpio_reset = -EINVAL, - .flags = IORESOURCE_IRQ_LOWLEVEL, }; /* TODO/FIXME (comment by Peter Barada, LogicPD): diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c index 1db15492d82b..1d10736c6d3c 100644 --- a/arch/arm/mach-omap2/board-omap3pandora.c +++ b/arch/arm/mach-omap2/board-omap3pandora.c @@ -22,7 +22,6 @@ #include <linux/platform_device.h> #include <linux/spi/spi.h> -#include <linux/spi/ads7846.h> #include <linux/regulator/machine.h> #include <linux/i2c/twl.h> #include <linux/wl12xx.h> @@ -52,6 +51,7 @@ #include "mux.h" #include "sdram-micron-mt46h32m32lf-6.h" #include "hsmmc.h" +#include "common-board-devices.h" #define PANDORA_WIFI_IRQ_GPIO 21 #define PANDORA_WIFI_NRESET_GPIO 23 @@ -305,24 +305,13 @@ static int omap3pandora_twl_gpio_setup(struct device *dev, /* gpio + 13 drives 32kHz buffer for wifi module */ gpio_32khz = gpio + 13; - ret = gpio_request(gpio_32khz, "wifi 32kHz"); + ret = gpio_request_one(gpio_32khz, GPIOF_OUT_INIT_HIGH, "wifi 32kHz"); if (ret < 0) { pr_err("Cannot get GPIO line %d, ret=%d\n", gpio_32khz, ret); - goto fail; - } - - ret = gpio_direction_output(gpio_32khz, 1); - if (ret < 0) { - pr_err("Cannot set GPIO line %d, ret=%d\n", gpio_32khz, ret); - goto fail_direction; + return -ENODEV; } return 0; - -fail_direction: - gpio_free(gpio_32khz); -fail: - return -ENODEV; } static struct twl4030_gpio_platform_data omap3pandora_gpio_data = { @@ -544,15 +533,6 @@ static struct twl4030_platform_data omap3pandora_twldata = { .bci = &pandora_bci_data, }; -static struct i2c_board_info __initdata omap3pandora_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("tps65950", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &omap3pandora_twldata, - }, -}; - static struct i2c_board_info __initdata omap3pandora_i2c3_boardinfo[] = { { I2C_BOARD_INFO("bq27500", 0x55), @@ -562,61 +542,15 @@ static struct i2c_board_info __initdata omap3pandora_i2c3_boardinfo[] = { static int __init omap3pandora_i2c_init(void) { - omap_register_i2c_bus(1, 2600, omap3pandora_i2c_boardinfo, - ARRAY_SIZE(omap3pandora_i2c_boardinfo)); + omap3_pmic_init("tps65950", &omap3pandora_twldata); /* i2c2 pins are not connected */ omap_register_i2c_bus(3, 100, omap3pandora_i2c3_boardinfo, ARRAY_SIZE(omap3pandora_i2c3_boardinfo)); return 0; } -static void __init omap3pandora_ads7846_init(void) -{ - int gpio = OMAP3_PANDORA_TS_GPIO; - int ret; - - ret = gpio_request(gpio, "ads7846_pen_down"); - if (ret < 0) { - printk(KERN_ERR "Failed to request GPIO %d for " - "ads7846 pen down IRQ\n", gpio); - return; - } - - gpio_direction_input(gpio); -} - -static int ads7846_get_pendown_state(void) -{ - return !gpio_get_value(OMAP3_PANDORA_TS_GPIO); -} - -static struct ads7846_platform_data ads7846_config = { - .x_max = 0x0fff, - .y_max = 0x0fff, - .x_plate_ohms = 180, - .pressure_max = 255, - .debounce_max = 10, - .debounce_tol = 3, - .debounce_rep = 1, - .get_pendown_state = ads7846_get_pendown_state, - .keep_vref_on = 1, -}; - -static struct omap2_mcspi_device_config ads7846_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - static struct spi_board_info omap3pandora_spi_board_info[] __initdata = { { - .modalias = "ads7846", - .bus_num = 1, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &ads7846_mcspi_config, - .irq = OMAP_GPIO_IRQ(OMAP3_PANDORA_TS_GPIO), - .platform_data = &ads7846_config, - }, { .modalias = "tpo_td043mtea1_panel_spi", .bus_num = 1, .chip_select = 1, @@ -639,14 +573,10 @@ static void __init pandora_wl1251_init(void) memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata)); - ret = gpio_request(PANDORA_WIFI_IRQ_GPIO, "wl1251 irq"); + ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq"); if (ret < 0) goto fail; - ret = gpio_direction_input(PANDORA_WIFI_IRQ_GPIO); - if (ret < 0) - goto fail_irq; - pandora_wl1251_pdata.irq = gpio_to_irq(PANDORA_WIFI_IRQ_GPIO); if (pandora_wl1251_pdata.irq < 0) goto fail_irq; @@ -688,12 +618,6 @@ static struct omap_board_mux board_mux[] __initdata = { }; #endif -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static void __init omap3pandora_init(void) { omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); @@ -705,9 +629,9 @@ static void __init omap3pandora_init(void) omap_serial_init(); spi_register_board_info(omap3pandora_spi_board_info, ARRAY_SIZE(omap3pandora_spi_board_info)); - omap3pandora_ads7846_init(); + omap_ads7846_init(1, OMAP3_PANDORA_TS_GPIO, 0, NULL); usbhs_init(&usbhs_bdata); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); gpmc_nand_init(&pandora_nand_data); /* Ensure SDRC pins are mux'd for self-refresh */ diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c index a72c90a08c8a..0c108a212ea2 100644 --- a/arch/arm/mach-omap2/board-omap3stalker.c +++ b/arch/arm/mach-omap2/board-omap3stalker.c @@ -45,7 +45,6 @@ #include <plat/mcspi.h> #include <linux/input/matrix_keypad.h> #include <linux/spi/spi.h> -#include <linux/spi/ads7846.h> #include <linux/interrupt.h> #include <linux/smsc911x.h> #include <linux/i2c/at24.h> @@ -54,52 +53,28 @@ #include "mux.h" #include "hsmmc.h" #include "timer-gp.h" +#include "common-board-devices.h" #if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE) +#include <plat/gpmc-smsc911x.h> + #define OMAP3STALKER_ETHR_START 0x2c000000 #define OMAP3STALKER_ETHR_SIZE 1024 #define OMAP3STALKER_ETHR_GPIO_IRQ 19 #define OMAP3STALKER_SMC911X_CS 5 -static struct resource omap3stalker_smsc911x_resources[] = { - [0] = { - .start = OMAP3STALKER_ETHR_START, - .end = - (OMAP3STALKER_ETHR_START + OMAP3STALKER_ETHR_SIZE - 1), - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = OMAP_GPIO_IRQ(OMAP3STALKER_ETHR_GPIO_IRQ), - .end = OMAP_GPIO_IRQ(OMAP3STALKER_ETHR_GPIO_IRQ), - .flags = (IORESOURCE_IRQ | IRQF_TRIGGER_LOW), - }, -}; - -static struct smsc911x_platform_config smsc911x_config = { - .phy_interface = PHY_INTERFACE_MODE_MII, - .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW, - .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN, +static struct omap_smsc911x_platform_data smsc911x_cfg = { + .cs = OMAP3STALKER_SMC911X_CS, + .gpio_irq = OMAP3STALKER_ETHR_GPIO_IRQ, + .gpio_reset = -EINVAL, .flags = (SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS), }; -static struct platform_device omap3stalker_smsc911x_device = { - .name = "smsc911x", - .id = -1, - .num_resources = ARRAY_SIZE(omap3stalker_smsc911x_resources), - .resource = &omap3stalker_smsc911x_resources[0], - .dev = { - .platform_data = &smsc911x_config, - }, -}; - static inline void __init omap3stalker_init_eth(void) { - int eth_cs; struct clk *l3ck; unsigned int rate; - eth_cs = OMAP3STALKER_SMC911X_CS; - l3ck = clk_get(NULL, "l3_ck"); if (IS_ERR(l3ck)) rate = 100000000; @@ -107,16 +82,7 @@ static inline void __init omap3stalker_init_eth(void) rate = clk_get_rate(l3ck); omap_mux_init_gpio(19, OMAP_PIN_INPUT_PULLUP); - if (gpio_request(OMAP3STALKER_ETHR_GPIO_IRQ, "SMC911x irq") < 0) { - printk(KERN_ERR - "Failed to request GPIO%d for smc911x IRQ\n", - OMAP3STALKER_ETHR_GPIO_IRQ); - return; - } - - gpio_direction_input(OMAP3STALKER_ETHR_GPIO_IRQ); - - platform_device_register(&omap3stalker_smsc911x_device); + gpmc_smsc911x_init(&smsc911x_cfg); } #else @@ -365,12 +331,11 @@ omap3stalker_twl_gpio_setup(struct device *dev, */ /* TWL4030_GPIO_MAX + 0 == ledA, LCD Backlight control */ - gpio_request(gpio + TWL4030_GPIO_MAX, "EN_LCD_BKL"); - gpio_direction_output(gpio + TWL4030_GPIO_MAX, 0); + gpio_request_one(gpio + TWL4030_GPIO_MAX, GPIOF_OUT_INIT_LOW, + "EN_LCD_BKL"); /* gpio + 7 == DVI Enable */ - gpio_request(gpio + 7, "EN_DVI"); - gpio_direction_output(gpio + 7, 0); + gpio_request_one(gpio + 7, GPIOF_OUT_INIT_LOW, "EN_DVI"); /* TWL4030_GPIO_MAX + 1 == ledB (out, mmc0) */ gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1; @@ -489,15 +454,8 @@ static struct twl4030_platform_data omap3stalker_twldata = { .codec = &omap3stalker_codec_data, .vdac = &omap3_stalker_vdac, .vpll2 = &omap3_stalker_vpll2, -}; - -static struct i2c_board_info __initdata omap3stalker_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &omap3stalker_twldata, - }, + .vmmc1 = &omap3stalker_vmmc1, + .vsim = &omap3stalker_vsim, }; static struct at24_platform_data fram_info = { @@ -516,15 +474,7 @@ static struct i2c_board_info __initdata omap3stalker_i2c_boardinfo3[] = { static int __init omap3_stalker_i2c_init(void) { - /* - * REVISIT: These entries can be set in omap3evm_twl_data - * after a merge with MFD tree - */ - omap3stalker_twldata.vmmc1 = &omap3stalker_vmmc1; - omap3stalker_twldata.vsim = &omap3stalker_vsim; - - omap_register_i2c_bus(1, 2600, omap3stalker_i2c_boardinfo, - ARRAY_SIZE(omap3stalker_i2c_boardinfo)); + omap3_pmic_init("twl4030", &omap3stalker_twldata); omap_register_i2c_bus(2, 400, NULL, 0); omap_register_i2c_bus(3, 400, omap3stalker_i2c_boardinfo3, ARRAY_SIZE(omap3stalker_i2c_boardinfo3)); @@ -532,49 +482,6 @@ static int __init omap3_stalker_i2c_init(void) } #define OMAP3_STALKER_TS_GPIO 175 -static void ads7846_dev_init(void) -{ - if (gpio_request(OMAP3_STALKER_TS_GPIO, "ADS7846 pendown") < 0) - printk(KERN_ERR "can't get ads7846 pen down GPIO\n"); - - gpio_direction_input(OMAP3_STALKER_TS_GPIO); - gpio_set_debounce(OMAP3_STALKER_TS_GPIO, 310); -} - -static int ads7846_get_pendown_state(void) -{ - return !gpio_get_value(OMAP3_STALKER_TS_GPIO); -} - -static struct ads7846_platform_data ads7846_config = { - .x_max = 0x0fff, - .y_max = 0x0fff, - .x_plate_ohms = 180, - .pressure_max = 255, - .debounce_max = 10, - .debounce_tol = 3, - .debounce_rep = 1, - .get_pendown_state = ads7846_get_pendown_state, - .keep_vref_on = 1, - .settle_delay_usecs = 150, -}; - -static struct omap2_mcspi_device_config ads7846_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - -static struct spi_board_info omap3stalker_spi_board_info[] = { - [0] = { - .modalias = "ads7846", - .bus_num = 1, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &ads7846_mcspi_config, - .irq = OMAP_GPIO_IRQ(OMAP3_STALKER_TS_GPIO), - .platform_data = &ads7846_config, - }, -}; static struct omap_board_config_kernel omap3_stalker_config[] __initdata = { }; @@ -618,12 +525,6 @@ static struct omap_board_mux board_mux[] __initdata = { }; #endif -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static void __init omap3_stalker_init(void) { omap3_mux_init(board_mux, OMAP_PACKAGE_CUS); @@ -636,13 +537,11 @@ static void __init omap3_stalker_init(void) ARRAY_SIZE(omap3_stalker_devices)); omap_display_init(&omap3_stalker_dss_data); - spi_register_board_info(omap3stalker_spi_board_info, - ARRAY_SIZE(omap3stalker_spi_board_info)); omap_serial_init(); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); usbhs_init(&usbhs_bdata); - ads7846_dev_init(); + omap_ads7846_init(1, OMAP3_STALKER_TS_GPIO, 310, NULL); omap_mux_init_gpio(21, OMAP_PIN_OUTPUT); omap_mux_init_gpio(18, OMAP_PIN_INPUT_PULLUP); diff --git a/arch/arm/mach-omap2/board-omap3touchbook.c b/arch/arm/mach-omap2/board-omap3touchbook.c index 127cb1752bdd..82872d7d313b 100644 --- a/arch/arm/mach-omap2/board-omap3touchbook.c +++ b/arch/arm/mach-omap2/board-omap3touchbook.c @@ -52,6 +52,7 @@ #include "mux.h" #include "hsmmc.h" #include "timer-gp.h" +#include "common-board-devices.h" #include <asm/setup.h> @@ -95,15 +96,6 @@ static struct mtd_partition omap3touchbook_nand_partitions[] = { }, }; -static struct omap_nand_platform_data omap3touchbook_nand_data = { - .options = NAND_BUSWIDTH_16, - .parts = omap3touchbook_nand_partitions, - .nr_parts = ARRAY_SIZE(omap3touchbook_nand_partitions), - .dma_channel = -1, /* disable DMA in OMAP NAND driver */ - .nand_setup = NULL, - .dev_ready = NULL, -}; - #include "sdram-micron-mt46h32m32lf-6.h" static struct omap2_hsmmc_info mmc[] = { @@ -154,13 +146,11 @@ static int touchbook_twl_gpio_setup(struct device *dev, /* REVISIT: need ehci-omap hooks for external VBUS * power switch and overcurrent detect */ - - gpio_request(gpio + 1, "EHCI_nOC"); - gpio_direction_input(gpio + 1); + gpio_request_one(gpio + 1, GPIOF_IN, "EHCI_nOC"); /* TWL4030_GPIO_MAX + 0 == ledA, EHCI nEN_USB_PWR (out, active low) */ - gpio_request(gpio + TWL4030_GPIO_MAX, "nEN_USB_PWR"); - gpio_direction_output(gpio + TWL4030_GPIO_MAX, 0); + gpio_request_one(gpio + TWL4030_GPIO_MAX, GPIOF_OUT_INIT_LOW, + "nEN_USB_PWR"); /* TWL4030_GPIO_MAX + 1 == ledB, PMU_STAT (out, active low LED) */ gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1; @@ -273,15 +263,6 @@ static struct twl4030_platform_data touchbook_twldata = { .vpll2 = &touchbook_vpll2, }; -static struct i2c_board_info __initdata touchbook_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl4030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &touchbook_twldata, - }, -}; - static struct i2c_board_info __initdata touchBook_i2c_boardinfo[] = { { I2C_BOARD_INFO("bq27200", 0x55), @@ -291,8 +272,7 @@ static struct i2c_board_info __initdata touchBook_i2c_boardinfo[] = { static int __init omap3_touchbook_i2c_init(void) { /* Standard TouchBook bus */ - omap_register_i2c_bus(1, 2600, touchbook_i2c_boardinfo, - ARRAY_SIZE(touchbook_i2c_boardinfo)); + omap3_pmic_init("twl4030", &touchbook_twldata); /* Additional TouchBook bus */ omap_register_i2c_bus(3, 100, touchBook_i2c_boardinfo, @@ -301,19 +281,7 @@ static int __init omap3_touchbook_i2c_init(void) return 0; } -static void __init omap3_ads7846_init(void) -{ - if (gpio_request(OMAP3_TS_GPIO, "ads7846_pen_down")) { - printk(KERN_ERR "Failed to request GPIO %d for " - "ads7846 pen down IRQ\n", OMAP3_TS_GPIO); - return; - } - - gpio_direction_input(OMAP3_TS_GPIO); - gpio_set_debounce(OMAP3_TS_GPIO, 310); -} - -static struct ads7846_platform_data ads7846_config = { +static struct ads7846_platform_data ads7846_pdata = { .x_min = 100, .y_min = 265, .x_max = 3950, @@ -327,23 +295,6 @@ static struct ads7846_platform_data ads7846_config = { .keep_vref_on = 1, }; -static struct omap2_mcspi_device_config ads7846_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - -static struct spi_board_info omap3_ads7846_spi_board_info[] __initdata = { - { - .modalias = "ads7846", - .bus_num = 4, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &ads7846_mcspi_config, - .irq = OMAP_GPIO_IRQ(OMAP3_TS_GPIO), - .platform_data = &ads7846_config, - } -}; - static struct gpio_led gpio_leds[] = { { .name = "touchbook::usr0", @@ -434,39 +385,6 @@ static struct platform_device *omap3_touchbook_devices[] __initdata = { &keys_gpio, }; -static void __init omap3touchbook_flash_init(void) -{ - u8 cs = 0; - u8 nandcs = GPMC_CS_NUM + 1; - - /* find out the chip-select on which NAND exists */ - while (cs < GPMC_CS_NUM) { - u32 ret = 0; - ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG1); - - if ((ret & 0xC00) == 0x800) { - printk(KERN_INFO "Found NAND on CS%d\n", cs); - if (nandcs > GPMC_CS_NUM) - nandcs = cs; - } - cs++; - } - - if (nandcs > GPMC_CS_NUM) { - printk(KERN_INFO "NAND: Unable to find configuration " - "in GPMC\n "); - return; - } - - if (nandcs < GPMC_CS_NUM) { - omap3touchbook_nand_data.cs = nandcs; - - printk(KERN_INFO "Registering NAND on CS%d\n", nandcs); - if (gpmc_nand_init(&omap3touchbook_nand_data) < 0) - printk(KERN_ERR "Unable to register NAND device\n"); - } -} - static const struct usbhs_omap_board_data usbhs_bdata __initconst = { .port_mode[0] = OMAP_EHCI_PORT_MODE_PHY, @@ -481,15 +399,10 @@ static const struct usbhs_omap_board_data usbhs_bdata __initconst = { static void omap3_touchbook_poweroff(void) { - int r; + int pwr_off = TB_KILL_POWER_GPIO; - r = gpio_request(TB_KILL_POWER_GPIO, "DVI reset"); - if (r < 0) { + if (gpio_request_one(pwr_off, GPIOF_OUT_INIT_LOW, "DVI reset") < 0) printk(KERN_ERR "Unable to get kill power GPIO\n"); - return; - } - - gpio_direction_output(TB_KILL_POWER_GPIO, 0); } static int __init early_touchbook_revision(char *p) @@ -501,12 +414,6 @@ static int __init early_touchbook_revision(char *p) } early_param("tbr", early_touchbook_revision); -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static void __init omap3_touchbook_init(void) { omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); @@ -521,17 +428,15 @@ static void __init omap3_touchbook_init(void) omap_serial_init(); omap_mux_init_gpio(170, OMAP_PIN_INPUT); - gpio_request(176, "DVI_nPD"); /* REVISIT leave DVI powered down until it's needed ... */ - gpio_direction_output(176, true); + gpio_request_one(176, GPIOF_OUT_INIT_HIGH, "DVI_nPD"); /* Touchscreen and accelerometer */ - spi_register_board_info(omap3_ads7846_spi_board_info, - ARRAY_SIZE(omap3_ads7846_spi_board_info)); - omap3_ads7846_init(); - usb_musb_init(&musb_board_data); + omap_ads7846_init(4, OMAP3_TS_GPIO, 310, &ads7846_pdata); + usb_musb_init(NULL); usbhs_init(&usbhs_bdata); - omap3touchbook_flash_init(); + omap_nand_flash_init(NAND_BUSWIDTH_16, omap3touchbook_nand_partitions, + ARRAY_SIZE(omap3touchbook_nand_partitions)); /* Ensure SDRC pins are mux'd for self-refresh */ omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT); diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c index e4973ac77cbc..90485fced973 100644 --- a/arch/arm/mach-omap2/board-omap4panda.c +++ b/arch/arm/mach-omap2/board-omap4panda.c @@ -46,6 +46,7 @@ #include "hsmmc.h" #include "control.h" #include "mux.h" +#include "common-board-devices.h" #define GPIO_HUB_POWER 1 #define GPIO_HUB_NRESET 62 @@ -111,6 +112,11 @@ static const struct usbhs_omap_board_data usbhs_bdata __initconst = { .reset_gpio_port[2] = -EINVAL }; +static struct gpio panda_ehci_gpios[] __initdata = { + { GPIO_HUB_POWER, GPIOF_OUT_INIT_LOW, "hub_power" }, + { GPIO_HUB_NRESET, GPIOF_OUT_INIT_LOW, "hub_nreset" }, +}; + static void __init omap4_ehci_init(void) { int ret; @@ -120,44 +126,27 @@ static void __init omap4_ehci_init(void) phy_ref_clk = clk_get(NULL, "auxclk3_ck"); if (IS_ERR(phy_ref_clk)) { pr_err("Cannot request auxclk3\n"); - goto error1; + return; } clk_set_rate(phy_ref_clk, 19200000); clk_enable(phy_ref_clk); - /* disable the power to the usb hub prior to init */ - ret = gpio_request(GPIO_HUB_POWER, "hub_power"); + /* disable the power to the usb hub prior to init and reset phy+hub */ + ret = gpio_request_array(panda_ehci_gpios, + ARRAY_SIZE(panda_ehci_gpios)); if (ret) { - pr_err("Cannot request GPIO %d\n", GPIO_HUB_POWER); - goto error1; + pr_err("Unable to initialize EHCI power/reset\n"); + return; } - gpio_export(GPIO_HUB_POWER, 0); - gpio_direction_output(GPIO_HUB_POWER, 0); - gpio_set_value(GPIO_HUB_POWER, 0); - /* reset phy+hub */ - ret = gpio_request(GPIO_HUB_NRESET, "hub_nreset"); - if (ret) { - pr_err("Cannot request GPIO %d\n", GPIO_HUB_NRESET); - goto error2; - } + gpio_export(GPIO_HUB_POWER, 0); gpio_export(GPIO_HUB_NRESET, 0); - gpio_direction_output(GPIO_HUB_NRESET, 0); - gpio_set_value(GPIO_HUB_NRESET, 0); gpio_set_value(GPIO_HUB_NRESET, 1); usbhs_init(&usbhs_bdata); /* enable power to hub */ gpio_set_value(GPIO_HUB_POWER, 1); - return; - -error2: - gpio_free(GPIO_HUB_POWER); -error1: - pr_err("Unable to initialize EHCI power/reset\n"); - return; - } static struct omap_musb_board_data musb_board_data = { @@ -408,15 +397,6 @@ static struct twl4030_platform_data omap4_panda_twldata = { .usb = &omap4_usbphy_data, }; -static struct i2c_board_info __initdata omap4_panda_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl6030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = OMAP44XX_IRQ_SYS_1N, - .platform_data = &omap4_panda_twldata, - }, -}; - /* * Display monitor features are burnt in their EEPROM as EDID data. The EEPROM * is connected as I2C slave device, and can be accessed at address 0x50 @@ -429,12 +409,7 @@ static struct i2c_board_info __initdata panda_i2c_eeprom[] = { static int __init omap4_panda_i2c_init(void) { - /* - * Phoenix Audio IC needs I2C1 to - * start with 400 KHz or less - */ - omap_register_i2c_bus(1, 400, omap4_panda_i2c_boardinfo, - ARRAY_SIZE(omap4_panda_i2c_boardinfo)); + omap4_pmic_init("twl6030", &omap4_panda_twldata); omap_register_i2c_bus(2, 400, NULL, 0); /* * Bus 3 is attached to the DVI port where devices like the pico DLP @@ -651,27 +626,19 @@ static void omap4_panda_hdmi_mux_init(void) OMAP_PIN_INPUT_PULLUP); } +static struct gpio panda_hdmi_gpios[] = { + { HDMI_GPIO_HPD, GPIOF_OUT_INIT_HIGH, "hdmi_gpio_hpd" }, + { HDMI_GPIO_LS_OE, GPIOF_OUT_INIT_HIGH, "hdmi_gpio_ls_oe" }, +}; + static int omap4_panda_panel_enable_hdmi(struct omap_dss_device *dssdev) { int status; - status = gpio_request_one(HDMI_GPIO_HPD, GPIOF_OUT_INIT_HIGH, - "hdmi_gpio_hpd"); - if (status) { - pr_err("Cannot request GPIO %d\n", HDMI_GPIO_HPD); - return status; - } - status = gpio_request_one(HDMI_GPIO_LS_OE, GPIOF_OUT_INIT_HIGH, - "hdmi_gpio_ls_oe"); - if (status) { - pr_err("Cannot request GPIO %d\n", HDMI_GPIO_LS_OE); - goto error1; - } - - return 0; - -error1: - gpio_free(HDMI_GPIO_HPD); + status = gpio_request_array(panda_hdmi_gpios, + ARRAY_SIZE(panda_hdmi_gpios)); + if (status) + pr_err("Cannot request HDMI GPIOs\n"); return status; } diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c index 9d192ff3b9ac..1555918e3ffa 100644 --- a/arch/arm/mach-omap2/board-overo.c +++ b/arch/arm/mach-omap2/board-overo.c @@ -56,6 +56,7 @@ #include "mux.h" #include "sdram-micron-mt46h32m32lf-6.h" #include "hsmmc.h" +#include "common-board-devices.h" #define OVERO_GPIO_BT_XGATE 15 #define OVERO_GPIO_W2W_NRESET 16 @@ -74,30 +75,6 @@ #if defined(CONFIG_TOUCHSCREEN_ADS7846) || \ defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) -#include <linux/spi/ads7846.h> - -static struct omap2_mcspi_device_config ads7846_mcspi_config = { - .turbo_mode = 0, - .single_channel = 1, /* 0: slave, 1: master */ -}; - -static int ads7846_get_pendown_state(void) -{ - return !gpio_get_value(OVERO_GPIO_PENDOWN); -} - -static struct ads7846_platform_data ads7846_config = { - .x_max = 0x0fff, - .y_max = 0x0fff, - .x_plate_ohms = 180, - .pressure_max = 255, - .debounce_max = 10, - .debounce_tol = 3, - .debounce_rep = 1, - .get_pendown_state = ads7846_get_pendown_state, - .keep_vref_on = 1, -}; - /* fixed regulator for ads7846 */ static struct regulator_consumer_supply ads7846_supply = REGULATOR_SUPPLY("vcc", "spi1.0"); @@ -128,14 +105,7 @@ static struct platform_device vads7846_device = { static void __init overo_ads7846_init(void) { - if ((gpio_request(OVERO_GPIO_PENDOWN, "ADS7846_PENDOWN") == 0) && - (gpio_direction_input(OVERO_GPIO_PENDOWN) == 0)) { - gpio_export(OVERO_GPIO_PENDOWN, 0); - } else { - printk(KERN_ERR "could not obtain gpio for ADS7846_PENDOWN\n"); - return; - } - + omap_ads7846_init(1, OVERO_GPIO_PENDOWN, 0, NULL); platform_device_register(&vads7846_device); } @@ -146,106 +116,28 @@ static inline void __init overo_ads7846_init(void) { return; } #if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE) #include <linux/smsc911x.h> +#include <plat/gpmc-smsc911x.h> -static struct resource overo_smsc911x_resources[] = { - { - .name = "smsc911x-memory", - .flags = IORESOURCE_MEM, - }, - { - .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL, - }, -}; - -static struct resource overo_smsc911x2_resources[] = { - { - .name = "smsc911x2-memory", - .flags = IORESOURCE_MEM, - }, - { - .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL, - }, -}; - -static struct smsc911x_platform_config overo_smsc911x_config = { - .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW, - .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN, - .flags = SMSC911X_USE_32BIT , - .phy_interface = PHY_INTERFACE_MODE_MII, -}; - -static struct platform_device overo_smsc911x_device = { - .name = "smsc911x", +static struct omap_smsc911x_platform_data smsc911x_cfg = { .id = 0, - .num_resources = ARRAY_SIZE(overo_smsc911x_resources), - .resource = overo_smsc911x_resources, - .dev = { - .platform_data = &overo_smsc911x_config, - }, + .cs = OVERO_SMSC911X_CS, + .gpio_irq = OVERO_SMSC911X_GPIO, + .gpio_reset = -EINVAL, + .flags = SMSC911X_USE_32BIT, }; -static struct platform_device overo_smsc911x2_device = { - .name = "smsc911x", +static struct omap_smsc911x_platform_data smsc911x2_cfg = { .id = 1, - .num_resources = ARRAY_SIZE(overo_smsc911x2_resources), - .resource = overo_smsc911x2_resources, - .dev = { - .platform_data = &overo_smsc911x_config, - }, + .cs = OVERO_SMSC911X2_CS, + .gpio_irq = OVERO_SMSC911X2_GPIO, + .gpio_reset = -EINVAL, + .flags = SMSC911X_USE_32BIT, }; -static struct platform_device *smsc911x_devices[] = { - &overo_smsc911x_device, - &overo_smsc911x2_device, -}; - -static inline void __init overo_init_smsc911x(void) +static void __init overo_init_smsc911x(void) { - unsigned long cs_mem_base, cs_mem_base2; - - /* set up first smsc911x chip */ - - if (gpmc_cs_request(OVERO_SMSC911X_CS, SZ_16M, &cs_mem_base) < 0) { - printk(KERN_ERR "Failed request for GPMC mem for smsc911x\n"); - return; - } - - overo_smsc911x_resources[0].start = cs_mem_base + 0x0; - overo_smsc911x_resources[0].end = cs_mem_base + 0xff; - - if ((gpio_request(OVERO_SMSC911X_GPIO, "SMSC911X IRQ") == 0) && - (gpio_direction_input(OVERO_SMSC911X_GPIO) == 0)) { - gpio_export(OVERO_SMSC911X_GPIO, 0); - } else { - printk(KERN_ERR "could not obtain gpio for SMSC911X IRQ\n"); - return; - } - - overo_smsc911x_resources[1].start = OMAP_GPIO_IRQ(OVERO_SMSC911X_GPIO); - overo_smsc911x_resources[1].end = 0; - - /* set up second smsc911x chip */ - - if (gpmc_cs_request(OVERO_SMSC911X2_CS, SZ_16M, &cs_mem_base2) < 0) { - printk(KERN_ERR "Failed request for GPMC mem for smsc911x2\n"); - return; - } - - overo_smsc911x2_resources[0].start = cs_mem_base2 + 0x0; - overo_smsc911x2_resources[0].end = cs_mem_base2 + 0xff; - - if ((gpio_request(OVERO_SMSC911X2_GPIO, "SMSC911X2 IRQ") == 0) && - (gpio_direction_input(OVERO_SMSC911X2_GPIO) == 0)) { - gpio_export(OVERO_SMSC911X2_GPIO, 0); - } else { - printk(KERN_ERR "could not obtain gpio for SMSC911X2 IRQ\n"); - return; - } - - overo_smsc911x2_resources[1].start = OMAP_GPIO_IRQ(OVERO_SMSC911X2_GPIO); - overo_smsc911x2_resources[1].end = 0; - - platform_add_devices(smsc911x_devices, ARRAY_SIZE(smsc911x_devices)); + gpmc_smsc911x_init(&smsc911x_cfg); + gpmc_smsc911x_init(&smsc911x2_cfg); } #else @@ -259,21 +151,20 @@ static int dvi_enabled; #define OVERO_GPIO_LCD_EN 144 #define OVERO_GPIO_LCD_BL 145 +static struct gpio overo_dss_gpios[] __initdata = { + { OVERO_GPIO_LCD_EN, GPIOF_OUT_INIT_HIGH, "OVERO_GPIO_LCD_EN" }, + { OVERO_GPIO_LCD_BL, GPIOF_OUT_INIT_HIGH, "OVERO_GPIO_LCD_BL" }, +}; + static void __init overo_display_init(void) { - if ((gpio_request(OVERO_GPIO_LCD_EN, "OVERO_GPIO_LCD_EN") == 0) && - (gpio_direction_output(OVERO_GPIO_LCD_EN, 1) == 0)) - gpio_export(OVERO_GPIO_LCD_EN, 0); - else - printk(KERN_ERR "could not obtain gpio for " - "OVERO_GPIO_LCD_EN\n"); + if (gpio_request_array(overo_dss_gpios, ARRAY_SIZE(overo_dss_gpios))) { + printk(KERN_ERR "could not obtain DSS control GPIOs\n"); + return; + } - if ((gpio_request(OVERO_GPIO_LCD_BL, "OVERO_GPIO_LCD_BL") == 0) && - (gpio_direction_output(OVERO_GPIO_LCD_BL, 1) == 0)) - gpio_export(OVERO_GPIO_LCD_BL, 0); - else - printk(KERN_ERR "could not obtain gpio for " - "OVERO_GPIO_LCD_BL\n"); + gpio_export(OVERO_GPIO_LCD_EN, 0); + gpio_export(OVERO_GPIO_LCD_BL, 0); } static int overo_panel_enable_dvi(struct omap_dss_device *dssdev) @@ -412,45 +303,6 @@ static struct mtd_partition overo_nand_partitions[] = { }, }; -static struct omap_nand_platform_data overo_nand_data = { - .parts = overo_nand_partitions, - .nr_parts = ARRAY_SIZE(overo_nand_partitions), - .dma_channel = -1, /* disable DMA in OMAP NAND driver */ -}; - -static void __init overo_flash_init(void) -{ - u8 cs = 0; - u8 nandcs = GPMC_CS_NUM + 1; - - /* find out the chip-select on which NAND exists */ - while (cs < GPMC_CS_NUM) { - u32 ret = 0; - ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG1); - - if ((ret & 0xC00) == 0x800) { - printk(KERN_INFO "Found NAND on CS%d\n", cs); - if (nandcs > GPMC_CS_NUM) - nandcs = cs; - } - cs++; - } - - if (nandcs > GPMC_CS_NUM) { - printk(KERN_INFO "NAND: Unable to find configuration " - "in GPMC\n "); - return; - } - - if (nandcs < GPMC_CS_NUM) { - overo_nand_data.cs = nandcs; - - printk(KERN_INFO "Registering NAND on CS%d\n", nandcs); - if (gpmc_nand_init(&overo_nand_data) < 0) - printk(KERN_ERR "Unable to register NAND device\n"); - } -} - static struct omap2_hsmmc_info mmc[] = { { .mmc = 1, @@ -648,37 +500,15 @@ static struct twl4030_platform_data overo_twldata = { .vpll2 = &overo_vpll2, }; -static struct i2c_board_info __initdata overo_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("tps65950", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &overo_twldata, - }, -}; - static int __init overo_i2c_init(void) { - omap_register_i2c_bus(1, 2600, overo_i2c_boardinfo, - ARRAY_SIZE(overo_i2c_boardinfo)); + omap3_pmic_init("tps65950", &overo_twldata); /* i2c2 pins are used for gpio */ omap_register_i2c_bus(3, 400, NULL, 0); return 0; } static struct spi_board_info overo_spi_board_info[] __initdata = { -#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \ - defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) - { - .modalias = "ads7846", - .bus_num = 1, - .chip_select = 0, - .max_speed_hz = 1500000, - .controller_data = &ads7846_mcspi_config, - .irq = OMAP_GPIO_IRQ(OVERO_GPIO_PENDOWN), - .platform_data = &ads7846_config, - }, -#endif #if defined(CONFIG_PANEL_LGPHILIPS_LB035Q02) || \ defined(CONFIG_PANEL_LGPHILIPS_LB035Q02_MODULE) { @@ -722,20 +552,22 @@ static struct omap_board_mux board_mux[] __initdata = { }; #endif -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, +static struct gpio overo_bt_gpios[] __initdata = { + { OVERO_GPIO_BT_XGATE, GPIOF_OUT_INIT_LOW, "lcd enable" }, + { OVERO_GPIO_BT_NRESET, GPIOF_OUT_INIT_HIGH, "lcd bl enable" }, }; static void __init overo_init(void) { + int ret; + omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); overo_i2c_init(); omap_display_init(&overo_dss_data); omap_serial_init(); - overo_flash_init(); - usb_musb_init(&musb_board_data); + omap_nand_flash_init(0, overo_nand_partitions, + ARRAY_SIZE(overo_nand_partitions)); + usb_musb_init(NULL); usbhs_init(&usbhs_bdata); overo_spi_init(); overo_ads7846_init(); @@ -748,9 +580,9 @@ static void __init overo_init(void) omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT); omap_mux_init_signal("sdrc_cke1", OMAP_PIN_OUTPUT); - if ((gpio_request(OVERO_GPIO_W2W_NRESET, - "OVERO_GPIO_W2W_NRESET") == 0) && - (gpio_direction_output(OVERO_GPIO_W2W_NRESET, 1) == 0)) { + ret = gpio_request_one(OVERO_GPIO_W2W_NRESET, GPIOF_OUT_INIT_HIGH, + "OVERO_GPIO_W2W_NRESET"); + if (ret == 0) { gpio_export(OVERO_GPIO_W2W_NRESET, 0); gpio_set_value(OVERO_GPIO_W2W_NRESET, 0); udelay(10); @@ -760,25 +592,20 @@ static void __init overo_init(void) "OVERO_GPIO_W2W_NRESET\n"); } - if ((gpio_request(OVERO_GPIO_BT_XGATE, "OVERO_GPIO_BT_XGATE") == 0) && - (gpio_direction_output(OVERO_GPIO_BT_XGATE, 0) == 0)) + ret = gpio_request_array(overo_bt_gpios, ARRAY_SIZE(overo_bt_gpios)); + if (ret) { + pr_err("%s: could not obtain BT gpios\n", __func__); + } else { gpio_export(OVERO_GPIO_BT_XGATE, 0); - else - printk(KERN_ERR "could not obtain gpio for OVERO_GPIO_BT_XGATE\n"); - - if ((gpio_request(OVERO_GPIO_BT_NRESET, "OVERO_GPIO_BT_NRESET") == 0) && - (gpio_direction_output(OVERO_GPIO_BT_NRESET, 1) == 0)) { gpio_export(OVERO_GPIO_BT_NRESET, 0); gpio_set_value(OVERO_GPIO_BT_NRESET, 0); mdelay(6); gpio_set_value(OVERO_GPIO_BT_NRESET, 1); - } else { - printk(KERN_ERR "could not obtain gpio for " - "OVERO_GPIO_BT_NRESET\n"); } - if ((gpio_request(OVERO_GPIO_USBH_CPEN, "OVERO_GPIO_USBH_CPEN") == 0) && - (gpio_direction_output(OVERO_GPIO_USBH_CPEN, 1) == 0)) + ret = gpio_request_one(OVERO_GPIO_USBH_CPEN, GPIOF_OUT_INIT_HIGH, + "OVERO_GPIO_USBH_CPEN"); + if (ret == 0) gpio_export(OVERO_GPIO_USBH_CPEN, 0); else printk(KERN_ERR "could not obtain gpio for " diff --git a/arch/arm/mach-omap2/board-rm680.c b/arch/arm/mach-omap2/board-rm680.c index 2af8b05e786d..42d10b12da3c 100644 --- a/arch/arm/mach-omap2/board-rm680.c +++ b/arch/arm/mach-omap2/board-rm680.c @@ -31,6 +31,7 @@ #include "mux.h" #include "hsmmc.h" #include "sdram-nokia.h" +#include "common-board-devices.h" static struct regulator_consumer_supply rm680_vemmc_consumers[] = { REGULATOR_SUPPLY("vmmc", "omap_hsmmc.1"), @@ -90,19 +91,9 @@ static struct twl4030_platform_data rm680_twl_data = { /* add rest of the children here */ }; -static struct i2c_board_info __initdata rm680_twl_i2c_board_info[] = { - { - I2C_BOARD_INFO("twl5031", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &rm680_twl_data, - }, -}; - static void __init rm680_i2c_init(void) { - omap_register_i2c_bus(1, 2900, rm680_twl_i2c_board_info, - ARRAY_SIZE(rm680_twl_i2c_board_info)); + omap_pmic_init(1, 2900, "twl5031", INT_34XX_SYS_NIRQ, &rm680_twl_data); omap_register_i2c_bus(2, 400, NULL, 0); omap_register_i2c_bus(3, 400, NULL, 0); } @@ -153,17 +144,11 @@ static struct omap_board_mux board_mux[] __initdata = { }; #endif -static struct omap_musb_board_data rm680_musb_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_PERIPHERAL, - .power = 100, -}; - static void __init rm680_init(void) { omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); omap_serial_init(); - usb_musb_init(&rm680_musb_data); + usb_musb_init(NULL); rm680_peripherals_init(); } diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index 01ee0a15ef88..2b00f72e8e36 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -43,6 +43,7 @@ #include "mux.h" #include "hsmmc.h" +#include "common-board-devices.h" #define SYSTEM_REV_B_USES_VAUX3 0x1699 #define SYSTEM_REV_S_USES_VAUX3 0x8 @@ -557,10 +558,8 @@ static __init void rx51_init_si4713(void) static int rx51_twlgpio_setup(struct device *dev, unsigned gpio, unsigned n) { /* FIXME this gpio setup is just a placeholder for now */ - gpio_request(gpio + 6, "backlight_pwm"); - gpio_direction_output(gpio + 6, 0); - gpio_request(gpio + 7, "speaker_en"); - gpio_direction_output(gpio + 7, 1); + gpio_request_one(gpio + 6, GPIOF_OUT_INIT_LOW, "backlight_pwm"); + gpio_request_one(gpio + 7, GPIOF_OUT_INIT_HIGH, "speaker_en"); return 0; } @@ -777,15 +776,6 @@ static struct tpa6130a2_platform_data rx51_tpa6130a2_data __initdata_or_module = .power_gpio = 98, }; -static struct i2c_board_info __initdata rx51_peripherals_i2c_board_info_1[] = { - { - I2C_BOARD_INFO("twl5030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &rx51_twldata, - }, -}; - /* Audio setup data */ static struct aic3x_setup_data rx51_aic34_setup = { .gpio_func[0] = AIC3X_GPIO1_FUNC_DISABLED, @@ -833,8 +823,7 @@ static int __init rx51_i2c_init(void) rx51_twldata.vaux3 = &rx51_vaux3_cam; } rx51_twldata.vmmc2 = &rx51_vmmc2; - omap_register_i2c_bus(1, 2200, rx51_peripherals_i2c_board_info_1, - ARRAY_SIZE(rx51_peripherals_i2c_board_info_1)); + omap_pmic_init(1, 2200, "twl5030", INT_34XX_SYS_NIRQ, &rx51_twldata); omap_register_i2c_bus(2, 100, rx51_peripherals_i2c_board_info_2, ARRAY_SIZE(rx51_peripherals_i2c_board_info_2)); omap_register_i2c_bus(3, 400, NULL, 0); @@ -921,26 +910,20 @@ static void rx51_wl1251_set_power(bool enable) gpio_set_value(RX51_WL1251_POWER_GPIO, enable); } +static struct gpio rx51_wl1251_gpios[] __initdata = { + { RX51_WL1251_POWER_GPIO, GPIOF_OUT_INIT_LOW, "wl1251 power" }, + { RX51_WL1251_IRQ_GPIO, GPIOF_IN, "wl1251 irq" }, +}; + static void __init rx51_init_wl1251(void) { int irq, ret; - ret = gpio_request(RX51_WL1251_POWER_GPIO, "wl1251 power"); + ret = gpio_request_array(rx51_wl1251_gpios, + ARRAY_SIZE(rx51_wl1251_gpios)); if (ret < 0) goto error; - ret = gpio_direction_output(RX51_WL1251_POWER_GPIO, 0); - if (ret < 0) - goto err_power; - - ret = gpio_request(RX51_WL1251_IRQ_GPIO, "wl1251 irq"); - if (ret < 0) - goto err_power; - - ret = gpio_direction_input(RX51_WL1251_IRQ_GPIO); - if (ret < 0) - goto err_irq; - irq = gpio_to_irq(RX51_WL1251_IRQ_GPIO); if (irq < 0) goto err_irq; @@ -952,10 +935,7 @@ static void __init rx51_init_wl1251(void) err_irq: gpio_free(RX51_WL1251_IRQ_GPIO); - -err_power: gpio_free(RX51_WL1251_POWER_GPIO); - error: printk(KERN_ERR "wl1251 board initialisation failed\n"); wl1251_pdata.set_power = NULL; diff --git a/arch/arm/mach-omap2/board-rx51-video.c b/arch/arm/mach-omap2/board-rx51-video.c index 2df10b6a5940..2c1289bd5e6a 100644 --- a/arch/arm/mach-omap2/board-rx51-video.c +++ b/arch/arm/mach-omap2/board-rx51-video.c @@ -76,13 +76,12 @@ static int __init rx51_video_init(void) return 0; } - if (gpio_request(RX51_LCD_RESET_GPIO, "LCD ACX565AKM reset")) { + if (gpio_request_one(RX51_LCD_RESET_GPIO, GPIOF_OUT_INIT_HIGH, + "LCD ACX565AKM reset")) { pr_err("%s failed to get LCD Reset GPIO\n", __func__); return 0; } - gpio_direction_output(RX51_LCD_RESET_GPIO, 1); - omap_display_init(&rx51_dss_board_info); return 0; } diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c index f8ba20a14e62..fec4cac8fa0a 100644 --- a/arch/arm/mach-omap2/board-rx51.c +++ b/arch/arm/mach-omap2/board-rx51.c @@ -58,21 +58,25 @@ static struct platform_device leds_gpio = { }, }; +/* + * cpuidle C-states definition override from the default values. + * The 'exit_latency' field is the sum of sleep and wake-up latencies. + */ static struct cpuidle_params rx51_cpuidle_params[] = { /* C1 */ - {1, 110, 162, 5}, + {110 + 162, 5 , 1}, /* C2 */ - {1, 106, 180, 309}, + {106 + 180, 309, 1}, /* C3 */ - {0, 107, 410, 46057}, + {107 + 410, 46057, 0}, /* C4 */ - {0, 121, 3374, 46057}, + {121 + 3374, 46057, 0}, /* C5 */ - {1, 855, 1146, 46057}, + {855 + 1146, 46057, 1}, /* C6 */ - {0, 7580, 4134, 484329}, + {7580 + 4134, 484329, 0}, /* C7 */ - {1, 7505, 15274, 484329}, + {7505 + 15274, 484329, 1}, }; static struct omap_lcd_config rx51_lcd_config = { diff --git a/arch/arm/mach-omap2/board-zoom-debugboard.c b/arch/arm/mach-omap2/board-zoom-debugboard.c index 007ebdc6c993..6402e781c458 100644 --- a/arch/arm/mach-omap2/board-zoom-debugboard.c +++ b/arch/arm/mach-omap2/board-zoom-debugboard.c @@ -15,6 +15,7 @@ #include <linux/interrupt.h> #include <plat/gpmc.h> +#include <plat/gpmc-smsc911x.h> #include <mach/board-zoom.h> @@ -26,60 +27,16 @@ #define DEBUG_BASE 0x08000000 #define ZOOM_ETHR_START DEBUG_BASE -static struct resource zoom_smsc911x_resources[] = { - [0] = { - .start = ZOOM_ETHR_START, - .end = ZOOM_ETHR_START + SZ_4K, - .flags = IORESOURCE_MEM, - }, - [1] = { - .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL, - }, -}; - -static struct smsc911x_platform_config zoom_smsc911x_config = { - .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW, - .irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN, +static struct omap_smsc911x_platform_data zoom_smsc911x_cfg = { + .cs = ZOOM_SMSC911X_CS, + .gpio_irq = ZOOM_SMSC911X_GPIO, + .gpio_reset = -EINVAL, .flags = SMSC911X_USE_32BIT, - .phy_interface = PHY_INTERFACE_MODE_MII, -}; - -static struct platform_device zoom_smsc911x_device = { - .name = "smsc911x", - .id = -1, - .num_resources = ARRAY_SIZE(zoom_smsc911x_resources), - .resource = zoom_smsc911x_resources, - .dev = { - .platform_data = &zoom_smsc911x_config, - }, }; static inline void __init zoom_init_smsc911x(void) { - int eth_cs; - unsigned long cs_mem_base; - int eth_gpio = 0; - - eth_cs = ZOOM_SMSC911X_CS; - - if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) { - printk(KERN_ERR "Failed to request GPMC mem for smsc911x\n"); - return; - } - - zoom_smsc911x_resources[0].start = cs_mem_base + 0x0; - zoom_smsc911x_resources[0].end = cs_mem_base + 0xff; - - eth_gpio = ZOOM_SMSC911X_GPIO; - - zoom_smsc911x_resources[1].start = OMAP_GPIO_IRQ(eth_gpio); - - if (gpio_request(eth_gpio, "smsc911x irq") < 0) { - printk(KERN_ERR "Failed to request GPIO%d for smsc911x IRQ\n", - eth_gpio); - return; - } - gpio_direction_input(eth_gpio); + gpmc_smsc911x_init(&zoom_smsc911x_cfg); } static struct plat_serial8250_port serial_platform_data[] = { @@ -120,12 +77,9 @@ static inline void __init zoom_init_quaduart(void) quart_gpio = ZOOM_QUADUART_GPIO; - if (gpio_request(quart_gpio, "TL16CP754C GPIO") < 0) { + if (gpio_request_one(quart_gpio, GPIOF_IN, "TL16CP754C GPIO") < 0) printk(KERN_ERR "Failed to request GPIO%d for TL16CP754C\n", quart_gpio); - return; - } - gpio_direction_input(quart_gpio); } static inline int omap_zoom_debugboard_detect(void) @@ -135,12 +89,12 @@ static inline int omap_zoom_debugboard_detect(void) debug_board_detect = ZOOM_SMSC911X_GPIO; - if (gpio_request(debug_board_detect, "Zoom debug board detect") < 0) { + if (gpio_request_one(debug_board_detect, GPIOF_IN, + "Zoom debug board detect") < 0) { printk(KERN_ERR "Failed to request GPIO%d for Zoom debug" "board detect\n", debug_board_detect); return 0; } - gpio_direction_input(debug_board_detect); if (!gpio_get_value(debug_board_detect)) { ret = 0; @@ -150,7 +104,6 @@ static inline int omap_zoom_debugboard_detect(void) } static struct platform_device *zoom_devices[] __initdata = { - &zoom_smsc911x_device, &zoom_debugboard_serial_device, }; diff --git a/arch/arm/mach-omap2/board-zoom-display.c b/arch/arm/mach-omap2/board-zoom-display.c index 60e8645db59d..c7c6beb1ec24 100644 --- a/arch/arm/mach-omap2/board-zoom-display.c +++ b/arch/arm/mach-omap2/board-zoom-display.c @@ -21,34 +21,19 @@ #define LCD_PANEL_RESET_GPIO_PILOT 55 #define LCD_PANEL_QVGA_GPIO 56 +static struct gpio zoom_lcd_gpios[] __initdata = { + { -EINVAL, GPIOF_OUT_INIT_HIGH, "lcd reset" }, + { LCD_PANEL_QVGA_GPIO, GPIOF_OUT_INIT_HIGH, "lcd qvga" }, +}; + static void zoom_lcd_panel_init(void) { - int ret; - unsigned char lcd_panel_reset_gpio; - - lcd_panel_reset_gpio = (omap_rev() > OMAP3430_REV_ES3_0) ? + zoom_lcd_gpios[0].gpio = (omap_rev() > OMAP3430_REV_ES3_0) ? LCD_PANEL_RESET_GPIO_PROD : LCD_PANEL_RESET_GPIO_PILOT; - ret = gpio_request(lcd_panel_reset_gpio, "lcd reset"); - if (ret) { - pr_err("Failed to get LCD reset GPIO (gpio%d).\n", - lcd_panel_reset_gpio); - return; - } - gpio_direction_output(lcd_panel_reset_gpio, 1); - - ret = gpio_request(LCD_PANEL_QVGA_GPIO, "lcd qvga"); - if (ret) { - pr_err("Failed to get LCD_PANEL_QVGA_GPIO (gpio%d).\n", - LCD_PANEL_QVGA_GPIO); - goto err0; - } - gpio_direction_output(LCD_PANEL_QVGA_GPIO, 1); - - return; -err0: - gpio_free(lcd_panel_reset_gpio); + if (gpio_request_array(zoom_lcd_gpios, ARRAY_SIZE(zoom_lcd_gpios))) + pr_err("%s: Failed to get LCD GPIOs.\n", __func__); } static int zoom_panel_enable_lcd(struct omap_dss_device *dssdev) diff --git a/arch/arm/mach-omap2/board-zoom-peripherals.c b/arch/arm/mach-omap2/board-zoom-peripherals.c index 8dee7549fbdf..118c6f53c5eb 100644 --- a/arch/arm/mach-omap2/board-zoom-peripherals.c +++ b/arch/arm/mach-omap2/board-zoom-peripherals.c @@ -31,6 +31,7 @@ #include "mux.h" #include "hsmmc.h" +#include "common-board-devices.h" #define OMAP_ZOOM_WLAN_PMENA_GPIO (101) #define OMAP_ZOOM_WLAN_IRQ_GPIO (162) @@ -276,13 +277,11 @@ static int zoom_twl_gpio_setup(struct device *dev, zoom_vsim_supply.dev = mmc[0].dev; zoom_vmmc2_supply.dev = mmc[1].dev; - ret = gpio_request(LCD_PANEL_ENABLE_GPIO, "lcd enable"); - if (ret) { + ret = gpio_request_one(LCD_PANEL_ENABLE_GPIO, GPIOF_OUT_INIT_LOW, + "lcd enable"); + if (ret) pr_err("Failed to get LCD_PANEL_ENABLE_GPIO (gpio%d).\n", LCD_PANEL_ENABLE_GPIO); - return ret; - } - gpio_direction_output(LCD_PANEL_ENABLE_GPIO, 0); return ret; } @@ -349,15 +348,6 @@ static struct twl4030_platform_data zoom_twldata = { .vdac = &zoom_vdac, }; -static struct i2c_board_info __initdata zoom_i2c_boardinfo[] = { - { - I2C_BOARD_INFO("twl5030", 0x48), - .flags = I2C_CLIENT_WAKE, - .irq = INT_34XX_SYS_NIRQ, - .platform_data = &zoom_twldata, - }, -}; - static int __init omap_i2c_init(void) { if (machine_is_omap_zoom2()) { @@ -365,19 +355,12 @@ static int __init omap_i2c_init(void) zoom_audio_data.hs_extmute = 1; zoom_audio_data.set_hs_extmute = zoom2_set_hs_extmute; } - omap_register_i2c_bus(1, 2400, zoom_i2c_boardinfo, - ARRAY_SIZE(zoom_i2c_boardinfo)); + omap_pmic_init(1, 2400, "twl5030", INT_34XX_SYS_NIRQ, &zoom_twldata); omap_register_i2c_bus(2, 400, NULL, 0); omap_register_i2c_bus(3, 400, NULL, 0); return 0; } -static struct omap_musb_board_data musb_board_data = { - .interface_type = MUSB_INTERFACE_ULPI, - .mode = MUSB_OTG, - .power = 100, -}; - static void enable_board_wakeup_source(void) { /* T2 interrupt line (keypad) */ @@ -392,7 +375,7 @@ void __init zoom_peripherals_init(void) omap_i2c_init(); platform_device_register(&omap_vwlan_device); - usb_musb_init(&musb_board_data); + usb_musb_init(NULL); enable_board_wakeup_source(); omap_serial_init(); } diff --git a/arch/arm/mach-omap2/common-board-devices.c b/arch/arm/mach-omap2/common-board-devices.c new file mode 100644 index 000000000000..e94903b2c65b --- /dev/null +++ b/arch/arm/mach-omap2/common-board-devices.c @@ -0,0 +1,163 @@ +/* + * common-board-devices.c + * + * Copyright (C) 2011 CompuLab, Ltd. + * Author: Mike Rapoport <mike@compulab.co.il> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include <linux/i2c.h> +#include <linux/i2c/twl.h> + +#include <linux/gpio.h> +#include <linux/spi/spi.h> +#include <linux/spi/ads7846.h> + +#include <plat/i2c.h> +#include <plat/mcspi.h> +#include <plat/nand.h> + +#include "common-board-devices.h" + +static struct i2c_board_info __initdata pmic_i2c_board_info = { + .addr = 0x48, + .flags = I2C_CLIENT_WAKE, +}; + +void __init omap_pmic_init(int bus, u32 clkrate, + const char *pmic_type, int pmic_irq, + struct twl4030_platform_data *pmic_data) +{ + strncpy(pmic_i2c_board_info.type, pmic_type, + sizeof(pmic_i2c_board_info.type)); + pmic_i2c_board_info.irq = pmic_irq; + pmic_i2c_board_info.platform_data = pmic_data; + + omap_register_i2c_bus(bus, clkrate, &pmic_i2c_board_info, 1); +} + +#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \ + defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) +static struct omap2_mcspi_device_config ads7846_mcspi_config = { + .turbo_mode = 0, + .single_channel = 1, /* 0: slave, 1: master */ +}; + +static struct ads7846_platform_data ads7846_config = { + .x_max = 0x0fff, + .y_max = 0x0fff, + .x_plate_ohms = 180, + .pressure_max = 255, + .debounce_max = 10, + .debounce_tol = 3, + .debounce_rep = 1, + .gpio_pendown = -EINVAL, + .keep_vref_on = 1, +}; + +static struct spi_board_info ads7846_spi_board_info __initdata = { + .modalias = "ads7846", + .bus_num = -EINVAL, + .chip_select = 0, + .max_speed_hz = 1500000, + .controller_data = &ads7846_mcspi_config, + .irq = -EINVAL, + .platform_data = &ads7846_config, +}; + +void __init omap_ads7846_init(int bus_num, int gpio_pendown, int gpio_debounce, + struct ads7846_platform_data *board_pdata) +{ + struct spi_board_info *spi_bi = &ads7846_spi_board_info; + int err; + + err = gpio_request(gpio_pendown, "TS PenDown"); + if (err) { + pr_err("Could not obtain gpio for TS PenDown: %d\n", err); + return; + } + + gpio_direction_input(gpio_pendown); + gpio_export(gpio_pendown, 0); + + if (gpio_debounce) + gpio_set_debounce(gpio_pendown, gpio_debounce); + + ads7846_config.gpio_pendown = gpio_pendown; + + spi_bi->bus_num = bus_num; + spi_bi->irq = OMAP_GPIO_IRQ(gpio_pendown); + + if (board_pdata) + spi_bi->platform_data = board_pdata; + + spi_register_board_info(&ads7846_spi_board_info, 1); +} +#else +void __init omap_ads7846_init(int bus_num, int gpio_pendown, int gpio_debounce, + struct ads7846_platform_data *board_pdata) +{ +} +#endif + +#if defined(CONFIG_MTD_NAND_OMAP2) || defined(CONFIG_MTD_NAND_OMAP2_MODULE) +static struct omap_nand_platform_data nand_data = { + .dma_channel = -1, /* disable DMA in OMAP NAND driver */ +}; + +void __init omap_nand_flash_init(int options, struct mtd_partition *parts, + int nr_parts) +{ + u8 cs = 0; + u8 nandcs = GPMC_CS_NUM + 1; + + /* find out the chip-select on which NAND exists */ + while (cs < GPMC_CS_NUM) { + u32 ret = 0; + ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG1); + + if ((ret & 0xC00) == 0x800) { + printk(KERN_INFO "Found NAND on CS%d\n", cs); + if (nandcs > GPMC_CS_NUM) + nandcs = cs; + } + cs++; + } + + if (nandcs > GPMC_CS_NUM) { + printk(KERN_INFO "NAND: Unable to find configuration " + "in GPMC\n "); + return; + } + + if (nandcs < GPMC_CS_NUM) { + nand_data.cs = nandcs; + nand_data.parts = parts; + nand_data.nr_parts = nr_parts; + nand_data.options = options; + + printk(KERN_INFO "Registering NAND on CS%d\n", nandcs); + if (gpmc_nand_init(&nand_data) < 0) + printk(KERN_ERR "Unable to register NAND device\n"); + } +} +#else +void __init omap_nand_flash_init(int options, struct mtd_partition *parts, + int nr_parts) +{ +} +#endif diff --git a/arch/arm/mach-omap2/common-board-devices.h b/arch/arm/mach-omap2/common-board-devices.h new file mode 100644 index 000000000000..eb80b3b0ef47 --- /dev/null +++ b/arch/arm/mach-omap2/common-board-devices.h @@ -0,0 +1,35 @@ +#ifndef __OMAP_COMMON_BOARD_DEVICES__ +#define __OMAP_COMMON_BOARD_DEVICES__ + +struct twl4030_platform_data; +struct mtd_partition; + +void omap_pmic_init(int bus, u32 clkrate, const char *pmic_type, int pmic_irq, + struct twl4030_platform_data *pmic_data); + +static inline void omap2_pmic_init(const char *pmic_type, + struct twl4030_platform_data *pmic_data) +{ + omap_pmic_init(2, 2600, pmic_type, INT_24XX_SYS_NIRQ, pmic_data); +} + +static inline void omap3_pmic_init(const char *pmic_type, + struct twl4030_platform_data *pmic_data) +{ + omap_pmic_init(1, 2600, pmic_type, INT_34XX_SYS_NIRQ, pmic_data); +} + +static inline void omap4_pmic_init(const char *pmic_type, + struct twl4030_platform_data *pmic_data) +{ + /* Phoenix Audio IC needs I2C1 to start with 400 KHz or less */ + omap_pmic_init(1, 400, pmic_type, OMAP44XX_IRQ_SYS_1N, pmic_data); +} + +struct ads7846_platform_data; + +void omap_ads7846_init(int bus_num, int gpio_pendown, int gpio_debounce, + struct ads7846_platform_data *board_pdata); +void omap_nand_flash_init(int opts, struct mtd_partition *parts, int n_parts); + +#endif /* __OMAP_COMMON_BOARD_DEVICES__ */ diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 1c240eff3918..4bf6e6e8b100 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -36,36 +36,6 @@ #ifdef CONFIG_CPU_IDLE -#define OMAP3_MAX_STATES 7 -#define OMAP3_STATE_C1 0 /* C1 - MPU WFI + Core active */ -#define OMAP3_STATE_C2 1 /* C2 - MPU WFI + Core inactive */ -#define OMAP3_STATE_C3 2 /* C3 - MPU CSWR + Core inactive */ -#define OMAP3_STATE_C4 3 /* C4 - MPU OFF + Core iactive */ -#define OMAP3_STATE_C5 4 /* C5 - MPU RET + Core RET */ -#define OMAP3_STATE_C6 5 /* C6 - MPU OFF + Core RET */ -#define OMAP3_STATE_C7 6 /* C7 - MPU OFF + Core OFF */ - -#define OMAP3_STATE_MAX OMAP3_STATE_C7 - -#define CPUIDLE_FLAG_CHECK_BM 0x10000 /* use omap3_enter_idle_bm() */ - -struct omap3_processor_cx { - u8 valid; - u8 type; - u32 sleep_latency; - u32 wakeup_latency; - u32 mpu_state; - u32 core_state; - u32 threshold; - u32 flags; - const char *desc; -}; - -struct omap3_processor_cx omap3_power_states[OMAP3_MAX_STATES]; -struct omap3_processor_cx current_cx_state; -struct powerdomain *mpu_pd, *core_pd, *per_pd; -struct powerdomain *cam_pd; - /* * The latencies/thresholds for various C states have * to be configured from the respective board files. @@ -75,27 +45,31 @@ struct powerdomain *cam_pd; */ static struct cpuidle_params cpuidle_params_table[] = { /* C1 */ - {1, 2, 2, 5}, + {2 + 2, 5, 1}, /* C2 */ - {1, 10, 10, 30}, + {10 + 10, 30, 1}, /* C3 */ - {1, 50, 50, 300}, + {50 + 50, 300, 1}, /* C4 */ - {1, 1500, 1800, 4000}, + {1500 + 1800, 4000, 1}, /* C5 */ - {1, 2500, 7500, 12000}, + {2500 + 7500, 12000, 1}, /* C6 */ - {1, 3000, 8500, 15000}, + {3000 + 8500, 15000, 1}, /* C7 */ - {1, 10000, 30000, 300000}, + {10000 + 30000, 300000, 1}, }; +#define OMAP3_NUM_STATES ARRAY_SIZE(cpuidle_params_table) -static int omap3_idle_bm_check(void) -{ - if (!omap3_can_sleep()) - return 1; - return 0; -} +/* Mach specific information to be recorded in the C-state driver_data */ +struct omap3_idle_statedata { + u32 mpu_state; + u32 core_state; + u8 valid; +}; +struct omap3_idle_statedata omap3_idle_data[OMAP3_NUM_STATES]; + +struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd; static int _cpuidle_allow_idle(struct powerdomain *pwrdm, struct clockdomain *clkdm) @@ -122,12 +96,10 @@ static int _cpuidle_deny_idle(struct powerdomain *pwrdm, static int omap3_enter_idle(struct cpuidle_device *dev, struct cpuidle_state *state) { - struct omap3_processor_cx *cx = cpuidle_get_statedata(state); + struct omap3_idle_statedata *cx = cpuidle_get_statedata(state); struct timespec ts_preidle, ts_postidle, ts_idle; u32 mpu_state = cx->mpu_state, core_state = cx->core_state; - current_cx_state = *cx; - /* Used to keep track of the total time in idle */ getnstimeofday(&ts_preidle); @@ -140,7 +112,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev, if (omap_irq_pending() || need_resched()) goto return_sleep_time; - if (cx->type == OMAP3_STATE_C1) { + /* Deny idle for C1 */ + if (state == &dev->states[0]) { pwrdm_for_each_clkdm(mpu_pd, _cpuidle_deny_idle); pwrdm_for_each_clkdm(core_pd, _cpuidle_deny_idle); } @@ -148,7 +121,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev, /* Execute ARM wfi */ omap_sram_idle(); - if (cx->type == OMAP3_STATE_C1) { + /* Re-allow idle for C1 */ + if (state == &dev->states[0]) { pwrdm_for_each_clkdm(mpu_pd, _cpuidle_allow_idle); pwrdm_for_each_clkdm(core_pd, _cpuidle_allow_idle); } @@ -164,41 +138,53 @@ return_sleep_time: } /** - * next_valid_state - Find next valid c-state + * next_valid_state - Find next valid C-state * @dev: cpuidle device - * @state: Currently selected c-state + * @state: Currently selected C-state * * If the current state is valid, it is returned back to the caller. * Else, this function searches for a lower c-state which is still - * valid (as defined in omap3_power_states[]). + * valid. + * + * A state is valid if the 'valid' field is enabled and + * if it satisfies the enable_off_mode condition. */ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, - struct cpuidle_state *curr) + struct cpuidle_state *curr) { struct cpuidle_state *next = NULL; - struct omap3_processor_cx *cx; + struct omap3_idle_statedata *cx = cpuidle_get_statedata(curr); + u32 mpu_deepest_state = PWRDM_POWER_RET; + u32 core_deepest_state = PWRDM_POWER_RET; - cx = (struct omap3_processor_cx *)cpuidle_get_statedata(curr); + if (enable_off_mode) { + mpu_deepest_state = PWRDM_POWER_OFF; + /* + * Erratum i583: valable for ES rev < Es1.2 on 3630. + * CORE OFF mode is not supported in a stable form, restrict + * instead the CORE state to RET. + */ + if (!IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) + core_deepest_state = PWRDM_POWER_OFF; + } /* Check if current state is valid */ - if (cx->valid) { + if ((cx->valid) && + (cx->mpu_state >= mpu_deepest_state) && + (cx->core_state >= core_deepest_state)) { return curr; } else { - u8 idx = OMAP3_STATE_MAX; + int idx = OMAP3_NUM_STATES - 1; - /* - * Reach the current state starting at highest C-state - */ - for (; idx >= OMAP3_STATE_C1; idx--) { + /* Reach the current state starting at highest C-state */ + for (; idx >= 0; idx--) { if (&dev->states[idx] == curr) { next = &dev->states[idx]; break; } } - /* - * Should never hit this condition. - */ + /* Should never hit this condition */ WARN_ON(next == NULL); /* @@ -206,17 +192,17 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, * Start search from the next (lower) state. */ idx--; - for (; idx >= OMAP3_STATE_C1; idx--) { - struct omap3_processor_cx *cx; - + for (; idx >= 0; idx--) { cx = cpuidle_get_statedata(&dev->states[idx]); - if (cx->valid) { + if ((cx->valid) && + (cx->mpu_state >= mpu_deepest_state) && + (cx->core_state >= core_deepest_state)) { next = &dev->states[idx]; break; } } /* - * C1 and C2 are always valid. + * C1 is always valid. * So, no need to check for 'next==NULL' outside this loop. */ } @@ -229,36 +215,22 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, * @dev: cpuidle device * @state: The target state to be programmed * - * Used for C states with CPUIDLE_FLAG_CHECK_BM flag set. This - * function checks for any pending activity and then programs the - * device to the specified or a safer state. + * This function checks for any pending activity and then programs + * the device to the specified or a safer state. */ static int omap3_enter_idle_bm(struct cpuidle_device *dev, struct cpuidle_state *state) { - struct cpuidle_state *new_state = next_valid_state(dev, state); - u32 core_next_state, per_next_state = 0, per_saved_state = 0; - u32 cam_state; - struct omap3_processor_cx *cx; + struct cpuidle_state *new_state; + u32 core_next_state, per_next_state = 0, per_saved_state = 0, cam_state; + struct omap3_idle_statedata *cx; int ret; - if ((state->flags & CPUIDLE_FLAG_CHECK_BM) && omap3_idle_bm_check()) { - BUG_ON(!dev->safe_state); + if (!omap3_can_sleep()) { new_state = dev->safe_state; goto select_state; } - cx = cpuidle_get_statedata(state); - core_next_state = cx->core_state; - - /* - * FIXME: we currently manage device-specific idle states - * for PER and CORE in combination with CPU-specific - * idle states. This is wrong, and device-specific - * idle management needs to be separated out into - * its own code. - */ - /* * Prevent idle completely if CAM is active. * CAM does not have wakeup capability in OMAP3. @@ -270,9 +242,19 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev, } /* + * FIXME: we currently manage device-specific idle states + * for PER and CORE in combination with CPU-specific + * idle states. This is wrong, and device-specific + * idle management needs to be separated out into + * its own code. + */ + + /* * Prevent PER off if CORE is not in retention or off as this * would disable PER wakeups completely. */ + cx = cpuidle_get_statedata(state); + core_next_state = cx->core_state; per_next_state = per_saved_state = pwrdm_read_next_pwrst(per_pd); if ((per_next_state == PWRDM_POWER_OFF) && (core_next_state > PWRDM_POWER_RET)) @@ -282,6 +264,8 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev, if (per_next_state != per_saved_state) pwrdm_set_next_pwrst(per_pd, per_next_state); + new_state = next_valid_state(dev, state); + select_state: dev->last_state = new_state; ret = omap3_enter_idle(dev, new_state); @@ -295,31 +279,6 @@ select_state: DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev); -/** - * omap3_cpuidle_update_states() - Update the cpuidle states - * @mpu_deepest_state: Enable states up to and including this for mpu domain - * @core_deepest_state: Enable states up to and including this for core domain - * - * This goes through the list of states available and enables and disables the - * validity of C states based on deepest state that can be achieved for the - * variable domain - */ -void omap3_cpuidle_update_states(u32 mpu_deepest_state, u32 core_deepest_state) -{ - int i; - - for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { - struct omap3_processor_cx *cx = &omap3_power_states[i]; - - if ((cx->mpu_state >= mpu_deepest_state) && - (cx->core_state >= core_deepest_state)) { - cx->valid = 1; - } else { - cx->valid = 0; - } - } -} - void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) { int i; @@ -327,212 +286,109 @@ void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) if (!cpuidle_board_params) return; - for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { - cpuidle_params_table[i].valid = - cpuidle_board_params[i].valid; - cpuidle_params_table[i].sleep_latency = - cpuidle_board_params[i].sleep_latency; - cpuidle_params_table[i].wake_latency = - cpuidle_board_params[i].wake_latency; - cpuidle_params_table[i].threshold = - cpuidle_board_params[i].threshold; + for (i = 0; i < OMAP3_NUM_STATES; i++) { + cpuidle_params_table[i].valid = cpuidle_board_params[i].valid; + cpuidle_params_table[i].exit_latency = + cpuidle_board_params[i].exit_latency; + cpuidle_params_table[i].target_residency = + cpuidle_board_params[i].target_residency; } return; } -/* omap3_init_power_states - Initialises the OMAP3 specific C states. - * - * Below is the desciption of each C state. - * C1 . MPU WFI + Core active - * C2 . MPU WFI + Core inactive - * C3 . MPU CSWR + Core inactive - * C4 . MPU OFF + Core inactive - * C5 . MPU CSWR + Core CSWR - * C6 . MPU OFF + Core CSWR - * C7 . MPU OFF + Core OFF - */ -void omap_init_power_states(void) -{ - /* C1 . MPU WFI + Core active */ - omap3_power_states[OMAP3_STATE_C1].valid = - cpuidle_params_table[OMAP3_STATE_C1].valid; - omap3_power_states[OMAP3_STATE_C1].type = OMAP3_STATE_C1; - omap3_power_states[OMAP3_STATE_C1].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C1].sleep_latency; - omap3_power_states[OMAP3_STATE_C1].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C1].wake_latency; - omap3_power_states[OMAP3_STATE_C1].threshold = - cpuidle_params_table[OMAP3_STATE_C1].threshold; - omap3_power_states[OMAP3_STATE_C1].mpu_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C1].core_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C1].flags = CPUIDLE_FLAG_TIME_VALID; - omap3_power_states[OMAP3_STATE_C1].desc = "MPU ON + CORE ON"; - - /* C2 . MPU WFI + Core inactive */ - omap3_power_states[OMAP3_STATE_C2].valid = - cpuidle_params_table[OMAP3_STATE_C2].valid; - omap3_power_states[OMAP3_STATE_C2].type = OMAP3_STATE_C2; - omap3_power_states[OMAP3_STATE_C2].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C2].sleep_latency; - omap3_power_states[OMAP3_STATE_C2].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C2].wake_latency; - omap3_power_states[OMAP3_STATE_C2].threshold = - cpuidle_params_table[OMAP3_STATE_C2].threshold; - omap3_power_states[OMAP3_STATE_C2].mpu_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C2].core_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C2].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C2].desc = "MPU ON + CORE ON"; - - /* C3 . MPU CSWR + Core inactive */ - omap3_power_states[OMAP3_STATE_C3].valid = - cpuidle_params_table[OMAP3_STATE_C3].valid; - omap3_power_states[OMAP3_STATE_C3].type = OMAP3_STATE_C3; - omap3_power_states[OMAP3_STATE_C3].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C3].sleep_latency; - omap3_power_states[OMAP3_STATE_C3].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C3].wake_latency; - omap3_power_states[OMAP3_STATE_C3].threshold = - cpuidle_params_table[OMAP3_STATE_C3].threshold; - omap3_power_states[OMAP3_STATE_C3].mpu_state = PWRDM_POWER_RET; - omap3_power_states[OMAP3_STATE_C3].core_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C3].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C3].desc = "MPU RET + CORE ON"; - - /* C4 . MPU OFF + Core inactive */ - omap3_power_states[OMAP3_STATE_C4].valid = - cpuidle_params_table[OMAP3_STATE_C4].valid; - omap3_power_states[OMAP3_STATE_C4].type = OMAP3_STATE_C4; - omap3_power_states[OMAP3_STATE_C4].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C4].sleep_latency; - omap3_power_states[OMAP3_STATE_C4].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C4].wake_latency; - omap3_power_states[OMAP3_STATE_C4].threshold = - cpuidle_params_table[OMAP3_STATE_C4].threshold; - omap3_power_states[OMAP3_STATE_C4].mpu_state = PWRDM_POWER_OFF; - omap3_power_states[OMAP3_STATE_C4].core_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C4].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C4].desc = "MPU OFF + CORE ON"; - - /* C5 . MPU CSWR + Core CSWR*/ - omap3_power_states[OMAP3_STATE_C5].valid = - cpuidle_params_table[OMAP3_STATE_C5].valid; - omap3_power_states[OMAP3_STATE_C5].type = OMAP3_STATE_C5; - omap3_power_states[OMAP3_STATE_C5].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C5].sleep_latency; - omap3_power_states[OMAP3_STATE_C5].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C5].wake_latency; - omap3_power_states[OMAP3_STATE_C5].threshold = - cpuidle_params_table[OMAP3_STATE_C5].threshold; - omap3_power_states[OMAP3_STATE_C5].mpu_state = PWRDM_POWER_RET; - omap3_power_states[OMAP3_STATE_C5].core_state = PWRDM_POWER_RET; - omap3_power_states[OMAP3_STATE_C5].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C5].desc = "MPU RET + CORE RET"; - - /* C6 . MPU OFF + Core CSWR */ - omap3_power_states[OMAP3_STATE_C6].valid = - cpuidle_params_table[OMAP3_STATE_C6].valid; - omap3_power_states[OMAP3_STATE_C6].type = OMAP3_STATE_C6; - omap3_power_states[OMAP3_STATE_C6].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C6].sleep_latency; - omap3_power_states[OMAP3_STATE_C6].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C6].wake_latency; - omap3_power_states[OMAP3_STATE_C6].threshold = - cpuidle_params_table[OMAP3_STATE_C6].threshold; - omap3_power_states[OMAP3_STATE_C6].mpu_state = PWRDM_POWER_OFF; - omap3_power_states[OMAP3_STATE_C6].core_state = PWRDM_POWER_RET; - omap3_power_states[OMAP3_STATE_C6].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C6].desc = "MPU OFF + CORE RET"; - - /* C7 . MPU OFF + Core OFF */ - omap3_power_states[OMAP3_STATE_C7].valid = - cpuidle_params_table[OMAP3_STATE_C7].valid; - omap3_power_states[OMAP3_STATE_C7].type = OMAP3_STATE_C7; - omap3_power_states[OMAP3_STATE_C7].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C7].sleep_latency; - omap3_power_states[OMAP3_STATE_C7].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C7].wake_latency; - omap3_power_states[OMAP3_STATE_C7].threshold = - cpuidle_params_table[OMAP3_STATE_C7].threshold; - omap3_power_states[OMAP3_STATE_C7].mpu_state = PWRDM_POWER_OFF; - omap3_power_states[OMAP3_STATE_C7].core_state = PWRDM_POWER_OFF; - omap3_power_states[OMAP3_STATE_C7].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C7].desc = "MPU OFF + CORE OFF"; - - /* - * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot - * enable OFF mode in a stable form for previous revisions. - * we disable C7 state as a result. - */ - if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) { - omap3_power_states[OMAP3_STATE_C7].valid = 0; - cpuidle_params_table[OMAP3_STATE_C7].valid = 0; - pr_warn("%s: core off state C7 disabled due to i583\n", - __func__); - } -} - struct cpuidle_driver omap3_idle_driver = { .name = "omap3_idle", .owner = THIS_MODULE, }; +/* Helper to fill the C-state common data and register the driver_data */ +static inline struct omap3_idle_statedata *_fill_cstate( + struct cpuidle_device *dev, + int idx, const char *descr) +{ + struct omap3_idle_statedata *cx = &omap3_idle_data[idx]; + struct cpuidle_state *state = &dev->states[idx]; + + state->exit_latency = cpuidle_params_table[idx].exit_latency; + state->target_residency = cpuidle_params_table[idx].target_residency; + state->flags = CPUIDLE_FLAG_TIME_VALID; + state->enter = omap3_enter_idle_bm; + cx->valid = cpuidle_params_table[idx].valid; + sprintf(state->name, "C%d", idx + 1); + strncpy(state->desc, descr, CPUIDLE_DESC_LEN); + cpuidle_set_statedata(state, cx); + + return cx; +} + /** * omap3_idle_init - Init routine for OMAP3 idle * - * Registers the OMAP3 specific cpuidle driver with the cpuidle + * Registers the OMAP3 specific cpuidle driver to the cpuidle * framework with the valid set of states. */ int __init omap3_idle_init(void) { - int i, count = 0; - struct omap3_processor_cx *cx; - struct cpuidle_state *state; struct cpuidle_device *dev; + struct omap3_idle_statedata *cx; mpu_pd = pwrdm_lookup("mpu_pwrdm"); core_pd = pwrdm_lookup("core_pwrdm"); per_pd = pwrdm_lookup("per_pwrdm"); cam_pd = pwrdm_lookup("cam_pwrdm"); - omap_init_power_states(); cpuidle_register_driver(&omap3_idle_driver); - dev = &per_cpu(omap3_idle_dev, smp_processor_id()); - for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { - cx = &omap3_power_states[i]; - state = &dev->states[count]; - - if (!cx->valid) - continue; - cpuidle_set_statedata(state, cx); - state->exit_latency = cx->sleep_latency + cx->wakeup_latency; - state->target_residency = cx->threshold; - state->flags = cx->flags; - state->enter = (state->flags & CPUIDLE_FLAG_CHECK_BM) ? - omap3_enter_idle_bm : omap3_enter_idle; - if (cx->type == OMAP3_STATE_C1) - dev->safe_state = state; - sprintf(state->name, "C%d", count+1); - strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); - count++; - } + /* C1 . MPU WFI + Core active */ + cx = _fill_cstate(dev, 0, "MPU ON + CORE ON"); + (&dev->states[0])->enter = omap3_enter_idle; + dev->safe_state = &dev->states[0]; + cx->valid = 1; /* C1 is always valid */ + cx->mpu_state = PWRDM_POWER_ON; + cx->core_state = PWRDM_POWER_ON; - if (!count) - return -EINVAL; - dev->state_count = count; + /* C2 . MPU WFI + Core inactive */ + cx = _fill_cstate(dev, 1, "MPU ON + CORE ON"); + cx->mpu_state = PWRDM_POWER_ON; + cx->core_state = PWRDM_POWER_ON; + + /* C3 . MPU CSWR + Core inactive */ + cx = _fill_cstate(dev, 2, "MPU RET + CORE ON"); + cx->mpu_state = PWRDM_POWER_RET; + cx->core_state = PWRDM_POWER_ON; - if (enable_off_mode) - omap3_cpuidle_update_states(PWRDM_POWER_OFF, PWRDM_POWER_OFF); - else - omap3_cpuidle_update_states(PWRDM_POWER_RET, PWRDM_POWER_RET); + /* C4 . MPU OFF + Core inactive */ + cx = _fill_cstate(dev, 3, "MPU OFF + CORE ON"); + cx->mpu_state = PWRDM_POWER_OFF; + cx->core_state = PWRDM_POWER_ON; + + /* C5 . MPU RET + Core RET */ + cx = _fill_cstate(dev, 4, "MPU RET + CORE RET"); + cx->mpu_state = PWRDM_POWER_RET; + cx->core_state = PWRDM_POWER_RET; + + /* C6 . MPU OFF + Core RET */ + cx = _fill_cstate(dev, 5, "MPU OFF + CORE RET"); + cx->mpu_state = PWRDM_POWER_OFF; + cx->core_state = PWRDM_POWER_RET; + + /* C7 . MPU OFF + Core OFF */ + cx = _fill_cstate(dev, 6, "MPU OFF + CORE OFF"); + /* + * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot + * enable OFF mode in a stable form for previous revisions. + * We disable C7 state as a result. + */ + if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) { + cx->valid = 0; + pr_warn("%s: core off state C7 disabled due to i583\n", + __func__); + } + cx->mpu_state = PWRDM_POWER_OFF; + cx->core_state = PWRDM_POWER_OFF; + dev->state_count = OMAP3_NUM_STATES; if (cpuidle_register_device(dev)) { printk(KERN_ERR "%s: CPUidle register device failed\n", __func__); diff --git a/arch/arm/mach-omap2/gpmc-smc91x.c b/arch/arm/mach-omap2/gpmc-smc91x.c index 877c6f5807b7..ba10c24f3d8d 100644 --- a/arch/arm/mach-omap2/gpmc-smc91x.c +++ b/arch/arm/mach-omap2/gpmc-smc91x.c @@ -147,25 +147,24 @@ void __init gpmc_smc91x_init(struct omap_smc91x_platform_data *board_data) goto free1; } - if (gpio_request(gpmc_cfg->gpio_irq, "SMC91X irq") < 0) + if (gpio_request_one(gpmc_cfg->gpio_irq, GPIOF_IN, "SMC91X irq") < 0) goto free1; - gpio_direction_input(gpmc_cfg->gpio_irq); gpmc_smc91x_resources[1].start = gpio_to_irq(gpmc_cfg->gpio_irq); if (gpmc_cfg->gpio_pwrdwn) { - ret = gpio_request(gpmc_cfg->gpio_pwrdwn, "SMC91X powerdown"); + ret = gpio_request_one(gpmc_cfg->gpio_pwrdwn, + GPIOF_OUT_INIT_LOW, "SMC91X powerdown"); if (ret) goto free2; - gpio_direction_output(gpmc_cfg->gpio_pwrdwn, 0); } if (gpmc_cfg->gpio_reset) { - ret = gpio_request(gpmc_cfg->gpio_reset, "SMC91X reset"); + ret = gpio_request_one(gpmc_cfg->gpio_reset, + GPIOF_OUT_INIT_LOW, "SMC91X reset"); if (ret) goto free3; - gpio_direction_output(gpmc_cfg->gpio_reset, 0); gpio_set_value(gpmc_cfg->gpio_reset, 1); msleep(100); gpio_set_value(gpmc_cfg->gpio_reset, 0); diff --git a/arch/arm/mach-omap2/gpmc-smsc911x.c b/arch/arm/mach-omap2/gpmc-smsc911x.c index 703f150dd01d..997033129d26 100644 --- a/arch/arm/mach-omap2/gpmc-smsc911x.c +++ b/arch/arm/mach-omap2/gpmc-smsc911x.c @@ -10,6 +10,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) "%s: " fmt, __func__ #include <linux/kernel.h> #include <linux/platform_device.h> @@ -30,7 +31,7 @@ static struct resource gpmc_smsc911x_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL, }, }; @@ -41,16 +42,6 @@ static struct smsc911x_platform_config gpmc_smsc911x_config = { .flags = SMSC911X_USE_16BIT, }; -static struct platform_device gpmc_smsc911x_device = { - .name = "smsc911x", - .id = -1, - .num_resources = ARRAY_SIZE(gpmc_smsc911x_resources), - .resource = gpmc_smsc911x_resources, - .dev = { - .platform_data = &gpmc_smsc911x_config, - }, -}; - /* * Initialize smsc911x device connected to the GPMC. Note that we * assume that pin multiplexing is done in the board-*.c file, @@ -58,46 +49,49 @@ static struct platform_device gpmc_smsc911x_device = { */ void __init gpmc_smsc911x_init(struct omap_smsc911x_platform_data *board_data) { + struct platform_device *pdev; unsigned long cs_mem_base; int ret; gpmc_cfg = board_data; if (gpmc_cs_request(gpmc_cfg->cs, SZ_16M, &cs_mem_base) < 0) { - printk(KERN_ERR "Failed to request GPMC mem for smsc911x\n"); + pr_err("Failed to request GPMC mem region\n"); return; } gpmc_smsc911x_resources[0].start = cs_mem_base + 0x0; gpmc_smsc911x_resources[0].end = cs_mem_base + 0xff; - if (gpio_request(gpmc_cfg->gpio_irq, "smsc911x irq") < 0) { - printk(KERN_ERR "Failed to request GPIO%d for smsc911x IRQ\n", - gpmc_cfg->gpio_irq); + if (gpio_request_one(gpmc_cfg->gpio_irq, GPIOF_IN, "smsc911x irq")) { + pr_err("Failed to request IRQ GPIO%d\n", gpmc_cfg->gpio_irq); goto free1; } - gpio_direction_input(gpmc_cfg->gpio_irq); gpmc_smsc911x_resources[1].start = gpio_to_irq(gpmc_cfg->gpio_irq); - gpmc_smsc911x_resources[1].flags |= - (gpmc_cfg->flags & IRQF_TRIGGER_MASK); if (gpio_is_valid(gpmc_cfg->gpio_reset)) { - ret = gpio_request(gpmc_cfg->gpio_reset, "smsc911x reset"); + ret = gpio_request_one(gpmc_cfg->gpio_reset, + GPIOF_OUT_INIT_HIGH, "smsc911x reset"); if (ret) { - printk(KERN_ERR "Failed to request GPIO%d for smsc911x reset\n", - gpmc_cfg->gpio_reset); + pr_err("Failed to request reset GPIO%d\n", + gpmc_cfg->gpio_reset); goto free2; } - gpio_direction_output(gpmc_cfg->gpio_reset, 1); gpio_set_value(gpmc_cfg->gpio_reset, 0); msleep(100); gpio_set_value(gpmc_cfg->gpio_reset, 1); } - if (platform_device_register(&gpmc_smsc911x_device) < 0) { - printk(KERN_ERR "Unable to register smsc911x device\n"); + if (gpmc_cfg->flags) + gpmc_smsc911x_config.flags = gpmc_cfg->flags; + + pdev = platform_device_register_resndata(NULL, "smsc911x", gpmc_cfg->id, + gpmc_smsc911x_resources, ARRAY_SIZE(gpmc_smsc911x_resources), + &gpmc_smsc911x_config, sizeof(gpmc_smsc911x_config)); + if (!pdev) { + pr_err("Unable to register platform device\n"); gpio_free(gpmc_cfg->gpio_reset); goto free2; } @@ -109,5 +103,5 @@ free2: free1: gpmc_cs_free(gpmc_cfg->cs); - printk(KERN_ERR "Could not initialize smsc911x\n"); + pr_err("Could not initialize smsc911x device\n"); } diff --git a/arch/arm/mach-omap2/omap_l3_noc.c b/arch/arm/mach-omap2/omap_l3_noc.c index 82632c24076f..7b9f1909ddb2 100644 --- a/arch/arm/mach-omap2/omap_l3_noc.c +++ b/arch/arm/mach-omap2/omap_l3_noc.c @@ -63,10 +63,7 @@ static irqreturn_t l3_interrupt_handler(int irq, void *_l3) char *source_name; /* Get the Type of interrupt */ - if (irq == l3->app_irq) - inttype = L3_APPLICATION_ERROR; - else - inttype = L3_DEBUG_ERROR; + inttype = irq == l3->app_irq ? L3_APPLICATION_ERROR : L3_DEBUG_ERROR; for (i = 0; i < L3_MODULES; i++) { /* @@ -84,10 +81,10 @@ static irqreturn_t l3_interrupt_handler(int irq, void *_l3) err_src = j; /* Read the stderrlog_main_source from clk domain */ - std_err_main_addr = base + (*(l3_targ[i] + err_src)); - std_err_main = readl(std_err_main_addr); + std_err_main_addr = base + *(l3_targ[i] + err_src); + std_err_main = readl(std_err_main_addr); - switch ((std_err_main & CUSTOM_ERROR)) { + switch (std_err_main & CUSTOM_ERROR) { case STANDARD_ERROR: source_name = l3_targ_stderrlog_main_name[i][err_src]; @@ -132,49 +129,49 @@ static int __init omap4_l3_probe(struct platform_device *pdev) l3 = kzalloc(sizeof(*l3), GFP_KERNEL); if (!l3) - ret = -ENOMEM; + return -ENOMEM; platform_set_drvdata(pdev, l3); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(&pdev->dev, "couldn't find resource 0\n"); ret = -ENODEV; - goto err1; + goto err0; } l3->l3_base[0] = ioremap(res->start, resource_size(res)); - if (!(l3->l3_base[0])) { + if (!l3->l3_base[0]) { dev_err(&pdev->dev, "ioremap failed\n"); ret = -ENOMEM; - goto err2; + goto err0; } res = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (!res) { dev_err(&pdev->dev, "couldn't find resource 1\n"); ret = -ENODEV; - goto err3; + goto err1; } l3->l3_base[1] = ioremap(res->start, resource_size(res)); - if (!(l3->l3_base[1])) { + if (!l3->l3_base[1]) { dev_err(&pdev->dev, "ioremap failed\n"); ret = -ENOMEM; - goto err4; + goto err1; } res = platform_get_resource(pdev, IORESOURCE_MEM, 2); if (!res) { dev_err(&pdev->dev, "couldn't find resource 2\n"); ret = -ENODEV; - goto err5; + goto err2; } l3->l3_base[2] = ioremap(res->start, resource_size(res)); - if (!(l3->l3_base[2])) { + if (!l3->l3_base[2]) { dev_err(&pdev->dev, "ioremap failed\n"); ret = -ENOMEM; - goto err6; + goto err2; } /* @@ -187,7 +184,7 @@ static int __init omap4_l3_probe(struct platform_device *pdev) if (ret) { pr_crit("L3: request_irq failed to register for 0x%x\n", OMAP44XX_IRQ_L3_DBG); - goto err7; + goto err3; } l3->debug_irq = irq; @@ -198,24 +195,22 @@ static int __init omap4_l3_probe(struct platform_device *pdev) if (ret) { pr_crit("L3: request_irq failed to register for 0x%x\n", OMAP44XX_IRQ_L3_APP); - goto err8; + goto err4; } l3->app_irq = irq; - goto err0; -err8: -err7: - iounmap(l3->l3_base[2]); -err6: -err5: - iounmap(l3->l3_base[1]); + return 0; + err4: + free_irq(l3->debug_irq, l3); err3: - iounmap(l3->l3_base[0]); + iounmap(l3->l3_base[2]); err2: + iounmap(l3->l3_base[1]); err1: - kfree(l3); + iounmap(l3->l3_base[0]); err0: + kfree(l3); return ret; } diff --git a/arch/arm/mach-omap2/omap_l3_smx.c b/arch/arm/mach-omap2/omap_l3_smx.c index 4321e7938929..873c0e33b512 100644 --- a/arch/arm/mach-omap2/omap_l3_smx.c +++ b/arch/arm/mach-omap2/omap_l3_smx.c @@ -155,7 +155,7 @@ static irqreturn_t omap3_l3_block_irq(struct omap3_l3 *l3, u8 multi = error & L3_ERROR_LOG_MULTI; u32 address = omap3_l3_decode_addr(error_addr); - WARN(true, "%s Error seen by %s %s at address %x\n", + WARN(true, "%s seen by %s %s at address %x\n", omap3_l3_code_string(code), omap3_l3_initiator_string(initid), multi ? "Multiple Errors" : "", @@ -167,21 +167,15 @@ static irqreturn_t omap3_l3_block_irq(struct omap3_l3 *l3, static irqreturn_t omap3_l3_app_irq(int irq, void *_l3) { struct omap3_l3 *l3 = _l3; - u64 status, clear; u64 error; u64 error_addr; u64 err_source = 0; void __iomem *base; int int_type; - irqreturn_t ret = IRQ_NONE; - if (irq == l3->app_irq) - int_type = L3_APPLICATION_ERROR; - else - int_type = L3_DEBUG_ERROR; - + int_type = irq == l3->app_irq ? L3_APPLICATION_ERROR : L3_DEBUG_ERROR; if (!int_type) { status = omap3_l3_readll(l3->rt, L3_SI_FLAG_STATUS_0); /* @@ -202,7 +196,6 @@ static irqreturn_t omap3_l3_app_irq(int irq, void *_l3) base = l3->rt + *(omap3_l3_bases[int_type] + err_source); error = omap3_l3_readll(base, L3_ERROR_LOG); - if (error) { error_addr = omap3_l3_readll(base, L3_ERROR_LOG_ADDR); @@ -210,9 +203,8 @@ static irqreturn_t omap3_l3_app_irq(int irq, void *_l3) } /* Clear the status register */ - clear = ((L3_AGENT_STATUS_CLEAR_IA << int_type) | - (L3_AGENT_STATUS_CLEAR_TA)); - + clear = (L3_AGENT_STATUS_CLEAR_IA << int_type) | + L3_AGENT_STATUS_CLEAR_TA; omap3_l3_writell(base, L3_AGENT_STATUS, clear); /* clear the error log register */ @@ -228,10 +220,8 @@ static int __init omap3_l3_probe(struct platform_device *pdev) int ret; l3 = kzalloc(sizeof(*l3), GFP_KERNEL); - if (!l3) { - ret = -ENOMEM; - goto err0; - } + if (!l3) + return -ENOMEM; platform_set_drvdata(pdev, l3); @@ -239,13 +229,13 @@ static int __init omap3_l3_probe(struct platform_device *pdev) if (!res) { dev_err(&pdev->dev, "couldn't find resource\n"); ret = -ENODEV; - goto err1; + goto err0; } l3->rt = ioremap(res->start, resource_size(res)); - if (!(l3->rt)) { + if (!l3->rt) { dev_err(&pdev->dev, "ioremap failed\n"); ret = -ENOMEM; - goto err2; + goto err0; } l3->debug_irq = platform_get_irq(pdev, 0); @@ -254,28 +244,26 @@ static int __init omap3_l3_probe(struct platform_device *pdev) "l3-debug-irq", l3); if (ret) { dev_err(&pdev->dev, "couldn't request debug irq\n"); - goto err3; + goto err1; } l3->app_irq = platform_get_irq(pdev, 1); ret = request_irq(l3->app_irq, omap3_l3_app_irq, IRQF_DISABLED | IRQF_TRIGGER_RISING, "l3-app-irq", l3); - if (ret) { dev_err(&pdev->dev, "couldn't request app irq\n"); - goto err4; + goto err2; } - goto err0; + return 0; -err4: -err3: - iounmap(l3->rt); err2: + free_irq(l3->debug_irq, l3); err1: - kfree(l3); + iounmap(l3->rt); err0: + kfree(l3); return ret; } diff --git a/arch/arm/mach-omap2/omap_phy_internal.c b/arch/arm/mach-omap2/omap_phy_internal.c index 05f6abc96b0d..f47813edd951 100644 --- a/arch/arm/mach-omap2/omap_phy_internal.c +++ b/arch/arm/mach-omap2/omap_phy_internal.c @@ -50,13 +50,16 @@ int omap4430_phy_init(struct device *dev) { ctrl_base = ioremap(OMAP443X_SCM_BASE, SZ_1K); if (!ctrl_base) { - dev_err(dev, "control module ioremap failed\n"); + pr_err("control module ioremap failed\n"); return -ENOMEM; } /* Power down the phy */ __raw_writel(PHY_PD, ctrl_base + CONTROL_DEV_CONF); - phyclk = clk_get(dev, "ocp2scp_usb_phy_ick"); + if (!dev) + return 0; + + phyclk = clk_get(dev, "ocp2scp_usb_phy_ick"); if (IS_ERR(phyclk)) { dev_err(dev, "cannot clk_get ocp2scp_usb_phy_ick\n"); iounmap(ctrl_base); @@ -228,7 +231,7 @@ void am35x_musb_clear_irq(void) regval = omap_ctrl_readl(AM35XX_CONTROL_LVL_INTR_CLEAR); } -void am35x_musb_set_mode(u8 musb_mode) +void am35x_set_mode(u8 musb_mode) { u32 devconf2 = omap_ctrl_readl(AM35XX_CONTROL_DEVCONF2); diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index 797bfd12b643..45bcfce77352 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h @@ -36,11 +36,16 @@ static inline int omap4_opp_init(void) } #endif +/* + * cpuidle mach specific parameters + * + * The board code can override the default C-states definition using + * omap3_pm_init_cpuidle + */ struct cpuidle_params { - u8 valid; - u32 sleep_latency; - u32 wake_latency; - u32 threshold; + u32 exit_latency; /* exit_latency = sleep + wake-up latencies */ + u32 target_residency; + u8 valid; /* validates the C-state */ }; #if defined(CONFIG_PM) && defined(CONFIG_CPU_IDLE) @@ -73,10 +78,6 @@ extern u32 sleep_while_idle; #define sleep_while_idle 0 #endif -#if defined(CONFIG_CPU_IDLE) -extern void omap3_cpuidle_update_states(u32, u32); -#endif - #if defined(CONFIG_PM_DEBUG) && defined(CONFIG_DEBUG_FS) extern void pm_dbg_update_time(struct powerdomain *pwrdm, int prev); extern int pm_dbg_regset_save(int reg_set); diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 0c5e3a46a3ad..c155c9d1c82c 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -779,18 +779,6 @@ void omap3_pm_off_mode_enable(int enable) else state = PWRDM_POWER_RET; -#ifdef CONFIG_CPU_IDLE - /* - * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot - * enable OFF mode in a stable form for previous revisions, restrict - * instead to RET - */ - if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) - omap3_cpuidle_update_states(state, PWRDM_POWER_RET); - else - omap3_cpuidle_update_states(state, state); -#endif - list_for_each_entry(pwrst, &pwrst_list, node) { if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583) && pwrst->pwrdm == core_pwrdm && @@ -895,8 +883,6 @@ static int __init omap3_pm_init(void) pm_errata_configure(); - printk(KERN_ERR "Power Management for TI OMAP3.\n"); - /* XXX prcm_setup_regs needs to be before enabling hw * supervised mode for powerdomains */ prcm_setup_regs(); diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c index 76cfff2db514..59a870be8390 100644 --- a/arch/arm/mach-omap2/pm44xx.c +++ b/arch/arm/mach-omap2/pm44xx.c @@ -105,13 +105,11 @@ static int __init omap4_pm_init(void) pr_err("Power Management for TI OMAP4.\n"); -#ifdef CONFIG_PM ret = pwrdm_for_each(pwrdms_setup, NULL); if (ret) { pr_err("Failed to setup powerdomains\n"); goto err2; } -#endif #ifdef CONFIG_SUSPEND suspend_set_ops(&omap_pm_ops); diff --git a/arch/arm/mach-omap2/smartreflex.c b/arch/arm/mach-omap2/smartreflex.c index 13e24f913dd4..fb7dc52394a8 100644 --- a/arch/arm/mach-omap2/smartreflex.c +++ b/arch/arm/mach-omap2/smartreflex.c @@ -847,6 +847,14 @@ static int __init omap_sr_probe(struct platform_device *pdev) goto err_free_devinfo; } + mem = request_mem_region(mem->start, resource_size(mem), + dev_name(&pdev->dev)); + if (!mem) { + dev_err(&pdev->dev, "%s: no mem region\n", __func__); + ret = -EBUSY; + goto err_free_devinfo; + } + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); pm_runtime_enable(&pdev->dev); @@ -883,7 +891,7 @@ static int __init omap_sr_probe(struct platform_device *pdev) ret = sr_late_init(sr_info); if (ret) { pr_warning("%s: Error in SR late init\n", __func__); - goto err_release_region; + return ret; } } @@ -896,7 +904,7 @@ static int __init omap_sr_probe(struct platform_device *pdev) vdd_dbg_dir = omap_voltage_get_dbgdir(sr_info->voltdm); if (!vdd_dbg_dir) { ret = -EINVAL; - goto err_release_region; + goto err_iounmap; } sr_info->dbg_dir = debugfs_create_dir("smartreflex", vdd_dbg_dir); @@ -904,7 +912,7 @@ static int __init omap_sr_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s: Unable to create debugfs directory\n", __func__); ret = PTR_ERR(sr_info->dbg_dir); - goto err_release_region; + goto err_iounmap; } (void) debugfs_create_file("autocomp", S_IRUGO | S_IWUSR, @@ -921,7 +929,7 @@ static int __init omap_sr_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s: Unable to create debugfs directory" "for n-values\n", __func__); ret = PTR_ERR(nvalue_dir); - goto err_release_region; + goto err_debugfs; } omap_voltage_get_volttable(sr_info->voltdm, &volt_data); @@ -931,7 +939,7 @@ static int __init omap_sr_probe(struct platform_device *pdev) "entries for n-values\n", __func__, sr_info->voltdm->name); ret = -ENODATA; - goto err_release_region; + goto err_debugfs; } for (i = 0; i < sr_info->nvalue_count; i++) { @@ -945,6 +953,11 @@ static int __init omap_sr_probe(struct platform_device *pdev) return ret; +err_debugfs: + debugfs_remove_recursive(sr_info->dbg_dir); +err_iounmap: + list_del(&sr_info->node); + iounmap(sr_info->base); err_release_region: release_mem_region(mem->start, resource_size(mem)); err_free_devinfo: diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c index 35559f77e2de..c7ed540d868d 100644 --- a/arch/arm/mach-omap2/usb-musb.c +++ b/arch/arm/mach-omap2/usb-musb.c @@ -108,7 +108,13 @@ static void usb_musb_mux_init(struct omap_musb_board_data *board_data) } } -void __init usb_musb_init(struct omap_musb_board_data *board_data) +static struct omap_musb_board_data musb_default_board_data = { + .interface_type = MUSB_INTERFACE_ULPI, + .mode = MUSB_OTG, + .power = 100, +}; + +void __init usb_musb_init(struct omap_musb_board_data *musb_board_data) { struct omap_hwmod *oh; struct omap_device *od; @@ -116,11 +122,12 @@ void __init usb_musb_init(struct omap_musb_board_data *board_data) struct device *dev; int bus_id = -1; const char *oh_name, *name; + struct omap_musb_board_data *board_data; - if (cpu_is_omap3517() || cpu_is_omap3505()) { - } else if (cpu_is_omap44xx()) { - usb_musb_mux_init(board_data); - } + if (musb_board_data) + board_data = musb_board_data; + else + board_data = &musb_default_board_data; /* * REVISIT: This line can be removed once all the platforms using @@ -164,10 +171,15 @@ void __init usb_musb_init(struct omap_musb_board_data *board_data) dev->dma_mask = &musb_dmamask; dev->coherent_dma_mask = musb_dmamask; put_device(dev); + + if (cpu_is_omap44xx()) + omap4430_phy_init(dev); } #else void __init usb_musb_init(struct omap_musb_board_data *board_data) { + if (cpu_is_omap44xx()) + omap4430_phy_init(NULL); } #endif /* CONFIG_USB_MUSB_SOC */ diff --git a/arch/arm/mach-omap2/usb-tusb6010.c b/arch/arm/mach-omap2/usb-tusb6010.c index 8a3c05f3c1d6..8dd26b765b7d 100644 --- a/arch/arm/mach-omap2/usb-tusb6010.c +++ b/arch/arm/mach-omap2/usb-tusb6010.c @@ -293,12 +293,11 @@ tusb6010_setup_interface(struct musb_hdrc_platform_data *data, ); /* IRQ */ - status = gpio_request(irq, "TUSB6010 irq"); + status = gpio_request_one(irq, GPIOF_IN, "TUSB6010 irq"); if (status < 0) { printk(error, 3, status); return status; } - gpio_direction_input(irq); tusb_resources[2].start = irq + IH_GPIO_BASE; /* set up memory timings ... can speed them up later */ diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c index 0c1552d9d995..9ef3789ded4b 100644 --- a/arch/arm/mach-omap2/voltage.c +++ b/arch/arm/mach-omap2/voltage.c @@ -148,7 +148,6 @@ static int vp_volt_debug_get(void *data, u64 *val) } vsel = vdd->read_reg(prm_mod_offs, vdd->vp_data->voltage); - pr_notice("curr_vsel = %x\n", vsel); if (!vdd->pmic_info->vsel_to_uv) { pr_warning("PMIC function to convert vsel to voltage" diff --git a/arch/arm/plat-omap/include/plat/gpmc-smsc911x.h b/arch/arm/plat-omap/include/plat/gpmc-smsc911x.h index 872de0bf1e6b..ea6c9c88c725 100644 --- a/arch/arm/plat-omap/include/plat/gpmc-smsc911x.h +++ b/arch/arm/plat-omap/include/plat/gpmc-smsc911x.h @@ -14,14 +14,14 @@ #ifndef __ASM_ARCH_OMAP_GPMC_SMSC911X_H__ struct omap_smsc911x_platform_data { + int id; int cs; int gpio_irq; int gpio_reset; u32 flags; }; -#if defined(CONFIG_SMSC911X) || \ - defined(CONFIG_SMSC911X_MODULE) +#if defined(CONFIG_SMSC911X) || defined(CONFIG_SMSC911X_MODULE) extern void gpmc_smsc911x_init(struct omap_smsc911x_platform_data *d); diff --git a/arch/arm/plat-omap/include/plat/uncompress.h b/arch/arm/plat-omap/include/plat/uncompress.h index 565d2664f5a7..ac4b60d9aa29 100644 --- a/arch/arm/plat-omap/include/plat/uncompress.h +++ b/arch/arm/plat-omap/include/plat/uncompress.h @@ -129,7 +129,6 @@ static inline void __arch_decomp_setup(unsigned long arch_id) DEBUG_LL_OMAP1(3, sx1); /* omap2 based boards using UART1 */ - DEBUG_LL_OMAP2(1, omap2evm); DEBUG_LL_OMAP2(1, omap_2430sdp); DEBUG_LL_OMAP2(1, omap_apollon); DEBUG_LL_OMAP2(1, omap_h4); diff --git a/arch/arm/plat-omap/include/plat/usb.h b/arch/arm/plat-omap/include/plat/usb.h index 02b96c8f6a17..17d3c939775c 100644 --- a/arch/arm/plat-omap/include/plat/usb.h +++ b/arch/arm/plat-omap/include/plat/usb.h @@ -113,7 +113,7 @@ extern int omap4430_phy_suspend(struct device *dev, int suspend); extern void am35x_musb_reset(void); extern void am35x_musb_phy_power(u8 on); extern void am35x_musb_clear_irq(void); -extern void am35x_musb_set_mode(u8 musb_mode); +extern void am35x_set_mode(u8 musb_mode); /* * FIXME correct answer depends on hmc_mode, diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 423145a6f7ba..2f6a22e8e935 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -141,6 +141,7 @@ config PPC select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select HAVE_RCU_TABLE_FREE if SMP + select HAVE_SYSCALL_TRACEPOINTS config EARLY_PRINTK bool diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts index 2779f08313a5..22dd6ae84da0 100644 --- a/arch/powerpc/boot/dts/canyonlands.dts +++ b/arch/powerpc/boot/dts/canyonlands.dts @@ -530,5 +530,23 @@ 0x0 0x0 0x0 0x3 &UIC3 0x12 0x4 /* swizzled int C */ 0x0 0x0 0x0 0x4 &UIC3 0x13 0x4 /* swizzled int D */>; }; + + MSI: ppc4xx-msi@C10000000 { + compatible = "amcc,ppc4xx-msi", "ppc4xx-msi"; + reg = < 0xC 0x10000000 0x100>; + sdr-base = <0x36C>; + msi-data = <0x00000000>; + msi-mask = <0x44440000>; + interrupt-count = <3>; + interrupts = <0 1 2 3>; + interrupt-parent = <&UIC3>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC3 0x18 1 + 1 &UIC3 0x19 1 + 2 &UIC3 0x1A 1 + 3 &UIC3 0x1B 1>; + }; }; }; diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts index 7c3be5e45748..f913dbe25d35 100644 --- a/arch/powerpc/boot/dts/katmai.dts +++ b/arch/powerpc/boot/dts/katmai.dts @@ -442,6 +442,24 @@ 0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>; }; + MSI: ppc4xx-msi@400300000 { + compatible = "amcc,ppc4xx-msi", "ppc4xx-msi"; + reg = < 0x4 0x00300000 0x100>; + sdr-base = <0x3B0>; + msi-data = <0x00000000>; + msi-mask = <0x44440000>; + interrupt-count = <3>; + interrupts =<0 1 2 3>; + interrupt-parent = <&UIC0>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC0 0xC 1 + 1 &UIC0 0x0D 1 + 2 &UIC0 0x0E 1 + 3 &UIC0 0x0F 1>; + }; + I2O: i2o@400100000 { compatible = "ibm,i2o-440spe"; reg = <0x00000004 0x00100000 0x100>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 89edb16649c3..1613d6e4049e 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -403,5 +403,33 @@ 0x0 0x0 0x0 0x3 &UIC2 0xd 0x4 /* swizzled int C */ 0x0 0x0 0x0 0x4 &UIC2 0xe 0x4 /* swizzled int D */>; }; + + MSI: ppc4xx-msi@C10000000 { + compatible = "amcc,ppc4xx-msi", "ppc4xx-msi"; + reg = < 0x0 0xEF620000 0x100>; + sdr-base = <0x4B0>; + msi-data = <0x00000000>; + msi-mask = <0x44440000>; + interrupt-count = <12>; + interrupts = <0 1 2 3 4 5 6 7 8 9 0xA 0xB 0xC 0xD>; + interrupt-parent = <&UIC2>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC2 0x10 1 + 1 &UIC2 0x11 1 + 2 &UIC2 0x12 1 + 2 &UIC2 0x13 1 + 2 &UIC2 0x14 1 + 2 &UIC2 0x15 1 + 2 &UIC2 0x16 1 + 2 &UIC2 0x17 1 + 2 &UIC2 0x18 1 + 2 &UIC2 0x19 1 + 2 &UIC2 0x1A 1 + 2 &UIC2 0x1B 1 + 2 &UIC2 0x1C 1 + 3 &UIC2 0x1D 1>; + }; }; }; diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts index 81636c01d906..d86a3a498118 100644 --- a/arch/powerpc/boot/dts/redwood.dts +++ b/arch/powerpc/boot/dts/redwood.dts @@ -358,8 +358,28 @@ 0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>; }; + MSI: ppc4xx-msi@400300000 { + compatible = "amcc,ppc4xx-msi", "ppc4xx-msi"; + reg = < 0x4 0x00300000 0x100 + 0x4 0x00300000 0x100>; + sdr-base = <0x3B0>; + msi-data = <0x00000000>; + msi-mask = <0x44440000>; + interrupt-count = <3>; + interrupts =<0 1 2 3>; + interrupt-parent = <&UIC0>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC0 0xC 1 + 1 &UIC0 0x0D 1 + 2 &UIC0 0x0E 1 + 3 &UIC0 0x0F 1>; + }; + }; + chosen { linux,stdout-path = "/plb/opb/serial@ef600200"; }; diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index dde1296b8b41..169d039ed402 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -60,4 +60,18 @@ struct dyn_arch_ftrace { #endif +#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* + * Compare the symbol name with the system call name. Skip the .sys or .SyS + * prefix from the symbol name and the sys prefix from the system call name and + * just match the rest. This is only needed on ppc64 since symbol names on + * 32bit do not start with a period so the generic function will work. + */ + return !strcmp(sym + 4, name + 3); +} +#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 && !__ASSEMBLY__ */ + #endif /* _ASM_POWERPC_FTRACE */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 852b8c1c09db..fd8201dddd4b 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -236,7 +236,7 @@ #define H_HOME_NODE_ASSOCIATIVITY 0x2EC #define H_BEST_ENERGY 0x2F4 #define H_GET_MPP_X 0x314 -#define MAX_HCALL_OPCODE H_BEST_ENERGY +#define MAX_HCALL_OPCODE H_GET_MPP_X #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 880b8c1e6e53..11eb404b5606 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -191,8 +191,6 @@ extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; -extern irqreturn_t debug_ipi_action(int irq, void *data); - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 23913e902fc3..b54b2add07be 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -15,6 +15,11 @@ #include <linux/sched.h> +/* ftrace syscalls requires exporting the sys_call_table */ +#ifdef CONFIG_FTRACE_SYSCALLS +extern const unsigned long *sys_call_table; +#endif /* CONFIG_FTRACE_SYSCALLS */ + static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 37c353e8af7c..836f231ec1f0 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -110,7 +110,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOERROR 12 /* Force successful syscall return */ #define TIF_NOTIFY_RESUME 13 /* callback before returning to user */ #define TIF_FREEZE 14 /* Freezing for suspend */ -#define TIF_RUNLATCH 15 /* Is the runlatch enabled? */ +#define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ +#define TIF_RUNLATCH 16 /* Is the runlatch enabled? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -127,8 +128,10 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NOERROR (1<<TIF_NOERROR) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_FREEZE (1<<TIF_FREEZE) +#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_RUNLATCH (1<<TIF_RUNLATCH) -#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP) +#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9aab36312572..e8b981897d44 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -109,6 +109,7 @@ obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index ce1f3e44c24f..bf99cfa6bbfe 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -22,6 +22,7 @@ #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/ftrace.h> +#include <asm/syscall.h> #ifdef CONFIG_DYNAMIC_FTRACE @@ -600,3 +601,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) +unsigned long __init arch_syscall_addr(int nr) +{ + return sys_call_table[nr*2]; +} +#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a24d37d4cf51..5b428e308666 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -295,17 +295,20 @@ static inline void handle_one_irq(unsigned int irq) unsigned long saved_sp_limit; struct irq_desc *desc; + desc = irq_to_desc(irq); + if (!desc) + return; + /* Switch to the irq stack to handle this */ curtp = current_thread_info(); irqtp = hardirq_ctx[smp_processor_id()]; if (curtp == irqtp) { /* We're already on the irq stack, just handle it */ - generic_handle_irq(irq); + desc->handle_irq(irq, desc); return; } - desc = irq_to_desc(irq); saved_sp_limit = current->thread.ksp_limit; irqtp->task = curtp->task; @@ -557,15 +560,8 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, if (revmap_type == IRQ_HOST_MAP_LEGACY) { if (irq_map[0].host != NULL) { raw_spin_unlock_irqrestore(&irq_big_lock, flags); - /* If we are early boot, we can't free the structure, - * too bad... - * this will be fixed once slab is made available early - * instead of the current cruft - */ - if (mem_init_done) { - of_node_put(host->of_node); - kfree(host); - } + of_node_put(host->of_node); + kfree(host); return NULL; } irq_map[0].host = host; @@ -727,9 +723,7 @@ unsigned int irq_create_mapping(struct irq_host *host, } pr_debug("irq: -> using host @%p\n", host); - /* Check if mapping already exist, if it does, call - * host->ops->map() to update the flags - */ + /* Check if mapping already exists */ virq = irq_find_mapping(host, hwirq); if (virq != NO_IRQ) { pr_debug("irq: -> existing mapping on virq %d\n", virq); @@ -899,10 +893,13 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, return irq_find_mapping(host, hwirq); /* - * No rcu_read_lock(ing) needed, the ptr returned can't go under us - * as it's referencing an entry in the static irq_map table. + * The ptr returned references the static global irq_map. + * but freeing an irq can delete nodes along the path to + * do the lookup via call_rcu. */ + rcu_read_lock(); ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq); + rcu_read_unlock(); /* * If found in radix tree, then fine. @@ -1010,14 +1007,23 @@ void irq_free_virt(unsigned int virq, unsigned int count) WARN_ON (virq < NUM_ISA_INTERRUPTS); WARN_ON (count == 0 || (virq + count) > irq_virq_count); + if (virq < NUM_ISA_INTERRUPTS) { + if (virq + count < NUM_ISA_INTERRUPTS) + return; + count =- NUM_ISA_INTERRUPTS - virq; + virq = NUM_ISA_INTERRUPTS; + } + + if (count > irq_virq_count || virq > irq_virq_count - count) { + if (virq > irq_virq_count) + return; + count = irq_virq_count - virq; + } + raw_spin_lock_irqsave(&irq_big_lock, flags); for (i = virq; i < (virq + count); i++) { struct irq_host *host; - if (i < NUM_ISA_INTERRUPTS || - (virq + count) > irq_virq_count) - continue; - host = irq_map[i].host; irq_map[i].hwirq = host->inval_irq; smp_wmb(); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index a6ae1cfad86c..cb22024f2b42 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -29,6 +29,7 @@ #include <linux/signal.h> #include <linux/seccomp.h> #include <linux/audit.h> +#include <trace/syscall.h> #ifdef CONFIG_PPC32 #include <linux/module.h> #endif @@ -40,6 +41,9 @@ #include <asm/pgtable.h> #include <asm/system.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + /* * The parameter save area on the stack is used to store arguments being passed * to callee function and is located at fixed offset from stack pointer. @@ -1710,6 +1714,9 @@ long do_syscall_trace_enter(struct pt_regs *regs) */ ret = -1L; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gpr[0]); + if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 if (!is_32bit_task()) @@ -1738,6 +1745,9 @@ void do_syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, regs->result); + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->result); + step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4a6f2ec7e761..8ebc6700b98d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -129,7 +129,7 @@ static irqreturn_t call_function_single_action(int irq, void *data) return IRQ_HANDLED; } -irqreturn_t debug_ipi_action(int irq, void *data) +static irqreturn_t debug_ipi_action(int irq, void *data) { if (crash_ipi_function_ptr) { crash_ipi_function_ptr(get_irq_regs()); diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index 8ee51a252cf1..e6bec74be131 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -261,6 +261,28 @@ static int get_kernel(unsigned long pc, unsigned long mmcra) return is_kernel; } +static bool pmc_overflow(unsigned long val) +{ + if ((int)val < 0) + return true; + + /* + * Events on POWER7 can roll back if a speculative event doesn't + * eventually complete. Unfortunately in some rare cases they will + * raise a performance monitor exception. We need to catch this to + * ensure we reset the PMC. In all cases the PMC will be 256 or less + * cycles from overflow. + * + * We only do this if the first pass fails to find any overflowing + * PMCs because a user might set a period of less than 256 and we + * don't want to mistakenly reset them. + */ + if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + return true; + + return false; +} + static void power4_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) { @@ -281,7 +303,7 @@ static void power4_handle_interrupt(struct pt_regs *regs, for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { val = classic_ctr_read(i); - if (val < 0) { + if (pmc_overflow(val)) { if (oprofile_running && ctr[i].enabled) { oprofile_add_ext_sample(pc, regs, i, is_kernel); classic_ctr_write(i, reset_value[i]); diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index b72176434ebe..d733d7ca939c 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -57,6 +57,8 @@ config KILAUEA select 405EX select PPC40x_SIMPLE select PPC4xx_PCI_EXPRESS + select PCI_MSI + select PPC4xx_MSI help This option enables support for the AMCC PPC405EX evaluation board. diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index f485fc5f6d5e..e958b6f48ec2 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -74,6 +74,8 @@ config KATMAI select 440SPe select PCI select PPC4xx_PCI_EXPRESS + select PCI_MSI + select PCC4xx_MSI help This option enables support for the AMCC PPC440SPe evaluation board. @@ -118,6 +120,8 @@ config CANYONLANDS select 460EX select PCI select PPC4xx_PCI_EXPRESS + select PCI_MSI + select PPC4xx_MSI select IBM_NEW_EMAC_RGMII select IBM_NEW_EMAC_ZMII help @@ -144,6 +148,8 @@ config REDWOOD select 460SX select PCI select PPC4xx_PCI_EXPRESS + select PCI_MSI + select PPC4xx_MSI help This option enables support for the AMCC PPC460SX Redwood board. diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 449c08c15862..3e4eba603e6b 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -176,14 +176,14 @@ EXPORT_SYMBOL_GPL(iic_get_target_id); #ifdef CONFIG_SMP /* Use the highest interrupt priorities for IPI */ -static inline int iic_ipi_to_irq(int ipi) +static inline int iic_msg_to_irq(int msg) { - return IIC_IRQ_TYPE_IPI + 0xf - ipi; + return IIC_IRQ_TYPE_IPI + 0xf - msg; } -void iic_cause_IPI(int cpu, int mesg) +void iic_message_pass(int cpu, int msg) { - out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - mesg) << 4); + out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4); } struct irq_host *iic_get_irq_host(int node) @@ -192,50 +192,31 @@ struct irq_host *iic_get_irq_host(int node) } EXPORT_SYMBOL_GPL(iic_get_irq_host); -static irqreturn_t iic_ipi_action(int irq, void *dev_id) -{ - int ipi = (int)(long)dev_id; - - switch(ipi) { - case PPC_MSG_CALL_FUNCTION: - generic_smp_call_function_interrupt(); - break; - case PPC_MSG_RESCHEDULE: - scheduler_ipi(); - break; - case PPC_MSG_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case PPC_MSG_DEBUGGER_BREAK: - debug_ipi_action(0, NULL); - break; - } - return IRQ_HANDLED; -} -static void iic_request_ipi(int ipi, const char *name) +static void iic_request_ipi(int msg) { int virq; - virq = irq_create_mapping(iic_host, iic_ipi_to_irq(ipi)); + virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg)); if (virq == NO_IRQ) { printk(KERN_ERR - "iic: failed to map IPI %s\n", name); + "iic: failed to map IPI %s\n", smp_ipi_name[msg]); return; } - if (request_irq(virq, iic_ipi_action, IRQF_DISABLED, name, - (void *)(long)ipi)) - printk(KERN_ERR - "iic: failed to request IPI %s\n", name); + + /* + * If smp_request_message_ipi encounters an error it will notify + * the error. If a message is not needed it will return non-zero. + */ + if (smp_request_message_ipi(virq, msg)) + irq_dispose_mapping(virq); } void iic_request_IPIs(void) { - iic_request_ipi(PPC_MSG_CALL_FUNCTION, "IPI-call"); - iic_request_ipi(PPC_MSG_RESCHEDULE, "IPI-resched"); - iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE, "IPI-call-single"); -#ifdef CONFIG_DEBUGGER - iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug"); -#endif /* CONFIG_DEBUGGER */ + iic_request_ipi(PPC_MSG_CALL_FUNCTION); + iic_request_ipi(PPC_MSG_RESCHEDULE); + iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE); + iic_request_ipi(PPC_MSG_DEBUGGER_BREAK); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h index 942dc39d6045..4f60ae6ca358 100644 --- a/arch/powerpc/platforms/cell/interrupt.h +++ b/arch/powerpc/platforms/cell/interrupt.h @@ -75,7 +75,7 @@ enum { }; extern void iic_init_IRQ(void); -extern void iic_cause_IPI(int cpu, int mesg); +extern void iic_message_pass(int cpu, int msg); extern void iic_request_IPIs(void); extern void iic_setup_cpu(void); diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index d176e6148e3f..dbb641ea90dd 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -152,7 +152,7 @@ static int smp_cell_cpu_bootable(unsigned int nr) return 1; } static struct smp_ops_t bpa_iic_smp_ops = { - .message_pass = iic_cause_IPI, + .message_pass = iic_message_pass, .probe = smp_iic_probe, .kick_cpu = smp_cell_kick_cpu, .setup_cpu = smp_cell_setup_cpu, diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index d775fd148d13..7b4df37ac381 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -7,11 +7,18 @@ config PPC4xx_PCI_EXPRESS depends on PCI && 4xx default n +config PPC4xx_MSI + bool + depends on PCI_MSI + depends on PCI && 4xx + default n + config PPC_MSI_BITMAP bool depends on PCI_MSI default y if MPIC default y if FSL_PCI + default y if PPC4xx_MSI source "arch/powerpc/sysdev/xics/Kconfig" diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 6076e0074a87..0efa990e3344 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_OF_RTC) += of_rtc.o ifeq ($(CONFIG_PCI),y) obj-$(CONFIG_4xx) += ppc4xx_pci.o endif +obj-$(CONFIG_PPC4xx_MSI) += ppc4xx_msi.o obj-$(CONFIG_PPC4xx_CPM) += ppc4xx_cpm.o obj-$(CONFIG_PPC4xx_GPIO) += ppc4xx_gpio.o diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c new file mode 100644 index 000000000000..367af0241851 --- /dev/null +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -0,0 +1,276 @@ +/* + * Adding PCI-E MSI support for PPC4XX SoCs. + * + * Copyright (c) 2010, Applied Micro Circuits Corporation + * Authors: Tirumala R Marri <tmarri@apm.com> + * Feng Kan <fkan@apm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <linux/irq.h> +#include <linux/bootmem.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <linux/of_platform.h> +#include <linux/interrupt.h> +#include <asm/prom.h> +#include <asm/hw_irq.h> +#include <asm/ppc-pci.h> +#include <boot/dcr.h> +#include <asm/dcr-regs.h> +#include <asm/msi_bitmap.h> + +#define PEIH_TERMADH 0x00 +#define PEIH_TERMADL 0x08 +#define PEIH_MSIED 0x10 +#define PEIH_MSIMK 0x18 +#define PEIH_MSIASS 0x20 +#define PEIH_FLUSH0 0x30 +#define PEIH_FLUSH1 0x38 +#define PEIH_CNTRST 0x48 +#define NR_MSI_IRQS 4 + +struct ppc4xx_msi { + u32 msi_addr_lo; + u32 msi_addr_hi; + void __iomem *msi_regs; + int msi_virqs[NR_MSI_IRQS]; + struct msi_bitmap bitmap; + struct device_node *msi_dev; +}; + +static struct ppc4xx_msi ppc4xx_msi; + +static int ppc4xx_msi_init_allocator(struct platform_device *dev, + struct ppc4xx_msi *msi_data) +{ + int err; + + err = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS, + dev->dev.of_node); + if (err) + return err; + + err = msi_bitmap_reserve_dt_hwirqs(&msi_data->bitmap); + if (err < 0) { + msi_bitmap_free(&msi_data->bitmap); + return err; + } + + return 0; +} + +static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int int_no = -ENOMEM; + unsigned int virq; + struct msi_msg msg; + struct msi_desc *entry; + struct ppc4xx_msi *msi_data = &ppc4xx_msi; + + list_for_each_entry(entry, &dev->msi_list, list) { + int_no = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1); + if (int_no >= 0) + break; + if (int_no < 0) { + pr_debug("%s: fail allocating msi interrupt\n", + __func__); + } + virq = irq_of_parse_and_map(msi_data->msi_dev, int_no); + if (virq == NO_IRQ) { + dev_err(&dev->dev, "%s: fail mapping irq\n", __func__); + msi_bitmap_free_hwirqs(&msi_data->bitmap, int_no, 1); + return -ENOSPC; + } + dev_dbg(&dev->dev, "%s: virq = %d\n", __func__, virq); + + /* Setup msi address space */ + msg.address_hi = msi_data->msi_addr_hi; + msg.address_lo = msi_data->msi_addr_lo; + + irq_set_msi_desc(virq, entry); + msg.data = int_no; + write_msi_msg(virq, &msg); + } + return 0; +} + +void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *entry; + struct ppc4xx_msi *msi_data = &ppc4xx_msi; + + dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); + + list_for_each_entry(entry, &dev->msi_list, list) { + if (entry->irq == NO_IRQ) + continue; + irq_set_msi_desc(entry->irq, NULL); + msi_bitmap_free_hwirqs(&msi_data->bitmap, + virq_to_hw(entry->irq), 1); + irq_dispose_mapping(entry->irq); + } +} + +static int ppc4xx_msi_check_device(struct pci_dev *pdev, int nvec, int type) +{ + dev_dbg(&pdev->dev, "PCIE-MSI:%s called. vec %x type %d\n", + __func__, nvec, type); + if (type == PCI_CAP_ID_MSIX) + pr_debug("ppc4xx msi: MSI-X untested, trying anyway.\n"); + + return 0; +} + +static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, + struct resource res, struct ppc4xx_msi *msi) +{ + const u32 *msi_data; + const u32 *msi_mask; + const u32 *sdr_addr; + dma_addr_t msi_phys; + void *msi_virt; + + sdr_addr = of_get_property(dev->dev.of_node, "sdr-base", NULL); + if (!sdr_addr) + return -1; + + SDR0_WRITE(sdr_addr, (u64)res.start >> 32); /*HIGH addr */ + SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */ + + + msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi"); + if (msi->msi_dev) + return -ENODEV; + + msi->msi_regs = of_iomap(msi->msi_dev, 0); + if (!msi->msi_regs) { + dev_err(&dev->dev, "of_iomap problem failed\n"); + return -ENOMEM; + } + dev_dbg(&dev->dev, "PCIE-MSI: msi register mapped 0x%x 0x%x\n", + (u32) (msi->msi_regs + PEIH_TERMADH), (u32) (msi->msi_regs)); + + msi_virt = dma_alloc_coherent(&dev->dev, 64, &msi_phys, GFP_KERNEL); + msi->msi_addr_hi = 0x0; + msi->msi_addr_lo = (u32) msi_phys; + dev_dbg(&dev->dev, "PCIE-MSI: msi address 0x%x\n", msi->msi_addr_lo); + + /* Progam the Interrupt handler Termination addr registers */ + out_be32(msi->msi_regs + PEIH_TERMADH, msi->msi_addr_hi); + out_be32(msi->msi_regs + PEIH_TERMADL, msi->msi_addr_lo); + + msi_data = of_get_property(dev->dev.of_node, "msi-data", NULL); + if (!msi_data) + return -1; + msi_mask = of_get_property(dev->dev.of_node, "msi-mask", NULL); + if (!msi_mask) + return -1; + /* Program MSI Expected data and Mask bits */ + out_be32(msi->msi_regs + PEIH_MSIED, *msi_data); + out_be32(msi->msi_regs + PEIH_MSIMK, *msi_mask); + + return 0; +} + +static int ppc4xx_of_msi_remove(struct platform_device *dev) +{ + struct ppc4xx_msi *msi = dev->dev.platform_data; + int i; + int virq; + + for (i = 0; i < NR_MSI_IRQS; i++) { + virq = msi->msi_virqs[i]; + if (virq != NO_IRQ) + irq_dispose_mapping(virq); + } + + if (msi->bitmap.bitmap) + msi_bitmap_free(&msi->bitmap); + iounmap(msi->msi_regs); + of_node_put(msi->msi_dev); + kfree(msi); + + return 0; +} + +static int __devinit ppc4xx_msi_probe(struct platform_device *dev) +{ + struct ppc4xx_msi *msi; + struct resource res; + int err = 0; + + msi = &ppc4xx_msi;/*keep the msi data for further use*/ + + dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); + + msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL); + if (!msi) { + dev_err(&dev->dev, "No memory for MSI structure\n"); + return -ENOMEM; + } + dev->dev.platform_data = msi; + + /* Get MSI ranges */ + err = of_address_to_resource(dev->dev.of_node, 0, &res); + if (err) { + dev_err(&dev->dev, "%s resource error!\n", + dev->dev.of_node->full_name); + goto error_out; + } + + if (ppc4xx_setup_pcieh_hw(dev, res, msi)) + goto error_out; + + err = ppc4xx_msi_init_allocator(dev, msi); + if (err) { + dev_err(&dev->dev, "Error allocating MSI bitmap\n"); + goto error_out; + } + + ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs; + ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; + ppc_md.msi_check_device = ppc4xx_msi_check_device; + return err; + +error_out: + ppc4xx_of_msi_remove(dev); + return err; +} +static const struct of_device_id ppc4xx_msi_ids[] = { + { + .compatible = "amcc,ppc4xx-msi", + }, + {} +}; +static struct platform_driver ppc4xx_msi_driver = { + .probe = ppc4xx_msi_probe, + .remove = ppc4xx_of_msi_remove, + .driver = { + .name = "ppc4xx-msi", + .owner = THIS_MODULE, + .of_match_table = ppc4xx_msi_ids, + }, + +}; + +static __init int ppc4xx_msi_init(void) +{ + return platform_driver_register(&ppc4xx_msi_driver); +} + +subsys_initcall(ppc4xx_msi_init); diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 8508bfe52296..d240ea950519 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -447,6 +447,13 @@ HYPERVISOR_hvm_op(int op, void *arg) return _hypercall2(unsigned long, hvm_op, op, arg); } +static inline int +HYPERVISOR_tmem_op( + struct tmem_op *op) +{ + return _hypercall1(int, tmem_op, op); +} + static inline void MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) { diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 6e35eccc9caa..0f9a84c1046a 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -2,6 +2,7 @@ menuconfig INFINIBAND tristate "InfiniBand support" depends on PCI || BROKEN depends on HAS_IOMEM + depends on NET ---help--- Core support for InfiniBand (IB). Make sure to also select any protocols you wish to use as well as drivers for your diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index cb1ab3ea4998..c8bbaef1becb 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ - device.o fmr_pool.o cache.o + device.o fmr_pool.o cache.o netlink.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_mad-y := mad.o smi.o agent.o mad_rmpp.o diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index f804e28e1ebb..f62f52fb9ece 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3639,8 +3639,16 @@ static struct kobj_type cm_port_obj_type = { .release = cm_release_port_obj }; +static char *cm_devnode(struct device *dev, mode_t *mode) +{ + *mode = 0666; + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + struct class cm_class = { + .owner = THIS_MODULE, .name = "infiniband_cm", + .devnode = cm_devnode, }; EXPORT_SYMBOL(cm_class); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 99dde874fbbd..b6a33b3c516d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -47,6 +47,7 @@ #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> +#include <rdma/rdma_netlink.h> #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> #include <rdma/ib_sa.h> @@ -89,20 +90,6 @@ struct cma_device { struct list_head id_list; }; -enum cma_state { - CMA_IDLE, - CMA_ADDR_QUERY, - CMA_ADDR_RESOLVED, - CMA_ROUTE_QUERY, - CMA_ROUTE_RESOLVED, - CMA_CONNECT, - CMA_DISCONNECT, - CMA_ADDR_BOUND, - CMA_LISTEN, - CMA_DEVICE_REMOVAL, - CMA_DESTROYING -}; - struct rdma_bind_list { struct idr *ps; struct hlist_head owners; @@ -126,7 +113,7 @@ struct rdma_id_private { struct list_head mc_list; int internal_id; - enum cma_state state; + enum rdma_cm_state state; spinlock_t lock; struct mutex qp_mutex; @@ -146,6 +133,7 @@ struct rdma_id_private { u32 seq_num; u32 qkey; u32 qp_num; + pid_t owner; u8 srq; u8 tos; u8 reuseaddr; @@ -165,8 +153,8 @@ struct cma_multicast { struct cma_work { struct work_struct work; struct rdma_id_private *id; - enum cma_state old_state; - enum cma_state new_state; + enum rdma_cm_state old_state; + enum rdma_cm_state new_state; struct rdma_cm_event event; }; @@ -217,7 +205,7 @@ struct sdp_hah { #define CMA_VERSION 0x00 #define SDP_MAJ_VERSION 0x2 -static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp) +static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) { unsigned long flags; int ret; @@ -229,7 +217,7 @@ static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp) } static int cma_comp_exch(struct rdma_id_private *id_priv, - enum cma_state comp, enum cma_state exch) + enum rdma_cm_state comp, enum rdma_cm_state exch) { unsigned long flags; int ret; @@ -241,11 +229,11 @@ static int cma_comp_exch(struct rdma_id_private *id_priv, return ret; } -static enum cma_state cma_exch(struct rdma_id_private *id_priv, - enum cma_state exch) +static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, + enum rdma_cm_state exch) { unsigned long flags; - enum cma_state old; + enum rdma_cm_state old; spin_lock_irqsave(&id_priv->lock, flags); old = id_priv->state; @@ -279,11 +267,6 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); } -static inline int cma_is_ud_ps(enum rdma_port_space ps) -{ - return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB); -} - static void cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { @@ -413,7 +396,7 @@ static void cma_deref_id(struct rdma_id_private *id_priv) } static int cma_disable_callback(struct rdma_id_private *id_priv, - enum cma_state state) + enum rdma_cm_state state) { mutex_lock(&id_priv->handler_mutex); if (id_priv->state != state) { @@ -429,7 +412,8 @@ static int cma_has_cm_dev(struct rdma_id_private *id_priv) } struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps) + void *context, enum rdma_port_space ps, + enum ib_qp_type qp_type) { struct rdma_id_private *id_priv; @@ -437,10 +421,12 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, if (!id_priv) return ERR_PTR(-ENOMEM); - id_priv->state = CMA_IDLE; + id_priv->owner = task_pid_nr(current); + id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; + id_priv->id.qp_type = qp_type; spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); @@ -508,7 +494,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (IS_ERR(qp)) return PTR_ERR(qp); - if (cma_is_ud_ps(id_priv->id.ps)) + if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); else ret = cma_init_conn_qp(id_priv, qp); @@ -636,7 +622,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, qp_attr->port_num = id_priv->id.port_num; *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; - if (cma_is_ud_ps(id_priv->id.ps)) { + if (id_priv->id.qp_type == IB_QPT_UD) { ret = cma_set_qkey(id_priv); if (ret) return ret; @@ -659,7 +645,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, id_priv = container_of(id, struct rdma_id_private, id); switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: - if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps)) + if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); else ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, @@ -858,16 +844,16 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv) } static void cma_cancel_operation(struct rdma_id_private *id_priv, - enum cma_state state) + enum rdma_cm_state state) { switch (state) { - case CMA_ADDR_QUERY: + case RDMA_CM_ADDR_QUERY: rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); break; - case CMA_ROUTE_QUERY: + case RDMA_CM_ROUTE_QUERY: cma_cancel_route(id_priv); break; - case CMA_LISTEN: + case RDMA_CM_LISTEN: if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) && !id_priv->cma_dev) cma_cancel_listens(id_priv); @@ -918,10 +904,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) void rdma_destroy_id(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; - enum cma_state state; + enum rdma_cm_state state; id_priv = container_of(id, struct rdma_id_private, id); - state = cma_exch(id_priv, CMA_DESTROYING); + state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); /* @@ -1015,9 +1001,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) int ret = 0; if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && - cma_disable_callback(id_priv, CMA_CONNECT)) || + cma_disable_callback(id_priv, RDMA_CM_CONNECT)) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && - cma_disable_callback(id_priv, CMA_DISCONNECT))) + cma_disable_callback(id_priv, RDMA_CM_DISCONNECT))) return 0; memset(&event, 0, sizeof event); @@ -1048,7 +1034,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.status = -ETIMEDOUT; /* fall through */ case IB_CM_DREQ_RECEIVED: case IB_CM_DREP_RECEIVED: - if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT)) + if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, + RDMA_CM_DISCONNECT)) goto out; event.event = RDMA_CM_EVENT_DISCONNECTED; break; @@ -1075,7 +1062,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; @@ -1101,7 +1088,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, goto err; id = rdma_create_id(listen_id->event_handler, listen_id->context, - listen_id->ps); + listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) goto err; @@ -1132,7 +1119,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); id_priv = container_of(id, struct rdma_id_private, id); - id_priv->state = CMA_CONNECT; + id_priv->state = RDMA_CM_CONNECT; return id_priv; destroy_id: @@ -1152,7 +1139,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, int ret; id = rdma_create_id(listen_id->event_handler, listen_id->context, - listen_id->ps); + listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; @@ -1172,7 +1159,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, } id_priv = container_of(id, struct rdma_id_private, id); - id_priv->state = CMA_CONNECT; + id_priv->state = RDMA_CM_CONNECT; return id_priv; err: rdma_destroy_id(id); @@ -1201,13 +1188,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) int offset, ret; listen_id = cm_id->context; - if (cma_disable_callback(listen_id, CMA_LISTEN)) + if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) return -ECONNABORTED; memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - if (cma_is_ud_ps(listen_id->id.ps)) { + if (listen_id->id.qp_type == IB_QPT_UD) { conn_id = cma_new_udp_id(&listen_id->id, ib_event); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = @@ -1243,8 +1230,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) * while we're accessing the cm_id. */ mutex_lock(&lock); - if (cma_comp(conn_id, CMA_CONNECT) && - !cma_is_ud_ps(conn_id->id.ps)) + if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); @@ -1257,7 +1243,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) conn_id->cm_id.ib = NULL; release_conn_id: - cma_exch(conn_id, CMA_DESTROYING); + cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(&conn_id->id); @@ -1328,7 +1314,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr_in *sin; int ret = 0; - if (cma_disable_callback(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -1371,7 +1357,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; @@ -1393,20 +1379,20 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct ib_device_attr attr; listen_id = cm_id->context; - if (cma_disable_callback(listen_id, CMA_LISTEN)) + if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.event_handler, listen_id->id.context, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { ret = -ENOMEM; goto out; } conn_id = container_of(new_cm_id, struct rdma_id_private, id); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); - conn_id->state = CMA_CONNECT; + conn_id->state = RDMA_CM_CONNECT; dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { @@ -1461,7 +1447,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; - cma_exch(conn_id, CMA_DESTROYING); + cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); @@ -1548,13 +1534,14 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct rdma_cm_id *id; int ret; - id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps); + id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, + id_priv->id.qp_type); if (IS_ERR(id)) return; dev_id_priv = container_of(id, struct rdma_id_private, id); - dev_id_priv->state = CMA_ADDR_BOUND; + dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); @@ -1601,8 +1588,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, route->num_paths = 1; *route->path_rec = *path_rec; } else { - work->old_state = CMA_ROUTE_QUERY; - work->new_state = CMA_ADDR_RESOLVED; + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; work->event.status = status; } @@ -1660,7 +1647,7 @@ static void cma_work_handler(struct work_struct *_work) goto out; if (id_priv->id.event_handler(&id_priv->id, &work->event)) { - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } out: @@ -1678,12 +1665,12 @@ static void cma_ndev_work_handler(struct work_struct *_work) int destroy = 0; mutex_lock(&id_priv->handler_mutex); - if (id_priv->state == CMA_DESTROYING || - id_priv->state == CMA_DEVICE_REMOVAL) + if (id_priv->state == RDMA_CM_DESTROYING || + id_priv->state == RDMA_CM_DEVICE_REMOVAL) goto out; if (id_priv->id.event_handler(&id_priv->id, &work->event)) { - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } @@ -1707,8 +1694,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); - work->old_state = CMA_ROUTE_QUERY; - work->new_state = CMA_ROUTE_RESOLVED; + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); @@ -1737,7 +1724,8 @@ int rdma_set_ib_paths(struct rdma_cm_id *id, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ROUTE_RESOLVED)) return -EINVAL; id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, @@ -1750,7 +1738,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id, id->route.num_paths = num_paths; return 0; err: - cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED); + cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_set_ib_paths); @@ -1765,8 +1753,8 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); - work->old_state = CMA_ROUTE_QUERY; - work->new_state = CMA_ROUTE_RESOLVED; + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; queue_work(cma_wq, &work->work); return 0; @@ -1830,8 +1818,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } - work->old_state = CMA_ROUTE_QUERY; - work->new_state = CMA_ROUTE_RESOLVED; + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; work->event.status = 0; @@ -1853,7 +1841,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); @@ -1882,7 +1870,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) return 0; err: - cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED); + cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); cma_deref_id(id_priv); return ret; } @@ -1941,14 +1929,16 @@ static void addr_handler(int status, struct sockaddr *src_addr, memset(&event, 0, sizeof event); mutex_lock(&id_priv->handler_mutex); - if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, + RDMA_CM_ADDR_RESOLVED)) goto out; if (!status && !id_priv->cma_dev) status = cma_acquire_dev(id_priv); if (status) { - if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ADDR_BOUND)) goto out; event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = status; @@ -1959,7 +1949,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, } if (id_priv->id.event_handler(&id_priv->id, &event)) { - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); @@ -2004,8 +1994,8 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); - work->old_state = CMA_ADDR_QUERY; - work->new_state = CMA_ADDR_RESOLVED; + work->old_state = RDMA_CM_ADDR_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; queue_work(cma_wq, &work->work); return 0; @@ -2034,13 +2024,13 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (id_priv->state == CMA_IDLE) { + if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); if (ret) return ret; } - if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); @@ -2056,7 +2046,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return 0; err: - cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND); + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); cma_deref_id(id_priv); return ret; } @@ -2070,7 +2060,7 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); - if (id_priv->state == CMA_IDLE) { + if (id_priv->state == RDMA_CM_IDLE) { id_priv->reuseaddr = reuse; ret = 0; } else { @@ -2177,7 +2167,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list, if (id_priv == cur_id) continue; - if ((cur_id->state == CMA_LISTEN) || + if ((cur_id->state == RDMA_CM_LISTEN) || !reuseaddr || !cur_id->reuseaddr) { cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; if (cma_any_addr(cur_addr)) @@ -2280,14 +2270,14 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (id_priv->state == CMA_IDLE) { + if (id_priv->state == RDMA_CM_IDLE) { ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); if (ret) return ret; } - if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) return -EINVAL; if (id_priv->reuseaddr) { @@ -2319,7 +2309,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) return 0; err: id_priv->backlog = 0; - cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND); + cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); return ret; } EXPORT_SYMBOL(rdma_listen); @@ -2333,7 +2323,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) + if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); @@ -2360,7 +2350,7 @@ err2: if (id_priv->cma_dev) cma_release_dev(id_priv); err1: - cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE); + cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } EXPORT_SYMBOL(rdma_bind_addr); @@ -2433,7 +2423,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; - if (cma_disable_callback(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -2479,7 +2469,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; @@ -2645,7 +2635,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT)) + if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp) { @@ -2655,7 +2645,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (cma_is_ud_ps(id->ps)) + if (id->qp_type == IB_QPT_UD) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); @@ -2672,7 +2662,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) return 0; err: - cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED); + cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_connect); @@ -2758,7 +2748,10 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_CONNECT)) + + id_priv->owner = task_pid_nr(current); + + if (!cma_comp(id_priv, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp && conn_param) { @@ -2768,7 +2761,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (cma_is_ud_ps(id->ps)) + if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, conn_param->private_data, conn_param->private_data_len); @@ -2829,7 +2822,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (cma_is_ud_ps(id->ps)) + if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, private_data, private_data_len); else @@ -2887,8 +2880,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) int ret; id_priv = mc->id_priv; - if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) && - cma_disable_callback(id_priv, CMA_ADDR_RESOLVED)) + if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) && + cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) return 0; mutex_lock(&id_priv->qp_mutex); @@ -2912,7 +2905,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { - cma_exch(id_priv, CMA_DESTROYING); + cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return 0; @@ -3095,8 +3088,8 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_ADDR_BOUND) && - !cma_comp(id_priv, CMA_ADDR_RESOLVED)) + if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && + !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) return -EINVAL; mc = kmalloc(sizeof *mc, GFP_KERNEL); @@ -3261,19 +3254,19 @@ static void cma_add_one(struct ib_device *device) static int cma_remove_id_dev(struct rdma_id_private *id_priv) { struct rdma_cm_event event; - enum cma_state state; + enum rdma_cm_state state; int ret = 0; /* Record that we want to remove the device */ - state = cma_exch(id_priv, CMA_DEVICE_REMOVAL); - if (state == CMA_DESTROYING) + state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); + if (state == RDMA_CM_DESTROYING) return 0; cma_cancel_operation(id_priv, state); mutex_lock(&id_priv->handler_mutex); /* Check for destruction from another callback. */ - if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) + if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) goto out; memset(&event, 0, sizeof event); @@ -3328,6 +3321,100 @@ static void cma_remove_one(struct ib_device *device) kfree(cma_dev); } +static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlmsghdr *nlh; + struct rdma_cm_id_stats *id_stats; + struct rdma_id_private *id_priv; + struct rdma_cm_id *id = NULL; + struct cma_device *cma_dev; + int i_dev = 0, i_id = 0; + + /* + * We export all of the IDs as a sequence of messages. Each + * ID gets its own netlink message. + */ + mutex_lock(&lock); + + list_for_each_entry(cma_dev, &dev_list, list) { + if (i_dev < cb->args[0]) { + i_dev++; + continue; + } + + i_id = 0; + list_for_each_entry(id_priv, &cma_dev->id_list, list) { + if (i_id < cb->args[1]) { + i_id++; + continue; + } + + id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, + sizeof *id_stats, RDMA_NL_RDMA_CM, + RDMA_NL_RDMA_CM_ID_STATS); + if (!id_stats) + goto out; + + memset(id_stats, 0, sizeof *id_stats); + id = &id_priv->id; + id_stats->node_type = id->route.addr.dev_addr.dev_type; + id_stats->port_num = id->port_num; + id_stats->bound_dev_if = + id->route.addr.dev_addr.bound_dev_if; + + if (id->route.addr.src_addr.ss_family == AF_INET) { + if (ibnl_put_attr(skb, nlh, + sizeof(struct sockaddr_in), + &id->route.addr.src_addr, + RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) { + goto out; + } + if (ibnl_put_attr(skb, nlh, + sizeof(struct sockaddr_in), + &id->route.addr.dst_addr, + RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) { + goto out; + } + } else if (id->route.addr.src_addr.ss_family == AF_INET6) { + if (ibnl_put_attr(skb, nlh, + sizeof(struct sockaddr_in6), + &id->route.addr.src_addr, + RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) { + goto out; + } + if (ibnl_put_attr(skb, nlh, + sizeof(struct sockaddr_in6), + &id->route.addr.dst_addr, + RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) { + goto out; + } + } + + id_stats->pid = id_priv->owner; + id_stats->port_space = id->ps; + id_stats->cm_state = id_priv->state; + id_stats->qp_num = id_priv->qp_num; + id_stats->qp_type = id->qp_type; + + i_id++; + } + + cb->args[1] = 0; + i_dev++; + } + +out: + mutex_unlock(&lock); + cb->args[0] = i_dev; + cb->args[1] = i_id; + + return skb->len; +} + +static const struct ibnl_client_cbs cma_cb_table[] = { + [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats }, +}; + static int __init cma_init(void) { int ret; @@ -3343,6 +3430,10 @@ static int __init cma_init(void) ret = ib_register_client(&cma_client); if (ret) goto err; + + if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table)) + printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n"); + return 0; err: @@ -3355,6 +3446,7 @@ err: static void __exit cma_cleanup(void) { + ibnl_remove_client(RDMA_NL_RDMA_CM); ib_unregister_client(&cma_client); unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f793bf2f5da7..4007f721d25d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -38,6 +38,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/mutex.h> +#include <rdma/rdma_netlink.h> #include "core_priv.h" @@ -725,22 +726,40 @@ static int __init ib_core_init(void) return -ENOMEM; ret = ib_sysfs_setup(); - if (ret) + if (ret) { printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); + goto err; + } + + ret = ibnl_init(); + if (ret) { + printk(KERN_WARNING "Couldn't init IB netlink interface\n"); + goto err_sysfs; + } ret = ib_cache_setup(); if (ret) { printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); - ib_sysfs_cleanup(); - destroy_workqueue(ib_wq); + goto err_nl; } + return 0; + +err_nl: + ibnl_cleanup(); + +err_sysfs: + ib_sysfs_cleanup(); + +err: + destroy_workqueue(ib_wq); return ret; } static void __exit ib_core_cleanup(void) { ib_cache_cleanup(); + ibnl_cleanup(); ib_sysfs_cleanup(); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 822cfdcd9f78..b4d8672a3e4e 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -276,6 +276,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error1; } + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0 */ + if (!port_priv->qp_info[qpn].qp) { + ret = ERR_PTR(-EPROTONOSUPPORT); + goto error1; + } + /* Allocate structures */ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); if (!mad_agent_priv) { diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c new file mode 100644 index 000000000000..4a5abaf0a25c --- /dev/null +++ b/drivers/infiniband/core/netlink.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2010 Voltaire Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ + +#include <net/netlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <rdma/rdma_netlink.h> + +struct ibnl_client { + struct list_head list; + int index; + int nops; + const struct ibnl_client_cbs *cb_table; +}; + +static DEFINE_MUTEX(ibnl_mutex); +static struct sock *nls; +static LIST_HEAD(client_list); + +int ibnl_add_client(int index, int nops, + const struct ibnl_client_cbs cb_table[]) +{ + struct ibnl_client *cur; + struct ibnl_client *nl_client; + + nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL); + if (!nl_client) + return -ENOMEM; + + nl_client->index = index; + nl_client->nops = nops; + nl_client->cb_table = cb_table; + + mutex_lock(&ibnl_mutex); + + list_for_each_entry(cur, &client_list, list) { + if (cur->index == index) { + pr_warn("Client for %d already exists\n", index); + mutex_unlock(&ibnl_mutex); + kfree(nl_client); + return -EINVAL; + } + } + + list_add_tail(&nl_client->list, &client_list); + + mutex_unlock(&ibnl_mutex); + + return 0; +} +EXPORT_SYMBOL(ibnl_add_client); + +int ibnl_remove_client(int index) +{ + struct ibnl_client *cur, *next; + + mutex_lock(&ibnl_mutex); + list_for_each_entry_safe(cur, next, &client_list, list) { + if (cur->index == index) { + list_del(&(cur->list)); + mutex_unlock(&ibnl_mutex); + kfree(cur); + return 0; + } + } + pr_warn("Can't remove callback for client idx %d. Not found\n", index); + mutex_unlock(&ibnl_mutex); + + return -EINVAL; +} +EXPORT_SYMBOL(ibnl_remove_client); + +void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, + int len, int client, int op) +{ + unsigned char *prev_tail; + + prev_tail = skb_tail_pointer(skb); + *nlh = NLMSG_NEW(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), + len, NLM_F_MULTI); + (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail; + return NLMSG_DATA(*nlh); + +nlmsg_failure: + nlmsg_trim(skb, prev_tail); + return NULL; +} +EXPORT_SYMBOL(ibnl_put_msg); + +int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, + int len, void *data, int type) +{ + unsigned char *prev_tail; + + prev_tail = skb_tail_pointer(skb); + NLA_PUT(skb, type, len, data); + nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail; + return 0; + +nla_put_failure: + nlmsg_trim(skb, prev_tail - nlh->nlmsg_len); + return -EMSGSIZE; +} +EXPORT_SYMBOL(ibnl_put_attr); + +static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct ibnl_client *client; + int type = nlh->nlmsg_type; + int index = RDMA_NL_GET_CLIENT(type); + int op = RDMA_NL_GET_OP(type); + + list_for_each_entry(client, &client_list, list) { + if (client->index == index) { + if (op < 0 || op >= client->nops || + !client->cb_table[RDMA_NL_GET_OP(op)].dump) + return -EINVAL; + return netlink_dump_start(nls, skb, nlh, + client->cb_table[op].dump, + NULL); + } + } + + pr_info("Index %d wasn't found in client list\n", index); + return -EINVAL; +} + +static void ibnl_rcv(struct sk_buff *skb) +{ + mutex_lock(&ibnl_mutex); + netlink_rcv_skb(skb, &ibnl_rcv_msg); + mutex_unlock(&ibnl_mutex); +} + +int __init ibnl_init(void) +{ + nls = netlink_kernel_create(&init_net, NETLINK_RDMA, 0, ibnl_rcv, + NULL, THIS_MODULE); + if (!nls) { + pr_warn("Failed to create netlink socket\n"); + return -ENOMEM; + } + + return 0; +} + +void ibnl_cleanup(void) +{ + struct ibnl_client *cur, *next; + + mutex_lock(&ibnl_mutex); + list_for_each_entry_safe(cur, next, &client_list, list) { + list_del(&(cur->list)); + kfree(cur); + } + mutex_unlock(&ibnl_mutex); + + netlink_kernel_release(nls); +} diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index b3fa798525b2..71be5eebd683 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -367,13 +367,28 @@ done: return ret; } -static ssize_t ucma_create_id(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) +static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) +{ + switch (cmd->ps) { + case RDMA_PS_TCP: + *qp_type = IB_QPT_RC; + return 0; + case RDMA_PS_UDP: + case RDMA_PS_IPOIB: + *qp_type = IB_QPT_UD; + return 0; + default: + return -EINVAL; + } +} + +static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) { struct rdma_ucm_create_id cmd; struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; + enum ib_qp_type qp_type; int ret; if (out_len < sizeof(resp)) @@ -382,6 +397,10 @@ static ssize_t ucma_create_id(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + ret = ucma_get_qp_type(&cmd, &qp_type); + if (ret) + return ret; + mutex_lock(&file->mut); ctx = ucma_alloc_ctx(file); mutex_unlock(&file->mut); @@ -389,7 +408,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps); + ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; @@ -1338,9 +1357,11 @@ static const struct file_operations ucma_fops = { }; static struct miscdevice ucma_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = "rdma_cm", - .fops = &ucma_fops, + .minor = MISC_DYNAMIC_MINOR, + .name = "rdma_cm", + .nodename = "infiniband/rdma_cm", + .mode = 0666, + .fops = &ucma_fops, }; static ssize_t show_abi_version(struct device *dev, diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index cd1996d0ad08..8d261b6ea5fe 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1176,6 +1176,11 @@ static void ib_umad_remove_one(struct ib_device *device) kref_put(&umad_dev->ref, ib_umad_release_dev); } +static char *umad_devnode(struct device *dev, mode_t *mode) +{ + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + static int __init ib_umad_init(void) { int ret; @@ -1194,6 +1199,8 @@ static int __init ib_umad_init(void) goto out_chrdev; } + umad_class->devnode = umad_devnode; + ret = class_create_file(umad_class, &class_attr_abi_version.attr); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index ec83e9fe387b..e49a85f8a44d 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -824,6 +824,12 @@ static void ib_uverbs_remove_one(struct ib_device *device) kfree(uverbs_dev); } +static char *uverbs_devnode(struct device *dev, mode_t *mode) +{ + *mode = 0666; + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + static int __init ib_uverbs_init(void) { int ret; @@ -842,6 +848,8 @@ static int __init ib_uverbs_init(void) goto out_chrdev; } + uverbs_class->devnode = uverbs_devnode; + ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 239184138994..0a5008fbebac 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -914,7 +914,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) goto err; if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { - iwch_post_zb_read(ep->com.qp); + iwch_post_zb_read(ep); } goto out; @@ -1078,6 +1078,8 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *ep = ctx; struct cpl_wr_ack *hdr = cplhdr(skb); unsigned int credits = ntohs(hdr->credits); + unsigned long flags; + int post_zb = 0; PDBG("%s ep %p credits %u\n", __func__, ep, credits); @@ -1087,28 +1089,34 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } + spin_lock_irqsave(&ep->com.lock, flags); BUG_ON(credits != 1); dst_confirm(ep->dst); if (!ep->mpa_skb) { PDBG("%s rdma_init wr_ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); + __func__, ep, ep->com.state); if (ep->mpa_attr.initiator) { PDBG("%s initiator ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - if (peer2peer) - iwch_post_zb_read(ep->com.qp); + __func__, ep, ep->com.state); + if (peer2peer && ep->com.state == FPDU_MODE) + post_zb = 1; } else { PDBG("%s responder ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); + __func__, ep, ep->com.state); + if (ep->com.state == MPA_REQ_RCVD) { + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + } } } else { PDBG("%s lsm ack ep %p state %u freeing skb\n", - __func__, ep, state_read(&ep->com)); + __func__, ep, ep->com.state); kfree_skb(ep->mpa_skb); ep->mpa_skb = NULL; } + spin_unlock_irqrestore(&ep->com.lock, flags); + if (post_zb) + iwch_post_zb_read(ep); return CPL_RET_BUF_DONE; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index c5406da3f4cd..9a342c9b220d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -332,7 +332,7 @@ int iwch_bind_mw(struct ib_qp *qp, struct ib_mw_bind *mw_bind); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); -int iwch_post_zb_read(struct iwch_qp *qhp); +int iwch_post_zb_read(struct iwch_ep *ep); int iwch_register_device(struct iwch_dev *dev); void iwch_unregister_device(struct iwch_dev *dev); void stop_read_rep_timer(struct iwch_qp *qhp); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 1b4cd09f74dc..ecd313f359a4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -738,7 +738,7 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg, } } -int iwch_post_zb_read(struct iwch_qp *qhp) +int iwch_post_zb_read(struct iwch_ep *ep) { union t3_wr *wqe; struct sk_buff *skb; @@ -761,10 +761,10 @@ int iwch_post_zb_read(struct iwch_qp *qhp) wqe->read.local_len = cpu_to_be32(0); wqe->read.local_to = cpu_to_be64(1); wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)| + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| V_FW_RIWR_LEN(flit_cnt)); skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); + return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); } /* diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 35d2a5dd9bb4..4f045375c8e2 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -35,7 +35,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/idr.h> -#include <linux/workqueue.h> +#include <linux/completion.h> #include <linux/netdevice.h> #include <linux/sched.h> #include <linux/pci.h> @@ -131,28 +131,21 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev) #define C4IW_WR_TO (10*HZ) -enum { - REPLY_READY = 0, -}; - struct c4iw_wr_wait { - wait_queue_head_t wait; - unsigned long status; + struct completion completion; int ret; }; static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) { wr_waitp->ret = 0; - wr_waitp->status = 0; - init_waitqueue_head(&wr_waitp->wait); + init_completion(&wr_waitp->completion); } static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) { wr_waitp->ret = ret; - set_bit(REPLY_READY, &wr_waitp->status); - wake_up(&wr_waitp->wait); + complete(&wr_waitp->completion); } static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, @@ -164,8 +157,7 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, int ret; do { - ret = wait_event_timeout(wr_waitp->wait, - test_and_clear_bit(REPLY_READY, &wr_waitp->status), to); + ret = wait_for_completion_timeout(&wr_waitp->completion, to); if (!ret) { printk(KERN_ERR MOD "%s - Device %s not responding - " "tid %u qpid %u\n", func, diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 13de1192927c..2d668c69f6d9 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -1138,7 +1138,9 @@ static ssize_t nes_store_wqm_quanta(struct device_driver *ddp, u32 i = 0; struct nes_device *nesdev; - strict_strtoul(buf, 0, &wqm_quanta_value); + if (kstrtoul(buf, 0, &wqm_quanta_value) < 0) + return -EINVAL; + list_for_each_entry(nesdev, &nes_dev_list, list) { if (i == ee_flsh_adapter) { nesdev->nesadapter->wqm_quanta = wqm_quanta_value; diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 7c03a70c55a2..8349f9c5064c 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_QIB tristate "QLogic PCIe HCA support" - depends on 64BIT && NET + depends on 64BIT ---help--- This is a low-level driver for QLogic PCIe QLE InfiniBand host channel adapters. This driver does not support the QLogic diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 9876865732f7..ede1475bee09 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -548,7 +548,7 @@ int iser_connect(struct iser_conn *ib_conn, iser_conn_get(ib_conn); /* ref ib conn's cma id */ ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)ib_conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ib_conn->cma_id)) { err = PTR_ERR(ib_conn->cma_id); iser_err("rdma_create_id failed: %d\n", err); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 376d640487d2..ee165fdcb596 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1147,7 +1147,7 @@ static void srp_process_aer_req(struct srp_target_port *target, static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) { struct ib_device *dev = target->srp_host->srp_dev->dev; - struct srp_iu *iu = (struct srp_iu *) wc->wr_id; + struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id; int res; u8 opcode; @@ -1231,7 +1231,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) break; } - iu = (struct srp_iu *) wc.wr_id; + iu = (struct srp_iu *) (uintptr_t) wc.wr_id; list_add(&iu->list, &target->free_tx); } } diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index fc14b8dea0d7..fbd96b29530d 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -271,8 +271,8 @@ config SPI_ORION This enables using the SPI master controller on the Orion chips. config SPI_PL022 - tristate "ARM AMBA PL022 SSP controller (EXPERIMENTAL)" - depends on ARM_AMBA && EXPERIMENTAL + tristate "ARM AMBA PL022 SSP controller" + depends on ARM_AMBA default y if MACH_U300 default y if ARCH_REALVIEW default y if INTEGRATOR_IMPD1 diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c index 08de58e7f59f..6a9e58dd36c7 100644 --- a/drivers/spi/amba-pl022.c +++ b/drivers/spi/amba-pl022.c @@ -24,11 +24,6 @@ * GNU General Public License for more details. */ -/* - * TODO: - * - add timeout on polled transfers - */ - #include <linux/init.h> #include <linux/module.h> #include <linux/device.h> @@ -287,6 +282,8 @@ #define CLEAR_ALL_INTERRUPTS 0x3 +#define SPI_POLLING_TIMEOUT 1000 + /* * The type of reading going on on this chip @@ -1063,7 +1060,7 @@ static int __init pl022_dma_probe(struct pl022 *pl022) pl022->master_info->dma_filter, pl022->master_info->dma_rx_param); if (!pl022->dma_rx_channel) { - dev_err(&pl022->adev->dev, "no RX DMA channel!\n"); + dev_dbg(&pl022->adev->dev, "no RX DMA channel!\n"); goto err_no_rxchan; } @@ -1071,13 +1068,13 @@ static int __init pl022_dma_probe(struct pl022 *pl022) pl022->master_info->dma_filter, pl022->master_info->dma_tx_param); if (!pl022->dma_tx_channel) { - dev_err(&pl022->adev->dev, "no TX DMA channel!\n"); + dev_dbg(&pl022->adev->dev, "no TX DMA channel!\n"); goto err_no_txchan; } pl022->dummypage = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!pl022->dummypage) { - dev_err(&pl022->adev->dev, "no DMA dummypage!\n"); + dev_dbg(&pl022->adev->dev, "no DMA dummypage!\n"); goto err_no_dummypage; } @@ -1093,6 +1090,8 @@ err_no_txchan: dma_release_channel(pl022->dma_rx_channel); pl022->dma_rx_channel = NULL; err_no_rxchan: + dev_err(&pl022->adev->dev, + "Failed to work in dma mode, work without dma!\n"); return -ENODEV; } @@ -1378,6 +1377,7 @@ static void do_polling_transfer(struct pl022 *pl022) struct spi_transfer *transfer = NULL; struct spi_transfer *previous = NULL; struct chip_data *chip; + unsigned long time, timeout; chip = pl022->cur_chip; message = pl022->cur_msg; @@ -1415,9 +1415,19 @@ static void do_polling_transfer(struct pl022 *pl022) SSP_CR1(pl022->virtbase)); dev_dbg(&pl022->adev->dev, "polling transfer ongoing ...\n"); - /* FIXME: insert a timeout so we don't hang here indefinitely */ - while (pl022->tx < pl022->tx_end || pl022->rx < pl022->rx_end) + + timeout = jiffies + msecs_to_jiffies(SPI_POLLING_TIMEOUT); + while (pl022->tx < pl022->tx_end || pl022->rx < pl022->rx_end) { + time = jiffies; readwriter(pl022); + if (time_after(time, timeout)) { + dev_warn(&pl022->adev->dev, + "%s: timeout!\n", __func__); + message->state = STATE_ERROR; + goto out; + } + cpu_relax(); + } /* Update total byte transferred */ message->actual_length += pl022->cur_transfer->len; @@ -1426,7 +1436,7 @@ static void do_polling_transfer(struct pl022 *pl022) /* Move to next transfer */ message->state = next_transfer(pl022); } - +out: /* Handle end of message */ if (message->state == STATE_DONE) message->status = 0; @@ -2107,7 +2117,7 @@ pl022_probe(struct amba_device *adev, const struct amba_id *id) if (platform_info->enable_dma) { status = pl022_dma_probe(pl022); if (status != 0) - goto err_no_dma; + platform_info->enable_dma = 0; } /* Initialize and start queue */ @@ -2143,7 +2153,6 @@ pl022_probe(struct amba_device *adev, const struct amba_id *id) err_init_queue: destroy_queue(pl022); pl022_dma_remove(pl022); - err_no_dma: free_irq(adev->irq[0], pl022); err_no_irq: clk_put(pl022->clk); diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 871e337c917f..919fa9d9e16b 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -58,8 +58,6 @@ struct chip_data { u8 bits_per_word; u16 clk_div; /* baud rate divider */ u32 speed_hz; /* baud rate */ - int (*write)(struct dw_spi *dws); - int (*read)(struct dw_spi *dws); void (*cs_control)(u32 command); }; @@ -162,107 +160,70 @@ static inline void mrst_spi_debugfs_remove(struct dw_spi *dws) } #endif /* CONFIG_DEBUG_FS */ -static void wait_till_not_busy(struct dw_spi *dws) +/* Return the max entries we can fill into tx fifo */ +static inline u32 tx_max(struct dw_spi *dws) { - unsigned long end = jiffies + 1 + usecs_to_jiffies(5000); + u32 tx_left, tx_room, rxtx_gap; - while (time_before(jiffies, end)) { - if (!(dw_readw(dws, sr) & SR_BUSY)) - return; - cpu_relax(); - } - dev_err(&dws->master->dev, - "DW SPI: Status keeps busy for 5000us after a read/write!\n"); -} - -static void flush(struct dw_spi *dws) -{ - while (dw_readw(dws, sr) & SR_RF_NOT_EMPT) { - dw_readw(dws, dr); - cpu_relax(); - } - - wait_till_not_busy(dws); -} - -static int null_writer(struct dw_spi *dws) -{ - u8 n_bytes = dws->n_bytes; + tx_left = (dws->tx_end - dws->tx) / dws->n_bytes; + tx_room = dws->fifo_len - dw_readw(dws, txflr); - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; - dw_writew(dws, dr, 0); - dws->tx += n_bytes; + /* + * Another concern is about the tx/rx mismatch, we + * though to use (dws->fifo_len - rxflr - txflr) as + * one maximum value for tx, but it doesn't cover the + * data which is out of tx/rx fifo and inside the + * shift registers. So a control from sw point of + * view is taken. + */ + rxtx_gap = ((dws->rx_end - dws->rx) - (dws->tx_end - dws->tx)) + / dws->n_bytes; - wait_till_not_busy(dws); - return 1; + return min3(tx_left, tx_room, (u32) (dws->fifo_len - rxtx_gap)); } -static int null_reader(struct dw_spi *dws) +/* Return the max entries we should read out of rx fifo */ +static inline u32 rx_max(struct dw_spi *dws) { - u8 n_bytes = dws->n_bytes; + u32 rx_left = (dws->rx_end - dws->rx) / dws->n_bytes; - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { - dw_readw(dws, dr); - dws->rx += n_bytes; - } - wait_till_not_busy(dws); - return dws->rx == dws->rx_end; + return min(rx_left, (u32)dw_readw(dws, rxflr)); } -static int u8_writer(struct dw_spi *dws) +static void dw_writer(struct dw_spi *dws) { - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; + u32 max = tx_max(dws); + u16 txw = 0; - dw_writew(dws, dr, *(u8 *)(dws->tx)); - ++dws->tx; - - wait_till_not_busy(dws); - return 1; -} - -static int u8_reader(struct dw_spi *dws) -{ - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { - *(u8 *)(dws->rx) = dw_readw(dws, dr); - ++dws->rx; + while (max--) { + /* Set the tx word if the transfer's original "tx" is not null */ + if (dws->tx_end - dws->len) { + if (dws->n_bytes == 1) + txw = *(u8 *)(dws->tx); + else + txw = *(u16 *)(dws->tx); + } + dw_writew(dws, dr, txw); + dws->tx += dws->n_bytes; } - - wait_till_not_busy(dws); - return dws->rx == dws->rx_end; } -static int u16_writer(struct dw_spi *dws) +static void dw_reader(struct dw_spi *dws) { - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; + u32 max = rx_max(dws); + u16 rxw; - dw_writew(dws, dr, *(u16 *)(dws->tx)); - dws->tx += 2; - - wait_till_not_busy(dws); - return 1; -} - -static int u16_reader(struct dw_spi *dws) -{ - u16 temp; - - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { - temp = dw_readw(dws, dr); - *(u16 *)(dws->rx) = temp; - dws->rx += 2; + while (max--) { + rxw = dw_readw(dws, dr); + /* Care rx only if the transfer's original "rx" is not null */ + if (dws->rx_end - dws->len) { + if (dws->n_bytes == 1) + *(u8 *)(dws->rx) = rxw; + else + *(u16 *)(dws->rx) = rxw; + } + dws->rx += dws->n_bytes; } - - wait_till_not_busy(dws); - return dws->rx == dws->rx_end; } static void *next_transfer(struct dw_spi *dws) @@ -334,8 +295,7 @@ static void giveback(struct dw_spi *dws) static void int_error_stop(struct dw_spi *dws, const char *msg) { - /* Stop and reset hw */ - flush(dws); + /* Stop the hw */ spi_enable_chip(dws, 0); dev_err(&dws->master->dev, "%s\n", msg); @@ -362,35 +322,28 @@ EXPORT_SYMBOL_GPL(dw_spi_xfer_done); static irqreturn_t interrupt_transfer(struct dw_spi *dws) { - u16 irq_status, irq_mask = 0x3f; - u32 int_level = dws->fifo_len / 2; - u32 left; + u16 irq_status = dw_readw(dws, isr); - irq_status = dw_readw(dws, isr) & irq_mask; /* Error handling */ if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) { dw_readw(dws, txoicr); dw_readw(dws, rxoicr); dw_readw(dws, rxuicr); - int_error_stop(dws, "interrupt_transfer: fifo overrun"); + int_error_stop(dws, "interrupt_transfer: fifo overrun/underrun"); return IRQ_HANDLED; } + dw_reader(dws); + if (dws->rx_end == dws->rx) { + spi_mask_intr(dws, SPI_INT_TXEI); + dw_spi_xfer_done(dws); + return IRQ_HANDLED; + } if (irq_status & SPI_INT_TXEI) { spi_mask_intr(dws, SPI_INT_TXEI); - - left = (dws->tx_end - dws->tx) / dws->n_bytes; - left = (left > int_level) ? int_level : left; - - while (left--) - dws->write(dws); - dws->read(dws); - - /* Re-enable the IRQ if there is still data left to tx */ - if (dws->tx_end > dws->tx) - spi_umask_intr(dws, SPI_INT_TXEI); - else - dw_spi_xfer_done(dws); + dw_writer(dws); + /* Enable TX irq always, it will be disabled when RX finished */ + spi_umask_intr(dws, SPI_INT_TXEI); } return IRQ_HANDLED; @@ -399,15 +352,13 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws) static irqreturn_t dw_spi_irq(int irq, void *dev_id) { struct dw_spi *dws = dev_id; - u16 irq_status, irq_mask = 0x3f; + u16 irq_status = dw_readw(dws, isr) & 0x3f; - irq_status = dw_readw(dws, isr) & irq_mask; if (!irq_status) return IRQ_NONE; if (!dws->cur_msg) { spi_mask_intr(dws, SPI_INT_TXEI); - /* Never fail */ return IRQ_HANDLED; } @@ -417,13 +368,11 @@ static irqreturn_t dw_spi_irq(int irq, void *dev_id) /* Must be called inside pump_transfers() */ static void poll_transfer(struct dw_spi *dws) { - while (dws->write(dws)) - dws->read(dws); - /* - * There is a possibility that the last word of a transaction - * will be lost if data is not ready. Re-read to solve this issue. - */ - dws->read(dws); + do { + dw_writer(dws); + dw_reader(dws); + cpu_relax(); + } while (dws->rx_end > dws->rx); dw_spi_xfer_done(dws); } @@ -483,8 +432,6 @@ static void pump_transfers(unsigned long data) dws->tx_end = dws->tx + transfer->len; dws->rx = transfer->rx_buf; dws->rx_end = dws->rx + transfer->len; - dws->write = dws->tx ? chip->write : null_writer; - dws->read = dws->rx ? chip->read : null_reader; dws->cs_change = transfer->cs_change; dws->len = dws->cur_transfer->len; if (chip != dws->prev_chip) @@ -518,20 +465,8 @@ static void pump_transfers(unsigned long data) switch (bits) { case 8: - dws->n_bytes = 1; - dws->dma_width = 1; - dws->read = (dws->read != null_reader) ? - u8_reader : null_reader; - dws->write = (dws->write != null_writer) ? - u8_writer : null_writer; - break; case 16: - dws->n_bytes = 2; - dws->dma_width = 2; - dws->read = (dws->read != null_reader) ? - u16_reader : null_reader; - dws->write = (dws->write != null_writer) ? - u16_writer : null_writer; + dws->n_bytes = dws->dma_width = bits >> 3; break; default: printk(KERN_ERR "MRST SPI0: unsupported bits:" @@ -575,7 +510,7 @@ static void pump_transfers(unsigned long data) txint_level = dws->fifo_len / 2; txint_level = (templen > txint_level) ? txint_level : templen; - imask |= SPI_INT_TXEI; + imask |= SPI_INT_TXEI | SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI; dws->transfer_handler = interrupt_transfer; } @@ -733,13 +668,9 @@ static int dw_spi_setup(struct spi_device *spi) if (spi->bits_per_word <= 8) { chip->n_bytes = 1; chip->dma_width = 1; - chip->read = u8_reader; - chip->write = u8_writer; } else if (spi->bits_per_word <= 16) { chip->n_bytes = 2; chip->dma_width = 2; - chip->read = u16_reader; - chip->write = u16_writer; } else { /* Never take >16b case for MRST SPIC */ dev_err(&spi->dev, "invalid wordsize\n"); @@ -851,7 +782,6 @@ static void spi_hw_init(struct dw_spi *dws) spi_enable_chip(dws, 0); spi_mask_intr(dws, 0xff); spi_enable_chip(dws, 1); - flush(dws); /* * Try to detect the FIFO depth if not set by interface driver, diff --git a/drivers/spi/dw_spi.h b/drivers/spi/dw_spi.h index b23e452adaf7..7a5e78d2a5cb 100644 --- a/drivers/spi/dw_spi.h +++ b/drivers/spi/dw_spi.h @@ -137,8 +137,6 @@ struct dw_spi { u8 max_bits_per_word; /* maxim is 16b */ u32 dma_width; int cs_change; - int (*write)(struct dw_spi *dws); - int (*read)(struct dw_spi *dws); irqreturn_t (*transfer_handler)(struct dw_spi *dws); void (*cs_control)(u32 command); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 82b9a428c323..2e13a14bba3f 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1047,8 +1047,8 @@ static u8 *buf; * spi_{async,sync}() calls with dma-safe buffers. */ int spi_write_then_read(struct spi_device *spi, - const u8 *txbuf, unsigned n_tx, - u8 *rxbuf, unsigned n_rx) + const void *txbuf, unsigned n_tx, + void *rxbuf, unsigned n_rx) { static DEFINE_MUTEX(lock); diff --git a/drivers/spi/spi_nuc900.c b/drivers/spi/spi_nuc900.c index d5be18b3078c..3cd15f690f16 100644 --- a/drivers/spi/spi_nuc900.c +++ b/drivers/spi/spi_nuc900.c @@ -463,7 +463,7 @@ static int __devexit nuc900_spi_remove(struct platform_device *dev) platform_set_drvdata(dev, NULL); - spi_unregister_master(hw->master); + spi_bitbang_stop(&hw->bitbang); clk_disable(hw->clk); clk_put(hw->clk); diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c index 151a95e40653..1a5fcabfd565 100644 --- a/drivers/spi/spi_s3c24xx.c +++ b/drivers/spi/spi_s3c24xx.c @@ -668,7 +668,7 @@ static int __exit s3c24xx_spi_remove(struct platform_device *dev) platform_set_drvdata(dev, NULL); - spi_unregister_master(hw->master); + spi_bitbang_stop(&hw->bitbang); clk_disable(hw->clk); clk_put(hw->clk); diff --git a/drivers/spi/spi_sh.c b/drivers/spi/spi_sh.c index 869a07d375d6..9eedd71ad898 100644 --- a/drivers/spi/spi_sh.c +++ b/drivers/spi/spi_sh.c @@ -427,10 +427,10 @@ static int __devexit spi_sh_remove(struct platform_device *pdev) { struct spi_sh_data *ss = dev_get_drvdata(&pdev->dev); + spi_unregister_master(ss->master); destroy_workqueue(ss->workqueue); free_irq(ss->irq, ss); iounmap(ss->addr); - spi_master_put(ss->master); return 0; } diff --git a/drivers/spi/spi_tegra.c b/drivers/spi/spi_tegra.c index 891e5909038c..6c3aa6ecaade 100644 --- a/drivers/spi/spi_tegra.c +++ b/drivers/spi/spi_tegra.c @@ -578,6 +578,7 @@ static int __devexit spi_tegra_remove(struct platform_device *pdev) master = dev_get_drvdata(&pdev->dev); tspi = spi_master_get_devdata(master); + spi_unregister_master(master); tegra_dma_free_channel(tspi->rx_dma); dma_free_coherent(&pdev->dev, sizeof(u32) * BB_LEN, @@ -586,7 +587,6 @@ static int __devexit spi_tegra_remove(struct platform_device *pdev) clk_put(tspi->clk); iounmap(tspi->base); - spi_master_put(master); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(r->start, (r->end - r->start) + 1); diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile index 49226a1b909e..25db55696e14 100644 --- a/drivers/video/omap/Makefile +++ b/drivers/video/omap/Makefile @@ -30,7 +30,6 @@ objs-y$(CONFIG_MACH_OMAP_APOLLON) += lcd_apollon.o objs-y$(CONFIG_MACH_OMAP_2430SDP) += lcd_2430sdp.o objs-y$(CONFIG_MACH_OMAP_3430SDP) += lcd_2430sdp.o objs-y$(CONFIG_MACH_OMAP_LDP) += lcd_ldp.o -objs-y$(CONFIG_MACH_OMAP2EVM) += lcd_omap2evm.o objs-y$(CONFIG_MACH_OMAP3EVM) += lcd_omap3evm.o objs-y$(CONFIG_MACH_OMAP3_BEAGLE) += lcd_omap3beagle.o objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o diff --git a/drivers/video/omap/lcd_omap2evm.c b/drivers/video/omap/lcd_omap2evm.c deleted file mode 100644 index 7e7a65c08452..000000000000 --- a/drivers/video/omap/lcd_omap2evm.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * LCD panel support for the MISTRAL OMAP2EVM board - * - * Author: Arun C <arunedarath@mistralsolutions.com> - * - * Derived from drivers/video/omap/lcd_omap3evm.c - * Derived from drivers/video/omap/lcd-apollon.c - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#include <linux/module.h> -#include <linux/platform_device.h> -#include <linux/gpio.h> -#include <linux/i2c/twl.h> - -#include <plat/mux.h> -#include <asm/mach-types.h> - -#include "omapfb.h" - -#define LCD_PANEL_ENABLE_GPIO 154 -#define LCD_PANEL_LR 128 -#define LCD_PANEL_UD 129 -#define LCD_PANEL_INI 152 -#define LCD_PANEL_QVGA 148 -#define LCD_PANEL_RESB 153 - -#define TWL_LED_LEDEN 0x00 -#define TWL_PWMA_PWMAON 0x00 -#define TWL_PWMA_PWMAOFF 0x01 - -static unsigned int bklight_level; - -static int omap2evm_panel_init(struct lcd_panel *panel, - struct omapfb_device *fbdev) -{ - gpio_request(LCD_PANEL_ENABLE_GPIO, "LCD enable"); - gpio_request(LCD_PANEL_LR, "LCD lr"); - gpio_request(LCD_PANEL_UD, "LCD ud"); - gpio_request(LCD_PANEL_INI, "LCD ini"); - gpio_request(LCD_PANEL_QVGA, "LCD qvga"); - gpio_request(LCD_PANEL_RESB, "LCD resb"); - - gpio_direction_output(LCD_PANEL_ENABLE_GPIO, 1); - gpio_direction_output(LCD_PANEL_RESB, 1); - gpio_direction_output(LCD_PANEL_INI, 1); - gpio_direction_output(LCD_PANEL_QVGA, 0); - gpio_direction_output(LCD_PANEL_LR, 1); - gpio_direction_output(LCD_PANEL_UD, 1); - - twl_i2c_write_u8(TWL4030_MODULE_LED, 0x11, TWL_LED_LEDEN); - twl_i2c_write_u8(TWL4030_MODULE_PWMA, 0x01, TWL_PWMA_PWMAON); - twl_i2c_write_u8(TWL4030_MODULE_PWMA, 0x02, TWL_PWMA_PWMAOFF); - bklight_level = 100; - - return 0; -} - -static void omap2evm_panel_cleanup(struct lcd_panel *panel) -{ - gpio_free(LCD_PANEL_RESB); - gpio_free(LCD_PANEL_QVGA); - gpio_free(LCD_PANEL_INI); - gpio_free(LCD_PANEL_UD); - gpio_free(LCD_PANEL_LR); - gpio_free(LCD_PANEL_ENABLE_GPIO); -} - -static int omap2evm_panel_enable(struct lcd_panel *panel) -{ - gpio_set_value(LCD_PANEL_ENABLE_GPIO, 0); - return 0; -} - -static void omap2evm_panel_disable(struct lcd_panel *panel) -{ - gpio_set_value(LCD_PANEL_ENABLE_GPIO, 1); -} - -static unsigned long omap2evm_panel_get_caps(struct lcd_panel *panel) -{ - return 0; -} - -static int omap2evm_bklight_setlevel(struct lcd_panel *panel, - unsigned int level) -{ - u8 c; - if ((level >= 0) && (level <= 100)) { - c = (125 * (100 - level)) / 100 + 2; - twl_i2c_write_u8(TWL4030_MODULE_PWMA, c, TWL_PWMA_PWMAOFF); - bklight_level = level; - } - return 0; -} - -static unsigned int omap2evm_bklight_getlevel(struct lcd_panel *panel) -{ - return bklight_level; -} - -static unsigned int omap2evm_bklight_getmaxlevel(struct lcd_panel *panel) -{ - return 100; -} - -struct lcd_panel omap2evm_panel = { - .name = "omap2evm", - .config = OMAP_LCDC_PANEL_TFT | OMAP_LCDC_INV_VSYNC | - OMAP_LCDC_INV_HSYNC, - - .bpp = 16, - .data_lines = 18, - .x_res = 480, - .y_res = 640, - .hsw = 3, - .hfp = 0, - .hbp = 28, - .vsw = 2, - .vfp = 1, - .vbp = 0, - - .pixel_clock = 20000, - - .init = omap2evm_panel_init, - .cleanup = omap2evm_panel_cleanup, - .enable = omap2evm_panel_enable, - .disable = omap2evm_panel_disable, - .get_caps = omap2evm_panel_get_caps, - .set_bklight_level = omap2evm_bklight_setlevel, - .get_bklight_level = omap2evm_bklight_getlevel, - .get_bklight_max = omap2evm_bklight_getmaxlevel, -}; - -static int omap2evm_panel_probe(struct platform_device *pdev) -{ - omapfb_register_panel(&omap2evm_panel); - return 0; -} - -static int omap2evm_panel_remove(struct platform_device *pdev) -{ - return 0; -} - -static int omap2evm_panel_suspend(struct platform_device *pdev, - pm_message_t mesg) -{ - return 0; -} - -static int omap2evm_panel_resume(struct platform_device *pdev) -{ - return 0; -} - -struct platform_driver omap2evm_panel_driver = { - .probe = omap2evm_panel_probe, - .remove = omap2evm_panel_remove, - .suspend = omap2evm_panel_suspend, - .resume = omap2evm_panel_resume, - .driver = { - .name = "omap2evm_lcd", - .owner = THIS_MODULE, - }, -}; - -static int __init omap2evm_panel_drv_init(void) -{ - return platform_driver_register(&omap2evm_panel_driver); -} - -static void __exit omap2evm_panel_drv_exit(void) -{ - platform_driver_unregister(&omap2evm_panel_driver); -} - -module_init(omap2evm_panel_drv_init); -module_exit(omap2evm_panel_drv_exit); diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 4781f806701d..bbc18258ecc5 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,5 +1,6 @@ obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ +obj-y += tmem.o nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c new file mode 100644 index 000000000000..816a44959ef0 --- /dev/null +++ b/drivers/xen/tmem.c @@ -0,0 +1,264 @@ +/* + * Xen implementation for transcendent memory (tmem) + * + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Author: Dan Magenheimer + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/pagemap.h> +#include <linux/cleancache.h> + +#include <xen/xen.h> +#include <xen/interface/xen.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/page.h> +#include <asm/xen/hypervisor.h> + +#define TMEM_CONTROL 0 +#define TMEM_NEW_POOL 1 +#define TMEM_DESTROY_POOL 2 +#define TMEM_NEW_PAGE 3 +#define TMEM_PUT_PAGE 4 +#define TMEM_GET_PAGE 5 +#define TMEM_FLUSH_PAGE 6 +#define TMEM_FLUSH_OBJECT 7 +#define TMEM_READ 8 +#define TMEM_WRITE 9 +#define TMEM_XCHG 10 + +/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ +#define TMEM_POOL_PERSIST 1 +#define TMEM_POOL_SHARED 2 +#define TMEM_POOL_PAGESIZE_SHIFT 4 +#define TMEM_VERSION_SHIFT 24 + + +struct tmem_pool_uuid { + u64 uuid_lo; + u64 uuid_hi; +}; + +struct tmem_oid { + u64 oid[3]; +}; + +#define TMEM_POOL_PRIVATE_UUID { 0, 0 } + +/* flags for tmem_ops.new_pool */ +#define TMEM_POOL_PERSIST 1 +#define TMEM_POOL_SHARED 2 + +/* xen tmem foundation ops/hypercalls */ + +static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid, + u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len) +{ + struct tmem_op op; + int rc = 0; + + op.cmd = tmem_cmd; + op.pool_id = tmem_pool; + op.u.gen.oid[0] = oid.oid[0]; + op.u.gen.oid[1] = oid.oid[1]; + op.u.gen.oid[2] = oid.oid[2]; + op.u.gen.index = index; + op.u.gen.tmem_offset = tmem_offset; + op.u.gen.pfn_offset = pfn_offset; + op.u.gen.len = len; + set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn); + rc = HYPERVISOR_tmem_op(&op); + return rc; +} + +static int xen_tmem_new_pool(struct tmem_pool_uuid uuid, + u32 flags, unsigned long pagesize) +{ + struct tmem_op op; + int rc = 0, pageshift; + + for (pageshift = 0; pagesize != 1; pageshift++) + pagesize >>= 1; + flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT; + flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT; + op.cmd = TMEM_NEW_POOL; + op.u.new.uuid[0] = uuid.uuid_lo; + op.u.new.uuid[1] = uuid.uuid_hi; + op.u.new.flags = flags; + rc = HYPERVISOR_tmem_op(&op); + return rc; +} + +/* xen generic tmem ops */ + +static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid, + u32 index, unsigned long pfn) +{ + unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; + + return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index, + gmfn, 0, 0, 0); +} + +static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid, + u32 index, unsigned long pfn) +{ + unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; + + return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index, + gmfn, 0, 0, 0); +} + +static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index) +{ + return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index, + 0, 0, 0, 0); +} + +static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) +{ + return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); +} + +static int xen_tmem_destroy_pool(u32 pool_id) +{ + struct tmem_oid oid = { { 0 } }; + + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); +} + +int tmem_enabled; + +static int __init enable_tmem(char *s) +{ + tmem_enabled = 1; + return 1; +} + +__setup("tmem", enable_tmem); + +/* cleancache ops */ + +static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, + pgoff_t index, struct page *page) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + unsigned long pfn = page_to_pfn(page); + + if (pool < 0) + return; + if (ind != index) + return; + mb(); /* ensure page is quiescent; tmem may address it with an alias */ + (void)xen_tmem_put_page((u32)pool, oid, ind, pfn); +} + +static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, + pgoff_t index, struct page *page) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + unsigned long pfn = page_to_pfn(page); + int ret; + + /* translate return values to linux semantics */ + if (pool < 0) + return -1; + if (ind != index) + return -1; + ret = xen_tmem_get_page((u32)pool, oid, ind, pfn); + if (ret == 1) + return 0; + else + return -1; +} + +static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key, + pgoff_t index) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + + if (pool < 0) + return; + if (ind != index) + return; + (void)xen_tmem_flush_page((u32)pool, oid, ind); +} + +static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key) +{ + struct tmem_oid oid = *(struct tmem_oid *)&key; + + if (pool < 0) + return; + (void)xen_tmem_flush_object((u32)pool, oid); +} + +static void tmem_cleancache_flush_fs(int pool) +{ + if (pool < 0) + return; + (void)xen_tmem_destroy_pool((u32)pool); +} + +static int tmem_cleancache_init_fs(size_t pagesize) +{ + struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID; + + return xen_tmem_new_pool(uuid_private, 0, pagesize); +} + +static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) +{ + struct tmem_pool_uuid shared_uuid; + + shared_uuid.uuid_lo = *(u64 *)uuid; + shared_uuid.uuid_hi = *(u64 *)(&uuid[8]); + return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); +} + +static int use_cleancache = 1; + +static int __init no_cleancache(char *s) +{ + use_cleancache = 0; + return 1; +} + +__setup("nocleancache", no_cleancache); + +static struct cleancache_ops tmem_cleancache_ops = { + .put_page = tmem_cleancache_put_page, + .get_page = tmem_cleancache_get_page, + .flush_page = tmem_cleancache_flush_page, + .flush_inode = tmem_cleancache_flush_inode, + .flush_fs = tmem_cleancache_flush_fs, + .init_shared_fs = tmem_cleancache_init_shared_fs, + .init_fs = tmem_cleancache_init_fs +}; + +static int __init xen_tmem_init(void) +{ + struct cleancache_ops old_ops; + + if (!xen_domain()) + return 0; +#ifdef CONFIG_CLEANCACHE + BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); + if (tmem_enabled && use_cleancache) { + char *s = ""; + old_ops = cleancache_register_ops(&tmem_cleancache_ops); + if (old_ops.init_fs != NULL) + s = " (WARNING: cleancache_ops overridden)"; + printk(KERN_INFO "cleancache enabled, RAM provided by " + "Xen Transcendent Memory%s\n", s); + } +#endif + return 0; +} + +module_init(xen_tmem_init) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 7f6c67703195..8d7f3e69ae29 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -814,6 +814,7 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) { + dentry_unhash(d); return v9fs_remove(i, d, 1); } @@ -839,6 +840,9 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct p9_fid *newdirfid; struct p9_wstat wstat; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + P9_DPRINTK(P9_DEBUG_VFS, "\n"); retval = 0; old_inode = old_dentry->d_inode; diff --git a/fs/Kconfig b/fs/Kconfig index f6edba2e069f..19891aab9c6e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -47,7 +47,7 @@ config FS_POSIX_ACL def_bool n config EXPORTFS - bool + tristate config FILE_LOCKING bool "Enable POSIX file locking API" if EXPERT diff --git a/fs/affs/namei.c b/fs/affs/namei.c index e3e9efc1fdd8..03330e2e390c 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -320,6 +320,8 @@ affs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); + dentry_unhash(dentry); + return affs_remove_header(dentry); } @@ -417,6 +419,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh = NULL; int retval; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 20c106f24927..2c4e05160042 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -845,6 +845,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); + dentry_unhash(dentry); + ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) goto error; @@ -1146,6 +1148,9 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct key *key; int ret; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + vnode = AFS_FS_I(old_dentry->d_inode); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index f55ae23b137e..87d95a8cddbc 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -583,6 +583,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EACCES; + dentry_unhash(dentry); + if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index b14cebfd9047..c7d1d06b0483 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -224,6 +224,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct bfs_sb_info *info; int error = -ENOENT; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + old_bh = new_bh = NULL; old_inode = old_dentry->d_inode; if (S_ISDIR(old_inode->i_mode)) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96fcfa522dab..4f9893243dae 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -11,6 +11,7 @@ #include <linux/writeback.h> #include <linux/pagevec.h> #include <linux/prefetch.h> +#include <linux/cleancache.h> #include "extent_io.h" #include "extent_map.h" #include "compat.h" @@ -2016,6 +2017,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree, set_page_extent_mapped(page); + if (!PageUptodate(page)) { + if (cleancache_get_page(page) == 0) { + BUG_ON(blocksize != PAGE_SIZE); + goto out; + } + } + end = page_end; while (1) { lock_extent(tree, start, end, GFP_NOFS); @@ -2149,6 +2157,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, cur = cur + iosize; page_offset += iosize; } +out: if (!nr) { if (!PageError(page)) SetPageUptodate(page); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0ac712efcdf2..be4ffa12f3ef 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -39,6 +39,7 @@ #include <linux/miscdevice.h> #include <linux/magic.h> #include <linux/slab.h> +#include <linux/cleancache.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -624,6 +625,7 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_root = root_dentry; save_mount_options(sb, data); + cleancache_init_fs(sb); return 0; fail_close: diff --git a/fs/buffer.c b/fs/buffer.c index a08bb8e61c6f..698c6b2cc462 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -41,6 +41,7 @@ #include <linux/bitops.h> #include <linux/mpage.h> #include <linux/bit_spinlock.h> +#include <linux/cleancache.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -269,6 +270,10 @@ void invalidate_bdev(struct block_device *bdev) invalidate_bh_lrus(); lru_add_drain_all(); /* make sure all lru add caches are flushed */ invalidate_mapping_pages(mapping, 0, -1); + /* 99% of the time, we don't need to flush the cleancache on the bdev. + * But, for the strange corners, lets be cautious + */ + cleancache_flush_inode(mapping); } EXPORT_SYMBOL(invalidate_bdev); @@ -2331,24 +2336,26 @@ EXPORT_SYMBOL(block_commit_write); * page lock we can determine safely if the page is beyond EOF. If it is not * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. + * + * Direct callers of this function should call vfs_check_frozen() so that page + * fault does not busyloop until the fs is thawed. */ -int -block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block) +int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, + get_block_t get_block) { struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; unsigned long end; loff_t size; - int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ + int ret; lock_page(page); size = i_size_read(inode); if ((page->mapping != inode->i_mapping) || (page_offset(page) > size)) { - /* page got truncated out from underneath us */ - unlock_page(page); - goto out; + /* We overload EFAULT to mean page got truncated */ + ret = -EFAULT; + goto out_unlock; } /* page is wholly or partially inside EOF */ @@ -2361,18 +2368,41 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (!ret) ret = block_commit_write(page, 0, end); - if (unlikely(ret)) { - unlock_page(page); - if (ret == -ENOMEM) - ret = VM_FAULT_OOM; - else /* -ENOSPC, -EIO, etc */ - ret = VM_FAULT_SIGBUS; - } else - ret = VM_FAULT_LOCKED; - -out: + if (unlikely(ret < 0)) + goto out_unlock; + /* + * Freezing in progress? We check after the page is marked dirty and + * with page lock held so if the test here fails, we are sure freezing + * code will wait during syncing until the page fault is done - at that + * point page will be dirty and unlocked so freezing code will write it + * and writeprotect it again. + */ + set_page_dirty(page); + if (inode->i_sb->s_frozen != SB_UNFROZEN) { + ret = -EAGAIN; + goto out_unlock; + } + return 0; +out_unlock: + unlock_page(page); return ret; } +EXPORT_SYMBOL(__block_page_mkwrite); + +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, + get_block_t get_block) +{ + int ret; + struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; + + /* + * This check is racy but catches the common case. The check in + * __block_page_mkwrite() is reliable. + */ + vfs_check_frozen(sb, SB_FREEZE_WRITE); + ret = __block_page_mkwrite(vma, vmf, get_block); + return block_page_mkwrite_return(ret); +} EXPORT_SYMBOL(block_page_mkwrite); /* diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 2b8dae4d121e..a46126fd5735 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -336,6 +336,8 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) int len = de->d_name.len; int error; + dentry_unhash(de); + error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); if (!error) { /* VFS may delete the child */ @@ -359,6 +361,9 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, int new_length = new_dentry->d_name.len; int error; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), coda_i2f(new_dir), old_length, new_length, (const char *) old_name, (const char *)new_name); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 9a37a9b6de3a..9d17d350abc5 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1359,6 +1359,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; + dentry_unhash(dentry); + if (dentry->d_parent == configfs_sb->s_root) return -EPERM; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 4d4cc6a90cd5..227b409b8406 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -521,6 +521,8 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; int rc; + dentry_unhash(dentry); + lower_dentry = ecryptfs_dentry_to_lower(dentry); dget(dentry); lower_dir_dentry = lock_parent(lower_dentry); @@ -571,6 +573,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); dget(lower_old_dentry); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3c6a9e0eadc1..aad153ef6b78 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -36,6 +36,7 @@ #include <linux/quotaops.h> #include <linux/seq_file.h> #include <linux/log2.h> +#include <linux/cleancache.h> #include <asm/uaccess.h> @@ -1367,6 +1368,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, } else { ext3_msg(sb, KERN_INFO, "using internal journal"); } + cleancache_init_fs(sb); return res; } diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index c947e36eda6c..04109460ba9e 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o + ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ + mmp.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1c67139ad4b4..264f6949511e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -362,130 +362,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) } /** - * ext4_add_groupblocks() -- Add given blocks to an existing group - * @handle: handle to this transaction - * @sb: super block - * @block: start physcial block to add to the block group - * @count: number of blocks to free - * - * This marks the blocks as free in the bitmap. We ask the - * mballoc to reload the buddy after this by setting group - * EXT4_GROUP_INFO_NEED_INIT_BIT flag - */ -void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, - ext4_fsblk_t block, unsigned long count) -{ - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gd_bh; - ext4_group_t block_group; - ext4_grpblk_t bit; - unsigned int i; - struct ext4_group_desc *desc; - struct ext4_sb_info *sbi = EXT4_SB(sb); - int err = 0, ret, blk_free_count; - ext4_grpblk_t blocks_freed; - struct ext4_group_info *grp; - - ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); - - ext4_get_group_no_and_offset(sb, block, &block_group, &bit); - grp = ext4_get_group_info(sb, block_group); - /* - * Check to see if we are freeing blocks across a group - * boundary. - */ - if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { - goto error_return; - } - bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) - goto error_return; - desc = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!desc) - goto error_return; - - if (in_range(ext4_block_bitmap(sb, desc), block, count) || - in_range(ext4_inode_bitmap(sb, desc), block, count) || - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || - in_range(block + count - 1, ext4_inode_table(sb, desc), - sbi->s_itb_per_group)) { - ext4_error(sb, "Adding blocks in system zones - " - "Block = %llu, count = %lu", - block, count); - goto error_return; - } - - /* - * We are about to add blocks to the bitmap, - * so we need undo access. - */ - BUFFER_TRACE(bitmap_bh, "getting undo access"); - err = ext4_journal_get_undo_access(handle, bitmap_bh); - if (err) - goto error_return; - - /* - * We are about to modify some metadata. Call the journal APIs - * to unshare ->b_data if a currently-committing transaction is - * using it - */ - BUFFER_TRACE(gd_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gd_bh); - if (err) - goto error_return; - /* - * make sure we don't allow a parallel init on other groups in the - * same buddy cache - */ - down_write(&grp->alloc_sem); - for (i = 0, blocks_freed = 0; i < count; i++) { - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), - bit + i, bitmap_bh->b_data)) { - ext4_error(sb, "bit already cleared for block %llu", - (ext4_fsblk_t)(block + i)); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { - blocks_freed++; - } - } - ext4_lock_group(sb, block_group); - blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); - ext4_free_blks_set(sb, desc, blk_free_count); - desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); - ext4_unlock_group(sb, block_group); - percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); - - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(blocks_freed, - &sbi->s_flex_groups[flex_group].free_blocks); - } - /* - * request to reload the buddy with the - * new bitmap information - */ - set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); - grp->bb_free += blocks_freed; - up_write(&grp->alloc_sem); - - /* We dirtied the bitmap block */ - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); - - /* And the group descriptor block */ - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); - ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); - if (!err) - err = ret; - -error_return: - brelse(bitmap_bh); - ext4_std_error(sb, err); - return; -} - -/** * ext4_has_free_blocks() * @sbi: in-core super block structure. * @nblocks: number of needed blocks @@ -493,7 +369,8 @@ error_return: * Check if filesystem has nblocks free & available for allocation. * On success return 1, return 0 on failure. */ -static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) +static int ext4_has_free_blocks(struct ext4_sb_info *sbi, + s64 nblocks, unsigned int flags) { s64 free_blocks, dirty_blocks, root_blocks; struct percpu_counter *fbc = &sbi->s_freeblocks_counter; @@ -507,11 +384,6 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) EXT4_FREEBLOCKS_WATERMARK) { free_blocks = percpu_counter_sum_positive(fbc); dirty_blocks = percpu_counter_sum_positive(dbc); - if (dirty_blocks < 0) { - printk(KERN_CRIT "Dirty block accounting " - "went wrong %lld\n", - (long long)dirty_blocks); - } } /* Check whether we have space after * accounting for current dirty blocks & root reserved blocks. @@ -522,7 +394,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) /* Hm, nope. Are (enough) root reserved blocks available? */ if (sbi->s_resuid == current_fsuid() || ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || - capable(CAP_SYS_RESOURCE)) { + capable(CAP_SYS_RESOURCE) || + (flags & EXT4_MB_USE_ROOT_BLOCKS)) { + if (free_blocks >= (nblocks + dirty_blocks)) return 1; } @@ -531,9 +405,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) } int ext4_claim_free_blocks(struct ext4_sb_info *sbi, - s64 nblocks) + s64 nblocks, unsigned int flags) { - if (ext4_has_free_blocks(sbi, nblocks)) { + if (ext4_has_free_blocks(sbi, nblocks, flags)) { percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); return 0; } else @@ -554,7 +428,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || + if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || (*retries)++ > 3 || !EXT4_SB(sb)->s_journal) return 0; @@ -577,7 +451,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) * error stores in errp pointer */ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp) + ext4_fsblk_t goal, unsigned int flags, + unsigned long *count, int *errp) { struct ext4_allocation_request ar; ext4_fsblk_t ret; @@ -587,6 +462,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, ar.inode = inode; ar.goal = goal; ar.len = count ? *count : 1; + ar.flags = flags; ret = ext4_mb_new_blocks(handle, &ar, errp); if (count) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4daaf2b753f4..a74b89c09f90 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -108,7 +108,8 @@ typedef unsigned int ext4_group_t; #define EXT4_MB_DELALLOC_RESERVED 0x0400 /* We are doing stream allocation */ #define EXT4_MB_STREAM_ALLOC 0x0800 - +/* Use reserved root blocks if needed */ +#define EXT4_MB_USE_ROOT_BLOCKS 0x1000 struct ext4_allocation_request { /* target inode for block we're allocating */ @@ -209,6 +210,8 @@ struct ext4_io_submit { */ #define EXT4_BAD_INO 1 /* Bad blocks inode */ #define EXT4_ROOT_INO 2 /* Root inode */ +#define EXT4_USR_QUOTA_INO 3 /* User quota inode */ +#define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */ #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ @@ -512,6 +515,10 @@ struct ext4_new_group_data { /* Convert extent to initialized after IO complete */ #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) + /* Punch out blocks of an extent */ +#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 + /* Don't normalize allocation size (used for fallocate) */ +#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 /* * Flags used by ext4_free_blocks @@ -1028,7 +1035,7 @@ struct ext4_super_block { __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ __le32 s_flags; /* Miscellaneous flags */ __le16 s_raid_stride; /* RAID stride */ - __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ __le64 s_mmp_block; /* Block for multi-mount protection */ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ __u8 s_log_groups_per_flex; /* FLEX_BG group size */ @@ -1144,6 +1151,9 @@ struct ext4_sb_info { unsigned long s_ext_blocks; unsigned long s_ext_extents; #endif + /* ext4 extent cache stats */ + unsigned long extent_cache_hits; + unsigned long extent_cache_misses; /* for buddy allocator */ struct ext4_group_info ***s_group_info; @@ -1201,6 +1211,9 @@ struct ext4_sb_info { struct ext4_li_request *s_li_request; /* Wait multiplier for lazy initialization thread */ unsigned int s_li_wait_mult; + + /* Kernel thread for multiple mount protection */ + struct task_struct *s_mmp_tsk; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1338,6 +1351,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 +#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -1351,13 +1365,29 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ +#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR +#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_META_BG) +#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR) + +#define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR +#define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_RECOVER| \ + EXT4_FEATURE_INCOMPAT_META_BG) +#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR) + #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ EXT4_FEATURE_INCOMPAT_RECOVER| \ EXT4_FEATURE_INCOMPAT_META_BG| \ EXT4_FEATURE_INCOMPAT_EXTENTS| \ EXT4_FEATURE_INCOMPAT_64BIT| \ - EXT4_FEATURE_INCOMPAT_FLEX_BG) + EXT4_FEATURE_INCOMPAT_FLEX_BG| \ + EXT4_FEATURE_INCOMPAT_MMP) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ @@ -1590,12 +1620,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, */ struct ext4_lazy_init { unsigned long li_state; - - wait_queue_head_t li_wait_daemon; - wait_queue_head_t li_wait_task; - struct timer_list li_timer; - struct task_struct *li_task; - struct list_head li_request_list; struct mutex li_list_mtx; }; @@ -1615,6 +1639,67 @@ struct ext4_features { }; /* + * This structure will be used for multiple mount protection. It will be + * written into the block number saved in the s_mmp_block field in the + * superblock. Programs that check MMP should assume that if + * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe + * to use the filesystem, regardless of how old the timestamp is. + */ +#define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ +#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ +#define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ +#define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ + +struct mmp_struct { + __le32 mmp_magic; /* Magic number for MMP */ + __le32 mmp_seq; /* Sequence no. updated periodically */ + + /* + * mmp_time, mmp_nodename & mmp_bdevname are only used for information + * purposes and do not affect the correctness of the algorithm + */ + __le64 mmp_time; /* Time last updated */ + char mmp_nodename[64]; /* Node which last updated MMP block */ + char mmp_bdevname[32]; /* Bdev which last updated MMP block */ + + /* + * mmp_check_interval is used to verify if the MMP block has been + * updated on the block device. The value is updated based on the + * maximum time to write the MMP block during an update cycle. + */ + __le16 mmp_check_interval; + + __le16 mmp_pad1; + __le32 mmp_pad2[227]; +}; + +/* arguments passed to the mmp thread */ +struct mmpd_data { + struct buffer_head *bh; /* bh from initial read_mmp_block() */ + struct super_block *sb; /* super block of the fs */ +}; + +/* + * Check interval multiplier + * The MMP block is written every update interval and initially checked every + * update interval x the multiplier (the value is then adapted based on the + * write latency). The reason is that writes can be delayed under load and we + * don't want readers to incorrectly assume that the filesystem is no longer + * in use. + */ +#define EXT4_MMP_CHECK_MULT 2UL + +/* + * Minimum interval for MMP checking in seconds. + */ +#define EXT4_MMP_MIN_CHECK_INTERVAL 5UL + +/* + * Maximum interval for MMP checking in seconds. + */ +#define EXT4_MMP_MAX_CHECK_INTERVAL 300UL + +/* * Function prototypes */ @@ -1638,10 +1723,12 @@ extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); extern unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group); extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); -extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); -extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, - ext4_fsblk_t block, unsigned long count); + ext4_fsblk_t goal, + unsigned int flags, + unsigned long *count, + int *errp); +extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, + s64 nblocks, unsigned int flags); extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); extern void ext4_check_blocks_bitmap(struct super_block *); extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, @@ -1706,6 +1793,8 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode, unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); +extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count); extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); /* inode.c */ @@ -1729,6 +1818,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); extern void ext4_truncate(struct inode *); +extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); @@ -1738,6 +1828,8 @@ extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); +extern int ext4_block_zero_page_range(handle_t *handle, + struct address_space *mapping, loff_t from, loff_t length); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, @@ -1788,6 +1880,10 @@ extern void __ext4_warning(struct super_block *, const char *, unsigned int, __LINE__, ## message) extern void ext4_msg(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); +extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, + const char *, unsigned int, const char *); +#define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ + __LINE__, msg) extern void __ext4_grp_locked_error(const char *, unsigned int, \ struct super_block *, ext4_group_t, \ unsigned long, ext4_fsblk_t, \ @@ -2064,6 +2160,8 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern void ext4_ext_truncate(struct inode *); +extern int ext4_ext_punch_hole(struct file *file, loff_t offset, + loff_t length); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct file *file, int mode, loff_t offset, @@ -2092,6 +2190,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, int len, struct writeback_control *wbc); +/* mmp.c */ +extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); + /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ enum ext4_state_bits { BH_Uninit /* blocks are allocated but uninitialized on disk */ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 6e272ef6ba96..f5240aa15601 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -6,20 +6,6 @@ #include <trace/events/ext4.h> -int __ext4_journal_get_undo_access(const char *where, unsigned int line, - handle_t *handle, struct buffer_head *bh) -{ - int err = 0; - - if (ext4_handle_valid(handle)) { - err = jbd2_journal_get_undo_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, line, __func__, bh, - handle, err); - } - return err; -} - int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh) { diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index d0f53538a57f..bb85757689b6 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -126,9 +126,6 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line, const char *err_fn, struct buffer_head *bh, handle_t *handle, int err); -int __ext4_journal_get_undo_access(const char *where, unsigned int line, - handle_t *handle, struct buffer_head *bh); - int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh); @@ -146,8 +143,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, int __ext4_handle_dirty_super(const char *where, unsigned int line, handle_t *handle, struct super_block *sb); -#define ext4_journal_get_undo_access(handle, bh) \ - __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh)) #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4890d6f3ad15..5199bac7fc62 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -46,6 +46,13 @@ #include <trace/events/ext4.h> +static int ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, + int split_flag, + int flags); + static int ext4_ext_truncate_extend_restart(handle_t *handle, struct inode *inode, int needed) @@ -192,12 +199,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, static ext4_fsblk_t ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *ex, int *err) + struct ext4_extent *ex, int *err, unsigned int flags) { ext4_fsblk_t goal, newblock; goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); - newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); + newblock = ext4_new_meta_blocks(handle, inode, goal, flags, + NULL, err); return newblock; } @@ -474,9 +482,43 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) } ext_debug("\n"); } + +static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, + ext4_fsblk_t newblock, int level) +{ + int depth = ext_depth(inode); + struct ext4_extent *ex; + + if (depth != level) { + struct ext4_extent_idx *idx; + idx = path[level].p_idx; + while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { + ext_debug("%d: move %d:%llu in new index %llu\n", level, + le32_to_cpu(idx->ei_block), + ext4_idx_pblock(idx), + newblock); + idx++; + } + + return; + } + + ex = path[depth].p_ext; + while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { + ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", + le32_to_cpu(ex->ee_block), + ext4_ext_pblock(ex), + ext4_ext_is_uninitialized(ex), + ext4_ext_get_actual_len(ex), + newblock); + ex++; + } +} + #else #define ext4_ext_show_path(inode, path) #define ext4_ext_show_leaf(inode, path) +#define ext4_ext_show_move(inode, path, newblock, level) #endif void ext4_ext_drop_refs(struct ext4_ext_path *path) @@ -792,14 +834,14 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, * - initializes subtree */ static int ext4_ext_split(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext, int at) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext, int at) { struct buffer_head *bh = NULL; int depth = ext_depth(inode); struct ext4_extent_header *neh; struct ext4_extent_idx *fidx; - struct ext4_extent *ex; int i = at, k, m, a; ext4_fsblk_t newblock, oldblock; __le32 border; @@ -847,7 +889,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); for (a = 0; a < depth - at; a++) { newblock = ext4_ext_new_meta_block(handle, inode, path, - newext, &err); + newext, &err, flags); if (newblock == 0) goto cleanup; ablocks[a] = newblock; @@ -876,7 +918,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; - ex = EXT_FIRST_EXTENT(neh); /* move remainder of path[depth] to the new leaf */ if (unlikely(path[depth].p_hdr->eh_entries != @@ -888,25 +929,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, goto cleanup; } /* start copy from next extent */ - /* TODO: we could do it by single memmove */ - m = 0; - path[depth].p_ext++; - while (path[depth].p_ext <= - EXT_MAX_EXTENT(path[depth].p_hdr)) { - ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", - le32_to_cpu(path[depth].p_ext->ee_block), - ext4_ext_pblock(path[depth].p_ext), - ext4_ext_is_uninitialized(path[depth].p_ext), - ext4_ext_get_actual_len(path[depth].p_ext), - newblock); - /*memmove(ex++, path[depth].p_ext++, - sizeof(struct ext4_extent)); - neh->eh_entries++;*/ - path[depth].p_ext++; - m++; - } + m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; + ext4_ext_show_move(inode, path, newblock, depth); if (m) { - memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); + struct ext4_extent *ex; + ex = EXT_FIRST_EXTENT(neh); + memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); le16_add_cpu(&neh->eh_entries, m); } @@ -968,12 +996,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("int.index at %d (block %llu): %u -> %llu\n", i, newblock, le32_to_cpu(border), oldblock); - /* copy indexes */ - m = 0; - path[i].p_idx++; - ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, - EXT_MAX_INDEX(path[i].p_hdr)); + /* move remainder of path[i] to the new index block */ if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != EXT_LAST_INDEX(path[i].p_hdr))) { EXT4_ERROR_INODE(inode, @@ -982,20 +1006,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { - ext_debug("%d: move %d:%llu in new index %llu\n", i, - le32_to_cpu(path[i].p_idx->ei_block), - ext4_idx_pblock(path[i].p_idx), - newblock); - /*memmove(++fidx, path[i].p_idx++, - sizeof(struct ext4_extent_idx)); - neh->eh_entries++; - BUG_ON(neh->eh_entries > neh->eh_max);*/ - path[i].p_idx++; - m++; - } + /* start copy indexes */ + m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; + ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, + EXT_MAX_INDEX(path[i].p_hdr)); + ext4_ext_show_move(inode, path, newblock, i); if (m) { - memmove(++fidx, path[i].p_idx - m, + memmove(++fidx, path[i].p_idx, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } @@ -1056,8 +1073,9 @@ cleanup: * just created block */ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext) { struct ext4_ext_path *curp = path; struct ext4_extent_header *neh; @@ -1065,7 +1083,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock; int err = 0; - newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); + newblock = ext4_ext_new_meta_block(handle, inode, path, + newext, &err, flags); if (newblock == 0) return err; @@ -1140,8 +1159,9 @@ out: * if no free index is found, then it requests in-depth growing. */ static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext) { struct ext4_ext_path *curp; int depth, i, err = 0; @@ -1161,7 +1181,7 @@ repeat: if (EXT_HAS_FREE_INDEX(curp)) { /* if we found index with free entry, then use that * entry: create all needed subtree and add new leaf */ - err = ext4_ext_split(handle, inode, path, newext, i); + err = ext4_ext_split(handle, inode, flags, path, newext, i); if (err) goto out; @@ -1174,7 +1194,8 @@ repeat: err = PTR_ERR(path); } else { /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(handle, inode, path, newext); + err = ext4_ext_grow_indepth(handle, inode, flags, + path, newext); if (err) goto out; @@ -1563,7 +1584,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns * 1 if they got merged. */ -static int ext4_ext_try_to_merge(struct inode *inode, +static int ext4_ext_try_to_merge_right(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex) { @@ -1603,6 +1624,31 @@ static int ext4_ext_try_to_merge(struct inode *inode, } /* + * This function tries to merge the @ex extent to neighbours in the tree. + * return 1 if merge left else 0. + */ +static int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex) { + struct ext4_extent_header *eh; + unsigned int depth; + int merge_done = 0; + int ret = 0; + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + eh = path[depth].p_hdr; + + if (ex > EXT_FIRST_EXTENT(eh)) + merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); + + if (!merge_done) + ret = ext4_ext_try_to_merge_right(inode, path, ex); + + return ret; +} + +/* * check if a portion of the "newext" extent overlaps with an * existing extent. * @@ -1668,6 +1714,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, int depth, len, err; ext4_lblk_t next; unsigned uninitialized = 0; + int flags = 0; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); @@ -1742,7 +1789,9 @@ repeat: * There is no free space in the found leaf. * We're gonna add a new leaf in the tree. */ - err = ext4_ext_create_new_leaf(handle, inode, path, newext); + if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) + flags = EXT4_MB_USE_ROOT_BLOCKS; + err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); if (err) goto cleanup; depth = ext_depth(inode); @@ -2003,13 +2052,25 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } /* + * ext4_ext_in_cache() + * Checks to see if the given block is in the cache. + * If it is, the cached extent is stored in the given + * cache extent pointer. If the cached extent is a hole, + * this routine should be used instead of + * ext4_ext_in_cache if the calling function needs to + * know the size of the hole. + * + * @inode: The files inode + * @block: The block to look for in the cache + * @ex: Pointer where the cached extent will be stored + * if it contains block + * * Return 0 if cache is invalid; 1 if the cache is valid */ -static int -ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, - struct ext4_extent *ex) -{ +static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, + struct ext4_ext_cache *ex){ struct ext4_ext_cache *cex; + struct ext4_sb_info *sbi; int ret = 0; /* @@ -2017,26 +2078,60 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; + sbi = EXT4_SB(inode->i_sb); /* has cache valid data? */ if (cex->ec_len == 0) goto errout; if (in_range(block, cex->ec_block, cex->ec_len)) { - ex->ee_block = cpu_to_le32(cex->ec_block); - ext4_ext_store_pblock(ex, cex->ec_start); - ex->ee_len = cpu_to_le16(cex->ec_len); + memcpy(ex, cex, sizeof(struct ext4_ext_cache)); ext_debug("%u cached by %u:%u:%llu\n", block, cex->ec_block, cex->ec_len, cex->ec_start); ret = 1; } errout: + if (!ret) + sbi->extent_cache_misses++; + else + sbi->extent_cache_hits++; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); return ret; } /* + * ext4_ext_in_cache() + * Checks to see if the given block is in the cache. + * If it is, the cached extent is stored in the given + * extent pointer. + * + * @inode: The files inode + * @block: The block to look for in the cache + * @ex: Pointer where the cached extent will be stored + * if it contains block + * + * Return 0 if cache is invalid; 1 if the cache is valid + */ +static int +ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, + struct ext4_extent *ex) +{ + struct ext4_ext_cache cex; + int ret = 0; + + if (ext4_ext_check_cache(inode, block, &cex)) { + ex->ee_block = cpu_to_le32(cex.ec_block); + ext4_ext_store_pblock(ex, cex.ec_start); + ex->ee_len = cpu_to_le16(cex.ec_len); + ret = 1; + } + + return ret; +} + + +/* * ext4_ext_rm_idx: * removes index from the index block. * It's used in truncate case only, thus all requests are for @@ -2163,8 +2258,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, start, num, flags); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { - printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), ee_len); + /* head removal */ + ext4_lblk_t num; + ext4_fsblk_t start; + + num = to - from; + start = ext4_ext_pblock(ex); + + ext_debug("free first %u blocks starting %llu\n", num, start); + ext4_free_blocks(handle, inode, 0, start, num, flags); + } else { printk(KERN_INFO "strange request: removal(2) " "%u-%u from %u:%u\n", @@ -2173,9 +2276,22 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, return 0; } + +/* + * ext4_ext_rm_leaf() Removes the extents associated with the + * blocks appearing between "start" and "end", and splits the extents + * if "start" and "end" appear in the same extent + * + * @handle: The journal handle + * @inode: The files inode + * @path: The path to the leaf + * @start: The first block to remove + * @end: The last block to remove + */ static int ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, ext4_lblk_t start) + struct ext4_ext_path *path, ext4_lblk_t start, + ext4_lblk_t end) { int err = 0, correct_index = 0; int depth = ext_depth(inode), credits; @@ -2186,6 +2302,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned short ex_ee_len; unsigned uninitialized = 0; struct ext4_extent *ex; + struct ext4_map_blocks map; /* the header must be checked already in ext4_ext_remove_space() */ ext_debug("truncate since %u in leaf\n", start); @@ -2215,31 +2332,95 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, path[depth].p_ext = ex; a = ex_ee_block > start ? ex_ee_block : start; - b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? - ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; + b = ex_ee_block+ex_ee_len - 1 < end ? + ex_ee_block+ex_ee_len - 1 : end; ext_debug(" border %u:%u\n", a, b); - if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { - block = 0; - num = 0; - BUG(); + /* If this extent is beyond the end of the hole, skip it */ + if (end <= ex_ee_block) { + ex--; + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = ext4_ext_get_actual_len(ex); + continue; + } else if (a != ex_ee_block && + b != ex_ee_block + ex_ee_len - 1) { + /* + * If this is a truncate, then this condition should + * never happen because at least one of the end points + * needs to be on the edge of the extent. + */ + if (end == EXT_MAX_BLOCK) { + ext_debug(" bad truncate %u:%u\n", + start, end); + block = 0; + num = 0; + err = -EIO; + goto out; + } + /* + * else this is a hole punch, so the extent needs to + * be split since neither edge of the hole is on the + * extent edge + */ + else{ + map.m_pblk = ext4_ext_pblock(ex); + map.m_lblk = ex_ee_block; + map.m_len = b - ex_ee_block; + + err = ext4_split_extent(handle, + inode, path, &map, 0, + EXT4_GET_BLOCKS_PUNCH_OUT_EXT | + EXT4_GET_BLOCKS_PRE_IO); + + if (err < 0) + goto out; + + ex_ee_len = ext4_ext_get_actual_len(ex); + + b = ex_ee_block+ex_ee_len - 1 < end ? + ex_ee_block+ex_ee_len - 1 : end; + + /* Then remove tail of this extent */ + block = ex_ee_block; + num = a - block; + } } else if (a != ex_ee_block) { /* remove tail of the extent */ block = ex_ee_block; num = a - block; } else if (b != ex_ee_block + ex_ee_len - 1) { /* remove head of the extent */ - block = a; - num = b - a; - /* there is no "make a hole" API yet */ - BUG(); + block = b; + num = ex_ee_block + ex_ee_len - b; + + /* + * If this is a truncate, this condition + * should never happen + */ + if (end == EXT_MAX_BLOCK) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } } else { /* remove whole extent: excellent! */ block = ex_ee_block; num = 0; - BUG_ON(a != ex_ee_block); - BUG_ON(b != ex_ee_block + ex_ee_len - 1); + if (a != ex_ee_block) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } + + if (b != ex_ee_block + ex_ee_len - 1) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } } /* @@ -2270,7 +2451,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (num == 0) { /* this extent is removed; mark slot entirely unused */ ext4_ext_store_pblock(ex, 0); - le16_add_cpu(&eh->eh_entries, -1); + } else if (block != ex_ee_block) { + /* + * If this was a head removal, then we need to update + * the physical block since it is now at a different + * location + */ + ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a)); } ex->ee_block = cpu_to_le32(block); @@ -2286,6 +2473,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (err) goto out; + /* + * If the extent was completely released, + * we need to remove it from the leaf + */ + if (num == 0) { + if (end != EXT_MAX_BLOCK) { + /* + * For hole punching, we need to scoot all the + * extents up when an extent is removed so that + * we dont have blank extents in the middle + */ + memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) * + sizeof(struct ext4_extent)); + + /* Now get rid of the one at the end */ + memset(EXT_LAST_EXTENT(eh), 0, + sizeof(struct ext4_extent)); + } + le16_add_cpu(&eh->eh_entries, -1); + } + ext_debug("new extent: %u:%u:%llu\n", block, num, ext4_ext_pblock(ex)); ex--; @@ -2326,7 +2534,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, + ext4_lblk_t end) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); @@ -2365,7 +2574,8 @@ again: while (i >= 0 && err == 0) { if (i == depth) { /* this is leaf block */ - err = ext4_ext_rm_leaf(handle, inode, path, start); + err = ext4_ext_rm_leaf(handle, inode, path, + start, end); /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); path[i].p_bh = NULL; @@ -2529,6 +2739,195 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) return ret; } +/* + * used by extent splitting. + */ +#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ + due to ENOSPC */ +#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ +#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ + +/* + * ext4_split_extent_at() splits an extent at given block. + * + * @handle: the journal handle + * @inode: the file inode + * @path: the path to the extent + * @split: the logical block where the extent is splitted. + * @split_flags: indicates if the extent could be zeroout if split fails, and + * the states(init or uninit) of new extents. + * @flags: flags used to insert new extent to extent tree. + * + * + * Splits extent [a, b] into two extents [a, @split) and [@split, b], states + * of which are deterimined by split_flag. + * + * There are two cases: + * a> the extent are splitted into two extent. + * b> split is not needed, and just mark the extent. + * + * return 0 on success. + */ +static int ext4_split_extent_at(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t split, + int split_flag, + int flags) +{ + ext4_fsblk_t newblock; + ext4_lblk_t ee_block; + struct ext4_extent *ex, newex, orig_ex; + struct ext4_extent *ex2 = NULL; + unsigned int ee_len, depth; + int err = 0; + + ext_debug("ext4_split_extents_at: inode %lu, logical" + "block %llu\n", inode->i_ino, (unsigned long long)split); + + ext4_ext_show_leaf(inode, path); + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + newblock = split - ee_block + ext4_ext_pblock(ex); + + BUG_ON(split < ee_block || split >= (ee_block + ee_len)); + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + + if (split == ee_block) { + /* + * case b: block @split is the block that the extent begins with + * then we just change the state of the extent, and splitting + * is not needed. + */ + if (split_flag & EXT4_EXT_MARK_UNINIT2) + ext4_ext_mark_uninitialized(ex); + else + ext4_ext_mark_initialized(ex); + + if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) + ext4_ext_try_to_merge(inode, path, ex); + + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; + } + + /* case a */ + memcpy(&orig_ex, ex, sizeof(orig_ex)); + ex->ee_len = cpu_to_le16(split - ee_block); + if (split_flag & EXT4_EXT_MARK_UNINIT1) + ext4_ext_mark_uninitialized(ex); + + /* + * path may lead to new leaf, not to original leaf any more + * after ext4_ext_insert_extent() returns, + */ + err = ext4_ext_dirty(handle, inode, path + depth); + if (err) + goto fix_extent_len; + + ex2 = &newex; + ex2->ee_block = cpu_to_le32(split); + ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); + ext4_ext_store_pblock(ex2, newblock); + if (split_flag & EXT4_EXT_MARK_UNINIT2) + ext4_ext_mark_uninitialized(ex2); + + err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_len = cpu_to_le32(ee_len); + ext4_ext_try_to_merge(inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; + } else if (err) + goto fix_extent_len; + +out: + ext4_ext_show_leaf(inode, path); + return err; + +fix_extent_len: + ex->ee_len = orig_ex.ee_len; + ext4_ext_dirty(handle, inode, path + depth); + return err; +} + +/* + * ext4_split_extents() splits an extent and mark extent which is covered + * by @map as split_flags indicates + * + * It may result in splitting the extent into multiple extents (upto three) + * There are three possibilities: + * a> There is no split required + * b> Splits in two extents: Split is happening at either end of the extent + * c> Splits in three extents: Somone is splitting in middle of the extent + * + */ +static int ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, + int split_flag, + int flags) +{ + ext4_lblk_t ee_block; + struct ext4_extent *ex; + unsigned int ee_len, depth; + int err = 0; + int uninitialized; + int split_flag1, flags1; + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + uninitialized = ext4_ext_is_uninitialized(ex); + + if (map->m_lblk + map->m_len < ee_block + ee_len) { + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? + EXT4_EXT_MAY_ZEROOUT : 0; + flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; + if (uninitialized) + split_flag1 |= EXT4_EXT_MARK_UNINIT1 | + EXT4_EXT_MARK_UNINIT2; + err = ext4_split_extent_at(handle, inode, path, + map->m_lblk + map->m_len, split_flag1, flags1); + if (err) + goto out; + } + + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); + if (IS_ERR(path)) + return PTR_ERR(path); + + if (map->m_lblk >= ee_block) { + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? + EXT4_EXT_MAY_ZEROOUT : 0; + if (uninitialized) + split_flag1 |= EXT4_EXT_MARK_UNINIT1; + if (split_flag & EXT4_EXT_MARK_UNINIT2) + split_flag1 |= EXT4_EXT_MARK_UNINIT2; + err = ext4_split_extent_at(handle, inode, path, + map->m_lblk, split_flag1, flags); + if (err) + goto out; + } + + ext4_ext_show_leaf(inode, path); +out: + return err ? err : map->m_len; +} + #define EXT4_EXT_ZERO_LEN 7 /* * This function is called by ext4_ext_map_blocks() if someone tries to write @@ -2545,17 +2944,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_map_blocks *map, struct ext4_ext_path *path) { - struct ext4_extent *ex, newex, orig_ex; - struct ext4_extent *ex1 = NULL; - struct ext4_extent *ex2 = NULL; - struct ext4_extent *ex3 = NULL; - struct ext4_extent_header *eh; + struct ext4_map_blocks split_map; + struct ext4_extent zero_ex; + struct ext4_extent *ex; ext4_lblk_t ee_block, eof_block; unsigned int allocated, ee_len, depth; - ext4_fsblk_t newblock; int err = 0; - int ret = 0; - int may_zeroout; + int split_flag = 0; ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, @@ -2567,280 +2962,86 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, eof_block = map->m_lblk + map->m_len; depth = ext_depth(inode); - eh = path[depth].p_hdr; ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); allocated = ee_len - (map->m_lblk - ee_block); - newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); - - ex2 = ex; - orig_ex.ee_block = ex->ee_block; - orig_ex.ee_len = cpu_to_le16(ee_len); - ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); + WARN_ON(map->m_lblk < ee_block); /* * It is safe to convert extent to initialized via explicit * zeroout only if extent is fully insde i_size or new_size. */ - may_zeroout = ee_block + ee_len <= eof_block; + split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ - if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); + if (ee_len <= 2*EXT4_EXT_ZERO_LEN && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + err = ext4_ext_zeroout(inode, ex); if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ - return allocated; - } - - /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ - if (map->m_lblk > ee_block) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* - * for sanity, update the length of the ex2 extent before - * we insert ex3, if ex1 is NULL. This is to avoid temporary - * overlap of blocks. - */ - if (!ex1 && allocated > map->m_len) - ex2->ee_len = cpu_to_le16(map->m_len); - /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > map->m_len) { - unsigned int newdepth; - /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ - if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { - /* - * map->m_lblk == ee_block is handled by the zerouout - * at the beginning. - * Mark first half uninitialized. - * Mark second half initialized and zero out the - * initialized extent - */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = cpu_to_le16(ee_len - allocated); - ext4_ext_mark_uninitialized(ex); - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - - ex3 = &newex; - ex3->ee_block = cpu_to_le32(map->m_lblk); - ext4_ext_store_pblock(ex3, newblock); - ex3->ee_len = cpu_to_le16(allocated); - err = ext4_ext_insert_extent(handle, inode, path, - ex3, 0); - if (err == -ENOSPC) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, - ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* blocks available from map->m_lblk */ - return allocated; - - } else if (err) - goto fix_extent_len; - - /* - * We need to zero out the second half because - * an fallocate request can update file size and - * converting the second half to initialized extent - * implies that we can leak some junk data to user - * space. - */ - err = ext4_ext_zeroout(inode, ex3); - if (err) { - /* - * We should actually mark the - * second half as uninit and return error - * Insert would have changed the extent - */ - depth = ext_depth(inode); - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, - path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - return err; - } - /* get the second half extent details */ - ex = path[depth].p_ext; - err = ext4_ext_get_access(handle, inode, - path + depth); - if (err) - return err; - ext4_ext_mark_uninitialized(ex); - ext4_ext_dirty(handle, inode, path + depth); - return err; - } - - /* zeroed the second half */ - return allocated; - } - ex3 = &newex; - ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); - ext4_ext_store_pblock(ex3, newblock + map->m_len); - ex3->ee_len = cpu_to_le16(allocated - map->m_len); - ext4_ext_mark_uninitialized(ex3); - err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ - /* blocks available from map->m_lblk */ - return allocated; - - } else if (err) - goto fix_extent_len; - /* - * The depth, and hence eh & ex might change - * as part of the insert above. - */ - newdepth = ext_depth(inode); - /* - * update the extent length after successful insert of the - * split extent - */ - ee_len -= ext4_ext_get_actual_len(ex3); - orig_ex.ee_len = cpu_to_le16(ee_len); - may_zeroout = ee_block + ee_len <= eof_block; - - depth = newdepth; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); goto out; - } - eh = path[depth].p_hdr; - ex = path[depth].p_ext; - if (ex2 != &newex) - ex2 = ex; err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - - allocated = map->m_len; - - /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying - * to insert a extent in the middle zerout directly - * otherwise give the extent a chance to merge to left - */ - if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && - map->m_lblk != ee_block && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zero out the first half */ - /* blocks available from map->m_lblk */ - return allocated; - } - } - /* - * If there was a change of depth as part of the - * insertion of ex3 above, we need to update the length - * of the ex1 extent again here - */ - if (ex1 && ex1 != ex) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ - ex2->ee_block = cpu_to_le32(map->m_lblk); - ext4_ext_store_pblock(ex2, newblock); - ex2->ee_len = cpu_to_le16(allocated); - if (ex2 != ex) - goto insert; - /* - * New (initialized) extent starts from the first block - * in the current extent. i.e., ex2 == ex - * We have to see if it can be merged with the extent - * on the left. - */ - if (ex2 > EXT_FIRST_EXTENT(eh)) { - /* - * To merge left, pass "ex2 - 1" to try_to_merge(), - * since it merges towards right _only_. - */ - ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); - if (err) - goto out; - depth = ext_depth(inode); - ex2--; - } + ext4_ext_mark_initialized(ex); + ext4_ext_try_to_merge(inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; } + /* - * Try to Merge towards right. This might be required - * only when the whole extent is being written to. - * i.e. ex2 == ex and ex3 == NULL. + * four cases: + * 1. split the extent into three extents. + * 2. split the extent into two extents, zeroout the first half. + * 3. split the extent into two extents, zeroout the second half. + * 4. split the extent into two extents with out zeroout. */ - if (!ex3) { - ret = ext4_ext_try_to_merge(inode, path, ex2); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); + split_map.m_lblk = map->m_lblk; + split_map.m_len = map->m_len; + + if (allocated > map->m_len) { + if (allocated <= EXT4_EXT_ZERO_LEN && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + /* case 3 */ + zero_ex.ee_block = + cpu_to_le32(map->m_lblk); + zero_ex.ee_len = cpu_to_le16(allocated); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex) + map->m_lblk - ee_block); + err = ext4_ext_zeroout(inode, &zero_ex); if (err) goto out; + split_map.m_lblk = map->m_lblk; + split_map.m_len = allocated; + } else if ((map->m_lblk - ee_block + map->m_len < + EXT4_EXT_ZERO_LEN) && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + /* case 2 */ + if (map->m_lblk != ee_block) { + zero_ex.ee_block = ex->ee_block; + zero_ex.ee_len = cpu_to_le16(map->m_lblk - + ee_block); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex)); + err = ext4_ext_zeroout(inode, &zero_ex); + if (err) + goto out; + } + + split_map.m_lblk = ee_block; + split_map.m_len = map->m_lblk - ee_block + map->m_len; + allocated = map->m_len; } } - /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + depth); - goto out; -insert: - err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zero out the first half */ - return allocated; - } else if (err) - goto fix_extent_len; + + allocated = ext4_split_extent(handle, inode, path, + &split_map, split_flag, 0); + if (allocated < 0) + err = allocated; + out: - ext4_ext_show_leaf(inode, path); return err ? err : allocated; - -fix_extent_len: - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_mark_uninitialized(ex); - ext4_ext_dirty(handle, inode, path + depth); - return err; } /* @@ -2871,15 +3072,11 @@ static int ext4_split_unwritten_extents(handle_t *handle, struct ext4_ext_path *path, int flags) { - struct ext4_extent *ex, newex, orig_ex; - struct ext4_extent *ex1 = NULL; - struct ext4_extent *ex2 = NULL; - struct ext4_extent *ex3 = NULL; - ext4_lblk_t ee_block, eof_block; - unsigned int allocated, ee_len, depth; - ext4_fsblk_t newblock; - int err = 0; - int may_zeroout; + ext4_lblk_t eof_block; + ext4_lblk_t ee_block; + struct ext4_extent *ex; + unsigned int ee_len; + int split_flag = 0, depth; ext_debug("ext4_split_unwritten_extents: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, @@ -2889,156 +3086,22 @@ static int ext4_split_unwritten_extents(handle_t *handle, inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; - - depth = ext_depth(inode); - ex = path[depth].p_ext; - ee_block = le32_to_cpu(ex->ee_block); - ee_len = ext4_ext_get_actual_len(ex); - allocated = ee_len - (map->m_lblk - ee_block); - newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); - - ex2 = ex; - orig_ex.ee_block = ex->ee_block; - orig_ex.ee_len = cpu_to_le16(ee_len); - ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); - /* * It is safe to convert extent to initialized via explicit * zeroout only if extent is fully insde i_size or new_size. */ - may_zeroout = ee_block + ee_len <= eof_block; - - /* - * If the uninitialized extent begins at the same logical - * block where the write begins, and the write completely - * covers the extent, then we don't need to split it. - */ - if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) - return allocated; - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; - /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ - if (map->m_lblk > ee_block) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* - * for sanity, update the length of the ex2 extent before - * we insert ex3, if ex1 is NULL. This is to avoid temporary - * overlap of blocks. - */ - if (!ex1 && allocated > map->m_len) - ex2->ee_len = cpu_to_le16(map->m_len); - /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > map->m_len) { - unsigned int newdepth; - ex3 = &newex; - ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); - ext4_ext_store_pblock(ex3, newblock + map->m_len); - ex3->ee_len = cpu_to_le16(allocated - map->m_len); - ext4_ext_mark_uninitialized(ex3); - err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ - /* blocks available from map->m_lblk */ - return allocated; - - } else if (err) - goto fix_extent_len; - /* - * The depth, and hence eh & ex might change - * as part of the insert above. - */ - newdepth = ext_depth(inode); - /* - * update the extent length after successful insert of the - * split extent - */ - ee_len -= ext4_ext_get_actual_len(ex3); - orig_ex.ee_len = cpu_to_le16(ee_len); - may_zeroout = ee_block + ee_len <= eof_block; - - depth = newdepth; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - ex = path[depth].p_ext; - if (ex2 != &newex) - ex2 = ex; + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; + split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; + split_flag |= EXT4_EXT_MARK_UNINIT2; - allocated = map->m_len; - } - /* - * If there was a change of depth as part of the - * insertion of ex3 above, we need to update the length - * of the ex1 extent again here - */ - if (ex1 && ex1 != ex) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* - * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written - * using direct I/O, uninitialised still. - */ - ex2->ee_block = cpu_to_le32(map->m_lblk); - ext4_ext_store_pblock(ex2, newblock); - ex2->ee_len = cpu_to_le16(allocated); - ext4_ext_mark_uninitialized(ex2); - if (ex2 != ex) - goto insert; - /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + depth); - ext_debug("out here\n"); - goto out; -insert: - err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zero out the first half */ - return allocated; - } else if (err) - goto fix_extent_len; -out: - ext4_ext_show_leaf(inode, path); - return err ? err : allocated; - -fix_extent_len: - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_mark_uninitialized(ex); - ext4_ext_dirty(handle, inode, path + depth); - return err; + flags |= EXT4_GET_BLOCKS_PRE_IO; + return ext4_split_extent(handle, inode, path, map, split_flag, flags); } + static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) @@ -3047,46 +3110,27 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct ext4_extent_header *eh; int depth; int err = 0; - int ret = 0; depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; + ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" + "block %llu, max_blocks %u\n", inode->i_ino, + (unsigned long long)le32_to_cpu(ex->ee_block), + ext4_ext_get_actual_len(ex)); + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; /* first mark the extent as initialized */ ext4_ext_mark_initialized(ex); - /* - * We have to see if it can be merged with the extent - * on the left. - */ - if (ex > EXT_FIRST_EXTENT(eh)) { - /* - * To merge left, pass "ex - 1" to try_to_merge(), - * since it merges towards right _only_. - */ - ret = ext4_ext_try_to_merge(inode, path, ex - 1); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); - if (err) - goto out; - depth = ext_depth(inode); - ex--; - } - } - /* - * Try to Merge towards right. + /* note: ext4_ext_correct_indexes() isn't needed here because + * borders are not changed */ - ret = ext4_ext_try_to_merge(inode, path, ex); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); - if (err) - goto out; - depth = ext_depth(inode); - } + ext4_ext_try_to_merge(inode, path, ex); + /* Mark modified extent as dirty */ err = ext4_ext_dirty(handle, inode, path + depth); out: @@ -3302,15 +3346,19 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock = 0; int err = 0, depth, ret; unsigned int allocated = 0; + unsigned int punched_out = 0; + unsigned int result = 0; struct ext4_allocation_request ar; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; + struct ext4_map_blocks punch_map; ext_debug("blocks %u/%u requested for inode %lu\n", map->m_lblk, map->m_len, inode->i_ino); trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* check in cache */ - if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { + if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && + ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { if (!newex.ee_start_lo && !newex.ee_start_hi) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* @@ -3375,16 +3423,84 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, ee_block, ee_len, newblock); - /* Do not put uninitialized extent in the cache */ - if (!ext4_ext_is_uninitialized(ex)) { - ext4_ext_put_in_cache(inode, ee_block, - ee_len, ee_start); - goto out; + if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { + /* + * Do not put uninitialized extent + * in the cache + */ + if (!ext4_ext_is_uninitialized(ex)) { + ext4_ext_put_in_cache(inode, ee_block, + ee_len, ee_start); + goto out; + } + ret = ext4_ext_handle_uninitialized_extents( + handle, inode, map, path, flags, + allocated, newblock); + return ret; } - ret = ext4_ext_handle_uninitialized_extents(handle, - inode, map, path, flags, allocated, - newblock); - return ret; + + /* + * Punch out the map length, but only to the + * end of the extent + */ + punched_out = allocated < map->m_len ? + allocated : map->m_len; + + /* + * Sense extents need to be converted to + * uninitialized, they must fit in an + * uninitialized extent + */ + if (punched_out > EXT_UNINIT_MAX_LEN) + punched_out = EXT_UNINIT_MAX_LEN; + + punch_map.m_lblk = map->m_lblk; + punch_map.m_pblk = newblock; + punch_map.m_len = punched_out; + punch_map.m_flags = 0; + + /* Check to see if the extent needs to be split */ + if (punch_map.m_len != ee_len || + punch_map.m_lblk != ee_block) { + + ret = ext4_split_extent(handle, inode, + path, &punch_map, 0, + EXT4_GET_BLOCKS_PUNCH_OUT_EXT | + EXT4_GET_BLOCKS_PRE_IO); + + if (ret < 0) { + err = ret; + goto out2; + } + /* + * find extent for the block at + * the start of the hole + */ + ext4_ext_drop_refs(path); + kfree(path); + + path = ext4_ext_find_extent(inode, + map->m_lblk, NULL); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + goto out2; + } + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_len = ext4_ext_get_actual_len(ex); + ee_block = le32_to_cpu(ex->ee_block); + ee_start = ext4_ext_pblock(ex); + + } + + ext4_ext_mark_uninitialized(ex); + + err = ext4_ext_remove_space(inode, map->m_lblk, + map->m_lblk + punched_out); + + goto out2; } } @@ -3446,6 +3562,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, else /* disable in-core preallocation for non-regular files */ ar.flags = 0; + if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) + ar.flags |= EXT4_MB_HINT_NOPREALLOC; newblock = ext4_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out2; @@ -3529,7 +3647,11 @@ out2: } trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, newblock, map->m_len, err ? err : allocated); - return err ? err : allocated; + + result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? + punched_out : allocated; + + return err ? err : result; } void ext4_ext_truncate(struct inode *inode) @@ -3577,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); - err = ext4_ext_remove_space(inode, last_block); + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK); /* In a multi-transaction truncate, we only make the final * transaction synchronous. @@ -3585,8 +3707,9 @@ void ext4_ext_truncate(struct inode *inode) if (IS_SYNC(inode)) ext4_handle_sync(handle); -out_stop: up_write(&EXT4_I(inode)->i_data_sem); + +out_stop: /* * If this was a simple ftruncate() and the file will remain alive, * then we need to clear up the orphan record which we created above. @@ -3651,10 +3774,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) struct ext4_map_blocks map; unsigned int credits, blkbits = inode->i_blkbits; - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode & ~FALLOC_FL_KEEP_SIZE) - return -EOPNOTSUPP; - /* * currently supporting (pre)allocate mode for extent-based * files _only_ @@ -3662,6 +3781,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; + /* Return error if mode is not supported */ + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + if (mode & FALLOC_FL_PUNCH_HOLE) + return ext4_punch_hole(file, offset, len); + trace_ext4_fallocate_enter(inode, offset, len, mode); map.m_lblk = offset >> blkbits; /* @@ -3691,7 +3817,8 @@ retry: break; } ret = ext4_map_blocks(handle, inode, &map, - EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); + EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | + EXT4_GET_BLOCKS_NO_NORMALIZE); if (ret <= 0) { #ifdef EXT4FS_DEBUG WARN_ON(ret <= 0); @@ -3822,6 +3949,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, pgoff_t last_offset; pgoff_t offset; pgoff_t index; + pgoff_t start_index = 0; struct page **pages = NULL; struct buffer_head *bh = NULL; struct buffer_head *head = NULL; @@ -3848,39 +3976,57 @@ out: kfree(pages); return EXT_CONTINUE; } + index = 0; +next_page: /* Try to find the 1st mapped buffer. */ - end = ((__u64)pages[0]->index << PAGE_SHIFT) >> + end = ((__u64)pages[index]->index << PAGE_SHIFT) >> blksize_bits; - if (!page_has_buffers(pages[0])) + if (!page_has_buffers(pages[index])) goto out; - head = page_buffers(pages[0]); + head = page_buffers(pages[index]); if (!head) goto out; + index++; bh = head; do { - if (buffer_mapped(bh)) { + if (end >= newex->ec_block + + newex->ec_len) + /* The buffer is out of + * the request range. + */ + goto out; + + if (buffer_mapped(bh) && + end >= newex->ec_block) { + start_index = index - 1; /* get the 1st mapped buffer. */ - if (end > newex->ec_block + - newex->ec_len) - /* The buffer is out of - * the request range. - */ - goto out; goto found_mapped_buffer; } + bh = bh->b_this_page; end++; } while (bh != head); - /* No mapped buffer found. */ - goto out; + /* No mapped buffer in the range found in this page, + * We need to look up next page. + */ + if (index >= ret) { + /* There is no page left, but we need to limit + * newex->ec_len. + */ + newex->ec_len = end - newex->ec_block; + goto out; + } + goto next_page; } else { /*Find contiguous delayed buffers. */ if (ret > 0 && pages[0]->index == last_offset) head = page_buffers(pages[0]); bh = head; + index = 1; + start_index = 0; } found_mapped_buffer: @@ -3903,7 +4049,7 @@ found_mapped_buffer: end++; } while (bh != head); - for (index = 1; index < ret; index++) { + for (; index < ret; index++) { if (!page_has_buffers(pages[index])) { bh = NULL; break; @@ -3913,8 +4059,10 @@ found_mapped_buffer: bh = NULL; break; } + if (pages[index]->index != - pages[0]->index + index) { + pages[start_index]->index + index + - start_index) { /* Blocks are not contiguous. */ bh = NULL; break; @@ -4006,6 +4154,177 @@ static int ext4_xattr_fiemap(struct inode *inode, return (error < 0 ? error : 0); } +/* + * ext4_ext_punch_hole + * + * Punches a hole of "length" bytes in a file starting + * at byte "offset" + * + * @inode: The inode of the file to punch a hole in + * @offset: The starting byte offset of the hole + * @length: The length of the hole + * + * Returns the number of blocks removed or negative on err + */ +int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct ext4_ext_cache cache_ex; + ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; + struct address_space *mapping = inode->i_mapping; + struct ext4_map_blocks map; + handle_t *handle; + loff_t first_block_offset, last_block_offset, block_len; + loff_t first_page, last_page, first_page_offset, last_page_offset; + int ret, credits, blocks_released, err = 0; + + first_block = (offset + sb->s_blocksize - 1) >> + EXT4_BLOCK_SIZE_BITS(sb); + last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); + + first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb); + last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb); + + first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + last_page = (offset + length) >> PAGE_CACHE_SHIFT; + + first_page_offset = first_page << PAGE_CACHE_SHIFT; + last_page_offset = last_page << PAGE_CACHE_SHIFT; + + /* + * Write out all dirty pages to avoid race conditions + * Then release them. + */ + if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + err = filemap_write_and_wait_range(mapping, + first_page_offset == 0 ? 0 : first_page_offset-1, + last_page_offset); + + if (err) + return err; + } + + /* Now release the pages */ + if (last_page_offset > first_page_offset) { + truncate_inode_pages_range(mapping, first_page_offset, + last_page_offset-1); + } + + /* finish any pending end_io work */ + ext4_flush_completed_IO(inode); + + credits = ext4_writepage_trans_blocks(inode); + handle = ext4_journal_start(inode, credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + err = ext4_orphan_add(handle, inode); + if (err) + goto out; + + /* + * Now we need to zero out the un block aligned data. + * If the file is smaller than a block, just + * zero out the middle + */ + if (first_block > last_block) + ext4_block_zero_page_range(handle, mapping, offset, length); + else { + /* zero out the head of the hole before the first block */ + block_len = first_block_offset - offset; + if (block_len > 0) + ext4_block_zero_page_range(handle, mapping, + offset, block_len); + + /* zero out the tail of the hole after the last block */ + block_len = offset + length - last_block_offset; + if (block_len > 0) { + ext4_block_zero_page_range(handle, mapping, + last_block_offset, block_len); + } + } + + /* If there are no blocks to remove, return now */ + if (first_block >= last_block) + goto out; + + down_write(&EXT4_I(inode)->i_data_sem); + ext4_ext_invalidate_cache(inode); + ext4_discard_preallocations(inode); + + /* + * Loop over all the blocks and identify blocks + * that need to be punched out + */ + iblock = first_block; + blocks_released = 0; + while (iblock < last_block) { + max_blocks = last_block - iblock; + num_blocks = 1; + memset(&map, 0, sizeof(map)); + map.m_lblk = iblock; + map.m_len = max_blocks; + ret = ext4_ext_map_blocks(handle, inode, &map, + EXT4_GET_BLOCKS_PUNCH_OUT_EXT); + + if (ret > 0) { + blocks_released += ret; + num_blocks = ret; + } else if (ret == 0) { + /* + * If map blocks could not find the block, + * then it is in a hole. If the hole was + * not already cached, then map blocks should + * put it in the cache. So we can get the hole + * out of the cache + */ + memset(&cache_ex, 0, sizeof(cache_ex)); + if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && + !cache_ex.ec_start) { + + /* The hole is cached */ + num_blocks = cache_ex.ec_block + + cache_ex.ec_len - iblock; + + } else { + /* The block could not be identified */ + err = -EIO; + break; + } + } else { + /* Map blocks error */ + err = ret; + break; + } + + if (num_blocks == 0) { + /* This condition should never happen */ + ext_debug("Block lookup failed"); + err = -EIO; + break; + } + + iblock += num_blocks; + } + + if (blocks_released > 0) { + ext4_ext_invalidate_cache(inode); + ext4_discard_preallocations(inode); + } + + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + + up_write(&EXT4_I(inode)->i_data_sem); + +out: + ext4_orphan_del(handle, inode); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + return err; +} int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { @@ -4042,4 +4361,3 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return error; } - diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 7b80d543b89e..2c0972322009 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -272,7 +272,6 @@ const struct file_operations ext4_file_operations = { }; const struct inode_operations ext4_file_inode_operations = { - .truncate = ext4_truncate, .setattr = ext4_setattr, .getattr = ext4_getattr, #ifdef CONFIG_EXT4_FS_XATTR diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e9473cbe80df..ce66d2fe826c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -36,7 +36,7 @@ static void dump_completed_IO(struct inode * inode) { -#ifdef EXT4_DEBUG +#ifdef EXT4FS_DEBUG struct list_head *cur, *before, *after; ext4_io_end_t *io, *io0, *io1; unsigned long flags; @@ -172,6 +172,7 @@ int ext4_sync_file(struct file *file, int datasync) journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; int ret; tid_t commit_tid; + bool needs_barrier = false; J_ASSERT(ext4_journal_current_handle() == NULL); @@ -211,22 +212,12 @@ int ext4_sync_file(struct file *file, int datasync) } commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; - if (jbd2_log_start_commit(journal, commit_tid)) { - /* - * When the journal is on a different device than the - * fs data disk, we need to issue the barrier in - * writeback mode. (In ordered mode, the jbd2 layer - * will take care of issuing the barrier. In - * data=journal, all of the data blocks are written to - * the journal device.) - */ - if (ext4_should_writeback_data(inode) && - (journal->j_fs_dev != journal->j_dev) && - (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, - NULL); - ret = jbd2_log_wait_commit(journal, commit_tid); - } else if (journal->j_flags & JBD2_BARRIER) + if (journal->j_flags & JBD2_BARRIER && + !jbd2_trans_will_send_data_barrier(journal, commit_tid)) + needs_barrier = true; + jbd2_log_start_commit(journal, commit_tid); + ret = jbd2_log_wait_commit(journal, commit_tid); + if (needs_barrier) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); out: trace_ext4_sync_file_exit(inode, ret); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f2fa5e8a582c..50d0e9c64584 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -639,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, while (target > 0) { count = target; /* allocating blocks for indirect blocks and direct blocks */ - current_block = ext4_new_meta_blocks(handle, inode, - goal, &count, err); + current_block = ext4_new_meta_blocks(handle, inode, goal, + 0, &count, err); if (*err) goto failed_out; @@ -1930,7 +1930,7 @@ repeat: * We do still charge estimated metadata to the sb though; * we cannot afford to run out of free blocks. */ - if (ext4_claim_free_blocks(sbi, md_needed + 1)) { + if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { dquot_release_reservation_block(inode, 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); @@ -2796,9 +2796,7 @@ static int write_cache_pages_da(struct address_space *mapping, continue; } - if (PageWriteback(page)) - wait_on_page_writeback(page); - + wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); if (mpd->next_page != page->index) @@ -3513,7 +3511,7 @@ retry: loff_t end = offset + iov_length(iov, nr_segs); if (end > isize) - vmtruncate(inode, isize); + ext4_truncate_failed_write(inode); } } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -3916,9 +3914,30 @@ void ext4_set_aops(struct inode *inode) int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from) { + unsigned offset = from & (PAGE_CACHE_SIZE-1); + unsigned length; + unsigned blocksize; + struct inode *inode = mapping->host; + + blocksize = inode->i_sb->s_blocksize; + length = blocksize - (offset & (blocksize - 1)); + + return ext4_block_zero_page_range(handle, mapping, from, length); +} + +/* + * ext4_block_zero_page_range() zeros out a mapping of length 'length' + * starting from file offset 'from'. The range to be zero'd must + * be contained with in one block. If the specified range exceeds + * the end of the block it will be shortened to end of the block + * that cooresponds to 'from' + */ +int ext4_block_zero_page_range(handle_t *handle, + struct address_space *mapping, loff_t from, loff_t length) +{ ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); - unsigned blocksize, length, pos; + unsigned blocksize, max, pos; ext4_lblk_t iblock; struct inode *inode = mapping->host; struct buffer_head *bh; @@ -3931,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle, return -EINVAL; blocksize = inode->i_sb->s_blocksize; - length = blocksize - (offset & (blocksize - 1)); + max = blocksize - (offset & (blocksize - 1)); + + /* + * correct length if it does not fall between + * 'from' and the end of the block + */ + if (length > max || length < 0) + length = max; + iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); if (!page_has_buffers(page)) @@ -4380,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, int ext4_can_truncate(struct inode *inode) { - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return 0; if (S_ISREG(inode->i_mode)) return 1; if (S_ISDIR(inode->i_mode)) @@ -4392,6 +4417,31 @@ int ext4_can_truncate(struct inode *inode) } /* + * ext4_punch_hole: punches a hole in a file by releaseing the blocks + * associated with the given offset and length + * + * @inode: File inode + * @offset: The offset where the hole will begin + * @len: The length of the hole + * + * Returns: 0 on sucess or negative on failure + */ + +int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) +{ + struct inode *inode = file->f_path.dentry->d_inode; + if (!S_ISREG(inode->i_mode)) + return -ENOTSUPP; + + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + /* TODO: Add support for non extent hole punching */ + return -ENOTSUPP; + } + + return ext4_ext_punch_hole(file, offset, length); +} + +/* * ext4_truncate() * * We block out ext4_get_block() block instantiations across the entire @@ -4617,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode, /* * Figure out the offset within the block group inode table */ - inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); + inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; inode_offset = ((inode->i_ino - 1) % EXT4_INODES_PER_GROUP(sb)); block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); @@ -5311,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE && - (attr->ia_size < inode->i_size || - (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { + (attr->ia_size < inode->i_size)) { handle_t *handle; handle = ext4_journal_start(inode, 3); @@ -5346,14 +5395,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) goto err_out; } } - /* ext4_truncate will clear the flag */ - if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) - ext4_truncate(inode); } - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) - rc = vmtruncate(inode, attr->ia_size); + if (attr->ia_valid & ATTR_SIZE) { + if (attr->ia_size != i_size_read(inode)) { + truncate_setsize(inode, attr->ia_size); + ext4_truncate(inode); + } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) + ext4_truncate(inode); + } if (!rc) { setattr_copy(inode, attr); @@ -5811,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out_unlock; } ret = 0; - if (PageMappedToDisk(page)) - goto out_unlock; + + lock_page(page); + wait_on_page_writeback(page); + if (PageMappedToDisk(page)) { + up_read(&inode->i_alloc_sem); + return VM_FAULT_LOCKED; + } if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; else len = PAGE_CACHE_SIZE; - lock_page(page); /* * return if we have all the buffers mapped. This avoid * the need to call write_begin/write_end which does a @@ -5829,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (page_has_buffers(page)) { if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, ext4_bh_unmapped)) { - unlock_page(page); - goto out_unlock; + up_read(&inode->i_alloc_sem); + return VM_FAULT_LOCKED; } } unlock_page(page); @@ -5850,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret < 0) goto out_unlock; ret = 0; + + /* + * write_begin/end might have created a dirty page and someone + * could wander in and start the IO. Make sure that hasn't + * happened. + */ + lock_page(page); + wait_on_page_writeback(page); + up_read(&inode->i_alloc_sem); + return VM_FAULT_LOCKED; out_unlock: if (ret) ret = VM_FAULT_SIGBUS; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d8a16eecf1d5..859f2ae8864e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -787,6 +787,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) struct inode *inode; char *data; char *bitmap; + struct ext4_group_info *grinfo; mb_debug(1, "init page %lu\n", page->index); @@ -819,6 +820,18 @@ static int ext4_mb_init_cache(struct page *page, char *incore) if (first_group + i >= ngroups) break; + grinfo = ext4_get_group_info(sb, first_group + i); + /* + * If page is uptodate then we came here after online resize + * which added some new uninitialized group info structs, so + * we must skip all initialized uptodate buddies on the page, + * which may be currently in use by an allocating task. + */ + if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) { + bh[i] = NULL; + continue; + } + err = -EIO; desc = ext4_get_group_desc(sb, first_group + i, NULL); if (desc == NULL) @@ -871,26 +884,28 @@ static int ext4_mb_init_cache(struct page *page, char *incore) } /* wait for I/O completion */ - for (i = 0; i < groups_per_page && bh[i]; i++) - wait_on_buffer(bh[i]); + for (i = 0; i < groups_per_page; i++) + if (bh[i]) + wait_on_buffer(bh[i]); err = -EIO; - for (i = 0; i < groups_per_page && bh[i]; i++) - if (!buffer_uptodate(bh[i])) + for (i = 0; i < groups_per_page; i++) + if (bh[i] && !buffer_uptodate(bh[i])) goto out; err = 0; first_block = page->index * blocks_per_page; - /* init the page */ - memset(page_address(page), 0xff, PAGE_CACHE_SIZE); for (i = 0; i < blocks_per_page; i++) { int group; - struct ext4_group_info *grinfo; group = (first_block + i) >> 1; if (group >= ngroups) break; + if (!bh[group - first_group]) + /* skip initialized uptodate buddy */ + continue; + /* * data carry information regarding this * particular group in the format specified @@ -919,6 +934,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore) * incore got set to the group block bitmap below */ ext4_lock_group(sb, group); + /* init the buddy */ + memset(data, 0xff, blocksize); ext4_mb_generate_buddy(sb, data, incore, group); ext4_unlock_group(sb, group); incore = NULL; @@ -948,7 +965,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) out: if (bh) { - for (i = 0; i < groups_per_page && bh[i]; i++) + for (i = 0; i < groups_per_page; i++) brelse(bh[i]); if (bh != &bhs) kfree(bh); @@ -957,22 +974,21 @@ out: } /* - * lock the group_info alloc_sem of all the groups - * belonging to the same buddy cache page. This - * make sure other parallel operation on the buddy - * cache doesn't happen whild holding the buddy cache - * lock + * Lock the buddy and bitmap pages. This make sure other parallel init_group + * on the same buddy page doesn't happen whild holding the buddy page lock. + * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap + * are on the same page e4b->bd_buddy_page is NULL and return value is 0. */ -static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, - ext4_group_t group) +static int ext4_mb_get_buddy_page_lock(struct super_block *sb, + ext4_group_t group, struct ext4_buddy *e4b) { - int i; - int block, pnum; + struct inode *inode = EXT4_SB(sb)->s_buddy_cache; + int block, pnum, poff; int blocks_per_page; - int groups_per_page; - ext4_group_t ngroups = ext4_get_groups_count(sb); - ext4_group_t first_group; - struct ext4_group_info *grp; + struct page *page; + + e4b->bd_buddy_page = NULL; + e4b->bd_bitmap_page = NULL; blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; /* @@ -982,57 +998,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, */ block = group * 2; pnum = block / blocks_per_page; - first_group = pnum * blocks_per_page / 2; - - groups_per_page = blocks_per_page >> 1; - if (groups_per_page == 0) - groups_per_page = 1; - /* read all groups the page covers into the cache */ - for (i = 0; i < groups_per_page; i++) { + poff = block % blocks_per_page; + page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); + if (!page) + return -EIO; + BUG_ON(page->mapping != inode->i_mapping); + e4b->bd_bitmap_page = page; + e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); - if ((first_group + i) >= ngroups) - break; - grp = ext4_get_group_info(sb, first_group + i); - /* take all groups write allocation - * semaphore. This make sure there is - * no block allocation going on in any - * of that groups - */ - down_write_nested(&grp->alloc_sem, i); + if (blocks_per_page >= 2) { + /* buddy and bitmap are on the same page */ + return 0; } - return i; + + block++; + pnum = block / blocks_per_page; + poff = block % blocks_per_page; + page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); + if (!page) + return -EIO; + BUG_ON(page->mapping != inode->i_mapping); + e4b->bd_buddy_page = page; + return 0; } -static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, - ext4_group_t group, int locked_group) +static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) { - int i; - int block, pnum; - int blocks_per_page; - ext4_group_t first_group; - struct ext4_group_info *grp; - - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; - /* - * the buddy cache inode stores the block bitmap - * and buddy information in consecutive blocks. - * So for each group we need two blocks. - */ - block = group * 2; - pnum = block / blocks_per_page; - first_group = pnum * blocks_per_page / 2; - /* release locks on all the groups */ - for (i = 0; i < locked_group; i++) { - - grp = ext4_get_group_info(sb, first_group + i); - /* take all groups write allocation - * semaphore. This make sure there is - * no block allocation going on in any - * of that groups - */ - up_write(&grp->alloc_sem); + if (e4b->bd_bitmap_page) { + unlock_page(e4b->bd_bitmap_page); + page_cache_release(e4b->bd_bitmap_page); + } + if (e4b->bd_buddy_page) { + unlock_page(e4b->bd_buddy_page); + page_cache_release(e4b->bd_buddy_page); } - } /* @@ -1044,93 +1043,60 @@ static noinline_for_stack int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) { - int ret = 0; - void *bitmap; - int blocks_per_page; - int block, pnum, poff; - int num_grp_locked = 0; struct ext4_group_info *this_grp; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct inode *inode = sbi->s_buddy_cache; - struct page *page = NULL, *bitmap_page = NULL; + struct ext4_buddy e4b; + struct page *page; + int ret = 0; mb_debug(1, "init group %u\n", group); - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; this_grp = ext4_get_group_info(sb, group); /* * This ensures that we don't reinit the buddy cache * page which map to the group from which we are already * allocating. If we are looking at the buddy cache we would * have taken a reference using ext4_mb_load_buddy and that - * would have taken the alloc_sem lock. + * would have pinned buddy page to page cache. */ - num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); - if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { + ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); + if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { /* * somebody initialized the group * return without doing anything */ - ret = 0; goto err; } - /* - * the buddy cache inode stores the block bitmap - * and buddy information in consecutive blocks. - * So for each group we need two blocks. - */ - block = group * 2; - pnum = block / blocks_per_page; - poff = block % blocks_per_page; - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); - if (page) { - BUG_ON(page->mapping != inode->i_mapping); - ret = ext4_mb_init_cache(page, NULL); - if (ret) { - unlock_page(page); - goto err; - } - unlock_page(page); - } - if (page == NULL || !PageUptodate(page)) { + + page = e4b.bd_bitmap_page; + ret = ext4_mb_init_cache(page, NULL); + if (ret) + goto err; + if (!PageUptodate(page)) { ret = -EIO; goto err; } mark_page_accessed(page); - bitmap_page = page; - bitmap = page_address(page) + (poff * sb->s_blocksize); - /* init buddy cache */ - block++; - pnum = block / blocks_per_page; - poff = block % blocks_per_page; - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); - if (page == bitmap_page) { + if (e4b.bd_buddy_page == NULL) { /* * If both the bitmap and buddy are in * the same page we don't need to force * init the buddy */ - unlock_page(page); - } else if (page) { - BUG_ON(page->mapping != inode->i_mapping); - ret = ext4_mb_init_cache(page, bitmap); - if (ret) { - unlock_page(page); - goto err; - } - unlock_page(page); + ret = 0; + goto err; } - if (page == NULL || !PageUptodate(page)) { + /* init buddy cache */ + page = e4b.bd_buddy_page; + ret = ext4_mb_init_cache(page, e4b.bd_bitmap); + if (ret) + goto err; + if (!PageUptodate(page)) { ret = -EIO; goto err; } mark_page_accessed(page); err: - ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); - if (bitmap_page) - page_cache_release(bitmap_page); - if (page) - page_cache_release(page); + ext4_mb_put_buddy_page_lock(&e4b); return ret; } @@ -1164,24 +1130,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, e4b->bd_group = group; e4b->bd_buddy_page = NULL; e4b->bd_bitmap_page = NULL; - e4b->alloc_semp = &grp->alloc_sem; - - /* Take the read lock on the group alloc - * sem. This would make sure a parallel - * ext4_mb_init_group happening on other - * groups mapped by the page is blocked - * till we are done with allocation - */ -repeat_load_buddy: - down_read(e4b->alloc_semp); if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { - /* we need to check for group need init flag - * with alloc_semp held so that we can be sure - * that new blocks didn't get added to the group - * when we are loading the buddy cache - */ - up_read(e4b->alloc_semp); /* * we need full data about the group * to make a good selection @@ -1189,7 +1139,6 @@ repeat_load_buddy: ret = ext4_mb_init_group(sb, group); if (ret) return ret; - goto repeat_load_buddy; } /* @@ -1273,15 +1222,14 @@ repeat_load_buddy: return 0; err: + if (page) + page_cache_release(page); if (e4b->bd_bitmap_page) page_cache_release(e4b->bd_bitmap_page); if (e4b->bd_buddy_page) page_cache_release(e4b->bd_buddy_page); e4b->bd_buddy = NULL; e4b->bd_bitmap = NULL; - - /* Done with the buddy cache */ - up_read(e4b->alloc_semp); return ret; } @@ -1291,9 +1239,6 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) page_cache_release(e4b->bd_bitmap_page); if (e4b->bd_buddy_page) page_cache_release(e4b->bd_buddy_page); - /* Done with the buddy cache */ - if (e4b->alloc_semp) - up_read(e4b->alloc_semp); } @@ -1606,9 +1551,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, get_page(ac->ac_bitmap_page); ac->ac_buddy_page = e4b->bd_buddy_page; get_page(ac->ac_buddy_page); - /* on allocation we use ac to track the held semaphore */ - ac->alloc_semp = e4b->alloc_semp; - e4b->alloc_semp = NULL; /* store last allocated for subsequent stream allocation */ if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { spin_lock(&sbi->s_md_lock); @@ -2659,7 +2601,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) struct super_block *sb = journal->j_private; struct ext4_buddy e4b; struct ext4_group_info *db; - int err, ret, count = 0, count2 = 0; + int err, count = 0, count2 = 0; struct ext4_free_data *entry; struct list_head *l, *ltmp; @@ -2669,15 +2611,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) mb_debug(1, "gonna free %u blocks in group %u (0x%p):", entry->count, entry->group, entry); - if (test_opt(sb, DISCARD)) { - ret = ext4_issue_discard(sb, entry->group, - entry->start_blk, entry->count); - if (unlikely(ret == -EOPNOTSUPP)) { - ext4_warning(sb, "discard not supported, " - "disabling"); - clear_opt(sb, DISCARD); - } - } + if (test_opt(sb, DISCARD)) + ext4_issue_discard(sb, entry->group, + entry->start_blk, entry->count); err = ext4_mb_load_buddy(sb, entry->group, &e4b); /* we expect to find existing buddy because it's pinned */ @@ -4226,15 +4162,12 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) spin_unlock(&pa->pa_lock); } } - if (ac->alloc_semp) - up_read(ac->alloc_semp); if (pa) { /* * We want to add the pa to the right bucket. * Remove it from the list and while adding * make sure the list to which we are adding - * doesn't grow big. We need to release - * alloc_semp before calling ext4_mb_add_n_trim() + * doesn't grow big. */ if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { spin_lock(pa->pa_obj_lock); @@ -4303,7 +4236,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, * there is enough free blocks to do block allocation * and verify allocation doesn't exceed the quota limits. */ - while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { + while (ar->len && + ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { + /* let others to free the space */ yield(); ar->len = ar->len >> 1; @@ -4313,9 +4248,15 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, return 0; } reserv_blks = ar->len; - while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { - ar->flags |= EXT4_MB_HINT_NOPREALLOC; - ar->len--; + if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { + dquot_alloc_block_nofail(ar->inode, ar->len); + } else { + while (ar->len && + dquot_alloc_block(ar->inode, ar->len)) { + + ar->flags |= EXT4_MB_HINT_NOPREALLOC; + ar->len--; + } } inquota = ar->len; if (ar->len == 0) { @@ -4704,6 +4645,127 @@ error_return: } /** + * ext4_add_groupblocks() -- Add given blocks to an existing group + * @handle: handle to this transaction + * @sb: super block + * @block: start physcial block to add to the block group + * @count: number of blocks to free + * + * This marks the blocks as free in the bitmap and buddy. + */ +void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gd_bh; + ext4_group_t block_group; + ext4_grpblk_t bit; + unsigned int i; + struct ext4_group_desc *desc; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_buddy e4b; + int err = 0, ret, blk_free_count; + ext4_grpblk_t blocks_freed; + struct ext4_group_info *grp; + + ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); + + ext4_get_group_no_and_offset(sb, block, &block_group, &bit); + grp = ext4_get_group_info(sb, block_group); + /* + * Check to see if we are freeing blocks across a group + * boundary. + */ + if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) + goto error_return; + + bitmap_bh = ext4_read_block_bitmap(sb, block_group); + if (!bitmap_bh) + goto error_return; + desc = ext4_get_group_desc(sb, block_group, &gd_bh); + if (!desc) + goto error_return; + + if (in_range(ext4_block_bitmap(sb, desc), block, count) || + in_range(ext4_inode_bitmap(sb, desc), block, count) || + in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || + in_range(block + count - 1, ext4_inode_table(sb, desc), + sbi->s_itb_per_group)) { + ext4_error(sb, "Adding blocks in system zones - " + "Block = %llu, count = %lu", + block, count); + goto error_return; + } + + BUFFER_TRACE(bitmap_bh, "getting write access"); + err = ext4_journal_get_write_access(handle, bitmap_bh); + if (err) + goto error_return; + + /* + * We are about to modify some metadata. Call the journal APIs + * to unshare ->b_data if a currently-committing transaction is + * using it + */ + BUFFER_TRACE(gd_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gd_bh); + if (err) + goto error_return; + + for (i = 0, blocks_freed = 0; i < count; i++) { + BUFFER_TRACE(bitmap_bh, "clear bit"); + if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { + ext4_error(sb, "bit already cleared for block %llu", + (ext4_fsblk_t)(block + i)); + BUFFER_TRACE(bitmap_bh, "bit already cleared"); + } else { + blocks_freed++; + } + } + + err = ext4_mb_load_buddy(sb, block_group, &e4b); + if (err) + goto error_return; + + /* + * need to update group_info->bb_free and bitmap + * with group lock held. generate_buddy look at + * them with group lock_held + */ + ext4_lock_group(sb, block_group); + mb_clear_bits(bitmap_bh->b_data, bit, count); + mb_free_blocks(NULL, &e4b, bit, count); + blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); + ext4_free_blks_set(sb, desc, blk_free_count); + desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); + ext4_unlock_group(sb, block_group); + percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); + + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, block_group); + atomic_add(blocks_freed, + &sbi->s_flex_groups[flex_group].free_blocks); + } + + ext4_mb_unload_buddy(&e4b); + + /* We dirtied the bitmap block */ + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); + + /* And the group descriptor block */ + BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); + ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); + if (!err) + err = ret; + +error_return: + brelse(bitmap_bh); + ext4_std_error(sb, err); + return; +} + +/** * ext4_trim_extent -- function to TRIM one single free extent in the group * @sb: super block for the file system * @start: starting block of the free extent in the alloc. group @@ -4715,11 +4777,10 @@ error_return: * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ -static int ext4_trim_extent(struct super_block *sb, int start, int count, - ext4_group_t group, struct ext4_buddy *e4b) +static void ext4_trim_extent(struct super_block *sb, int start, int count, + ext4_group_t group, struct ext4_buddy *e4b) { struct ext4_free_extent ex; - int ret = 0; assert_spin_locked(ext4_group_lock_ptr(sb, group)); @@ -4733,12 +4794,9 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, */ mb_mark_used(e4b, &ex); ext4_unlock_group(sb, group); - - ret = ext4_issue_discard(sb, group, start, count); - + ext4_issue_discard(sb, group, start, count); ext4_lock_group(sb, group); mb_free_blocks(NULL, e4b, start, ex.fe_len); - return ret; } /** @@ -4760,21 +4818,26 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, * the group buddy bitmap. This is done until whole group is scanned. */ static ext4_grpblk_t -ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, - ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) +ext4_trim_all_free(struct super_block *sb, ext4_group_t group, + ext4_grpblk_t start, ext4_grpblk_t max, + ext4_grpblk_t minblocks) { void *bitmap; ext4_grpblk_t next, count = 0; - ext4_group_t group; - int ret = 0; + struct ext4_buddy e4b; + int ret; - BUG_ON(e4b == NULL); + ret = ext4_mb_load_buddy(sb, group, &e4b); + if (ret) { + ext4_error(sb, "Error in loading buddy " + "information for %u", group); + return ret; + } + bitmap = e4b.bd_bitmap; - bitmap = e4b->bd_bitmap; - group = e4b->bd_group; - start = (e4b->bd_info->bb_first_free > start) ? - e4b->bd_info->bb_first_free : start; ext4_lock_group(sb, group); + start = (e4b.bd_info->bb_first_free > start) ? + e4b.bd_info->bb_first_free : start; while (start < max) { start = mb_find_next_zero_bit(bitmap, max, start); @@ -4783,10 +4846,8 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, next = mb_find_next_bit(bitmap, max, start); if ((next - start) >= minblocks) { - ret = ext4_trim_extent(sb, start, - next - start, group, e4b); - if (ret < 0) - break; + ext4_trim_extent(sb, start, + next - start, group, &e4b); count += next - start; } start = next + 1; @@ -4802,17 +4863,15 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, ext4_lock_group(sb, group); } - if ((e4b->bd_info->bb_free - count) < minblocks) + if ((e4b.bd_info->bb_free - count) < minblocks) break; } ext4_unlock_group(sb, group); + ext4_mb_unload_buddy(&e4b); ext4_debug("trimmed %d blocks in the group %d\n", count, group); - if (ret < 0) - count = ret; - return count; } @@ -4830,11 +4889,11 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, */ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { - struct ext4_buddy e4b; + struct ext4_group_info *grp; ext4_group_t first_group, last_group; ext4_group_t group, ngroups = ext4_get_groups_count(sb); ext4_grpblk_t cnt = 0, first_block, last_block; - uint64_t start, len, minlen, trimmed; + uint64_t start, len, minlen, trimmed = 0; ext4_fsblk_t first_data_blk = le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); int ret = 0; @@ -4842,7 +4901,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) start = range->start >> sb->s_blocksize_bits; len = range->len >> sb->s_blocksize_bits; minlen = range->minlen >> sb->s_blocksize_bits; - trimmed = 0; if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) return -EINVAL; @@ -4863,11 +4921,12 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) return -EINVAL; for (group = first_group; group <= last_group; group++) { - ret = ext4_mb_load_buddy(sb, group, &e4b); - if (ret) { - ext4_error(sb, "Error in loading buddy " - "information for %u", group); - break; + grp = ext4_get_group_info(sb, group); + /* We only do this if the grp has never been initialized */ + if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { + ret = ext4_mb_init_group(sb, group); + if (ret) + break; } /* @@ -4880,16 +4939,14 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) last_block = first_block + len; len -= last_block - first_block; - if (e4b.bd_info->bb_free >= minlen) { - cnt = ext4_trim_all_free(sb, &e4b, first_block, + if (grp->bb_free >= minlen) { + cnt = ext4_trim_all_free(sb, group, first_block, last_block, minlen); if (cnt < 0) { ret = cnt; - ext4_mb_unload_buddy(&e4b); break; } } - ext4_mb_unload_buddy(&e4b); trimmed += cnt; first_block = 0; } diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 22bd4d7f289b..20b5e7bfebd1 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -193,11 +193,6 @@ struct ext4_allocation_context { __u8 ac_op; /* operation, for history only */ struct page *ac_bitmap_page; struct page *ac_buddy_page; - /* - * pointer to the held semaphore upon successful - * block allocation - */ - struct rw_semaphore *alloc_semp; struct ext4_prealloc_space *ac_pa; struct ext4_locality_group *ac_lg; }; @@ -215,7 +210,6 @@ struct ext4_buddy { struct super_block *bd_sb; __u16 bd_blkbits; ext4_group_t bd_group; - struct rw_semaphore *alloc_semp; }; #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 92816b4e0f16..b57b98fb44d1 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, * We have the extent map build with the tmp inode. * Now copy the i_data across */ - ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS); + ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); /* diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c new file mode 100644 index 000000000000..9bdef3f537c5 --- /dev/null +++ b/fs/ext4/mmp.c @@ -0,0 +1,351 @@ +#include <linux/fs.h> +#include <linux/random.h> +#include <linux/buffer_head.h> +#include <linux/utsname.h> +#include <linux/kthread.h> + +#include "ext4.h" + +/* + * Write the MMP block using WRITE_SYNC to try to get the block on-disk + * faster. + */ +static int write_mmp_block(struct buffer_head *bh) +{ + mark_buffer_dirty(bh); + lock_buffer(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(WRITE_SYNC, bh); + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + return 1; + + return 0; +} + +/* + * Read the MMP block. It _must_ be read from disk and hence we clear the + * uptodate flag on the buffer. + */ +static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, + ext4_fsblk_t mmp_block) +{ + struct mmp_struct *mmp; + + if (*bh) + clear_buffer_uptodate(*bh); + + /* This would be sb_bread(sb, mmp_block), except we need to be sure + * that the MD RAID device cache has been bypassed, and that the read + * is not blocked in the elevator. */ + if (!*bh) + *bh = sb_getblk(sb, mmp_block); + if (*bh) { + get_bh(*bh); + lock_buffer(*bh); + (*bh)->b_end_io = end_buffer_read_sync; + submit_bh(READ_SYNC, *bh); + wait_on_buffer(*bh); + if (!buffer_uptodate(*bh)) { + brelse(*bh); + *bh = NULL; + } + } + if (!*bh) { + ext4_warning(sb, "Error while reading MMP block %llu", + mmp_block); + return -EIO; + } + + mmp = (struct mmp_struct *)((*bh)->b_data); + if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) + return -EINVAL; + + return 0; +} + +/* + * Dump as much information as possible to help the admin. + */ +void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, + const char *function, unsigned int line, const char *msg) +{ + __ext4_warning(sb, function, line, msg); + __ext4_warning(sb, function, line, + "MMP failure info: last update time: %llu, last update " + "node: %s, last update device: %s\n", + (long long unsigned int) le64_to_cpu(mmp->mmp_time), + mmp->mmp_nodename, mmp->mmp_bdevname); +} + +/* + * kmmpd will update the MMP sequence every s_mmp_update_interval seconds + */ +static int kmmpd(void *data) +{ + struct super_block *sb = ((struct mmpd_data *) data)->sb; + struct buffer_head *bh = ((struct mmpd_data *) data)->bh; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct mmp_struct *mmp; + ext4_fsblk_t mmp_block; + u32 seq = 0; + unsigned long failed_writes = 0; + int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); + unsigned mmp_check_interval; + unsigned long last_update_time; + unsigned long diff; + int retval; + + mmp_block = le64_to_cpu(es->s_mmp_block); + mmp = (struct mmp_struct *)(bh->b_data); + mmp->mmp_time = cpu_to_le64(get_seconds()); + /* + * Start with the higher mmp_check_interval and reduce it if + * the MMP block is being updated on time. + */ + mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, + EXT4_MMP_MIN_CHECK_INTERVAL); + mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); + bdevname(bh->b_bdev, mmp->mmp_bdevname); + + memcpy(mmp->mmp_nodename, init_utsname()->sysname, + sizeof(mmp->mmp_nodename)); + + while (!kthread_should_stop()) { + if (++seq > EXT4_MMP_SEQ_MAX) + seq = 1; + + mmp->mmp_seq = cpu_to_le32(seq); + mmp->mmp_time = cpu_to_le64(get_seconds()); + last_update_time = jiffies; + + retval = write_mmp_block(bh); + /* + * Don't spew too many error messages. Print one every + * (s_mmp_update_interval * 60) seconds. + */ + if (retval && (failed_writes % 60) == 0) { + ext4_error(sb, "Error writing to MMP block"); + failed_writes++; + } + + if (!(le32_to_cpu(es->s_feature_incompat) & + EXT4_FEATURE_INCOMPAT_MMP)) { + ext4_warning(sb, "kmmpd being stopped since MMP feature" + " has been disabled."); + EXT4_SB(sb)->s_mmp_tsk = NULL; + goto failed; + } + + if (sb->s_flags & MS_RDONLY) { + ext4_warning(sb, "kmmpd being stopped since filesystem " + "has been remounted as readonly."); + EXT4_SB(sb)->s_mmp_tsk = NULL; + goto failed; + } + + diff = jiffies - last_update_time; + if (diff < mmp_update_interval * HZ) + schedule_timeout_interruptible(mmp_update_interval * + HZ - diff); + + /* + * We need to make sure that more than mmp_check_interval + * seconds have not passed since writing. If that has happened + * we need to check if the MMP block is as we left it. + */ + diff = jiffies - last_update_time; + if (diff > mmp_check_interval * HZ) { + struct buffer_head *bh_check = NULL; + struct mmp_struct *mmp_check; + + retval = read_mmp_block(sb, &bh_check, mmp_block); + if (retval) { + ext4_error(sb, "error reading MMP data: %d", + retval); + + EXT4_SB(sb)->s_mmp_tsk = NULL; + goto failed; + } + + mmp_check = (struct mmp_struct *)(bh_check->b_data); + if (mmp->mmp_seq != mmp_check->mmp_seq || + memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, + sizeof(mmp->mmp_nodename))) { + dump_mmp_msg(sb, mmp_check, + "Error while updating MMP info. " + "The filesystem seems to have been" + " multiply mounted."); + ext4_error(sb, "abort"); + goto failed; + } + put_bh(bh_check); + } + + /* + * Adjust the mmp_check_interval depending on how much time + * it took for the MMP block to be written. + */ + mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, + EXT4_MMP_MAX_CHECK_INTERVAL), + EXT4_MMP_MIN_CHECK_INTERVAL); + mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); + } + + /* + * Unmount seems to be clean. + */ + mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); + mmp->mmp_time = cpu_to_le64(get_seconds()); + + retval = write_mmp_block(bh); + +failed: + kfree(data); + brelse(bh); + return retval; +} + +/* + * Get a random new sequence number but make sure it is not greater than + * EXT4_MMP_SEQ_MAX. + */ +static unsigned int mmp_new_seq(void) +{ + u32 new_seq; + + do { + get_random_bytes(&new_seq, sizeof(u32)); + } while (new_seq > EXT4_MMP_SEQ_MAX); + + return new_seq; +} + +/* + * Protect the filesystem from being mounted more than once. + */ +int ext4_multi_mount_protect(struct super_block *sb, + ext4_fsblk_t mmp_block) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct buffer_head *bh = NULL; + struct mmp_struct *mmp = NULL; + struct mmpd_data *mmpd_data; + u32 seq; + unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); + unsigned int wait_time = 0; + int retval; + + if (mmp_block < le32_to_cpu(es->s_first_data_block) || + mmp_block >= ext4_blocks_count(es)) { + ext4_warning(sb, "Invalid MMP block in superblock"); + goto failed; + } + + retval = read_mmp_block(sb, &bh, mmp_block); + if (retval) + goto failed; + + mmp = (struct mmp_struct *)(bh->b_data); + + if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) + mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; + + /* + * If check_interval in MMP block is larger, use that instead of + * update_interval from the superblock. + */ + if (mmp->mmp_check_interval > mmp_check_interval) + mmp_check_interval = mmp->mmp_check_interval; + + seq = le32_to_cpu(mmp->mmp_seq); + if (seq == EXT4_MMP_SEQ_CLEAN) + goto skip; + + if (seq == EXT4_MMP_SEQ_FSCK) { + dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); + goto failed; + } + + wait_time = min(mmp_check_interval * 2 + 1, + mmp_check_interval + 60); + + /* Print MMP interval if more than 20 secs. */ + if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) + ext4_warning(sb, "MMP interval %u higher than expected, please" + " wait.\n", wait_time * 2); + + if (schedule_timeout_interruptible(HZ * wait_time) != 0) { + ext4_warning(sb, "MMP startup interrupted, failing mount\n"); + goto failed; + } + + retval = read_mmp_block(sb, &bh, mmp_block); + if (retval) + goto failed; + mmp = (struct mmp_struct *)(bh->b_data); + if (seq != le32_to_cpu(mmp->mmp_seq)) { + dump_mmp_msg(sb, mmp, + "Device is already active on another node."); + goto failed; + } + +skip: + /* + * write a new random sequence number. + */ + mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); + + retval = write_mmp_block(bh); + if (retval) + goto failed; + + /* + * wait for MMP interval and check mmp_seq. + */ + if (schedule_timeout_interruptible(HZ * wait_time) != 0) { + ext4_warning(sb, "MMP startup interrupted, failing mount\n"); + goto failed; + } + + retval = read_mmp_block(sb, &bh, mmp_block); + if (retval) + goto failed; + mmp = (struct mmp_struct *)(bh->b_data); + if (seq != le32_to_cpu(mmp->mmp_seq)) { + dump_mmp_msg(sb, mmp, + "Device is already active on another node."); + goto failed; + } + + mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL); + if (!mmpd_data) { + ext4_warning(sb, "not enough memory for mmpd_data"); + goto failed; + } + mmpd_data->sb = sb; + mmpd_data->bh = bh; + + /* + * Start a kernel thread to update the MMP block periodically. + */ + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", + bdevname(bh->b_bdev, + mmp->mmp_bdevname)); + if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { + EXT4_SB(sb)->s_mmp_tsk = NULL; + kfree(mmpd_data); + ext4_warning(sb, "Unable to create kmmpd thread for %s.", + sb->s_id); + goto failed; + } + + return 0; + +failed: + brelse(bh); + return 1; +} + + diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index b9f3e7862f13..2b8304bf3c50 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -876,8 +876,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, * It needs to call wait_on_page_writeback() to wait for the * writeback of the page. */ - if (PageWriteback(page)) - wait_on_page_writeback(page); + wait_on_page_writeback(page); /* Release old bh and drop refs */ try_to_release_page(page, 0); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 67fd0b025858..b754b7721f51 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1413,10 +1413,22 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, frame->at = entries; frame->bh = bh; bh = bh2; + + ext4_handle_dirty_metadata(handle, dir, frame->bh); + ext4_handle_dirty_metadata(handle, dir, bh); + de = do_split(handle,dir, &bh, frame, &hinfo, &retval); - dx_release (frames); - if (!(de)) + if (!de) { + /* + * Even if the block split failed, we have to properly write + * out all the changes we did so far. Otherwise we can end up + * with corrupted filesystem. + */ + ext4_mark_inode_dirty(handle, dir); + dx_release(frames); return retval; + } + dx_release(frames); retval = add_dirent_to_buf(handle, dentry, inode, de, bh); brelse(bh); @@ -2240,6 +2252,7 @@ static int ext4_symlink(struct inode *dir, handle_t *handle; struct inode *inode; int l, err, retries = 0; + int credits; l = strlen(symname)+1; if (l > dir->i_sb->s_blocksize) @@ -2247,10 +2260,26 @@ static int ext4_symlink(struct inode *dir, dquot_initialize(dir); + if (l > EXT4_N_BLOCKS * 4) { + /* + * For non-fast symlinks, we just allocate inode and put it on + * orphan list in the first transaction => we need bitmap, + * group descriptor, sb, inode block, quota blocks. + */ + credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); + } else { + /* + * Fast symlink. We have to add entry to directory + * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS), + * allocate new inode (bitmap, group descriptor, inode block, + * quota blocks, sb is already counted in previous macros). + */ + credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); + } retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); + handle = ext4_journal_start(dir, credits); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2263,21 +2292,44 @@ retry: if (IS_ERR(inode)) goto out_stop; - if (l > sizeof(EXT4_I(inode)->i_data)) { + if (l > EXT4_N_BLOCKS * 4) { inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); /* - * page_symlink() calls into ext4_prepare/commit_write. - * We have a transaction open. All is sweetness. It also sets - * i_size in generic_commit_write(). + * We cannot call page_symlink() with transaction started + * because it calls into ext4_write_begin() which can wait + * for transaction commit if we are running out of space + * and thus we deadlock. So we have to stop transaction now + * and restart it when symlink contents is written. + * + * To keep fs consistent in case of crash, we have to put inode + * to orphan list in the mean time. */ + drop_nlink(inode); + err = ext4_orphan_add(handle, inode); + ext4_journal_stop(handle); + if (err) + goto err_drop_inode; err = __page_symlink(inode, symname, l, 1); + if (err) + goto err_drop_inode; + /* + * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS + * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified + */ + handle = ext4_journal_start(dir, + EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto err_drop_inode; + } + inc_nlink(inode); + err = ext4_orphan_del(handle, inode); if (err) { + ext4_journal_stop(handle); clear_nlink(inode); - unlock_new_inode(inode); - ext4_mark_inode_dirty(handle, inode); - iput(inode); - goto out_stop; + goto err_drop_inode; } } else { /* clear the extent format for fast symlink */ @@ -2293,6 +2345,10 @@ out_stop: if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; +err_drop_inode: + unlock_new_inode(inode); + iput(inode); + return err; } static int ext4_link(struct dentry *old_dentry, diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b6dbd056fcb1..7bb8f76d470a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -203,46 +203,29 @@ static void ext4_end_bio(struct bio *bio, int error) for (i = 0; i < io_end->num_io_pages; i++) { struct page *page = io_end->pages[i]->p_page; struct buffer_head *bh, *head; - int partial_write = 0; + loff_t offset; + loff_t io_end_offset; - head = page_buffers(page); - if (error) + if (error) { SetPageError(page); - BUG_ON(!head); - if (head->b_size != PAGE_CACHE_SIZE) { - loff_t offset; - loff_t io_end_offset = io_end->offset + io_end->size; + set_bit(AS_EIO, &page->mapping->flags); + head = page_buffers(page); + BUG_ON(!head); + + io_end_offset = io_end->offset + io_end->size; offset = (sector_t) page->index << PAGE_CACHE_SHIFT; bh = head; do { if ((offset >= io_end->offset) && - (offset+bh->b_size <= io_end_offset)) { - if (error) - buffer_io_error(bh); - - } - if (buffer_delay(bh)) - partial_write = 1; - else if (!buffer_mapped(bh)) - clear_buffer_dirty(bh); - else if (buffer_dirty(bh)) - partial_write = 1; + (offset+bh->b_size <= io_end_offset)) + buffer_io_error(bh); + offset += bh->b_size; bh = bh->b_this_page; } while (bh != head); } - /* - * If this is a partial write which happened to make - * all buffers uptodate then we can optimize away a - * bogus readpage() for the next read(). Here we - * 'discover' whether the page went uptodate as a - * result of this (potentially partial) write. - */ - if (!partial_write) - SetPageUptodate(page); - put_io_page(io_end->pages[i]); } io_end->num_io_pages = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8553dfb310af..cc5c157aa11d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -38,6 +38,7 @@ #include <linux/ctype.h> #include <linux/log2.h> #include <linux/crc16.h> +#include <linux/cleancache.h> #include <asm/uaccess.h> #include <linux/kthread.h> @@ -75,11 +76,27 @@ static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); +static inline int ext2_feature_set_ok(struct super_block *sb); +static inline int ext3_feature_set_ok(struct super_block *sb); static int ext4_feature_set_ok(struct super_block *sb, int readonly); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext2_fs_type = { + .owner = THIS_MODULE, + .name = "ext2", + .mount = ext4_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; +#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) +#else +#define IS_EXT2_SB(sb) (0) +#endif + + #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, @@ -806,6 +823,8 @@ static void ext4_put_super(struct super_block *sb) invalidate_bdev(sbi->journal_bdev); ext4_blkdev_remove(sbi); } + if (sbi->s_mmp_tsk) + kthread_stop(sbi->s_mmp_tsk); sb->s_fs_info = NULL; /* * Now that we are completely done shutting down the @@ -1096,7 +1115,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (!test_opt(sb, INIT_INODE_TABLE)) seq_puts(seq, ",noinit_inode_table"); - else if (sbi->s_li_wait_mult) + else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) seq_printf(seq, ",init_inode_table=%u", (unsigned) sbi->s_li_wait_mult); @@ -1187,9 +1206,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); static const struct dquot_operations ext4_quota_operations = { -#ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, -#endif .write_dquot = ext4_write_dquot, .acquire_dquot = ext4_acquire_dquot, .release_dquot = ext4_release_dquot, @@ -1900,7 +1917,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ext4_msg(sb, KERN_WARNING, "warning: mounting fs with errors, " "running e2fsck is recommended"); - else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && + else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && le16_to_cpu(es->s_mnt_count) >= (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) ext4_msg(sb, KERN_WARNING, @@ -1932,6 +1949,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt, sbi->s_mount_opt2); + cleancache_init_fs(sb); return res; } @@ -2425,6 +2443,18 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, EXT4_SB(sb)->s_sectors_written_start) >> 1))); } +static ssize_t extent_cache_hits_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits); +} + +static ssize_t extent_cache_misses_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses); +} + static ssize_t inode_readahead_blks_store(struct ext4_attr *a, struct ext4_sb_info *sbi, const char *buf, size_t count) @@ -2482,6 +2512,8 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) EXT4_RO_ATTR(delayed_allocation_blocks); EXT4_RO_ATTR(session_write_kbytes); EXT4_RO_ATTR(lifetime_write_kbytes); +EXT4_RO_ATTR(extent_cache_hits); +EXT4_RO_ATTR(extent_cache_misses); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, inode_readahead_blks_store, s_inode_readahead_blks); EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); @@ -2497,6 +2529,8 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), + ATTR_LIST(extent_cache_hits), + ATTR_LIST(extent_cache_misses), ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), @@ -2659,12 +2693,6 @@ static void print_daily_error_info(unsigned long arg) mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ } -static void ext4_lazyinode_timeout(unsigned long data) -{ - struct task_struct *p = (struct task_struct *)data; - wake_up_process(p); -} - /* Find next suitable group and run ext4_init_inode_table */ static int ext4_run_li_request(struct ext4_li_request *elr) { @@ -2696,11 +2724,8 @@ static int ext4_run_li_request(struct ext4_li_request *elr) ret = ext4_init_inode_table(sb, group, elr->lr_timeout ? 0 : 1); if (elr->lr_timeout == 0) { - timeout = jiffies - timeout; - if (elr->lr_sbi->s_li_wait_mult) - timeout *= elr->lr_sbi->s_li_wait_mult; - else - timeout *= 20; + timeout = (jiffies - timeout) * + elr->lr_sbi->s_li_wait_mult; elr->lr_timeout = timeout; } elr->lr_next_sched = jiffies + elr->lr_timeout; @@ -2712,7 +2737,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr) /* * Remove lr_request from the list_request and free the - * request tructure. Should be called with li_list_mtx held + * request structure. Should be called with li_list_mtx held */ static void ext4_remove_li_request(struct ext4_li_request *elr) { @@ -2730,14 +2755,16 @@ static void ext4_remove_li_request(struct ext4_li_request *elr) static void ext4_unregister_li_request(struct super_block *sb) { - struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; - - if (!ext4_li_info) + mutex_lock(&ext4_li_mtx); + if (!ext4_li_info) { + mutex_unlock(&ext4_li_mtx); return; + } mutex_lock(&ext4_li_info->li_list_mtx); - ext4_remove_li_request(elr); + ext4_remove_li_request(EXT4_SB(sb)->s_li_request); mutex_unlock(&ext4_li_info->li_list_mtx); + mutex_unlock(&ext4_li_mtx); } static struct task_struct *ext4_lazyinit_task; @@ -2756,17 +2783,10 @@ static int ext4_lazyinit_thread(void *arg) struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; struct list_head *pos, *n; struct ext4_li_request *elr; - unsigned long next_wakeup; - DEFINE_WAIT(wait); + unsigned long next_wakeup, cur; BUG_ON(NULL == eli); - eli->li_timer.data = (unsigned long)current; - eli->li_timer.function = ext4_lazyinode_timeout; - - eli->li_task = current; - wake_up(&eli->li_wait_task); - cont_thread: while (true) { next_wakeup = MAX_JIFFY_OFFSET; @@ -2797,19 +2817,15 @@ cont_thread: if (freezing(current)) refrigerator(); - if ((time_after_eq(jiffies, next_wakeup)) || + cur = jiffies; + if ((time_after_eq(cur, next_wakeup)) || (MAX_JIFFY_OFFSET == next_wakeup)) { cond_resched(); continue; } - eli->li_timer.expires = next_wakeup; - add_timer(&eli->li_timer); - prepare_to_wait(&eli->li_wait_daemon, &wait, - TASK_INTERRUPTIBLE); - if (time_before(jiffies, next_wakeup)) - schedule(); - finish_wait(&eli->li_wait_daemon, &wait); + schedule_timeout_interruptible(next_wakeup - cur); + if (kthread_should_stop()) { ext4_clear_request_list(); goto exit_thread; @@ -2833,12 +2849,7 @@ exit_thread: goto cont_thread; } mutex_unlock(&eli->li_list_mtx); - del_timer_sync(&ext4_li_info->li_timer); - eli->li_task = NULL; - wake_up(&eli->li_wait_task); - kfree(ext4_li_info); - ext4_lazyinit_task = NULL; ext4_li_info = NULL; mutex_unlock(&ext4_li_mtx); @@ -2866,7 +2877,6 @@ static int ext4_run_lazyinit_thread(void) if (IS_ERR(ext4_lazyinit_task)) { int err = PTR_ERR(ext4_lazyinit_task); ext4_clear_request_list(); - del_timer_sync(&ext4_li_info->li_timer); kfree(ext4_li_info); ext4_li_info = NULL; printk(KERN_CRIT "EXT4: error %d creating inode table " @@ -2875,8 +2885,6 @@ static int ext4_run_lazyinit_thread(void) return err; } ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; - - wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL); return 0; } @@ -2911,13 +2919,9 @@ static int ext4_li_info_new(void) if (!eli) return -ENOMEM; - eli->li_task = NULL; INIT_LIST_HEAD(&eli->li_request_list); mutex_init(&eli->li_list_mtx); - init_waitqueue_head(&eli->li_wait_daemon); - init_waitqueue_head(&eli->li_wait_task); - init_timer(&eli->li_timer); eli->li_state |= EXT4_LAZYINIT_QUIT; ext4_li_info = eli; @@ -2960,20 +2964,19 @@ static int ext4_register_li_request(struct super_block *sb, ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; int ret = 0; - if (sbi->s_li_request != NULL) + if (sbi->s_li_request != NULL) { + /* + * Reset timeout so it can be computed again, because + * s_li_wait_mult might have changed. + */ + sbi->s_li_request->lr_timeout = 0; return 0; + } if (first_not_zeroed == ngroups || (sb->s_flags & MS_RDONLY) || - !test_opt(sb, INIT_INODE_TABLE)) { - sbi->s_li_request = NULL; + !test_opt(sb, INIT_INODE_TABLE)) return 0; - } - - if (first_not_zeroed == ngroups) { - sbi->s_li_request = NULL; - return 0; - } elr = ext4_li_request_new(sb, first_not_zeroed); if (!elr) @@ -3166,6 +3169,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) set_opt(sb, DELALLOC); + /* + * set default s_li_wait_mult for lazyinit, for the case there is + * no mount option specified. + */ + sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, &journal_devnum, &journal_ioprio, NULL, 0)) { ext4_msg(sb, KERN_WARNING, @@ -3187,6 +3196,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "feature flags set on rev 0 fs, " "running e2fsck is recommended"); + if (IS_EXT2_SB(sb)) { + if (ext2_feature_set_ok(sb)) + ext4_msg(sb, KERN_INFO, "mounting ext2 file system " + "using the ext4 subsystem"); + else { + ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " + "to feature incompatibilities"); + goto failed_mount; + } + } + + if (IS_EXT3_SB(sb)) { + if (ext3_feature_set_ok(sb)) + ext4_msg(sb, KERN_INFO, "mounting ext3 file system " + "using the ext4 subsystem"); + else { + ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " + "to feature incompatibilities"); + goto failed_mount; + } + } + /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, @@ -3459,6 +3490,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && + !(sb->s_flags & MS_RDONLY)) + if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) + goto failed_mount3; + /* * The first inode we look at is the journal inode. Don't try * root first: it may be modified in the journal! @@ -3474,7 +3510,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount_wq; } else { clear_opt(sb, DATA_FLAGS); - set_opt(sb, WRITEBACK_DATA); sbi->s_journal = NULL; needs_recovery = 0; goto no_journal; @@ -3707,6 +3742,8 @@ failed_mount3: percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyblocks_counter); + if (sbi->s_mmp_tsk) + kthread_stop(sbi->s_mmp_tsk); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); @@ -4242,7 +4279,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) int enable_quota = 0; ext4_group_t g; unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - int err; + int err = 0; #ifdef CONFIG_QUOTA int i; #endif @@ -4368,6 +4405,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; if (!ext4_setup_super(sb, es, 0)) sb->s_flags &= ~MS_RDONLY; + if (EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_MMP)) + if (ext4_multi_mount_protect(sb, + le64_to_cpu(es->s_mmp_block))) { + err = -EROFS; + goto restore_opts; + } enable_quota = 1; } } @@ -4432,6 +4476,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; u64 fsid; + s64 bfree; if (test_opt(sb, MINIX_DF)) { sbi->s_overhead_last = 0; @@ -4475,8 +4520,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = EXT4_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; - buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - + bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); + /* prevent underflow in case that few free space is available */ + buf->f_bfree = max_t(s64, bfree, 0); buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) buf->f_bavail = 0; @@ -4652,6 +4699,9 @@ static int ext4_quota_off(struct super_block *sb, int type) if (test_opt(sb, DELALLOC)) sync_filesystem(sb); + if (!inode) + goto out; + /* Update modification times of quota files when userspace can * start looking at them */ handle = ext4_journal_start(inode, 1); @@ -4772,14 +4822,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, } #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) -static struct file_system_type ext2_fs_type = { - .owner = THIS_MODULE, - .name = "ext2", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - static inline void register_as_ext2(void) { int err = register_filesystem(&ext2_fs_type); @@ -4792,10 +4834,22 @@ static inline void unregister_as_ext2(void) { unregister_filesystem(&ext2_fs_type); } + +static inline int ext2_feature_set_ok(struct super_block *sb) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) + return 0; + if (sb->s_flags & MS_RDONLY) + return 1; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) + return 0; + return 1; +} MODULE_ALIAS("ext2"); #else static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } +static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } #endif #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) @@ -4811,10 +4865,24 @@ static inline void unregister_as_ext3(void) { unregister_filesystem(&ext3_fs_type); } + +static inline int ext3_feature_set_ok(struct super_block *sb) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) + return 0; + if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + return 0; + if (sb->s_flags & MS_RDONLY) + return 1; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) + return 0; + return 1; +} MODULE_ALIAS("ext3"); #else static inline void register_as_ext3(void) { } static inline void unregister_as_ext3(void) { } +static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } #endif static struct file_system_type ext4_fs_type = { @@ -4898,8 +4966,8 @@ static int __init ext4_init_fs(void) err = init_inodecache(); if (err) goto out1; - register_as_ext2(); register_as_ext3(); + register_as_ext2(); err = register_filesystem(&ext4_fs_type); if (err) goto out; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b545ca1c459c..c757adc97250 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -820,8 +820,8 @@ inserted: if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; - block = ext4_new_meta_blocks(handle, inode, - goal, NULL, &error); + block = ext4_new_meta_blocks(handle, inode, goal, 0, + NULL, &error); if (error) goto cleanup; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3b222dafd15b..be15437c272e 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -326,6 +326,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; + dentry_unhash(dentry); + lock_super(sb); /* * Check whether the directory is not in use, then check @@ -457,6 +459,9 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + err = fat_scan(old_dir, old_name, &old_sinfo); if (err) { err = -EIO; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 20b4ea53fdc4..c61a6789f36c 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -824,6 +824,8 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; + dentry_unhash(dentry); + lock_super(sb); err = fat_dir_empty(inode); @@ -931,6 +933,9 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, int err, is_dir, update_dotdot, corrupt = 0; struct super_block *sb = old_dir->i_sb; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b32eb29a4e6f..0d0e3faddcfa 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -667,6 +667,8 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (IS_ERR(req)) return PTR_ERR(req); + dentry_unhash(entry); + req->in.h.opcode = FUSE_RMDIR; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 1; @@ -691,6 +693,10 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, struct fuse_rename_in inarg; struct fuse_conn *fc = get_fuse_conn(olddir); struct fuse_req *req = fuse_get_req(fc); + + if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode)) + dentry_unhash(newent); + if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index b4d70b13be92..1cb70cdba2c1 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -253,6 +253,9 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; + if (S_ISDIR(inode->i_mode)) + dentry_unhash(dentry); + if (S_ISDIR(inode->i_mode) && inode->i_size != 2) return -ENOTEMPTY; res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); @@ -283,6 +286,9 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { + if (S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + res = hfs_remove(new_dir, new_dentry); if (res) return res; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 4df5059c25da..b28835091dd0 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -370,6 +370,8 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; + dentry_unhash(dentry); + if (inode->i_size != 2) return -ENOTEMPTY; @@ -467,10 +469,12 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) + if (S_ISDIR(new_dentry->d_inode->i_mode)) { + dentry_unhash(new_dentry); res = hfsplus_rmdir(new_dir, new_dentry); - else + } else { res = hfsplus_unlink(new_dir, new_dentry); + } if (res) return res; } diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2638c834ed28..e6816b9e6903 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -683,6 +683,8 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) char *file; int err; + dentry_unhash(dentry); + if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_rmdir(file); @@ -736,6 +738,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, char *from_name, *to_name; int err; + if (to->d_inode && S_ISDIR(to->d_inode->i_mode)) + dentry_unhash(to); + if ((from_name = dentry_name(from)) == NULL) return -ENOMEM; if ((to_name = dentry_name(to)) == NULL) { diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 1f05839c27a7..ff0ce21c0867 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -395,7 +395,6 @@ again: dentry_unhash(dentry); if (!d_unhashed(dentry)) { - dput(dentry); hpfs_unlock(dir->i_sb); return -ENOSPC; } @@ -403,7 +402,6 @@ again: !S_ISREG(inode->i_mode) || get_write_access(inode)) { d_rehash(dentry); - dput(dentry); } else { struct iattr newattrs; /*printk("HPFS: truncating file before delete.\n");*/ @@ -411,7 +409,6 @@ again: newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; err = notify_change(dentry, &newattrs); put_write_access(inode); - dput(dentry); if (!err) goto again; } @@ -442,6 +439,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int r; + dentry_unhash(dentry); + hpfs_adjust_length(name, &len); hpfs_lock(dir->i_sb); err = -ENOENT; @@ -535,6 +534,10 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh; struct fnode *fnode; int err; + + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + if ((err = hpfs_chk_name(new_name, &new_len))) return err; err = 0; hpfs_adjust_length(old_name, &old_len); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e7a035781b7d..7aafeb8fa300 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -921,7 +921,8 @@ static int can_do_hugetlb_shm(void) return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); } -struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, +struct file *hugetlb_file_setup(const char *name, size_t size, + vm_flags_t acctflag, struct user_struct **user, int creat_flags) { int error = -ENOMEM; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 29148a81c783..7f21cf3aaf92 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal, ret = err; spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); - commit_transaction->t_flushed_data_blocks = 1; clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); smp_mb__after_clear_bit(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); @@ -672,12 +671,16 @@ start_journal_io: err = 0; } + write_lock(&journal->j_state_lock); + J_ASSERT(commit_transaction->t_state == T_COMMIT); + commit_transaction->t_state = T_COMMIT_DFLUSH; + write_unlock(&journal->j_state_lock); /* * If the journal is not located on the file system device, * then we must flush the file system device before we issue * the commit record */ - if (commit_transaction->t_flushed_data_blocks && + if (commit_transaction->t_need_data_flush && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); @@ -754,8 +757,13 @@ wait_for_iobuf: required. */ JBUFFER_TRACE(jh, "file as BJ_Forget"); jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); - /* Wake up any transactions which were waiting for this - IO to complete */ + /* + * Wake up any transactions which were waiting for this IO to + * complete. The barrier must be here so that changes by + * jbd2_journal_file_buffer() take effect before wake_up_bit() + * does the waitqueue check. + */ + smp_mb(); wake_up_bit(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "brelse shadowed buffer"); __brelse(bh); @@ -794,6 +802,10 @@ wait_for_iobuf: jbd2_journal_abort(journal, err); jbd_debug(3, "JBD: commit phase 5\n"); + write_lock(&journal->j_state_lock); + J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); + commit_transaction->t_state = T_COMMIT_JFLUSH; + write_unlock(&journal->j_state_lock); if (!JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { @@ -949,7 +961,7 @@ restart_loop: jbd_debug(3, "JBD: commit phase 7\n"); - J_ASSERT(commit_transaction->t_state == T_COMMIT); + J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); commit_transaction->t_start = jiffies; stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e0ec3db1c395..9a7826990304 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -479,9 +479,12 @@ int __jbd2_log_space_left(journal_t *journal) int __jbd2_log_start_commit(journal_t *journal, tid_t target) { /* - * Are we already doing a recent enough commit? + * The only transaction we can possibly wait upon is the + * currently running transaction (if it exists). Otherwise, + * the target tid must be an old one. */ - if (!tid_geq(journal->j_commit_request, target)) { + if (journal->j_running_transaction && + journal->j_running_transaction->t_tid == target) { /* * We want a new commit: OK, mark the request and wakeup the * commit thread. We do _not_ do the commit ourselves. @@ -493,7 +496,15 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) journal->j_commit_sequence); wake_up(&journal->j_wait_commit); return 1; - } + } else if (!tid_geq(journal->j_commit_request, target)) + /* This should never happen, but if it does, preserve + the evidence before kjournald goes into a loop and + increments j_commit_sequence beyond all recognition. */ + WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", + journal->j_commit_request, + journal->j_commit_sequence, + target, journal->j_running_transaction ? + journal->j_running_transaction->t_tid : 0); return 0; } @@ -577,6 +588,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) } /* + * Return 1 if a given transaction has not yet sent barrier request + * connected with a transaction commit. If 0 is returned, transaction + * may or may not have sent the barrier. Used to avoid sending barrier + * twice in common cases. + */ +int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid) +{ + int ret = 0; + transaction_t *commit_trans; + + if (!(journal->j_flags & JBD2_BARRIER)) + return 0; + read_lock(&journal->j_state_lock); + /* Transaction already committed? */ + if (tid_geq(journal->j_commit_sequence, tid)) + goto out; + commit_trans = journal->j_committing_transaction; + if (!commit_trans || commit_trans->t_tid != tid) { + ret = 1; + goto out; + } + /* + * Transaction is being committed and we already proceeded to + * submitting a flush to fs partition? + */ + if (journal->j_fs_dev != journal->j_dev) { + if (!commit_trans->t_need_data_flush || + commit_trans->t_state >= T_COMMIT_DFLUSH) + goto out; + } else { + if (commit_trans->t_state >= T_COMMIT_JFLUSH) + goto out; + } + ret = 1; +out: + read_unlock(&journal->j_state_lock); + return ret; +} +EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier); + +/* * Wait for a specified commit to complete. * The caller may not hold the journal lock. */ diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 05fa77a23711..3eec82d32fd4 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -82,7 +82,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) */ /* - * Update transiaction's maximum wait time, if debugging is enabled. + * Update transaction's maximum wait time, if debugging is enabled. * * In order for t_max_wait to be reliable, it must be protected by a * lock. But doing so will mean that start_this_handle() can not be @@ -91,11 +91,10 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) * means that maximum wait time reported by the jbd2_run_stats * tracepoint will always be zero. */ -static inline void update_t_max_wait(transaction_t *transaction) +static inline void update_t_max_wait(transaction_t *transaction, + unsigned long ts) { #ifdef CONFIG_JBD2_DEBUG - unsigned long ts = jiffies; - if (jbd2_journal_enable_debug && time_after(transaction->t_start, ts)) { ts = jbd2_time_diff(ts, transaction->t_start); @@ -121,6 +120,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle, tid_t tid; int needed, need_to_start; int nblocks = handle->h_buffer_credits; + unsigned long ts = jiffies; if (nblocks > journal->j_max_transaction_buffers) { printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", @@ -271,7 +271,7 @@ repeat: /* OK, account for the buffers that this operation expects to * use and add the handle to the running transaction. */ - update_t_max_wait(transaction); + update_t_max_wait(transaction, ts); handle->h_transaction = transaction; atomic_inc(&transaction->t_updates); atomic_inc(&transaction->t_handle_count); @@ -316,7 +316,8 @@ static handle_t *new_handle(int nblocks) * This function is visible to journal users (like ext3fs), so is not * called with the journal already locked. * - * Return a pointer to a newly allocated handle, or NULL on failure + * Return a pointer to a newly allocated handle, or an ERR_PTR() value + * on failure. */ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) { @@ -921,8 +922,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) */ JBUFFER_TRACE(jh, "cancelling revoke"); jbd2_journal_cancel_revoke(handle, jh); - jbd2_journal_put_journal_head(jh); out: + jbd2_journal_put_journal_head(jh); return err; } @@ -2147,6 +2148,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) jinode->i_next_transaction == transaction) goto done; + /* + * We only ever set this variable to 1 so the test is safe. Since + * t_need_data_flush is likely to be set, we do the test to save some + * cacheline bouncing + */ + if (!transaction->t_need_data_flush) + transaction->t_need_data_flush = 1; /* On some different transaction's list - should be * the committing one */ if (jinode->i_transaction) { diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 82faddd1f321..05f73328b28b 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -609,6 +609,8 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) int ret; uint32_t now = get_seconds(); + dentry_unhash(dentry); + for (fd = f->dents ; fd; fd = fd->next) { if (fd->ino) return -ENOTEMPTY; @@ -784,6 +786,9 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, uint8_t type; uint32_t now; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + /* The VFS will check for us and prevent trying to rename a * file over a directory and vice versa, but if it's a directory, * the VFS can't check whether the victim is empty. The filesystem diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index eaaf2b511e89..865df16a6cf3 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -360,6 +360,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); + dentry_unhash(dentry); + /* Init inode for quota operations. */ dquot_initialize(dip); dquot_initialize(ip); @@ -1095,6 +1097,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, new_dentry->d_name.name); + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + dquot_initialize(old_dir); dquot_initialize(new_dir); diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 9ed89d1663f8..f34c9cde9e94 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -273,6 +273,8 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; + dentry_unhash(dentry); + if (!logfs_empty_dir(inode)) return -ENOTEMPTY; @@ -622,6 +624,9 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry, loff_t pos; int err; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + /* 1. locate source dd */ err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); if (err) diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 6e6777f1b4b2..f60aed8db9c4 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -168,6 +168,8 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err = -ENOTEMPTY; + dentry_unhash(dentry); + if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { @@ -190,6 +192,9 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, struct minix_dir_entry * old_de; int err = -ENOENT; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + old_de = minix_find_entry(old_dentry, &old_page); if (!old_de) goto out; diff --git a/fs/mpage.c b/fs/mpage.c index 0afc809e46e0..fdfae9fa98cd 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -27,6 +27,7 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/pagevec.h> +#include <linux/cleancache.h> /* * I/O completion handler for multipage BIOs. @@ -271,6 +272,12 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, SetPageMappedToDisk(page); } + if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) && + cleancache_get_page(page) == 0) { + SetPageUptodate(page); + goto confused; + } + /* * This page will go to BIO. Do we need to send this BIO off first? */ diff --git a/fs/namei.c b/fs/namei.c index 6ff858c049c0..2358b326b221 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -391,79 +391,28 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); -/** - * nameidata_drop_rcu - drop this nameidata out of rcu-walk - * @nd: nameidata pathwalk data to drop - * Returns: 0 on success, -ECHILD on failure - * +/* * Path walking has 2 modes, rcu-walk and ref-walk (see - * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt - * to drop out of rcu-walk mode and take normal reference counts on dentries - * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take - * refcounts at the last known good point before rcu-walk got stuck, so - * ref-walk may continue from there. If this is not successful (eg. a seqcount - * has changed), then failure is returned and path walk restarts from the - * beginning in ref-walk mode. - * - * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into - * ref-walk. Must be called from rcu-walk context. + * Documentation/filesystems/path-lookup.txt). In situations when we can't + * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab + * normal reference counts on dentries and vfsmounts to transition to rcu-walk + * mode. Refcounts are grabbed at the last known good point before rcu-walk + * got stuck, so ref-walk may continue from there. If this is not successful + * (eg. a seqcount has changed), then failure is returned and it's up to caller + * to restart the path walk from the beginning in ref-walk mode. */ -static int nameidata_drop_rcu(struct nameidata *nd) -{ - struct fs_struct *fs = current->fs; - struct dentry *dentry = nd->path.dentry; - int want_root = 0; - - BUG_ON(!(nd->flags & LOOKUP_RCU)); - if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { - want_root = 1; - spin_lock(&fs->lock); - if (nd->root.mnt != fs->root.mnt || - nd->root.dentry != fs->root.dentry) - goto err_root; - } - spin_lock(&dentry->d_lock); - if (!__d_rcu_to_refcount(dentry, nd->seq)) - goto err; - BUG_ON(nd->inode != dentry->d_inode); - spin_unlock(&dentry->d_lock); - if (want_root) { - path_get(&nd->root); - spin_unlock(&fs->lock); - } - mntget(nd->path.mnt); - - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - nd->flags &= ~LOOKUP_RCU; - return 0; -err: - spin_unlock(&dentry->d_lock); -err_root: - if (want_root) - spin_unlock(&fs->lock); - return -ECHILD; -} - -/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ -static inline int nameidata_drop_rcu_maybe(struct nameidata *nd) -{ - if (nd->flags & LOOKUP_RCU) - return nameidata_drop_rcu(nd); - return 0; -} /** - * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk - * @nd: nameidata pathwalk data to drop - * @dentry: dentry to drop + * unlazy_walk - try to switch to ref-walk mode. + * @nd: nameidata pathwalk data + * @dentry: child of nd->path.dentry or NULL * Returns: 0 on success, -ECHILD on failure * - * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, - * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on - * @nd. Must be called from rcu-walk context. + * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry + * for ref-walk mode. @dentry must be a path found by a do_lookup call on + * @nd or NULL. Must be called from rcu-walk context. */ -static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) +static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) { struct fs_struct *fs = current->fs; struct dentry *parent = nd->path.dentry; @@ -478,18 +427,25 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry goto err_root; } spin_lock(&parent->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (!__d_rcu_to_refcount(dentry, nd->seq)) - goto err; - /* - * If the sequence check on the child dentry passed, then the child has - * not been removed from its parent. This means the parent dentry must - * be valid and able to take a reference at this point. - */ - BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); - BUG_ON(!parent->d_count); - parent->d_count++; - spin_unlock(&dentry->d_lock); + if (!dentry) { + if (!__d_rcu_to_refcount(parent, nd->seq)) + goto err_parent; + BUG_ON(nd->inode != parent->d_inode); + } else { + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (!__d_rcu_to_refcount(dentry, nd->seq)) + goto err_child; + /* + * If the sequence check on the child dentry passed, then + * the child has not been removed from its parent. This + * means the parent dentry must be valid and able to take + * a reference at this point. + */ + BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); + BUG_ON(!parent->d_count); + parent->d_count++; + spin_unlock(&dentry->d_lock); + } spin_unlock(&parent->d_lock); if (want_root) { path_get(&nd->root); @@ -501,8 +457,10 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry br_read_unlock(vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; -err: + +err_child: spin_unlock(&dentry->d_lock); +err_parent: spin_unlock(&parent->d_lock); err_root: if (want_root) @@ -510,59 +468,6 @@ err_root: return -ECHILD; } -/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ -static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) -{ - if (nd->flags & LOOKUP_RCU) { - if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) { - nd->flags &= ~LOOKUP_RCU; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - return -ECHILD; - } - } - return 0; -} - -/** - * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk - * @nd: nameidata pathwalk data to drop - * Returns: 0 on success, -ECHILD on failure - * - * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk. - * nd->path should be the final element of the lookup, so nd->root is discarded. - * Must be called from rcu-walk context. - */ -static int nameidata_drop_rcu_last(struct nameidata *nd) -{ - struct dentry *dentry = nd->path.dentry; - - BUG_ON(!(nd->flags & LOOKUP_RCU)); - nd->flags &= ~LOOKUP_RCU; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - spin_lock(&dentry->d_lock); - if (!__d_rcu_to_refcount(dentry, nd->seq)) - goto err_unlock; - BUG_ON(nd->inode != dentry->d_inode); - spin_unlock(&dentry->d_lock); - - mntget(nd->path.mnt); - - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - - return 0; - -err_unlock: - spin_unlock(&dentry->d_lock); - rcu_read_unlock(); - br_read_unlock(vfsmount_lock); - return -ECHILD; -} - /** * release_open_intent - free up open intent resources * @nd: pointer to nameidata @@ -606,26 +511,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) return dentry; } -/* - * handle_reval_path - force revalidation of a dentry - * - * In some situations the path walking code will trust dentries without - * revalidating them. This causes problems for filesystems that depend on - * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set - * (which indicates that it's possible for the dentry to go stale), force - * a d_revalidate call before proceeding. +/** + * complete_walk - successful completion of path walk + * @nd: pointer nameidata * - * Returns 0 if the revalidation was successful. If the revalidation fails, - * either return the error returned by d_revalidate or -ESTALE if the - * revalidation it just returned 0. If d_revalidate returns 0, we attempt to - * invalidate the dentry. It's up to the caller to handle putting references - * to the path if necessary. + * If we had been in RCU mode, drop out of it and legitimize nd->path. + * Revalidate the final result, unless we'd already done that during + * the path walk or the filesystem doesn't ask for it. Return 0 on + * success, -error on failure. In case of failure caller does not + * need to drop nd->path. */ -static inline int handle_reval_path(struct nameidata *nd) +static int complete_walk(struct nameidata *nd) { struct dentry *dentry = nd->path.dentry; int status; + if (nd->flags & LOOKUP_RCU) { + nd->flags &= ~LOOKUP_RCU; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + spin_lock(&dentry->d_lock); + if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { + spin_unlock(&dentry->d_lock); + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + return -ECHILD; + } + BUG_ON(nd->inode != dentry->d_inode); + spin_unlock(&dentry->d_lock); + mntget(nd->path.mnt); + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + } + if (likely(!(nd->flags & LOOKUP_JUMPED))) return 0; @@ -643,6 +561,7 @@ static inline int handle_reval_path(struct nameidata *nd) if (!status) status = -ESTALE; + path_put(&nd->path); return status; } @@ -1241,13 +1160,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, if (likely(__follow_mount_rcu(nd, path, inode, false))) return 0; unlazy: - if (dentry) { - if (nameidata_dentry_drop_rcu(nd, dentry)) - return -ECHILD; - } else { - if (nameidata_drop_rcu(nd)) - return -ECHILD; - } + if (unlazy_walk(nd, dentry)) + return -ECHILD; } else { dentry = __d_lookup(parent, name); } @@ -1303,7 +1217,7 @@ static inline int may_lookup(struct nameidata *nd) int err = exec_permission(nd->inode, IPERM_FLAG_RCU); if (err != -ECHILD) return err; - if (nameidata_drop_rcu(nd)) + if (unlazy_walk(nd, NULL)) return -ECHILD; } return exec_permission(nd->inode, 0); @@ -1357,8 +1271,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path, return -ENOENT; } if (unlikely(inode->i_op->follow_link) && follow) { - if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) - return -ECHILD; + if (nd->flags & LOOKUP_RCU) { + if (unlikely(unlazy_walk(nd, path->dentry))) { + terminate_walk(nd); + return -ECHILD; + } + } BUG_ON(inode != path->dentry->d_inode); return 1; } @@ -1657,18 +1575,8 @@ static int path_lookupat(int dfd, const char *name, } } - if (nd->flags & LOOKUP_RCU) { - /* went all way through without dropping RCU */ - BUG_ON(err); - if (nameidata_drop_rcu_last(nd)) - err = -ECHILD; - } - - if (!err) { - err = handle_reval_path(nd); - if (err) - path_put(&nd->path); - } + if (!err) + err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) { if (!nd->inode->i_op->lookup) { @@ -2134,13 +2042,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, return ERR_PTR(error); /* fallthrough */ case LAST_ROOT: - if (nd->flags & LOOKUP_RCU) { - if (nameidata_drop_rcu_last(nd)) - return ERR_PTR(-ECHILD); - } - error = handle_reval_path(nd); + error = complete_walk(nd); if (error) - goto exit; + return ERR_PTR(error); audit_inode(pathname, nd->path.dentry); if (open_flag & O_CREAT) { error = -EISDIR; @@ -2148,10 +2052,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } goto ok; case LAST_BIND: - /* can't be RCU mode here */ - error = handle_reval_path(nd); + error = complete_walk(nd); if (error) - goto exit; + return ERR_PTR(error); audit_inode(pathname, dir); goto ok; } @@ -2170,10 +2073,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) /* symlink */ return NULL; /* sayonara */ - if (nd->flags & LOOKUP_RCU) { - if (nameidata_drop_rcu_last(nd)) - return ERR_PTR(-ECHILD); - } + error = complete_walk(nd); + if (error) + return ERR_PTR(-ECHILD); error = -ENOTDIR; if (nd->flags & LOOKUP_DIRECTORY) { @@ -2185,11 +2087,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } /* create side of things */ - - if (nd->flags & LOOKUP_RCU) { - if (nameidata_drop_rcu_last(nd)) - return ERR_PTR(-ECHILD); - } + error = complete_walk(nd); + if (error) + return ERR_PTR(error); audit_inode(pathname, dir); error = -EISDIR; @@ -2629,10 +2529,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) } /* - * We try to drop the dentry early: we should have - * a usage count of 2 if we're the only user of this - * dentry, and if that is true (possibly after pruning - * the dcache), then we drop the dentry now. + * The dentry_unhash() helper will try to drop the dentry early: we + * should have a usage count of 2 if we're the only user of this + * dentry, and if that is true (possibly after pruning the dcache), + * then we drop the dentry now. * * A low-level filesystem can, if it choses, legally * do a @@ -2645,10 +2545,9 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) */ void dentry_unhash(struct dentry *dentry) { - dget(dentry); shrink_dcache_parent(dentry); spin_lock(&dentry->d_lock); - if (dentry->d_count == 2) + if (dentry->d_count == 1) __d_drop(dentry); spin_unlock(&dentry->d_lock); } @@ -2664,25 +2563,26 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) return -EPERM; mutex_lock(&dentry->d_inode->i_mutex); - dentry_unhash(dentry); + + error = -EBUSY; if (d_mountpoint(dentry)) - error = -EBUSY; - else { - error = security_inode_rmdir(dir, dentry); - if (!error) { - error = dir->i_op->rmdir(dir, dentry); - if (!error) { - dentry->d_inode->i_flags |= S_DEAD; - dont_mount(dentry); - } - } - } + goto out; + + error = security_inode_rmdir(dir, dentry); + if (error) + goto out; + + error = dir->i_op->rmdir(dir, dentry); + if (error) + goto out; + + dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); + +out: mutex_unlock(&dentry->d_inode->i_mutex); - if (!error) { + if (!error) d_delete(dentry); - } - dput(dentry); - return error; } @@ -3053,12 +2953,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * HOWEVER, it relies on the assumption that any object with ->lookup() * has no more than 1 dentry. If "hybrid" objects will ever appear, * we'd better make sure that there's no link(2) for them. - * d) some filesystems don't support opened-but-unlinked directories, - * either because of layout or because they are not ready to deal with - * all cases correctly. The latter will be fixed (taking this sort of - * stuff into VFS), but the former is not going away. Solution: the same - * trick as in rmdir(). - * e) conversion from fhandle to dentry may come in the wrong moment - when + * d) conversion from fhandle to dentry may come in the wrong moment - when * we are removing the target. Solution: we will have to grab ->i_mutex * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on * ->i_mutex on parents, which works but leads to some truly excessive @@ -3068,7 +2963,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { int error = 0; - struct inode *target; + struct inode *target = new_dentry->d_inode; /* * If we are going to change the parent - check write permissions, @@ -3084,26 +2979,24 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; - target = new_dentry->d_inode; if (target) mutex_lock(&target->i_mutex); - if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) - error = -EBUSY; - else { - if (target) - dentry_unhash(new_dentry); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); - } + + error = -EBUSY; + if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) + goto out; + + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (error) + goto out; + if (target) { - if (!error) { - target->i_flags |= S_DEAD; - dont_mount(new_dentry); - } - mutex_unlock(&target->i_mutex); - if (d_unhashed(new_dentry)) - d_rehash(new_dentry); - dput(new_dentry); + target->i_flags |= S_DEAD; + dont_mount(new_dentry); } +out: + if (target) + mutex_unlock(&target->i_mutex); if (!error) if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry,new_dentry); @@ -3113,7 +3006,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct inode *target; + struct inode *target = new_dentry->d_inode; int error; error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); @@ -3121,19 +3014,22 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, return error; dget(new_dentry); - target = new_dentry->d_inode; if (target) mutex_lock(&target->i_mutex); + + error = -EBUSY; if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); - if (!error) { - if (target) - dont_mount(new_dentry); - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry, new_dentry); - } + goto out; + + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (error) + goto out; + + if (target) + dont_mount(new_dentry); + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) + d_move(old_dentry, new_dentry); +out: if (target) mutex_unlock(&target->i_mutex); dput(new_dentry); diff --git a/fs/namespace.c b/fs/namespace.c index d99bcf59e4c2..fe59bd145d21 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1695,7 +1695,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) static int flags_to_propagation_type(int flags) { - int type = flags & ~MS_REC; + int type = flags & ~(MS_REC | MS_SILENT); /* Fail if any non-propagation flags are set */ if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index f6946bb5cb55..e3e646b06404 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1033,6 +1033,8 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) DPRINTK("ncp_rmdir: removing %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); + dentry_unhash(dentry); + error = -EBUSY; if (!d_unhashed(dentry)) goto out; @@ -1139,6 +1141,9 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name); + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 546849b3e88f..1102a5fbb744 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -334,6 +334,8 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) struct nilfs_transaction_info ti; int err; + dentry_unhash(dentry); + err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) return err; @@ -369,6 +371,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct nilfs_transaction_info ti; int err; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); if (unlikely(err)) return err; diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index d8a0313e99e6..f17e58b32989 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -30,6 +30,7 @@ ocfs2-objs := \ namei.o \ refcounttree.o \ reservations.o \ + move_extents.o \ resize.o \ slot_map.o \ suballoc.o \ diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 48aa9c7401c7..ed553c60de82 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -29,6 +29,7 @@ #include <linux/highmem.h> #include <linux/swap.h> #include <linux/quotaops.h> +#include <linux/blkdev.h> #include <cluster/masklog.h> @@ -7184,3 +7185,168 @@ out_commit: out: return ret; } + +static int ocfs2_trim_extent(struct super_block *sb, + struct ocfs2_group_desc *gd, + u32 start, u32 count) +{ + u64 discard, bcount; + + bcount = ocfs2_clusters_to_blocks(sb, count); + discard = le64_to_cpu(gd->bg_blkno) + + ocfs2_clusters_to_blocks(sb, start); + + trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount); + + return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0); +} + +static int ocfs2_trim_group(struct super_block *sb, + struct ocfs2_group_desc *gd, + u32 start, u32 max, u32 minbits) +{ + int ret = 0, count = 0, next; + void *bitmap = gd->bg_bitmap; + + if (le16_to_cpu(gd->bg_free_bits_count) < minbits) + return 0; + + trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno), + start, max, minbits); + + while (start < max) { + start = ocfs2_find_next_zero_bit(bitmap, max, start); + if (start >= max) + break; + next = ocfs2_find_next_bit(bitmap, max, start); + + if ((next - start) >= minbits) { + ret = ocfs2_trim_extent(sb, gd, + start, next - start); + if (ret < 0) { + mlog_errno(ret); + break; + } + count += next - start; + } + start = next + 1; + + if (fatal_signal_pending(current)) { + count = -ERESTARTSYS; + break; + } + + if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits) + break; + } + + if (ret < 0) + count = ret; + + return count; +} + +int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) +{ + struct ocfs2_super *osb = OCFS2_SB(sb); + u64 start, len, trimmed, first_group, last_group, group; + int ret, cnt; + u32 first_bit, last_bit, minlen; + struct buffer_head *main_bm_bh = NULL; + struct inode *main_bm_inode = NULL; + struct buffer_head *gd_bh = NULL; + struct ocfs2_dinode *main_bm; + struct ocfs2_group_desc *gd = NULL; + + start = range->start >> osb->s_clustersize_bits; + len = range->len >> osb->s_clustersize_bits; + minlen = range->minlen >> osb->s_clustersize_bits; + trimmed = 0; + + if (!len) { + range->len = 0; + return 0; + } + + if (minlen >= osb->bitmap_cpg) + return -EINVAL; + + main_bm_inode = ocfs2_get_system_file_inode(osb, + GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT); + if (!main_bm_inode) { + ret = -EIO; + mlog_errno(ret); + goto out; + } + + mutex_lock(&main_bm_inode->i_mutex); + + ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0); + if (ret < 0) { + mlog_errno(ret); + goto out_mutex; + } + main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data; + + if (start >= le32_to_cpu(main_bm->i_clusters)) { + ret = -EINVAL; + goto out_unlock; + } + + if (start + len > le32_to_cpu(main_bm->i_clusters)) + len = le32_to_cpu(main_bm->i_clusters) - start; + + trace_ocfs2_trim_fs(start, len, minlen); + + /* Determine first and last group to examine based on start and len */ + first_group = ocfs2_which_cluster_group(main_bm_inode, start); + if (first_group == osb->first_cluster_group_blkno) + first_bit = start; + else + first_bit = start - ocfs2_blocks_to_clusters(sb, first_group); + last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1); + last_bit = osb->bitmap_cpg; + + for (group = first_group; group <= last_group;) { + if (first_bit + len >= osb->bitmap_cpg) + last_bit = osb->bitmap_cpg; + else + last_bit = first_bit + len; + + ret = ocfs2_read_group_descriptor(main_bm_inode, + main_bm, group, + &gd_bh); + if (ret < 0) { + mlog_errno(ret); + break; + } + + gd = (struct ocfs2_group_desc *)gd_bh->b_data; + cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); + brelse(gd_bh); + gd_bh = NULL; + if (cnt < 0) { + ret = cnt; + mlog_errno(ret); + break; + } + + trimmed += cnt; + len -= osb->bitmap_cpg - first_bit; + first_bit = 0; + if (group == osb->first_cluster_group_blkno) + group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); + else + group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); + } + range->len = trimmed * sb->s_blocksize; +out_unlock: + ocfs2_inode_unlock(main_bm_inode, 0); + brelse(main_bm_bh); +out_mutex: + mutex_unlock(&main_bm_inode->i_mutex); + iput(main_bm_inode); +out: + return ret; +} diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 3bd08a03251c..ca381c584127 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci, struct buffer_head **leaf_bh); int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); +int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range); /* * Helper function to look at the # of clusters in an extent record. */ diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index bc702dab5d1f..a4b07730b2e1 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c @@ -57,7 +57,6 @@ static struct kset *o2cb_kset; void o2cb_sys_shutdown(void) { mlog_sys_shutdown(); - sysfs_remove_link(NULL, "o2cb"); kset_unregister(o2cb_kset); } @@ -69,14 +68,6 @@ int o2cb_sys_init(void) if (!o2cb_kset) return -ENOMEM; - /* - * Create this symlink for backwards compatibility with old - * versions of ocfs2-tools which look for things in /sys/o2cb. - */ - ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb"); - if (ret) - goto error; - ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); if (ret) goto error; diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4bdf7baee344..d602abb51b61 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -144,6 +144,7 @@ struct dlm_ctxt wait_queue_head_t dlm_join_events; unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; struct dlm_recovery_ctxt reco; spinlock_t master_lock; @@ -401,6 +402,18 @@ static inline int dlm_lvb_is_empty(char *lvb) return 1; } +static inline char *dlm_list_in_text(enum dlm_lockres_list idx) +{ + if (idx == DLM_GRANTED_LIST) + return "granted"; + else if (idx == DLM_CONVERTING_LIST) + return "converting"; + else if (idx == DLM_BLOCKED_LIST) + return "blocked"; + else + return "unknown"; +} + static inline struct list_head * dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) { @@ -448,6 +461,7 @@ enum { DLM_FINALIZE_RECO_MSG = 518, DLM_QUERY_REGION = 519, DLM_QUERY_NODEINFO = 520, + DLM_BEGIN_EXIT_DOMAIN_MSG = 521, }; struct dlm_reco_node_data diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 04a32be0aeb9..56f82cb912e3 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -756,6 +756,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) buf + out, len - out); out += snprintf(buf + out, len - out, "\n"); + /* Exit Domain Map: xx xx xx */ + out += snprintf(buf + out, len - out, "Exit Domain Map: "); + out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES, + buf + out, len - out); + out += snprintf(buf + out, len - out, "\n"); + /* Live Map: xx xx xx */ out += snprintf(buf + out, len - out, "Live Map: "); out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3b179d6cbde0..6ed6b95dcf93 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -132,10 +132,12 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); * New in version 1.1: * - Message DLM_QUERY_REGION added to support global heartbeat * - Message DLM_QUERY_NODEINFO added to allow online node removes + * New in version 1.2: + * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain */ static const struct dlm_protocol_version dlm_protocol = { .pv_major = 1, - .pv_minor = 1, + .pv_minor = 2, }; #define DLM_DOMAIN_BACKOFF_MS 200 @@ -449,14 +451,18 @@ redo_bucket: dropped = dlm_empty_lockres(dlm, res); spin_lock(&res->spinlock); - __dlm_lockres_calc_usage(dlm, res); - iter = res->hash_node.next; + if (dropped) + __dlm_lockres_calc_usage(dlm, res); + else + iter = res->hash_node.next; spin_unlock(&res->spinlock); dlm_lockres_put(res); - if (dropped) + if (dropped) { + cond_resched_lock(&dlm->spinlock); goto redo_bucket; + } } cond_resched_lock(&dlm->spinlock); num += n; @@ -486,6 +492,28 @@ static int dlm_no_joining_node(struct dlm_ctxt *dlm) return ret; } +static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len, + void *data, void **ret_data) +{ + struct dlm_ctxt *dlm = data; + unsigned int node; + struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; + + if (!dlm_grab(dlm)) + return 0; + + node = exit_msg->node_idx; + mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node); + + spin_lock(&dlm->spinlock); + set_bit(node, dlm->exit_domain_map); + spin_unlock(&dlm->spinlock); + + dlm_put(dlm); + + return 0; +} + static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) { /* Yikes, a double spinlock! I need domain_lock for the dlm @@ -542,6 +570,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, spin_lock(&dlm->spinlock); clear_bit(node, dlm->domain_map); + clear_bit(node, dlm->exit_domain_map); __dlm_print_nodes(dlm); /* notify anything attached to the heartbeat events */ @@ -554,29 +583,56 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, return 0; } -static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, +static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type, unsigned int node) { int status; struct dlm_exit_domain leave_msg; - mlog(0, "Asking node %u if we can leave the domain %s me = %u\n", - node, dlm->name, dlm->node_num); + mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name, + msg_type, node); memset(&leave_msg, 0, sizeof(leave_msg)); leave_msg.node_idx = dlm->node_num; - status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, - &leave_msg, sizeof(leave_msg), node, - NULL); + status = o2net_send_message(msg_type, dlm->key, &leave_msg, + sizeof(leave_msg), node, NULL); if (status < 0) - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node); - mlog(0, "status return %d from o2net_send_message\n", status); + mlog(ML_ERROR, "Error %d sending domain exit message %u " + "to node %u on domain %s\n", status, msg_type, node, + dlm->name); return status; } +static void dlm_begin_exit_domain(struct dlm_ctxt *dlm) +{ + int node = -1; + + /* Support for begin exit domain was added in 1.2 */ + if (dlm->dlm_locking_proto.pv_major == 1 && + dlm->dlm_locking_proto.pv_minor < 2) + return; + + /* + * Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely + * informational. Meaning if a node does not receive the message, + * so be it. + */ + spin_lock(&dlm->spinlock); + while (1) { + node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1); + if (node >= O2NM_MAX_NODES) + break; + if (node == dlm->node_num) + continue; + + spin_unlock(&dlm->spinlock); + dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node); + spin_lock(&dlm->spinlock); + } + spin_unlock(&dlm->spinlock); +} static void dlm_leave_domain(struct dlm_ctxt *dlm) { @@ -602,7 +658,8 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) clear_node = 1; - status = dlm_send_one_domain_exit(dlm, node); + status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG, + node); if (status < 0 && status != -ENOPROTOOPT && status != -ENOTCONN) { @@ -677,6 +734,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) if (leave) { mlog(0, "shutting down domain %s\n", dlm->name); + dlm_begin_exit_domain(dlm); /* We changed dlm state, notify the thread */ dlm_kick_thread(dlm, NULL); @@ -909,6 +967,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, * leftover join state. */ BUG_ON(dlm->joining_node != assert->node_idx); set_bit(assert->node_idx, dlm->domain_map); + clear_bit(assert->node_idx, dlm->exit_domain_map); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", @@ -1793,6 +1852,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) if (status) goto bail; + status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key, + sizeof(struct dlm_exit_domain), + dlm_begin_exit_domain_handler, + dlm, NULL, &dlm->dlm_domain_handlers); + if (status) + goto bail; + bail: if (status) dlm_unregister_domain_handlers(dlm); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 84d166328cf7..11eefb8c12e9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2339,65 +2339,55 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) dlm_lockres_put(res); } -/* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 - * if not. If 0, numlocks is set to the number of locks in the lockres. +/* + * A migrateable resource is one that is : + * 1. locally mastered, and, + * 2. zero local locks, and, + * 3. one or more non-local locks, or, one or more references + * Returns 1 if yes, 0 if not. */ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - int *numlocks, - int *hasrefs) + struct dlm_lock_resource *res) { - int ret; - int i; - int count = 0; + enum dlm_lockres_list idx; + int nonlocal = 0, node_ref; struct list_head *queue; struct dlm_lock *lock; + u64 cookie; assert_spin_locked(&res->spinlock); - *numlocks = 0; - *hasrefs = 0; - - ret = -EINVAL; - if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "cannot migrate lockres with unknown owner!\n"); - goto leave; - } - - if (res->owner != dlm->node_num) { - mlog(0, "cannot migrate lockres this node doesn't own!\n"); - goto leave; - } + if (res->owner != dlm->node_num) + return 0; - ret = 0; - queue = &res->granted; - for (i = 0; i < 3; i++) { + for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { + queue = dlm_list_idx_to_ptr(res, idx); list_for_each_entry(lock, queue, list) { - ++count; - if (lock->ml.node == dlm->node_num) { - mlog(0, "found a lock owned by this node still " - "on the %s queue! will not migrate this " - "lockres\n", (i == 0 ? "granted" : - (i == 1 ? "converting" : - "blocked"))); - ret = -ENOTEMPTY; - goto leave; + if (lock->ml.node != dlm->node_num) { + nonlocal++; + continue; } + cookie = be64_to_cpu(lock->ml.cookie); + mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " + "%s list\n", dlm->name, res->lockname.len, + res->lockname.name, + dlm_get_lock_cookie_node(cookie), + dlm_get_lock_cookie_seq(cookie), + dlm_list_in_text(idx)); + return 0; } - queue++; } - *numlocks = count; - - count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); - if (count < O2NM_MAX_NODES) - *hasrefs = 1; + if (!nonlocal) { + node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + if (node_ref >= O2NM_MAX_NODES) + return 0; + } - mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name, - res->lockname.len, res->lockname.name, *numlocks, *hasrefs); + mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len, + res->lockname.name); -leave: - return ret; + return 1; } /* @@ -2406,8 +2396,7 @@ leave: static int dlm_migrate_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 target) + struct dlm_lock_resource *res, u8 target) { struct dlm_master_list_entry *mle = NULL; struct dlm_master_list_entry *oldmle = NULL; @@ -2416,37 +2405,20 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, const char *name; unsigned int namelen; int mle_added = 0; - int numlocks, hasrefs; int wake = 0; if (!dlm_grab(dlm)) return -EINVAL; + BUG_ON(target == O2NM_MAX_NODES); + name = res->lockname.name; namelen = res->lockname.len; - mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); - - /* - * ensure this lockres is a proper candidate for migration - */ - spin_lock(&res->spinlock); - ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); - if (ret < 0) { - spin_unlock(&res->spinlock); - goto leave; - } - spin_unlock(&res->spinlock); - - /* no work to do */ - if (numlocks == 0 && !hasrefs) - goto leave; - - /* - * preallocate up front - * if this fails, abort - */ + mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, + target); + /* preallocate up front. if this fails, abort */ ret = -ENOMEM; mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); if (!mres) { @@ -2462,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, ret = 0; /* - * find a node to migrate the lockres to - */ - - spin_lock(&dlm->spinlock); - /* pick a new node */ - if (!test_bit(target, dlm->domain_map) || - target >= O2NM_MAX_NODES) { - target = dlm_pick_migration_target(dlm, res); - } - mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, - namelen, name, target); - - if (target >= O2NM_MAX_NODES || - !test_bit(target, dlm->domain_map)) { - /* target chosen is not alive */ - ret = -EINVAL; - } - - if (ret) { - spin_unlock(&dlm->spinlock); - goto fail; - } - - mlog(0, "continuing with target = %u\n", target); - - /* * clear any existing master requests and * add the migration mle to the list */ + spin_lock(&dlm->spinlock); spin_lock(&dlm->master_lock); ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, target, dlm->node_num); @@ -2531,6 +2478,7 @@ fail: dlm_put_mle(mle); } else if (mle) { kmem_cache_free(dlm_mle_cache, mle); + mle = NULL; } goto leave; } @@ -2652,69 +2600,52 @@ leave: if (wake) wake_up(&res->wq); - /* TODO: cleanup */ if (mres) free_page((unsigned long)mres); dlm_put(dlm); - mlog(0, "returning %d\n", ret); + mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen, + name, target, ret); return ret; } #define DLM_MIGRATION_RETRY_MS 100 -/* Should be called only after beginning the domain leave process. +/* + * Should be called only after beginning the domain leave process. * There should not be any remaining locks on nonlocal lock resources, * and there should be no local locks left on locally mastered resources. * * Called with the dlm spinlock held, may drop it to do migration, but * will re-acquire before exit. * - * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ + * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped + */ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { int ret; int lock_dropped = 0; - int numlocks, hasrefs; + u8 target = O2NM_MAX_NODES; + + assert_spin_locked(&dlm->spinlock); spin_lock(&res->spinlock); - if (res->owner != dlm->node_num) { - if (!__dlm_lockres_unused(res)) { - mlog(ML_ERROR, "%s:%.*s: this node is not master, " - "trying to free this but locks remain\n", - dlm->name, res->lockname.len, res->lockname.name); - } - spin_unlock(&res->spinlock); - goto leave; - } + if (dlm_is_lockres_migrateable(dlm, res)) + target = dlm_pick_migration_target(dlm, res); + spin_unlock(&res->spinlock); - /* No need to migrate a lockres having no locks */ - ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); - if (ret >= 0 && numlocks == 0 && !hasrefs) { - spin_unlock(&res->spinlock); + if (target == O2NM_MAX_NODES) goto leave; - } - spin_unlock(&res->spinlock); /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ spin_unlock(&dlm->spinlock); lock_dropped = 1; - while (1) { - ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); - if (ret >= 0) - break; - if (ret == -ENOTEMPTY) { - mlog(ML_ERROR, "lockres %.*s still has local locks!\n", - res->lockname.len, res->lockname.name); - BUG(); - } - - mlog(0, "lockres %.*s: migrate failed, " - "retrying\n", res->lockname.len, - res->lockname.name); - msleep(DLM_MIGRATION_RETRY_MS); - } + ret = dlm_migrate_lockres(dlm, res, target); + if (ret) + mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", + dlm->name, res->lockname.len, res->lockname.name, + target, ret); spin_lock(&dlm->spinlock); leave: return lock_dropped; @@ -2898,61 +2829,55 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, } } -/* for now this is not too intelligent. we will - * need stats to make this do the right thing. - * this just finds the first lock on one of the - * queues and uses that node as the target. */ +/* + * Pick a node to migrate the lock resource to. This function selects a + * potential target based first on the locks and then on refmap. It skips + * nodes that are in the process of exiting the domain. + */ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { - int i; + enum dlm_lockres_list idx; struct list_head *queue = &res->granted; struct dlm_lock *lock; - int nodenum; + int noderef; + u8 nodenum = O2NM_MAX_NODES; assert_spin_locked(&dlm->spinlock); + assert_spin_locked(&res->spinlock); - spin_lock(&res->spinlock); - for (i=0; i<3; i++) { + /* Go through all the locks */ + for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { + queue = dlm_list_idx_to_ptr(res, idx); list_for_each_entry(lock, queue, list) { - /* up to the caller to make sure this node - * is alive */ - if (lock->ml.node != dlm->node_num) { - spin_unlock(&res->spinlock); - return lock->ml.node; - } + if (lock->ml.node == dlm->node_num) + continue; + if (test_bit(lock->ml.node, dlm->exit_domain_map)) + continue; + nodenum = lock->ml.node; + goto bail; } - queue++; - } - - nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); - if (nodenum < O2NM_MAX_NODES) { - spin_unlock(&res->spinlock); - return nodenum; } - spin_unlock(&res->spinlock); - mlog(0, "have not found a suitable target yet! checking domain map\n"); - /* ok now we're getting desperate. pick anyone alive. */ - nodenum = -1; + /* Go thru the refmap */ + noderef = -1; while (1) { - nodenum = find_next_bit(dlm->domain_map, - O2NM_MAX_NODES, nodenum+1); - mlog(0, "found %d in domain map\n", nodenum); - if (nodenum >= O2NM_MAX_NODES) + noderef = find_next_bit(res->refmap, O2NM_MAX_NODES, + noderef + 1); + if (noderef >= O2NM_MAX_NODES) break; - if (nodenum != dlm->node_num) { - mlog(0, "picking %d\n", nodenum); - return nodenum; - } + if (noderef == dlm->node_num) + continue; + if (test_bit(noderef, dlm->exit_domain_map)) + continue; + nodenum = noderef; + goto bail; } - mlog(0, "giving up. no master to migrate to\n"); - return DLM_LOCK_RES_OWNER_UNKNOWN; +bail: + return nodenum; } - - /* this is called by the new master once all lockres * data has been received */ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f1beb6fc254d..7efab6d28a21 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2393,6 +2393,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) mlog(0, "node %u being removed from domain map!\n", idx); clear_bit(idx, dlm->domain_map); + clear_bit(idx, dlm->exit_domain_map); /* wake up migration waiters if a node goes down. * perhaps later we can genericize this for other waiters. */ wake_up(&dlm->migration_wq); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 8c5c0eddc365..b42076797049 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -88,7 +88,7 @@ struct workqueue_struct *user_dlm_worker; * signifies a bast fired on the lock. */ #define DLMFS_CAPABILITIES "bast stackglue" -extern int param_set_dlmfs_capabilities(const char *val, +static int param_set_dlmfs_capabilities(const char *val, struct kernel_param *kp) { printk(KERN_ERR "%s: readonly parameter\n", kp->name); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 89659d6dc206..b1e35a392ca5 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2670,6 +2670,7 @@ const struct file_operations ocfs2_fops_no_plocks = { .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, .splice_write = ocfs2_file_splice_write, + .fallocate = ocfs2_fallocate, }; const struct file_operations ocfs2_dops_no_plocks = { diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 8f13c5989eae..bc91072b7219 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -22,6 +22,11 @@ #include "ioctl.h" #include "resize.h" #include "refcounttree.h" +#include "sysfile.h" +#include "dir.h" +#include "buffer_head_io.h" +#include "suballoc.h" +#include "move_extents.h" #include <linux/ext2_fs.h> @@ -35,31 +40,27 @@ * be -EFAULT. The error will be returned from the ioctl(2) call. It's * just a best-effort to tell userspace that this request caused the error. */ -static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, +static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, struct ocfs2_info_request __user *req) { kreq->ir_flags |= OCFS2_INFO_FL_ERROR; (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); } -#define o2info_set_request_error(a, b) \ - __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) - -static inline void __o2info_set_request_filled(struct ocfs2_info_request *req) +static inline void o2info_set_request_filled(struct ocfs2_info_request *req) { req->ir_flags |= OCFS2_INFO_FL_FILLED; } -#define o2info_set_request_filled(a) \ - __o2info_set_request_filled((struct ocfs2_info_request *)&(a)) - -static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req) +static inline void o2info_clear_request_filled(struct ocfs2_info_request *req) { req->ir_flags &= ~OCFS2_INFO_FL_FILLED; } -#define o2info_clear_request_filled(a) \ - __o2info_clear_request_filled((struct ocfs2_info_request *)&(a)) +static inline int o2info_coherent(struct ocfs2_info_request *req) +{ + return (!(req->ir_flags & OCFS2_INFO_FL_NON_COHERENT)); +} static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) { @@ -153,7 +154,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode, oib.ib_blocksize = inode->i_sb->s_blocksize; - o2info_set_request_filled(oib); + o2info_set_request_filled(&oib.ib_req); if (o2info_to_user(oib, req)) goto bail; @@ -161,7 +162,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode, status = 0; bail: if (status) - o2info_set_request_error(oib, req); + o2info_set_request_error(&oib.ib_req, req); return status; } @@ -178,7 +179,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode, oic.ic_clustersize = osb->s_clustersize; - o2info_set_request_filled(oic); + o2info_set_request_filled(&oic.ic_req); if (o2info_to_user(oic, req)) goto bail; @@ -186,7 +187,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode, status = 0; bail: if (status) - o2info_set_request_error(oic, req); + o2info_set_request_error(&oic.ic_req, req); return status; } @@ -203,7 +204,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode, oim.im_max_slots = osb->max_slots; - o2info_set_request_filled(oim); + o2info_set_request_filled(&oim.im_req); if (o2info_to_user(oim, req)) goto bail; @@ -211,7 +212,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode, status = 0; bail: if (status) - o2info_set_request_error(oim, req); + o2info_set_request_error(&oim.im_req, req); return status; } @@ -228,7 +229,7 @@ int ocfs2_info_handle_label(struct inode *inode, memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); - o2info_set_request_filled(oil); + o2info_set_request_filled(&oil.il_req); if (o2info_to_user(oil, req)) goto bail; @@ -236,7 +237,7 @@ int ocfs2_info_handle_label(struct inode *inode, status = 0; bail: if (status) - o2info_set_request_error(oil, req); + o2info_set_request_error(&oil.il_req, req); return status; } @@ -253,7 +254,7 @@ int ocfs2_info_handle_uuid(struct inode *inode, memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); - o2info_set_request_filled(oiu); + o2info_set_request_filled(&oiu.iu_req); if (o2info_to_user(oiu, req)) goto bail; @@ -261,7 +262,7 @@ int ocfs2_info_handle_uuid(struct inode *inode, status = 0; bail: if (status) - o2info_set_request_error(oiu, req); + o2info_set_request_error(&oiu.iu_req, req); return status; } @@ -280,7 +281,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode, oif.if_incompat_features = osb->s_feature_incompat; oif.if_ro_compat_features = osb->s_feature_ro_compat; - o2info_set_request_filled(oif); + o2info_set_request_filled(&oif.if_req); if (o2info_to_user(oif, req)) goto bail; @@ -288,7 +289,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode, status = 0; bail: if (status) - o2info_set_request_error(oif, req); + o2info_set_request_error(&oif.if_req, req); return status; } @@ -305,7 +306,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode, oij.ij_journal_size = osb->journal->j_inode->i_size; - o2info_set_request_filled(oij); + o2info_set_request_filled(&oij.ij_req); if (o2info_to_user(oij, req)) goto bail; @@ -313,7 +314,408 @@ int ocfs2_info_handle_journal_size(struct inode *inode, status = 0; bail: if (status) - o2info_set_request_error(oij, req); + o2info_set_request_error(&oij.ij_req, req); + + return status; +} + +int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, + struct inode *inode_alloc, u64 blkno, + struct ocfs2_info_freeinode *fi, u32 slot) +{ + int status = 0, unlock = 0; + + struct buffer_head *bh = NULL; + struct ocfs2_dinode *dinode_alloc = NULL; + + if (inode_alloc) + mutex_lock(&inode_alloc->i_mutex); + + if (o2info_coherent(&fi->ifi_req)) { + status = ocfs2_inode_lock(inode_alloc, &bh, 0); + if (status < 0) { + mlog_errno(status); + goto bail; + } + unlock = 1; + } else { + status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + + dinode_alloc = (struct ocfs2_dinode *)bh->b_data; + + fi->ifi_stat[slot].lfi_total = + le32_to_cpu(dinode_alloc->id1.bitmap1.i_total); + fi->ifi_stat[slot].lfi_free = + le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) - + le32_to_cpu(dinode_alloc->id1.bitmap1.i_used); + +bail: + if (unlock) + ocfs2_inode_unlock(inode_alloc, 0); + + if (inode_alloc) + mutex_unlock(&inode_alloc->i_mutex); + + brelse(bh); + + return status; +} + +int ocfs2_info_handle_freeinode(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + u32 i; + u64 blkno = -1; + char namebuf[40]; + int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; + struct ocfs2_info_freeinode *oifi = NULL; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *inode_alloc = NULL; + + oifi = kzalloc(sizeof(struct ocfs2_info_freeinode), GFP_KERNEL); + if (!oifi) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + if (o2info_from_user(*oifi, req)) + goto bail; + + oifi->ifi_slotnum = osb->max_slots; + + for (i = 0; i < oifi->ifi_slotnum; i++) { + if (o2info_coherent(&oifi->ifi_req)) { + inode_alloc = ocfs2_get_system_file_inode(osb, type, i); + if (!inode_alloc) { + mlog(ML_ERROR, "unable to get alloc inode in " + "slot %u\n", i); + status = -EIO; + goto bail; + } + } else { + ocfs2_sprintf_system_inode_name(namebuf, + sizeof(namebuf), + type, i); + status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, + namebuf, + strlen(namebuf), + &blkno); + if (status < 0) { + status = -ENOENT; + goto bail; + } + } + + status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); + if (status < 0) + goto bail; + + iput(inode_alloc); + inode_alloc = NULL; + } + + o2info_set_request_filled(&oifi->ifi_req); + + if (o2info_to_user(*oifi, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(&oifi->ifi_req, req); + + kfree(oifi); + + return status; +} + +static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist, + unsigned int chunksize) +{ + int index; + + index = __ilog2_u32(chunksize); + if (index >= OCFS2_INFO_MAX_HIST) + index = OCFS2_INFO_MAX_HIST - 1; + + hist->fc_chunks[index]++; + hist->fc_clusters[index] += chunksize; +} + +static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats, + unsigned int chunksize) +{ + if (chunksize > stats->ffs_max) + stats->ffs_max = chunksize; + + if (chunksize < stats->ffs_min) + stats->ffs_min = chunksize; + + stats->ffs_avg += chunksize; + stats->ffs_free_chunks_real++; +} + +void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg, + unsigned int chunksize) +{ + o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize); + o2ffg_update_stats(&(ffg->iff_ffs), chunksize); +} + +int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb, + struct inode *gb_inode, + struct ocfs2_dinode *gb_dinode, + struct ocfs2_chain_rec *rec, + struct ocfs2_info_freefrag *ffg, + u32 chunks_in_group) +{ + int status = 0, used; + u64 blkno; + + struct buffer_head *bh = NULL; + struct ocfs2_group_desc *bg = NULL; + + unsigned int max_bits, num_clusters; + unsigned int offset = 0, cluster, chunk; + unsigned int chunk_free, last_chunksize = 0; + + if (!le32_to_cpu(rec->c_free)) + goto bail; + + do { + if (!bg) + blkno = le64_to_cpu(rec->c_blkno); + else + blkno = le64_to_cpu(bg->bg_next_group); + + if (bh) { + brelse(bh); + bh = NULL; + } + + if (o2info_coherent(&ffg->iff_req)) + status = ocfs2_read_group_descriptor(gb_inode, + gb_dinode, + blkno, &bh); + else + status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); + + if (status < 0) { + mlog(ML_ERROR, "Can't read the group descriptor # " + "%llu from device.", (unsigned long long)blkno); + status = -EIO; + goto bail; + } + + bg = (struct ocfs2_group_desc *)bh->b_data; + + if (!le16_to_cpu(bg->bg_free_bits_count)) + continue; + + max_bits = le16_to_cpu(bg->bg_bits); + offset = 0; + + for (chunk = 0; chunk < chunks_in_group; chunk++) { + /* + * last chunk may be not an entire one. + */ + if ((offset + ffg->iff_chunksize) > max_bits) + num_clusters = max_bits - offset; + else + num_clusters = ffg->iff_chunksize; + + chunk_free = 0; + for (cluster = 0; cluster < num_clusters; cluster++) { + used = ocfs2_test_bit(offset, + (unsigned long *)bg->bg_bitmap); + /* + * - chunk_free counts free clusters in #N chunk. + * - last_chunksize records the size(in) clusters + * for the last real free chunk being counted. + */ + if (!used) { + last_chunksize++; + chunk_free++; + } + + if (used && last_chunksize) { + ocfs2_info_update_ffg(ffg, + last_chunksize); + last_chunksize = 0; + } + + offset++; + } + + if (chunk_free == ffg->iff_chunksize) + ffg->iff_ffs.ffs_free_chunks++; + } + + /* + * need to update the info for last free chunk. + */ + if (last_chunksize) + ocfs2_info_update_ffg(ffg, last_chunksize); + + } while (le64_to_cpu(bg->bg_next_group)); + +bail: + brelse(bh); + + return status; +} + +int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb, + struct inode *gb_inode, u64 blkno, + struct ocfs2_info_freefrag *ffg) +{ + u32 chunks_in_group; + int status = 0, unlock = 0, i; + + struct buffer_head *bh = NULL; + struct ocfs2_chain_list *cl = NULL; + struct ocfs2_chain_rec *rec = NULL; + struct ocfs2_dinode *gb_dinode = NULL; + + if (gb_inode) + mutex_lock(&gb_inode->i_mutex); + + if (o2info_coherent(&ffg->iff_req)) { + status = ocfs2_inode_lock(gb_inode, &bh, 0); + if (status < 0) { + mlog_errno(status); + goto bail; + } + unlock = 1; + } else { + status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + + gb_dinode = (struct ocfs2_dinode *)bh->b_data; + cl = &(gb_dinode->id2.i_chain); + + /* + * Chunksize(in) clusters from userspace should be + * less than clusters in a group. + */ + if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) { + status = -EINVAL; + goto bail; + } + + memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats)); + + ffg->iff_ffs.ffs_min = ~0U; + ffg->iff_ffs.ffs_clusters = + le32_to_cpu(gb_dinode->id1.bitmap1.i_total); + ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters - + le32_to_cpu(gb_dinode->id1.bitmap1.i_used); + + chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1; + + for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { + rec = &(cl->cl_recs[i]); + status = ocfs2_info_freefrag_scan_chain(osb, gb_inode, + gb_dinode, + rec, ffg, + chunks_in_group); + if (status) + goto bail; + } + + if (ffg->iff_ffs.ffs_free_chunks_real) + ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg / + ffg->iff_ffs.ffs_free_chunks_real); +bail: + if (unlock) + ocfs2_inode_unlock(gb_inode, 0); + + if (gb_inode) + mutex_unlock(&gb_inode->i_mutex); + + if (gb_inode) + iput(gb_inode); + + brelse(bh); + + return status; +} + +int ocfs2_info_handle_freefrag(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + u64 blkno = -1; + char namebuf[40]; + int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; + + struct ocfs2_info_freefrag *oiff; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *gb_inode = NULL; + + oiff = kzalloc(sizeof(struct ocfs2_info_freefrag), GFP_KERNEL); + if (!oiff) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + if (o2info_from_user(*oiff, req)) + goto bail; + /* + * chunksize from userspace should be power of 2. + */ + if ((oiff->iff_chunksize & (oiff->iff_chunksize - 1)) || + (!oiff->iff_chunksize)) { + status = -EINVAL; + goto bail; + } + + if (o2info_coherent(&oiff->iff_req)) { + gb_inode = ocfs2_get_system_file_inode(osb, type, + OCFS2_INVALID_SLOT); + if (!gb_inode) { + mlog(ML_ERROR, "unable to get global_bitmap inode\n"); + status = -EIO; + goto bail; + } + } else { + ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, + OCFS2_INVALID_SLOT); + status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, + namebuf, + strlen(namebuf), + &blkno); + if (status < 0) { + status = -ENOENT; + goto bail; + } + } + + status = ocfs2_info_freefrag_scan_bitmap(osb, gb_inode, blkno, oiff); + if (status < 0) + goto bail; + + o2info_set_request_filled(&oiff->iff_req); + + if (o2info_to_user(*oiff, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(&oiff->iff_req, req); + + kfree(oiff); return status; } @@ -327,7 +729,7 @@ int ocfs2_info_handle_unknown(struct inode *inode, if (o2info_from_user(oir, req)) goto bail; - o2info_clear_request_filled(oir); + o2info_clear_request_filled(&oir); if (o2info_to_user(oir, req)) goto bail; @@ -335,7 +737,7 @@ int ocfs2_info_handle_unknown(struct inode *inode, status = 0; bail: if (status) - o2info_set_request_error(oir, req); + o2info_set_request_error(&oir, req); return status; } @@ -389,6 +791,14 @@ int ocfs2_info_handle_request(struct inode *inode, if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) status = ocfs2_info_handle_journal_size(inode, req); break; + case OCFS2_INFO_FREEINODE: + if (oir.ir_size == sizeof(struct ocfs2_info_freeinode)) + status = ocfs2_info_handle_freeinode(inode, req); + break; + case OCFS2_INFO_FREEFRAG: + if (oir.ir_size == sizeof(struct ocfs2_info_freefrag)) + status = ocfs2_info_handle_freefrag(inode, req); + break; default: status = ocfs2_info_handle_unknown(inode, req); break; @@ -542,6 +952,31 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EFAULT; return ocfs2_info_handle(inode, &info, 0); + case FITRIM: + { + struct super_block *sb = inode->i_sb; + struct fstrim_range range; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&range, (struct fstrim_range *)arg, + sizeof(range))) + return -EFAULT; + + ret = ocfs2_trim_fs(sb, &range); + if (ret < 0) + return ret; + + if (copy_to_user((struct fstrim_range *)arg, &range, + sizeof(range))) + return -EFAULT; + + return 0; + } + case OCFS2_IOC_MOVE_EXT: + return ocfs2_ioctl_move_extents(filp, (void __user *)arg); default: return -ENOTTY; } @@ -569,6 +1004,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case OCFS2_IOC_GROUP_EXTEND: case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: + case FITRIM: break; case OCFS2_IOC_REFLINK: if (copy_from_user(&args, (struct reflink_arguments *)arg, @@ -584,6 +1020,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) return -EFAULT; return ocfs2_info_handle(inode, &info, 1); + case OCFS2_IOC_MOVE_EXT: + break; default: return -ENOIOCTLCMD; } diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c new file mode 100644 index 000000000000..4c5488468c14 --- /dev/null +++ b/fs/ocfs2/move_extents.c @@ -0,0 +1,1153 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * move_extents.c + * + * Copyright (C) 2011 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/fs.h> +#include <linux/types.h> +#include <linux/mount.h> +#include <linux/swap.h> + +#include <cluster/masklog.h> + +#include "ocfs2.h" +#include "ocfs2_ioctl.h" + +#include "alloc.h" +#include "aops.h" +#include "dlmglue.h" +#include "extent_map.h" +#include "inode.h" +#include "journal.h" +#include "suballoc.h" +#include "uptodate.h" +#include "super.h" +#include "dir.h" +#include "buffer_head_io.h" +#include "sysfile.h" +#include "suballoc.h" +#include "refcounttree.h" +#include "move_extents.h" + +struct ocfs2_move_extents_context { + struct inode *inode; + struct file *file; + int auto_defrag; + int partial; + int credits; + u32 new_phys_cpos; + u32 clusters_moved; + u64 refcount_loc; + struct ocfs2_move_extents *range; + struct ocfs2_extent_tree et; + struct ocfs2_alloc_context *meta_ac; + struct ocfs2_alloc_context *data_ac; + struct ocfs2_cached_dealloc_ctxt dealloc; +}; + +static int __ocfs2_move_extent(handle_t *handle, + struct ocfs2_move_extents_context *context, + u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos, + int ext_flags) +{ + int ret = 0, index; + struct inode *inode = context->inode; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_extent_rec *rec, replace_rec; + struct ocfs2_path *path = NULL; + struct ocfs2_extent_list *el; + u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); + u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); + + ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, + p_cpos, new_p_cpos, len); + if (ret) { + mlog_errno(ret); + goto out; + } + + memset(&replace_rec, 0, sizeof(replace_rec)); + replace_rec.e_cpos = cpu_to_le32(cpos); + replace_rec.e_leaf_clusters = cpu_to_le16(len); + replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, + new_p_cpos)); + + path = ocfs2_new_path_from_et(&context->et); + if (!path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + el = path_leaf_el(path); + + index = ocfs2_search_extent_list(el, cpos); + if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { + ocfs2_error(inode->i_sb, + "Inode %llu has an extent at cpos %u which can no " + "longer be found.\n", + (unsigned long long)ino, cpos); + ret = -EROFS; + goto out; + } + + rec = &el->l_recs[index]; + + BUG_ON(ext_flags != rec->e_flags); + /* + * after moving/defraging to new location, the extent is not going + * to be refcounted anymore. + */ + replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; + + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), + context->et.et_root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_split_extent(handle, &context->et, path, index, + &replace_rec, context->meta_ac, + &context->dealloc); + if (ret) { + mlog_errno(ret); + goto out; + } + + ocfs2_journal_dirty(handle, context->et.et_root_bh); + + context->new_phys_cpos = new_p_cpos; + + /* + * need I to append truncate log for old clusters? + */ + if (old_blkno) { + if (ext_flags & OCFS2_EXT_REFCOUNTED) + ret = ocfs2_decrease_refcount(inode, handle, + ocfs2_blocks_to_clusters(osb->sb, + old_blkno), + len, context->meta_ac, + &context->dealloc, 1); + else + ret = ocfs2_truncate_log_append(osb, handle, + old_blkno, len); + } + +out: + return ret; +} + +/* + * lock allocators, and reserving appropriate number of bits for + * meta blocks and data clusters. + * + * in some cases, we don't need to reserve clusters, just let data_ac + * be NULL. + */ +static int ocfs2_lock_allocators_move_extents(struct inode *inode, + struct ocfs2_extent_tree *et, + u32 clusters_to_move, + u32 extents_to_split, + struct ocfs2_alloc_context **meta_ac, + struct ocfs2_alloc_context **data_ac, + int extra_blocks, + int *credits) +{ + int ret, num_free_extents; + unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + num_free_extents = ocfs2_num_free_extents(osb, et); + if (num_free_extents < 0) { + ret = num_free_extents; + mlog_errno(ret); + goto out; + } + + if (!num_free_extents || + (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) + extra_blocks += ocfs2_extend_meta_needed(et->et_root_el); + + ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (data_ac) { + ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el, + clusters_to_move + 2); + + mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", + extra_blocks, clusters_to_move, *credits); +out: + if (ret) { + if (*meta_ac) { + ocfs2_free_alloc_context(*meta_ac); + *meta_ac = NULL; + } + } + + return ret; +} + +/* + * Using one journal handle to guarantee the data consistency in case + * crash happens anywhere. + * + * XXX: defrag can end up with finishing partial extent as requested, + * due to not enough contiguous clusters can be found in allocator. + */ +static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, + u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) +{ + int ret, credits = 0, extra_blocks = 0, partial = context->partial; + handle_t *handle; + struct inode *inode = context->inode; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *tl_inode = osb->osb_tl_inode; + struct ocfs2_refcount_tree *ref_tree = NULL; + u32 new_phys_cpos, new_len; + u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + + if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { + + BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & + OCFS2_HAS_REFCOUNT_FL)); + + BUG_ON(!context->refcount_loc); + + ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, + &ref_tree, NULL); + if (ret) { + mlog_errno(ret); + return ret; + } + + ret = ocfs2_prepare_refcount_change_for_del(inode, + context->refcount_loc, + phys_blkno, + *len, + &credits, + &extra_blocks); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, + &context->meta_ac, + &context->data_ac, + extra_blocks, &credits); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * should be using allocation reservation strategy there? + * + * if (context->data_ac) + * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; + */ + + mutex_lock(&tl_inode->i_mutex); + + if (ocfs2_truncate_log_needs_flush(osb)) { + ret = __ocfs2_flush_truncate_log(osb); + if (ret < 0) { + mlog_errno(ret); + goto out_unlock_mutex; + } + } + + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_unlock_mutex; + } + + ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, + &new_phys_cpos, &new_len); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + /* + * allowing partial extent moving is kind of 'pros and cons', it makes + * whole defragmentation less likely to fail, on the contrary, the bad + * thing is it may make the fs even more fragmented after moving, let + * userspace make a good decision here. + */ + if (new_len != *len) { + mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); + if (!partial) { + context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; + ret = -ENOSPC; + goto out_commit; + } + } + + mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, + phys_cpos, new_phys_cpos); + + ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, + new_phys_cpos, ext_flags); + if (ret) + mlog_errno(ret); + + if (partial && (new_len != *len)) + *len = new_len; + + /* + * Here we should write the new page out first if we are + * in write-back mode. + */ + ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); + if (ret) + mlog_errno(ret); + +out_commit: + ocfs2_commit_trans(osb, handle); + +out_unlock_mutex: + mutex_unlock(&tl_inode->i_mutex); + + if (context->data_ac) { + ocfs2_free_alloc_context(context->data_ac); + context->data_ac = NULL; + } + + if (context->meta_ac) { + ocfs2_free_alloc_context(context->meta_ac); + context->meta_ac = NULL; + } + +out: + if (ref_tree) + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + + return ret; +} + +/* + * find the victim alloc group, where #blkno fits. + */ +static int ocfs2_find_victim_alloc_group(struct inode *inode, + u64 vict_blkno, + int type, int slot, + int *vict_bit, + struct buffer_head **ret_bh) +{ + int ret, i, blocks_per_unit = 1; + u64 blkno; + char namebuf[40]; + + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct buffer_head *ac_bh = NULL, *gd_bh = NULL; + struct ocfs2_chain_list *cl; + struct ocfs2_chain_rec *rec; + struct ocfs2_dinode *ac_dinode; + struct ocfs2_group_desc *bg; + + ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); + ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, + strlen(namebuf), &blkno); + if (ret) { + ret = -ENOENT; + goto out; + } + + ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data; + cl = &(ac_dinode->id2.i_chain); + rec = &(cl->cl_recs[0]); + + if (type == GLOBAL_BITMAP_SYSTEM_INODE) + blocks_per_unit <<= (osb->s_clustersize_bits - + inode->i_sb->s_blocksize_bits); + /* + * 'vict_blkno' was out of the valid range. + */ + if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || + (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) * + blocks_per_unit))) { + ret = -EINVAL; + goto out; + } + + for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { + + rec = &(cl->cl_recs[i]); + if (!rec) + continue; + + bg = NULL; + + do { + if (!bg) + blkno = le64_to_cpu(rec->c_blkno); + else + blkno = le64_to_cpu(bg->bg_next_group); + + if (gd_bh) { + brelse(gd_bh); + gd_bh = NULL; + } + + ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + bg = (struct ocfs2_group_desc *)gd_bh->b_data; + + if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + + le16_to_cpu(bg->bg_bits))) { + + *ret_bh = gd_bh; + *vict_bit = (vict_blkno - blkno) / + blocks_per_unit; + mlog(0, "find the victim group: #%llu, " + "total_bits: %u, vict_bit: %u\n", + blkno, le16_to_cpu(bg->bg_bits), + *vict_bit); + goto out; + } + + } while (le64_to_cpu(bg->bg_next_group)); + } + + ret = -EINVAL; +out: + brelse(ac_bh); + + /* + * caller has to release the gd_bh properly. + */ + return ret; +} + +/* + * XXX: helper to validate and adjust moving goal. + */ +static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, + struct ocfs2_move_extents *range) +{ + int ret, goal_bit = 0; + + struct buffer_head *gd_bh = NULL; + struct ocfs2_group_desc *bg; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int c_to_b = 1 << (osb->s_clustersize_bits - + inode->i_sb->s_blocksize_bits); + + /* + * validate goal sits within global_bitmap, and return the victim + * group desc + */ + ret = ocfs2_find_victim_alloc_group(inode, range->me_goal, + GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT, + &goal_bit, &gd_bh); + if (ret) + goto out; + + bg = (struct ocfs2_group_desc *)gd_bh->b_data; + + /* + * make goal become cluster aligned. + */ + if (range->me_goal % c_to_b) + range->me_goal = range->me_goal / c_to_b * c_to_b; + + /* + * moving goal is not allowd to start with a group desc blok(#0 blk) + * let's compromise to the latter cluster. + */ + if (range->me_goal == le64_to_cpu(bg->bg_blkno)) + range->me_goal += c_to_b; + + /* + * movement is not gonna cross two groups. + */ + if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize < + range->me_len) { + ret = -EINVAL; + goto out; + } + /* + * more exact validations/adjustments will be performed later during + * moving operation for each extent range. + */ + mlog(0, "extents get ready to be moved to #%llu block\n", + range->me_goal); + +out: + brelse(gd_bh); + + return ret; +} + +static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, + int *goal_bit, u32 move_len, u32 max_hop, + u32 *phys_cpos) +{ + int i, used, last_free_bits = 0, base_bit = *goal_bit; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb, + le64_to_cpu(gd->bg_blkno)); + + for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) { + + used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap); + if (used) { + /* + * we even tried searching the free chunk by jumping + * a 'max_hop' distance, but still failed. + */ + if ((i - base_bit) > max_hop) { + *phys_cpos = 0; + break; + } + + if (last_free_bits) + last_free_bits = 0; + + continue; + } else + last_free_bits++; + + if (last_free_bits == move_len) { + *goal_bit = i; + *phys_cpos = base_cpos + i; + break; + } + } + + mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); +} + +static int ocfs2_alloc_dinode_update_counts(struct inode *inode, + handle_t *handle, + struct buffer_head *di_bh, + u32 num_bits, + u16 chain) +{ + int ret; + u32 tmp_used; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; + struct ocfs2_chain_list *cl = + (struct ocfs2_chain_list *) &di->id2.i_chain; + + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); + di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); + le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); + ocfs2_journal_dirty(handle, di_bh); + +out: + return ret; +} + +static inline int ocfs2_block_group_set_bits(handle_t *handle, + struct inode *alloc_inode, + struct ocfs2_group_desc *bg, + struct buffer_head *group_bh, + unsigned int bit_off, + unsigned int num_bits) +{ + int status; + void *bitmap = bg->bg_bitmap; + int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; + + /* All callers get the descriptor via + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); + BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); + + mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, + num_bits); + + if (ocfs2_is_cluster_bitmap(alloc_inode)) + journal_type = OCFS2_JOURNAL_ACCESS_UNDO; + + status = ocfs2_journal_access_gd(handle, + INODE_CACHE(alloc_inode), + group_bh, + journal_type); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + le16_add_cpu(&bg->bg_free_bits_count, -num_bits); + if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { + ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" + " count %u but claims %u are freed. num_bits %d", + (unsigned long long)le64_to_cpu(bg->bg_blkno), + le16_to_cpu(bg->bg_bits), + le16_to_cpu(bg->bg_free_bits_count), num_bits); + return -EROFS; + } + while (num_bits--) + ocfs2_set_bit(bit_off++, bitmap); + + ocfs2_journal_dirty(handle, group_bh); + +bail: + return status; +} + +static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, + u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, + u32 len, int ext_flags) +{ + int ret, credits = 0, extra_blocks = 0, goal_bit = 0; + handle_t *handle; + struct inode *inode = context->inode; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *tl_inode = osb->osb_tl_inode; + struct inode *gb_inode = NULL; + struct buffer_head *gb_bh = NULL; + struct buffer_head *gd_bh = NULL; + struct ocfs2_group_desc *gd; + struct ocfs2_refcount_tree *ref_tree = NULL; + u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb, + context->range->me_threshold); + u64 phys_blkno, new_phys_blkno; + + phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + + if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { + + BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & + OCFS2_HAS_REFCOUNT_FL)); + + BUG_ON(!context->refcount_loc); + + ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, + &ref_tree, NULL); + if (ret) { + mlog_errno(ret); + return ret; + } + + ret = ocfs2_prepare_refcount_change_for_del(inode, + context->refcount_loc, + phys_blkno, + len, + &credits, + &extra_blocks); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, + &context->meta_ac, + NULL, extra_blocks, &credits); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * need to count 2 extra credits for global_bitmap inode and + * group descriptor. + */ + credits += OCFS2_INODE_UPDATE_CREDITS + 1; + + /* + * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() + * logic, while we still need to lock the global_bitmap. + */ + gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT); + if (!gb_inode) { + mlog(ML_ERROR, "unable to get global_bitmap inode\n"); + ret = -EIO; + goto out; + } + + mutex_lock(&gb_inode->i_mutex); + + ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); + if (ret) { + mlog_errno(ret); + goto out_unlock_gb_mutex; + } + + mutex_lock(&tl_inode->i_mutex); + + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_unlock_tl_inode; + } + + new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); + ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno, + GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT, + &goal_bit, &gd_bh); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + /* + * probe the victim cluster group to find a proper + * region to fit wanted movement, it even will perfrom + * a best-effort attempt by compromising to a threshold + * around the goal. + */ + ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, + new_phys_cpos); + if (!new_phys_cpos) { + ret = -ENOSPC; + goto out_commit; + } + + ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, + *new_phys_cpos, ext_flags); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + gd = (struct ocfs2_group_desc *)gd_bh->b_data; + ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len, + le16_to_cpu(gd->bg_chain)); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, + goal_bit, len); + if (ret) + mlog_errno(ret); + + /* + * Here we should write the new page out first if we are + * in write-back mode. + */ + ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); + if (ret) + mlog_errno(ret); + +out_commit: + ocfs2_commit_trans(osb, handle); + brelse(gd_bh); + +out_unlock_tl_inode: + mutex_unlock(&tl_inode->i_mutex); + + ocfs2_inode_unlock(gb_inode, 1); +out_unlock_gb_mutex: + mutex_unlock(&gb_inode->i_mutex); + brelse(gb_bh); + iput(gb_inode); + +out: + if (context->meta_ac) { + ocfs2_free_alloc_context(context->meta_ac); + context->meta_ac = NULL; + } + + if (ref_tree) + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + + return ret; +} + +/* + * Helper to calculate the defraging length in one run according to threshold. + */ +static void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged, + u32 threshold, int *skip) +{ + if ((*alloc_size + *len_defraged) < threshold) { + /* + * proceed defragmentation until we meet the thresh + */ + *len_defraged += *alloc_size; + } else if (*len_defraged == 0) { + /* + * XXX: skip a large extent. + */ + *skip = 1; + } else { + /* + * split this extent to coalesce with former pieces as + * to reach the threshold. + * + * we're done here with one cycle of defragmentation + * in a size of 'thresh', resetting 'len_defraged' + * forces a new defragmentation. + */ + *alloc_size = threshold - *len_defraged; + *len_defraged = 0; + } +} + +static int __ocfs2_move_extents_range(struct buffer_head *di_bh, + struct ocfs2_move_extents_context *context) +{ + int ret = 0, flags, do_defrag, skip = 0; + u32 cpos, phys_cpos, move_start, len_to_move, alloc_size; + u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0; + + struct inode *inode = context->inode; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + struct ocfs2_move_extents *range = context->range; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if ((inode->i_size == 0) || (range->me_len == 0)) + return 0; + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) + return 0; + + context->refcount_loc = le64_to_cpu(di->i_refcount_loc); + + ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh); + ocfs2_init_dealloc_ctxt(&context->dealloc); + + /* + * TO-DO XXX: + * + * - xattr extents. + */ + + do_defrag = context->auto_defrag; + + /* + * extents moving happens in unit of clusters, for the sake + * of simplicity, we may ignore two clusters where 'byte_start' + * and 'byte_start + len' were within. + */ + move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start); + len_to_move = (range->me_start + range->me_len) >> + osb->s_clustersize_bits; + if (len_to_move >= move_start) + len_to_move -= move_start; + else + len_to_move = 0; + + if (do_defrag) { + defrag_thresh = range->me_threshold >> osb->s_clustersize_bits; + if (defrag_thresh <= 1) + goto done; + } else + new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, + range->me_goal); + + mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, " + "thresh: %u\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)range->me_start, + (unsigned long long)range->me_len, + move_start, len_to_move, defrag_thresh); + + cpos = move_start; + while (len_to_move) { + ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size, + &flags); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (alloc_size > len_to_move) + alloc_size = len_to_move; + + /* + * XXX: how to deal with a hole: + * + * - skip the hole of course + * - force a new defragmentation + */ + if (!phys_cpos) { + if (do_defrag) + len_defraged = 0; + + goto next; + } + + if (do_defrag) { + ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged, + defrag_thresh, &skip); + /* + * skip large extents + */ + if (skip) { + skip = 0; + goto next; + } + + mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, " + "alloc_size: %u, len_defraged: %u\n", + cpos, phys_cpos, alloc_size, len_defraged); + + ret = ocfs2_defrag_extent(context, cpos, phys_cpos, + &alloc_size, flags); + } else { + ret = ocfs2_move_extent(context, cpos, phys_cpos, + &new_phys_cpos, alloc_size, + flags); + + new_phys_cpos += alloc_size; + } + + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + context->clusters_moved += alloc_size; +next: + cpos += alloc_size; + len_to_move -= alloc_size; + } + +done: + range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE; + +out: + range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb, + context->clusters_moved); + range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb, + context->new_phys_cpos); + + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &context->dealloc); + + return ret; +} + +static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) +{ + int status; + handle_t *handle; + struct inode *inode = context->inode; + struct ocfs2_dinode *di; + struct buffer_head *di_bh = NULL; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (!inode) + return -ENOENT; + + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) + return -EROFS; + + mutex_lock(&inode->i_mutex); + + /* + * This prevents concurrent writes from other nodes + */ + status = ocfs2_rw_lock(inode, 1); + if (status) { + mlog_errno(status); + goto out; + } + + status = ocfs2_inode_lock(inode, &di_bh, 1); + if (status) { + mlog_errno(status); + goto out_rw_unlock; + } + + /* + * rememer ip_xattr_sem also needs to be held if necessary + */ + down_write(&OCFS2_I(inode)->ip_alloc_sem); + + status = __ocfs2_move_extents_range(di_bh, context); + + up_write(&OCFS2_I(inode)->ip_alloc_sem); + if (status) { + mlog_errno(status); + goto out_inode_unlock; + } + + /* + * We update ctime for these changes + */ + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_inode_unlock; + } + + status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status) { + mlog_errno(status); + goto out_commit; + } + + di = (struct ocfs2_dinode *)di_bh->b_data; + inode->i_ctime = CURRENT_TIME; + di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + + ocfs2_journal_dirty(handle, di_bh); + +out_commit: + ocfs2_commit_trans(osb, handle); + +out_inode_unlock: + brelse(di_bh); + ocfs2_inode_unlock(inode, 1); +out_rw_unlock: + ocfs2_rw_unlock(inode, 1); +out: + mutex_unlock(&inode->i_mutex); + + return status; +} + +int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) +{ + int status; + + struct inode *inode = filp->f_path.dentry->d_inode; + struct ocfs2_move_extents range; + struct ocfs2_move_extents_context *context = NULL; + + status = mnt_want_write(filp->f_path.mnt); + if (status) + return status; + + if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) + goto out; + + if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { + status = -EPERM; + goto out; + } + + context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS); + if (!context) { + status = -ENOMEM; + mlog_errno(status); + goto out; + } + + context->inode = inode; + context->file = filp; + + if (argp) { + if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, + sizeof(range))) { + status = -EFAULT; + goto out; + } + } else { + status = -EINVAL; + goto out; + } + + if (range.me_start > i_size_read(inode)) + goto out; + + if (range.me_start + range.me_len > i_size_read(inode)) + range.me_len = i_size_read(inode) - range.me_start; + + context->range = ⦥ + + if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { + context->auto_defrag = 1; + /* + * ok, the default theshold for the defragmentation + * is 1M, since our maximum clustersize was 1M also. + * any thought? + */ + if (!range.me_threshold) + range.me_threshold = 1024 * 1024; + + if (range.me_threshold > i_size_read(inode)) + range.me_threshold = i_size_read(inode); + + if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) + context->partial = 1; + } else { + /* + * first best-effort attempt to validate and adjust the goal + * (physical address in block), while it can't guarantee later + * operation can succeed all the time since global_bitmap may + * change a bit over time. + */ + + status = ocfs2_validate_and_adjust_move_goal(inode, &range); + if (status) + goto out; + } + + status = ocfs2_move_extents(context); + if (status) + mlog_errno(status); +out: + /* + * movement/defragmentation may end up being partially completed, + * that's the reason why we need to return userspace the finished + * length and new_offset even if failure happens somewhere. + */ + if (argp) { + if (copy_to_user((struct ocfs2_move_extents *)argp, &range, + sizeof(range))) + status = -EFAULT; + } + + kfree(context); + + mnt_drop_write(filp->f_path.mnt); + + return status; +} diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h new file mode 100644 index 000000000000..4e143e811441 --- /dev/null +++ b/fs/ocfs2/move_extents.h @@ -0,0 +1,22 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * move_extents.h + * + * Copyright (C) 2011 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef OCFS2_MOVE_EXTENTS_H +#define OCFS2_MOVE_EXTENTS_H + +int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp); + +#endif /* OCFS2_MOVE_EXTENTS_H */ diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index b46f39bf7438..5b27ff1fa577 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -142,6 +142,38 @@ struct ocfs2_info_journal_size { __u64 ij_journal_size; }; +struct ocfs2_info_freeinode { + struct ocfs2_info_request ifi_req; + struct ocfs2_info_local_freeinode { + __u64 lfi_total; + __u64 lfi_free; + } ifi_stat[OCFS2_MAX_SLOTS]; + __u32 ifi_slotnum; /* out */ + __u32 ifi_pad; +}; + +#define OCFS2_INFO_MAX_HIST (32) + +struct ocfs2_info_freefrag { + struct ocfs2_info_request iff_req; + struct ocfs2_info_freefrag_stats { /* (out) */ + struct ocfs2_info_free_chunk_list { + __u32 fc_chunks[OCFS2_INFO_MAX_HIST]; + __u32 fc_clusters[OCFS2_INFO_MAX_HIST]; + } ffs_fc_hist; + __u32 ffs_clusters; + __u32 ffs_free_clusters; + __u32 ffs_free_chunks; + __u32 ffs_free_chunks_real; + __u32 ffs_min; /* Minimum free chunksize in clusters */ + __u32 ffs_max; + __u32 ffs_avg; + __u32 ffs_pad; + } iff_ffs; + __u32 iff_chunksize; /* chunksize in clusters(in) */ + __u32 iff_pad; +}; + /* Codes for ocfs2_info_request */ enum ocfs2_info_type { OCFS2_INFO_CLUSTERSIZE = 1, @@ -151,6 +183,8 @@ enum ocfs2_info_type { OCFS2_INFO_UUID, OCFS2_INFO_FS_FEATURES, OCFS2_INFO_JOURNAL_SIZE, + OCFS2_INFO_FREEINODE, + OCFS2_INFO_FREEFRAG, OCFS2_INFO_NUM_TYPES }; @@ -171,4 +205,38 @@ enum ocfs2_info_type { #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) +struct ocfs2_move_extents { +/* All values are in bytes */ + /* in */ + __u64 me_start; /* Virtual start in the file to move */ + __u64 me_len; /* Length of the extents to be moved */ + __u64 me_goal; /* Physical offset of the goal, + it's in block unit */ + __u64 me_threshold; /* Maximum distance from goal or threshold + for auto defragmentation */ + __u64 me_flags; /* Flags for the operation: + * - auto defragmentation. + * - refcount,xattr cases. + */ + /* out */ + __u64 me_moved_len; /* Moved/defraged length */ + __u64 me_new_offset; /* Resulting physical location */ + __u32 me_reserved[2]; /* Reserved for futhure */ +}; + +#define OCFS2_MOVE_EXT_FL_AUTO_DEFRAG (0x00000001) /* Kernel manages to + claim new clusters + as the goal place + for extents moving */ +#define OCFS2_MOVE_EXT_FL_PART_DEFRAG (0x00000002) /* Allow partial extent + moving, is to make + movement less likely + to fail, may make fs + even more fragmented */ +#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation + completely gets done. + */ + +#define OCFS2_IOC_MOVE_EXT _IOW('o', 6, struct ocfs2_move_extents) + #endif /* OCFS2_IOCTL_H */ diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index a1dae5bb54ac..3b481f490633 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc, __entry->blkno, __entry->bit) ); +TRACE_EVENT(ocfs2_trim_extent, + TP_PROTO(struct super_block *sb, unsigned long long blk, + unsigned long long count), + TP_ARGS(sb, blk, count), + TP_STRUCT__entry( + __field(int, dev_major) + __field(int, dev_minor) + __field(unsigned long long, blk) + __field(__u64, count) + ), + TP_fast_assign( + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); + __entry->blk = blk; + __entry->count = count; + ), + TP_printk("%d %d %llu %llu", + __entry->dev_major, __entry->dev_minor, + __entry->blk, __entry->count) +); + +DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group); + +DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs); + /* End of trace events for fs/ocfs2/alloc.c. */ /* Trace events for fs/ocfs2/localalloc.c. */ diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3c7606cff1ab..ebfd3825f12a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -66,7 +66,7 @@ struct ocfs2_cow_context { u32 *num_clusters, unsigned int *extent_flags); int (*cow_duplicate_clusters)(handle_t *handle, - struct ocfs2_cow_context *context, + struct file *file, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len); }; @@ -2921,20 +2921,21 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) return 0; } -static int ocfs2_duplicate_clusters_by_page(handle_t *handle, - struct ocfs2_cow_context *context, - u32 cpos, u32 old_cluster, - u32 new_cluster, u32 new_len) +int ocfs2_duplicate_clusters_by_page(handle_t *handle, + struct file *file, + u32 cpos, u32 old_cluster, + u32 new_cluster, u32 new_len) { int ret = 0, partial; - struct ocfs2_caching_info *ci = context->data_et.et_ci; + struct inode *inode = file->f_path.dentry->d_inode; + struct ocfs2_caching_info *ci = INODE_CACHE(inode); struct super_block *sb = ocfs2_metadata_cache_get_super(ci); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); struct page *page; pgoff_t page_index; unsigned int from, to, readahead_pages; loff_t offset, end, map_end; - struct address_space *mapping = context->inode->i_mapping; + struct address_space *mapping = inode->i_mapping; trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, new_cluster, new_len); @@ -2948,8 +2949,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, * We only duplicate pages until we reach the page contains i_size - 1. * So trim 'end' to i_size. */ - if (end > i_size_read(context->inode)) - end = i_size_read(context->inode); + if (end > i_size_read(inode)) + end = i_size_read(inode); while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; @@ -2972,10 +2973,9 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) BUG_ON(PageDirty(page)); - if (PageReadahead(page) && context->file) { + if (PageReadahead(page)) { page_cache_async_readahead(mapping, - &context->file->f_ra, - context->file, + &file->f_ra, file, page, page_index, readahead_pages); } @@ -2999,8 +2999,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, } } - ocfs2_map_and_dirty_page(context->inode, - handle, from, to, + ocfs2_map_and_dirty_page(inode, handle, from, to, page, 0, &new_block); mark_page_accessed(page); unlock: @@ -3015,14 +3014,15 @@ unlock: return ret; } -static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, - struct ocfs2_cow_context *context, - u32 cpos, u32 old_cluster, - u32 new_cluster, u32 new_len) +int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, + struct file *file, + u32 cpos, u32 old_cluster, + u32 new_cluster, u32 new_len) { int ret = 0; - struct super_block *sb = context->inode->i_sb; - struct ocfs2_caching_info *ci = context->data_et.et_ci; + struct inode *inode = file->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct ocfs2_caching_info *ci = INODE_CACHE(inode); int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); @@ -3145,8 +3145,8 @@ static int ocfs2_replace_clusters(handle_t *handle, /*If the old clusters is unwritten, no need to duplicate. */ if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { - ret = context->cow_duplicate_clusters(handle, context, cpos, - old, new, len); + ret = context->cow_duplicate_clusters(handle, context->file, + cpos, old, new, len); if (ret) { mlog_errno(ret); goto out; @@ -3162,22 +3162,22 @@ out: return ret; } -static int ocfs2_cow_sync_writeback(struct super_block *sb, - struct ocfs2_cow_context *context, - u32 cpos, u32 num_clusters) +int ocfs2_cow_sync_writeback(struct super_block *sb, + struct inode *inode, + u32 cpos, u32 num_clusters) { int ret = 0; loff_t offset, end, map_end; pgoff_t page_index; struct page *page; - if (ocfs2_should_order_data(context->inode)) + if (ocfs2_should_order_data(inode)) return 0; offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); - ret = filemap_fdatawrite_range(context->inode->i_mapping, + ret = filemap_fdatawrite_range(inode->i_mapping, offset, end - 1); if (ret < 0) { mlog_errno(ret); @@ -3190,7 +3190,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, if (map_end > end) map_end = end; - page = find_or_create_page(context->inode->i_mapping, + page = find_or_create_page(inode->i_mapping, page_index, GFP_NOFS); BUG_ON(!page); @@ -3349,7 +3349,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, * in write-back mode. */ if (context->get_clusters == ocfs2_di_get_clusters) { - ret = ocfs2_cow_sync_writeback(sb, context, cpos, + ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos, orig_num_clusters); if (ret) mlog_errno(ret); diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index c8ce46f7d8e3..7754608c83a4 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -84,6 +84,17 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, struct buffer_head *ref_root_bh, u32 cpos, u32 write_len, struct ocfs2_post_refcount *post); +int ocfs2_duplicate_clusters_by_page(handle_t *handle, + struct file *file, + u32 cpos, u32 old_cluster, + u32 new_cluster, u32 new_len); +int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, + struct file *file, + u32 cpos, u32 old_cluster, + u32 new_cluster, u32 new_len); +int ocfs2_cow_sync_writeback(struct super_block *sb, + struct inode *inode, + u32 cpos, u32 num_clusters); int ocfs2_add_refcount_flag(struct inode *inode, struct ocfs2_extent_tree *data_et, struct ocfs2_caching_info *ref_ci, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5a521c748859..cdbaf5e97308 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -41,6 +41,7 @@ #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/quotaops.h> +#include <linux/cleancache.h> #define CREATE_TRACE_POINTS #include "ocfs2_trace.h" @@ -1566,7 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (osb->preferred_slot != OCFS2_INVALID_SLOT) seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); - if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) + if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME)) seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); if (osb->osb_commit_interval) @@ -2352,6 +2353,7 @@ static int ocfs2_initialize_super(struct super_block *sb, mlog_errno(status); goto bail; } + cleancache_init_shared_fs((char *)&uuid_net_key, sb); bail: return status; diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index de4ff29f1e05..c368360c35a1 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -240,8 +240,12 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int ret; - if (S_ISDIR(inode->i_mode) && !omfs_dir_is_empty(inode)) - return -ENOTEMPTY; + + if (S_ISDIR(inode->i_mode)) { + dentry_unhash(dentry); + if (!omfs_dir_is_empty(inode)) + return -ENOTEMPTY; + } ret = omfs_delete_entry(dentry); if (ret) @@ -378,6 +382,9 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, int err; if (new_inode) { + if (S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + /* overwriting existing file/dir */ err = omfs_remove(new_dir, new_dentry); if (err) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2c9db29ea358..db15935fa757 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -211,7 +211,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; - int flags = vma->vm_flags; + vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; unsigned long start, end; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 118662690cdf..76c8164d5651 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -831,6 +831,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) INITIALIZE_PATH(path); struct reiserfs_dir_entry de; + dentry_unhash(dentry); + /* we will be doing 2 balancings and update 2 stat data, we change quotas * of the owner of the directory and of the owner of the parent directory. * The quota structure is possibly deleted only on last iput => outside @@ -1225,6 +1227,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned long savelink = 1; struct timespec ctime; + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + dentry_unhash(new_dentry); + /* three balancings: (1) old name removal, (2) new name insertion and (3) maybe "save" link insertion stat data updates: (1) old directory, diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 47d2a4498b03..50f1abccd1cd 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -105,7 +105,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) mutex_unlock(&dentry->d_inode->i_mutex); if (!error) d_delete(dentry); - dput(dentry); return error; } diff --git a/fs/super.c b/fs/super.c index c04f7e0b7ed2..c75593953c52 100644 --- a/fs/super.c +++ b/fs/super.c @@ -31,6 +31,7 @@ #include <linux/mutex.h> #include <linux/backing-dev.h> #include <linux/rculist_bl.h> +#include <linux/cleancache.h> #include "internal.h" @@ -112,6 +113,7 @@ static struct super_block *alloc_super(struct file_system_type *type) s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; + s->cleancache_poolid = -1; } out: return s; @@ -177,6 +179,7 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { + cleancache_flush_fs(s); fs->kill_sb(s); /* * We need to call rcu_barrier so all the delayed rcu free diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index e474fbcf8bde..e2cc6756f3b1 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -196,6 +196,8 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry) struct inode *inode = dentry->d_inode; int err = -ENOTEMPTY; + dentry_unhash(dentry); + if (sysv_empty_dir(inode)) { err = sysv_unlink(dir, dentry); if (!err) { @@ -222,6 +224,9 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, struct sysv_dir_entry * old_de; int err = -ENOENT; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + old_de = sysv_find_entry(old_dentry, &old_page); if (!old_de) goto out; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ef5abd38f0bf..c2b80943560d 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -656,6 +656,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; + dentry_unhash(dentry); + /* * Budget request settings: deletion direntry, deletion inode and * changing the parent inode. If budgeting fails, go ahead anyway @@ -976,6 +978,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + /* * Budget request settings: deletion direntry, new direntry, removing * the old inode, and changing old and new parent directory inodes. diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f1dce848ef96..4d76594c2a8f 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -783,6 +783,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) struct fileIdentDesc *fi, cfi; struct kernel_lb_addr tloc; + dentry_unhash(dentry); + retval = -ENOENT; fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); if (!fi) @@ -1081,6 +1083,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct kernel_lb_addr tloc; struct udf_inode_info *old_iinfo = UDF_I(old_inode); + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); if (ofi) { if (ofibh.sbh != ofibh.ebh) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 29309e25417f..953ebdfc5bf7 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -258,6 +258,8 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err= -ENOTEMPTY; + dentry_unhash(dentry); + lock_ufs(dir->i_sb); if (ufs_empty_dir (inode)) { err = ufs_unlink(dir, dentry); @@ -282,6 +284,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ufs_dir_entry *old_de; int err = -ENOENT; + if (new_inode && S_ISDIR(new_inode->i_mode)) + dentry_unhash(new_dentry); + old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c index d61611c88012..244e797dae32 100644 --- a/fs/xfs/linux-2.6/xfs_discard.c +++ b/fs/xfs/linux-2.6/xfs_discard.c @@ -191,3 +191,32 @@ xfs_ioc_trim( return -XFS_ERROR(EFAULT); return 0; } + +int +xfs_discard_extents( + struct xfs_mount *mp, + struct list_head *list) +{ + struct xfs_busy_extent *busyp; + int error = 0; + + list_for_each_entry(busyp, list, list) { + trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, + busyp->length); + + error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + XFS_FSB_TO_BB(mp, busyp->length), + GFP_NOFS, 0); + if (error && error != EOPNOTSUPP) { + xfs_info(mp, + "discard failed for extent [0x%llu,%u], error %d", + (unsigned long long)busyp->bno, + busyp->length, + error); + return error; + } + } + + return 0; +} diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h index e82b6dd3e127..344879aea646 100644 --- a/fs/xfs/linux-2.6/xfs_discard.h +++ b/fs/xfs/linux-2.6/xfs_discard.h @@ -2,7 +2,9 @@ #define XFS_DISCARD_H 1 struct fstrim_range; +struct list_head; extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); +extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); #endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index b0aa59e51fd0..98b9c91fcdf1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -110,8 +110,10 @@ mempool_t *xfs_ioend_pool; #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ -#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ +#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ +#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ +#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ +#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ /* * Table driven mount option parser. @@ -355,6 +357,10 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_DELAYLOG; } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { mp->m_flags &= ~XFS_MOUNT_DELAYLOG; + } else if (!strcmp(this_char, MNTOPT_DISCARD)) { + mp->m_flags |= XFS_MOUNT_DISCARD; + } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { + mp->m_flags &= ~XFS_MOUNT_DISCARD; } else if (!strcmp(this_char, "ihashsize")) { xfs_warn(mp, "ihashsize no longer used, option is deprecated."); @@ -388,6 +394,13 @@ xfs_parseargs( return EINVAL; } + if ((mp->m_flags & XFS_MOUNT_DISCARD) && + !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { + xfs_warn(mp, + "the discard option is incompatible with the nodelaylog option"); + return EINVAL; + } + #ifndef CONFIG_XFS_QUOTA if (XFS_IS_QUOTA_RUNNING(mp)) { xfs_warn(mp, "quota support not available in this kernel."); @@ -488,6 +501,7 @@ xfs_showargs( { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, + { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, { 0, NULL } }; static struct proc_xfs_info xfs_info_unset[] = { diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index da0a561ffba2..6530769a999b 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -187,6 +187,9 @@ struct xfs_busy_extent { xfs_agnumber_t agno; xfs_agblock_t bno; xfs_extlen_t length; + unsigned int flags; +#define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ +#define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */ }; /* diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index acdced86413c..95862bbff56b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2469,7 +2469,7 @@ xfs_free_extent( error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); if (!error) - xfs_alloc_busy_insert(tp, args.agno, args.agbno, len); + xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); return error; @@ -2480,7 +2480,8 @@ xfs_alloc_busy_insert( struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, - xfs_extlen_t len) + xfs_extlen_t len, + unsigned int flags) { struct xfs_busy_extent *new; struct xfs_busy_extent *busyp; @@ -2504,6 +2505,7 @@ xfs_alloc_busy_insert( new->bno = bno; new->length = len; INIT_LIST_HEAD(&new->list); + new->flags = flags; /* trace before insert to be able to see failed inserts */ trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); @@ -2609,6 +2611,18 @@ xfs_alloc_busy_update_extent( xfs_agblock_t bend = bbno + busyp->length; /* + * This extent is currently being discarded. Give the thread + * performing the discard a chance to mark the extent unbusy + * and retry. + */ + if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { + spin_unlock(&pag->pagb_lock); + delay(1); + spin_lock(&pag->pagb_lock); + return false; + } + + /* * If there is a busy extent overlapping a user allocation, we have * no choice but to force the log and retry the search. * @@ -2813,7 +2827,8 @@ restart: * If this is a metadata allocation, try to reuse the busy * extent instead of trimming the allocation. */ - if (!args->userdata) { + if (!args->userdata && + !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { if (!xfs_alloc_busy_update_extent(args->mp, args->pag, busyp, fbno, flen, false)) @@ -2979,10 +2994,16 @@ xfs_alloc_busy_clear_one( kmem_free(busyp); } +/* + * Remove all extents on the passed in list from the busy extents tree. + * If do_discard is set skip extents that need to be discarded, and mark + * these as undergoing a discard operation instead. + */ void xfs_alloc_busy_clear( struct xfs_mount *mp, - struct list_head *list) + struct list_head *list, + bool do_discard) { struct xfs_busy_extent *busyp, *n; struct xfs_perag *pag = NULL; @@ -2999,7 +3020,11 @@ xfs_alloc_busy_clear( agno = busyp->agno; } - xfs_alloc_busy_clear_one(mp, pag, busyp); + if (do_discard && busyp->length && + !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD)) + busyp->flags = XFS_ALLOC_BUSY_DISCARDED; + else + xfs_alloc_busy_clear_one(mp, pag, busyp); } if (pag) { diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 240ad288f2f9..2f52b924be79 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -137,10 +137,11 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, #ifdef __KERNEL__ void xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, - xfs_agblock_t bno, xfs_extlen_t len); + xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); void -xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list); +xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list, + bool do_discard); int xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 8b469d53599f..2b3518826a69 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -120,7 +120,8 @@ xfs_allocbt_free_block( if (error) return error; - xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); + xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, + XFS_ALLOC_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); return 0; } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index fa00788de2f5..e546a33214c9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -89,36 +89,19 @@ xfs_bmap_add_attrfork_local( int *flags); /* inode logging flags */ /* - * Called by xfs_bmapi to update file extent records and the btree - * after allocating space (or doing a delayed allocation). - */ -STATIC int /* error */ -xfs_bmap_add_extent( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd); /* OK to allocate reserved blocks */ - -/* * Called by xfs_bmap_add_extent to handle cases converting a delayed * allocation to a real allocation. */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int rsvd); /* OK to allocate reserved blocks */ + int *logflagsp); /* inode logging flags */ /* * Called by xfs_bmap_add_extent to handle cases converting a hole @@ -127,10 +110,9 @@ xfs_bmap_add_extent_delay_real( STATIC int /* error */ xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp,/* inode logging flags */ - int rsvd); /* OK to allocate reserved blocks */ + int *logflagsp); /* inode logging flags */ /* * Called by xfs_bmap_add_extent to handle cases converting a hole @@ -139,7 +121,7 @@ xfs_bmap_add_extent_hole_delay( STATIC int /* error */ xfs_bmap_add_extent_hole_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ @@ -152,7 +134,7 @@ xfs_bmap_add_extent_hole_real( STATIC int /* error */ xfs_bmap_add_extent_unwritten_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp); /* inode logging flags */ @@ -180,22 +162,6 @@ xfs_bmap_btree_to_extents( int whichfork); /* data or attr fork */ /* - * Called by xfs_bmapi to update file extent records and the btree - * after removing space (or undoing a delayed allocation). - */ -STATIC int /* error */ -xfs_bmap_del_extent( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_trans_t *tp, /* current trans pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - xfs_btree_cur_t *cur, /* if null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp,/* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd); /* OK to allocate reserved blocks */ - -/* * Remove the entry "free" from the free item list. Prev points to the * previous entry, unless "free" is the head of the list. */ @@ -474,14 +440,13 @@ xfs_bmap_add_attrfork_local( STATIC int /* error */ xfs_bmap_add_extent( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd) /* OK to use reserved data blocks */ + int whichfork) /* data or attr fork */ { xfs_btree_cur_t *cur; /* btree cursor or null */ xfs_filblks_t da_new; /* new count del alloc blocks used */ @@ -492,23 +457,27 @@ xfs_bmap_add_extent( xfs_extnum_t nextents; /* number of extents in file now */ XFS_STATS_INC(xs_add_exlist); + cur = *curp; ifp = XFS_IFORK_PTR(ip, whichfork); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - ASSERT(idx <= nextents); da_old = da_new = 0; error = 0; + + ASSERT(*idx >= 0); + ASSERT(*idx <= nextents); + /* * This is the first extent added to a new/empty file. * Special case this one, so other routines get to assume there are * already extents in the list. */ if (nextents == 0) { - xfs_iext_insert(ip, 0, 1, new, + xfs_iext_insert(ip, *idx, 1, new, whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); ASSERT(cur == NULL); - ifp->if_lastex = 0; + if (!isnullstartblock(new->br_startblock)) { XFS_IFORK_NEXT_SET(ip, whichfork, 1); logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); @@ -522,27 +491,25 @@ xfs_bmap_add_extent( if (cur) ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, - &logflags, rsvd))) - goto done; + error = xfs_bmap_add_extent_hole_delay(ip, idx, new, + &logflags); } /* * Real allocation off the end of the file. */ - else if (idx == nextents) { + else if (*idx == nextents) { if (cur) ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, - &logflags, whichfork))) - goto done; + error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, + &logflags, whichfork); } else { xfs_bmbt_irec_t prev; /* old extent at offset idx */ /* * Get the record referred to by idx. */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &prev); /* * If it's a real allocation record, and the new allocation ends * after the start of the referred to record, then we're filling @@ -557,22 +524,18 @@ xfs_bmap_add_extent( if (cur) ASSERT(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL); - if ((error = xfs_bmap_add_extent_delay_real(ip, - idx, &cur, new, &da_new, first, flist, - &logflags, rsvd))) - goto done; - } else if (new->br_state == XFS_EXT_NORM) { - ASSERT(new->br_state == XFS_EXT_NORM); - if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags))) - goto done; + error = xfs_bmap_add_extent_delay_real(ip, + idx, &cur, new, &da_new, + first, flist, &logflags); } else { - ASSERT(new->br_state == XFS_EXT_UNWRITTEN); - if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags))) + ASSERT(new->br_state == XFS_EXT_NORM || + new->br_state == XFS_EXT_UNWRITTEN); + + error = xfs_bmap_add_extent_unwritten_real(ip, + idx, &cur, new, &logflags); + if (error) goto done; } - ASSERT(*curp == cur || *curp == NULL); } /* * Otherwise we're filling in a hole with an allocation. @@ -581,13 +544,15 @@ xfs_bmap_add_extent( if (cur) ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, - new, &logflags, whichfork))) - goto done; + error = xfs_bmap_add_extent_hole_real(ip, idx, cur, + new, &logflags, whichfork); } } + if (error) + goto done; ASSERT(*curp == cur || *curp == NULL); + /* * Convert to a btree if necessary. */ @@ -615,7 +580,7 @@ xfs_bmap_add_extent( ASSERT(nblks <= da_old); if (nblks < da_old) xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - nblks), rsvd); + (int64_t)(da_old - nblks), 0); } /* * Clear out the allocated field, done with it now in any case. @@ -640,14 +605,13 @@ done: STATIC int /* error */ xfs_bmap_add_extent_delay_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int rsvd) /* OK to use reserved data block allocation */ + int *logflagsp) /* inode logging flags */ { xfs_btree_cur_t *cur; /* btree cursor */ int diff; /* temp value */ @@ -673,7 +637,7 @@ xfs_bmap_add_extent_delay_real( */ cur = *curp; ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &PREV); new_endoff = new->br_startoff + new->br_blockcount; ASSERT(PREV.br_startoff <= new->br_startoff); @@ -692,9 +656,9 @@ xfs_bmap_add_extent_delay_real( * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); if (isnullstartblock(LEFT.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -712,9 +676,9 @@ xfs_bmap_add_extent_delay_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) state |= BMAP_RIGHT_DELAY; @@ -745,14 +709,14 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left and right neighbors are both contiguous with new. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 2, state); - ip->i_df.if_lastex = idx - 1; + xfs_iext_remove(ip, *idx + 1, 2, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -784,13 +748,14 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left neighbor is contiguous, the right is not. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx - 1; - xfs_iext_remove(ip, idx, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -814,14 +779,13 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The right neighbor is contiguous, the left is not. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; - xfs_iext_remove(ip, idx + 1, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -837,6 +801,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount, PREV.br_state))) goto done; } + *dnew = 0; break; @@ -846,11 +811,10 @@ xfs_bmap_add_extent_delay_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -866,6 +830,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } + *dnew = 0; break; @@ -874,17 +839,16 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is contiguous. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - ip->i_df.if_lastex = idx - 1; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -904,7 +868,9 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock)); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + --*idx; *dnew = temp; break; @@ -913,12 +879,11 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, new_endoff); temp = PREV.br_blockcount - new->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -946,9 +911,10 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock) - (cur ? cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, idx + 1); + ep = xfs_iext_get_ext(ifp, *idx + 1); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); + *dnew = temp; break; @@ -958,15 +924,13 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is contiguous with the new allocation. */ temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx + 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx + 1), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, RIGHT.br_state); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); - ip->i_df.if_lastex = idx + 1; + trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -983,10 +947,14 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_state))) goto done; } + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock)); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; *dnew = temp; break; @@ -996,10 +964,9 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is not contiguous. */ temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(ip, idx + 1, 1, new, state); - ip->i_df.if_lastex = idx + 1; + xfs_iext_insert(ip, *idx + 1, 1, new, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1027,9 +994,11 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock) - (cur ? cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; *dnew = temp; break; @@ -1056,7 +1025,7 @@ xfs_bmap_add_extent_delay_real( */ temp = new->br_startoff - PREV.br_startoff; temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; - trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, 0, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ LEFT = *new; RIGHT.br_state = PREV.br_state; @@ -1065,8 +1034,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_startoff = new_endoff; RIGHT.br_blockcount = temp2; /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ - xfs_iext_insert(ip, idx + 1, 2, &LEFT, state); - ip->i_df.if_lastex = idx + 1; + xfs_iext_insert(ip, *idx + 1, 2, &LEFT, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1097,7 +1065,7 @@ xfs_bmap_add_extent_delay_real( (cur ? cur->bc_private.b.allocated : 0)); if (diff > 0 && xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) { + -((int64_t)diff), 0)) { /* * Ick gross gag me with a spoon. */ @@ -1109,7 +1077,7 @@ xfs_bmap_add_extent_delay_real( if (!diff || !xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) + -((int64_t)diff), 0)) break; } if (temp2) { @@ -1118,18 +1086,20 @@ xfs_bmap_add_extent_delay_real( if (!diff || !xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) + -((int64_t)diff), 0)) break; } } } - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx + 2, state, _THIS_IP_); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx + 2), nullstartblock((int)temp2)); - trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx + 2, state, _THIS_IP_); + + ++*idx; *dnew = temp + temp2; break; @@ -1161,7 +1131,7 @@ done: STATIC int /* error */ xfs_bmap_add_extent_unwritten_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp) /* inode logging flags */ @@ -1188,7 +1158,7 @@ xfs_bmap_add_extent_unwritten_real( error = 0; cur = *curp; ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &PREV); newext = new->br_state; oldext = (newext == XFS_EXT_UNWRITTEN) ? @@ -1211,9 +1181,9 @@ xfs_bmap_add_extent_unwritten_real( * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); if (isnullstartblock(LEFT.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -1231,9 +1201,9 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -1262,14 +1232,15 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left and right neighbors are both contiguous with new. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 2, state); - ip->i_df.if_lastex = idx - 1; + xfs_iext_remove(ip, *idx + 1, 2, state); ip->i_d.di_nextents -= 2; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1305,13 +1276,14 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx - 1; - xfs_iext_remove(ip, idx, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1341,13 +1313,12 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The right neighbor is contiguous, the left is not. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; - xfs_iext_remove(ip, idx + 1, 1, state); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_remove(ip, *idx + 1, 1, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1378,11 +1349,10 @@ xfs_bmap_add_extent_unwritten_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1404,21 +1374,22 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is contiguous. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + --*idx; - ip->i_df.if_lastex = idx - 1; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1449,17 +1420,16 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); xfs_bmbt_set_startoff(ep, new_endoff); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1488,17 +1458,19 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is contiguous with the new allocation. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, newext); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx + 1; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1528,13 +1500,14 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; + xfs_iext_insert(ip, *idx, 1, new, state); - xfs_iext_insert(ip, idx + 1, 1, new, state); - ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1568,10 +1541,10 @@ xfs_bmap_add_extent_unwritten_real( * newext. Contiguity is impossible here. * One extent becomes three extents. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, new->br_startoff - PREV.br_startoff); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); r[0] = *new; r[1].br_startoff = new_endoff; @@ -1579,8 +1552,10 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startoff + PREV.br_blockcount - new_endoff; r[1].br_startblock = new->br_startblock + new->br_blockcount; r[1].br_state = oldext; - xfs_iext_insert(ip, idx + 1, 2, &r[0], state); - ip->i_df.if_lastex = idx + 1; + + ++*idx; + xfs_iext_insert(ip, *idx, 2, &r[0], state); + ip->i_d.di_nextents += 2; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1650,12 +1625,10 @@ done: STATIC int /* error */ xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp, /* inode logging flags */ - int rsvd) /* OK to allocate reserved blocks */ + int *logflagsp) /* inode logging flags */ { - xfs_bmbt_rec_host_t *ep; /* extent record for idx */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_filblks_t newlen=0; /* new indirect size */ @@ -1665,16 +1638,15 @@ xfs_bmap_add_extent_hole_delay( xfs_filblks_t temp=0; /* temp for indirect calculations */ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); state = 0; ASSERT(isnullstartblock(new->br_startblock)); /* * Check and set flags if this segment has a left neighbor */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -1684,9 +1656,9 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(ep, &right); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; @@ -1719,21 +1691,21 @@ xfs_bmap_add_extent_hole_delay( * on the left and on the right. * Merge all three into a single extent record. */ + --*idx; temp = left.br_blockcount + new->br_blockcount + right.br_blockcount; - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 1, state); - ip->i_df.if_lastex = idx - 1; + xfs_iext_remove(ip, *idx + 1, 1, state); break; case BMAP_LEFT_CONTIG: @@ -1742,17 +1714,17 @@ xfs_bmap_add_extent_hole_delay( * on the left. * Merge the new allocation with the left neighbor. */ + --*idx; temp = left.br_blockcount + new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); - - ip->i_df.if_lastex = idx - 1; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); break; case BMAP_RIGHT_CONTIG: @@ -1761,16 +1733,15 @@ xfs_bmap_add_extent_hole_delay( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); temp = new->br_blockcount + right.br_blockcount; oldlen = startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_allf(ep, new->br_startoff, + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, nullstartblock((int)newlen), temp, right.br_state); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - - ip->i_df.if_lastex = idx; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); break; case 0: @@ -1780,14 +1751,13 @@ xfs_bmap_add_extent_hole_delay( * Insert a new entry. */ oldlen = newlen = 0; - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); break; } if (oldlen != newlen) { ASSERT(oldlen > newlen); xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(oldlen - newlen), rsvd); + (int64_t)(oldlen - newlen), 0); /* * Nothing to do for disk quota accounting here. */ @@ -1803,13 +1773,12 @@ xfs_bmap_add_extent_hole_delay( STATIC int /* error */ xfs_bmap_add_extent_hole_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ int whichfork) /* data or attr fork */ { - xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */ int error; /* error return value */ int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ @@ -1819,8 +1788,7 @@ xfs_bmap_add_extent_hole_real( int state; /* state bits, accessed thru macros */ ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); - ep = xfs_iext_get_ext(ifp, idx); + ASSERT(*idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); state = 0; if (whichfork == XFS_ATTR_FORK) @@ -1829,9 +1797,9 @@ xfs_bmap_add_extent_hole_real( /* * Check and set flags if this segment has a left neighbor. */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -1840,9 +1808,9 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(ep, &right); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -1879,14 +1847,15 @@ xfs_bmap_add_extent_hole_real( * left and on the right. * Merge all three into a single extent record. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), left.br_blockcount + new->br_blockcount + right.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_remove(ip, *idx + 1, 1, state); - xfs_iext_remove(ip, idx, 1, state); - ifp->if_lastex = idx - 1; XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) { @@ -1921,12 +1890,12 @@ xfs_bmap_add_extent_hole_real( * on the left. * Merge the new allocation with the left neighbor. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), left.br_blockcount + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ifp->if_lastex = idx - 1; if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { @@ -1952,13 +1921,13 @@ xfs_bmap_add_extent_hole_real( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, right.br_state); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ifp->if_lastex = idx; if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { @@ -1984,8 +1953,7 @@ xfs_bmap_add_extent_hole_real( * real allocation. * Insert a new entry. */ - xfs_iext_insert(ip, idx, 1, new, state); - ifp->if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) { @@ -2833,13 +2801,12 @@ STATIC int /* error */ xfs_bmap_del_extent( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ - xfs_extnum_t idx, /* extent number to update/delete */ + xfs_extnum_t *idx, /* extent number to update/delete */ xfs_bmap_free_t *flist, /* list of extents to be freed */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd) /* OK to allocate reserved blocks */ + int whichfork) /* data or attr fork */ { xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ @@ -2870,10 +2837,10 @@ xfs_bmap_del_extent( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((idx >= 0) && (idx < ifp->if_bytes / + ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(del->br_blockcount > 0); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &got); ASSERT(got.br_startoff <= del->br_startoff); del_endoff = del->br_startoff + del->br_blockcount; @@ -2947,11 +2914,12 @@ xfs_bmap_del_extent( /* * Matches the whole extent. Delete the entry. */ - xfs_iext_remove(ip, idx, 1, + xfs_iext_remove(ip, *idx, 1, whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); - ifp->if_lastex = idx; + --*idx; if (delay) break; + XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); flags |= XFS_ILOG_CORE; @@ -2968,21 +2936,20 @@ xfs_bmap_del_extent( /* * Deleting the first part of the extent. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, del_endoff); temp = got.br_blockcount - del->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); - ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); da_new = temp; break; } xfs_bmbt_set_startblock(ep, del_endblock); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; @@ -2998,18 +2965,17 @@ xfs_bmap_del_extent( * Deleting the last part of the extent. */ temp = got.br_blockcount - del->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); da_new = temp; break; } - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; @@ -3026,7 +2992,7 @@ xfs_bmap_del_extent( * Deleting the middle of the extent. */ temp = del->br_startoff - got.br_startoff; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); new.br_startoff = del_endoff; temp2 = got_endoff - del_endoff; @@ -3113,9 +3079,9 @@ xfs_bmap_del_extent( } } } - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - xfs_iext_insert(ip, idx + 1, 1, &new, state); - ifp->if_lastex = idx + 1; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_insert(ip, *idx + 1, 1, &new, state); + ++*idx; break; } /* @@ -3142,7 +3108,7 @@ xfs_bmap_del_extent( ASSERT(da_old >= da_new); if (da_old > da_new) { xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - da_new), rsvd); + (int64_t)(da_old - da_new), 0); } done: *logflagsp = flags; @@ -4562,29 +4528,24 @@ xfs_bmapi( if (rt) { error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - -((int64_t)extsz), (flags & - XFS_BMAPI_RSVBLOCKS)); + -((int64_t)extsz), 0); } else { error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - -((int64_t)alen), (flags & - XFS_BMAPI_RSVBLOCKS)); + -((int64_t)alen), 0); } if (!error) { error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - -((int64_t)indlen), (flags & - XFS_BMAPI_RSVBLOCKS)); + -((int64_t)indlen), 0); if (error && rt) xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - (int64_t)extsz, (flags & - XFS_BMAPI_RSVBLOCKS)); + (int64_t)extsz, 0); else if (error) xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)alen, (flags & - XFS_BMAPI_RSVBLOCKS)); + (int64_t)alen, 0); } if (error) { @@ -4701,13 +4662,12 @@ xfs_bmapi( if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) got.br_state = XFS_EXT_UNWRITTEN; } - error = xfs_bmap_add_extent(ip, lastx, &cur, &got, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &got, firstblock, flist, &tmp_logflags, - whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); + whichfork); logflags |= tmp_logflags; if (error) goto error0; - lastx = ifp->if_lastex; ep = xfs_iext_get_ext(ifp, lastx); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); xfs_bmbt_get_all(ep, &got); @@ -4803,13 +4763,12 @@ xfs_bmapi( mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, mval, + error = xfs_bmap_add_extent(ip, &lastx, &cur, mval, firstblock, flist, &tmp_logflags, - whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); + whichfork); logflags |= tmp_logflags; if (error) goto error0; - lastx = ifp->if_lastex; ep = xfs_iext_get_ext(ifp, lastx); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); xfs_bmbt_get_all(ep, &got); @@ -4868,14 +4827,14 @@ xfs_bmapi( /* * Else go on to the next record. */ - ep = xfs_iext_get_ext(ifp, ++lastx); prev = got; - if (lastx >= nextents) - eof = 1; - else + if (++lastx < nextents) { + ep = xfs_iext_get_ext(ifp, lastx); xfs_bmbt_get_all(ep, &got); + } else { + eof = 1; + } } - ifp->if_lastex = lastx; *nmap = n; /* * Transform from btree to extents, give it cur. @@ -4984,7 +4943,6 @@ xfs_bmapi_single( ASSERT(!isnullstartblock(got.br_startblock)); ASSERT(bno < got.br_startoff + got.br_blockcount); *fsb = got.br_startblock + (bno - got.br_startoff); - ifp->if_lastex = lastx; return 0; } @@ -5026,7 +4984,6 @@ xfs_bunmapi( int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ - int rsvd; /* OK to allocate reserved blocks */ xfs_fsblock_t sum; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -5044,7 +5001,7 @@ xfs_bunmapi( mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; + ASSERT(len > 0); ASSERT(nexts >= 0); ASSERT(ifp->if_ext_max == @@ -5160,9 +5117,9 @@ xfs_bunmapi( del.br_blockcount = mod; } del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, &del, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + XFS_DATA_FORK); if (error) goto error0; goto nodelete; @@ -5188,9 +5145,12 @@ xfs_bunmapi( */ ASSERT(bno >= del.br_blockcount); bno -= del.br_blockcount; - if (bno < got.br_startoff) { - if (--lastx >= 0) - xfs_bmbt_get_all(--ep, &got); + if (got.br_startoff > bno) { + if (--lastx >= 0) { + ep = xfs_iext_get_ext(ifp, + lastx); + xfs_bmbt_get_all(ep, &got); + } } continue; } else if (del.br_state == XFS_EXT_UNWRITTEN) { @@ -5214,18 +5174,19 @@ xfs_bunmapi( prev.br_startoff = start; } prev.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx - 1, &cur, + lastx--; + error = xfs_bmap_add_extent(ip, &lastx, &cur, &prev, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + XFS_DATA_FORK); if (error) goto error0; goto nodelete; } else { ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + XFS_DATA_FORK); if (error) goto error0; goto nodelete; @@ -5240,13 +5201,13 @@ xfs_bunmapi( rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - (int64_t)rtexts, rsvd); + (int64_t)rtexts, 0); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)del.br_blockcount, rsvd); + (int64_t)del.br_blockcount, 0); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); @@ -5277,31 +5238,29 @@ xfs_bunmapi( error = XFS_ERROR(ENOSPC); goto error0; } - error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, - &tmp_logflags, whichfork, rsvd); + error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, + &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; bno = del.br_startoff - 1; nodelete: - lastx = ifp->if_lastex; /* * If not done go on to the next (previous) record. - * Reset ep in case the extents array was re-alloced. */ - ep = xfs_iext_get_ext(ifp, lastx); if (bno != (xfs_fileoff_t)-1 && bno >= start) { - if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) || - xfs_bmbt_get_startoff(ep) > bno) { - if (--lastx >= 0) - ep = xfs_iext_get_ext(ifp, lastx); - } - if (lastx >= 0) + if (lastx >= 0) { + ep = xfs_iext_get_ext(ifp, lastx); + if (xfs_bmbt_get_startoff(ep) > bno) { + if (--lastx >= 0) + ep = xfs_iext_get_ext(ifp, + lastx); + } xfs_bmbt_get_all(ep, &got); + } extno++; } } - ifp->if_lastex = lastx; *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 3651191daea1..c62234bde053 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -69,7 +69,6 @@ typedef struct xfs_bmap_free #define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ #define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ #define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ -#define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */ #define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ /* combine contig. space */ @@ -87,7 +86,6 @@ typedef struct xfs_bmap_free { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ - { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c8e3349c287c..a098a20ca63e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -920,7 +920,6 @@ xfs_iread_extents( /* * We know that the size is valid (it's checked in iformat_btree) */ - ifp->if_lastex = NULLEXTNUM; ifp->if_bytes = ifp->if_real_bytes = 0; ifp->if_flags |= XFS_IFEXTENTS; xfs_iext_add(ifp, 0, nextents); @@ -2558,12 +2557,9 @@ xfs_iflush_fork( case XFS_DINODE_FMT_EXTENTS: ASSERT((ifp->if_flags & XFS_IFEXTENTS) || !(iip->ili_format.ilf_fields & extflag[whichfork])); - ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || - (ifp->if_bytes == 0)); - ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || - (ifp->if_bytes > 0)); if ((iip->ili_format.ilf_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { + ASSERT(xfs_iext_get_ext(ifp, 0)); ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, whichfork); @@ -3112,6 +3108,8 @@ xfs_iext_get_ext( xfs_extnum_t idx) /* index of target extent */ { ASSERT(idx >= 0); + ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { return ifp->if_u1.if_ext_irec->er_extbuf; } else if (ifp->if_flags & XFS_IFEXTIREC) { @@ -3191,7 +3189,6 @@ xfs_iext_add( } ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; ifp->if_real_bytes = 0; - ifp->if_lastex = nextents + ext_diff; } /* * Otherwise use a linear (direct) extent list. @@ -3886,8 +3883,10 @@ xfs_iext_idx_to_irec( xfs_extnum_t page_idx = *idxp; /* extent index in target list */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); - ASSERT(page_idx >= 0 && page_idx <= - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); + ASSERT(page_idx >= 0); + ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; erp_idx = 0; low = 0; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ff4e2a30227d..3ae6d58e5473 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -67,7 +67,6 @@ typedef struct xfs_ifork { short if_broot_bytes; /* bytes allocated for root */ unsigned char if_flags; /* per-fork flags */ unsigned char if_ext_max; /* max # of extent records */ - xfs_extnum_t if_lastex; /* last if_extents used */ union { xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 7d56e88a3f0e..c7755d5a5fbe 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -29,6 +29,7 @@ #include "xfs_mount.h" #include "xfs_error.h" #include "xfs_alloc.h" +#include "xfs_discard.h" /* * Perform initial CIL structure initialisation. If the CIL is not @@ -361,18 +362,28 @@ xlog_cil_committed( int abort) { struct xfs_cil_ctx *ctx = args; + struct xfs_mount *mp = ctx->cil->xc_log->l_mp; xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, ctx->start_lsn, abort); xfs_alloc_busy_sort(&ctx->busy_extents); - xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents); + xfs_alloc_busy_clear(mp, &ctx->busy_extents, + (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); spin_lock(&ctx->cil->xc_cil_lock); list_del(&ctx->committing); spin_unlock(&ctx->cil->xc_cil_lock); xlog_cil_free_logvec(ctx->lv_chain); + + if (!list_empty(&ctx->busy_extents)) { + ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); + + xfs_discard_extents(mp, &ctx->busy_extents); + xfs_alloc_busy_clear(mp, &ctx->busy_extents, false); + } + kmem_free(ctx); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 19af0ab0d0c6..3d68bb267c5f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -224,6 +224,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem operations, typically for disk errors in metadata */ +#define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to user */ #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index d1f24858ccc4..7c7bc2b786bd 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -609,7 +609,7 @@ xfs_trans_free( struct xfs_trans *tp) { xfs_alloc_busy_sort(&tp->t_busy); - xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy); + xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false); atomic_dec(&tp->t_mountp->m_active_trans); xfs_trans_free_dqinfo(tp); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f5df23561b96..503c8a6b3079 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -217,8 +217,24 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); +int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, + get_block_t get_block); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); +/* Convert errno to return value from ->page_mkwrite() call */ +static inline int block_page_mkwrite_return(int err) +{ + if (err == 0) + return VM_FAULT_LOCKED; + if (err == -EFAULT) + return VM_FAULT_NOPAGE; + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err == -EAGAIN) + return VM_FAULT_RETRY; + /* -ENOSPC, -EDQUOT, -EIO ... */ + return VM_FAULT_SIGBUS; +} sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h new file mode 100644 index 000000000000..04ffb2e6c9d0 --- /dev/null +++ b/include/linux/cleancache.h @@ -0,0 +1,122 @@ +#ifndef _LINUX_CLEANCACHE_H +#define _LINUX_CLEANCACHE_H + +#include <linux/fs.h> +#include <linux/exportfs.h> +#include <linux/mm.h> + +#define CLEANCACHE_KEY_MAX 6 + +/* + * cleancache requires every file with a page in cleancache to have a + * unique key unless/until the file is removed/truncated. For some + * filesystems, the inode number is unique, but for "modern" filesystems + * an exportable filehandle is required (see exportfs.h) + */ +struct cleancache_filekey { + union { + ino_t ino; + __u32 fh[CLEANCACHE_KEY_MAX]; + u32 key[CLEANCACHE_KEY_MAX]; + } u; +}; + +struct cleancache_ops { + int (*init_fs)(size_t); + int (*init_shared_fs)(char *uuid, size_t); + int (*get_page)(int, struct cleancache_filekey, + pgoff_t, struct page *); + void (*put_page)(int, struct cleancache_filekey, + pgoff_t, struct page *); + void (*flush_page)(int, struct cleancache_filekey, pgoff_t); + void (*flush_inode)(int, struct cleancache_filekey); + void (*flush_fs)(int); +}; + +extern struct cleancache_ops + cleancache_register_ops(struct cleancache_ops *ops); +extern void __cleancache_init_fs(struct super_block *); +extern void __cleancache_init_shared_fs(char *, struct super_block *); +extern int __cleancache_get_page(struct page *); +extern void __cleancache_put_page(struct page *); +extern void __cleancache_flush_page(struct address_space *, struct page *); +extern void __cleancache_flush_inode(struct address_space *); +extern void __cleancache_flush_fs(struct super_block *); +extern int cleancache_enabled; + +#ifdef CONFIG_CLEANCACHE +static inline bool cleancache_fs_enabled(struct page *page) +{ + return page->mapping->host->i_sb->cleancache_poolid >= 0; +} +static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) +{ + return mapping->host->i_sb->cleancache_poolid >= 0; +} +#else +#define cleancache_enabled (0) +#define cleancache_fs_enabled(_page) (0) +#define cleancache_fs_enabled_mapping(_page) (0) +#endif + +/* + * The shim layer provided by these inline functions allows the compiler + * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE + * is disabled, to a single global variable check if CONFIG_CLEANCACHE + * is enabled but no cleancache "backend" has dynamically enabled it, + * and, for the most frequent cleancache ops, to a single global variable + * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled + * and a cleancache backend has dynamically enabled cleancache, but the + * filesystem referenced by that cleancache op has not enabled cleancache. + * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially + * no measurable performance impact. + */ + +static inline void cleancache_init_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_init_fs(sb); +} + +static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_init_shared_fs(uuid, sb); +} + +static inline int cleancache_get_page(struct page *page) +{ + int ret = -1; + + if (cleancache_enabled && cleancache_fs_enabled(page)) + ret = __cleancache_get_page(page); + return ret; +} + +static inline void cleancache_put_page(struct page *page) +{ + if (cleancache_enabled && cleancache_fs_enabled(page)) + __cleancache_put_page(page); +} + +static inline void cleancache_flush_page(struct address_space *mapping, + struct page *page) +{ + /* careful... page->mapping is NULL sometimes when this is called */ + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) + __cleancache_flush_page(mapping, page); +} + +static inline void cleancache_flush_inode(struct address_space *mapping) +{ + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) + __cleancache_flush_inode(mapping); +} + +static inline void cleancache_flush_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_flush_fs(sb); +} + +#endif /* _LINUX_CLEANCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f9d3251790d..241609346dfb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1428,6 +1428,11 @@ struct super_block { */ char __rcu *s_options; const struct dentry_operations *s_d_op; /* default d_op for dentries */ + + /* + * Saved pool identifier for cleancache (-1 means none) + */ + int cleancache_poolid; }; extern struct timespec current_fs_time(struct super_block *sb); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 943c76b3d4bb..59225ef27d15 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1,6 +1,7 @@ #ifndef _LINUX_HUGETLB_H #define _LINUX_HUGETLB_H +#include <linux/mm_types.h> #include <linux/fs.h> #include <linux/hugetlb_inline.h> @@ -41,7 +42,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, - int acctflags); + vm_flags_t vm_flags); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); int dequeue_hwpoisoned_huge_page(struct page *page); void copy_huge_page(struct page *dst, struct page *src); @@ -168,7 +169,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; -struct file *hugetlb_file_setup(const char *name, size_t size, int acct, +struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, struct user_struct **user, int creat_flags); int hugetlb_get_quota(struct address_space *mapping, long delta); void hugetlb_put_quota(struct address_space *mapping, long delta); @@ -192,7 +193,7 @@ static inline void set_file_hugepages(struct file *file) #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() static inline struct file *hugetlb_file_setup(const char *name, size_t size, - int acctflag, struct user_struct **user, int creat_flags) + vm_flags_t acctflag, struct user_struct **user, int creat_flags) { return ERR_PTR(-ENOSYS); } diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h index 6931489a5c14..2bb681fbeb35 100644 --- a/include/linux/hugetlb_inline.h +++ b/include/linux/hugetlb_inline.h @@ -7,7 +7,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) { - return vma->vm_flags & VM_HUGETLB; + return !!(vma->vm_flags & VM_HUGETLB); } #else diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a32dcaec04e1..4ecb7b16b278 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -529,9 +529,10 @@ struct transaction_s enum { T_RUNNING, T_LOCKED, - T_RUNDOWN, T_FLUSH, T_COMMIT, + T_COMMIT_DFLUSH, + T_COMMIT_JFLUSH, T_FINISHED } t_state; @@ -658,7 +659,9 @@ struct transaction_s * waiting for it to finish. */ unsigned int t_synchronous_commit:1; - unsigned int t_flushed_data_blocks:1; + + /* Disk flush needs to be sent to fs partition [no locking] */ + int t_need_data_flush; /* * For use by the filesystem to store fs-specific data @@ -1228,6 +1231,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); int jbd2_journal_force_commit_nested(journal_t *journal); int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); +int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); void __jbd2_log_wait_for_space(journal_t *journal); extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); diff --git a/include/linux/mm.h b/include/linux/mm.h index 8eb969ebf904..fb8e814f78dc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -165,12 +165,12 @@ extern pgprot_t protection_map[16]; */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { - return (vma->vm_flags & VM_PFN_AT_MMAP); + return !!(vma->vm_flags & VM_PFN_AT_MMAP); } static inline int is_pfn_mapping(struct vm_area_struct *vma) { - return (vma->vm_flags & VM_PFNMAP); + return !!(vma->vm_flags & VM_PFNMAP); } /* @@ -1432,7 +1432,7 @@ extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long flag, unsigned long pgoff); extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, - unsigned int vm_flags, unsigned long pgoff); + vm_flags_t vm_flags, unsigned long pgoff); static inline unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 071d459e866b..6fe96c19f85e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -102,6 +102,8 @@ struct page { #endif }; +typedef unsigned long __nocast vm_flags_t; + /* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that @@ -109,7 +111,7 @@ struct page { */ struct vm_region { struct rb_node vm_rb; /* link in global region tree */ - unsigned long vm_flags; /* VMA vm_flags */ + vm_flags_t vm_flags; /* VMA vm_flags */ unsigned long vm_start; /* start address of region */ unsigned long vm_end; /* region initialised to here */ unsigned long vm_top; /* region allocated to here */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 4c4ac3f3ce5a..a9dd89552f9c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -24,6 +24,7 @@ /* leave room for NETLINK_DM (DM Events) */ #define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ #define NETLINK_ECRYPTFS 19 +#define NETLINK_RDMA 20 #define MAX_LINKS 32 diff --git a/include/linux/smp.h b/include/linux/smp.h index 74243c86ba39..7ad824d510a2 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -98,16 +98,6 @@ void ipi_call_unlock_irq(void); */ int on_each_cpu(smp_call_func_t func, void *info, int wait); -#define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ -#define MSG_ALL 0x8001 - -#define MSG_INVALIDATE_TLB 0x0001 /* Remote processor TLB invalidate */ -#define MSG_STOP_CPU 0x0002 /* Sent to shut down slave CPU's - * when rebooting - */ -#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/ -#define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */ - /* * Mark the boot cpu "online" so that it can call console drivers in * printk() and can access its per-cpu storage. diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index b4d7710bc38d..bb4f5fbbbd8e 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -581,7 +581,7 @@ extern int spi_bus_unlock(struct spi_master *master); * Callable only from contexts that can sleep. */ static inline int -spi_write(struct spi_device *spi, const u8 *buf, size_t len) +spi_write(struct spi_device *spi, const void *buf, size_t len) { struct spi_transfer t = { .tx_buf = buf, @@ -605,7 +605,7 @@ spi_write(struct spi_device *spi, const u8 *buf, size_t len) * Callable only from contexts that can sleep. */ static inline int -spi_read(struct spi_device *spi, u8 *buf, size_t len) +spi_read(struct spi_device *spi, void *buf, size_t len) { struct spi_transfer t = { .rx_buf = buf, @@ -620,8 +620,8 @@ spi_read(struct spi_device *spi, u8 *buf, size_t len) /* this copies txbuf and rxbuf data; for small transfers only! */ extern int spi_write_then_read(struct spi_device *spi, - const u8 *txbuf, unsigned n_tx, - u8 *rxbuf, unsigned n_rx); + const void *txbuf, unsigned n_tx, + void *rxbuf, unsigned n_rx); /** * spi_w8r8 - SPI synchronous 8 bit write followed by 8 bit read diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild index e7c043216558..ea56f76c0c22 100644 --- a/include/rdma/Kbuild +++ b/include/rdma/Kbuild @@ -1 +1,6 @@ +header-y += ib_user_cm.h header-y += ib_user_mad.h +header-y += ib_user_sa.h +header-y += ib_user_verbs.h +header-y += rdma_netlink.h +header-y += rdma_user_cm.h diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h index bd3d380781e0..f79014aa28f9 100644 --- a/include/rdma/ib_user_cm.h +++ b/include/rdma/ib_user_cm.h @@ -34,6 +34,7 @@ #ifndef IB_USER_CM_H #define IB_USER_CM_H +#include <linux/types.h> #include <rdma/ib_user_sa.h> #define IB_USER_CM_ABI_VERSION 5 diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 169f7a53fb0c..26977c149c41 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -111,6 +111,20 @@ struct rdma_cm_event { } param; }; +enum rdma_cm_state { + RDMA_CM_IDLE, + RDMA_CM_ADDR_QUERY, + RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ROUTE_QUERY, + RDMA_CM_ROUTE_RESOLVED, + RDMA_CM_CONNECT, + RDMA_CM_DISCONNECT, + RDMA_CM_ADDR_BOUND, + RDMA_CM_LISTEN, + RDMA_CM_DEVICE_REMOVAL, + RDMA_CM_DESTROYING +}; + struct rdma_cm_id; /** @@ -130,6 +144,7 @@ struct rdma_cm_id { rdma_cm_event_handler event_handler; struct rdma_route route; enum rdma_port_space ps; + enum ib_qp_type qp_type; u8 port_num; }; @@ -140,9 +155,11 @@ struct rdma_cm_id { * returned rdma_id. * @context: User specified context associated with the id. * @ps: RDMA port space. + * @qp_type: type of queue pair associated with the id. */ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps); + void *context, enum rdma_port_space ps, + enum ib_qp_type qp_type); /** * rdma_destroy_id - Destroys an RDMA identifier. diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h new file mode 100644 index 000000000000..3c5363ab867b --- /dev/null +++ b/include/rdma/rdma_netlink.h @@ -0,0 +1,92 @@ +#ifndef _RDMA_NETLINK_H +#define _RDMA_NETLINK_H + +#include <linux/types.h> + +enum { + RDMA_NL_RDMA_CM = 1 +}; + +#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10) +#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1)) +#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op) + +enum { + RDMA_NL_RDMA_CM_ID_STATS = 0, + RDMA_NL_RDMA_CM_NUM_OPS +}; + +enum { + RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1, + RDMA_NL_RDMA_CM_ATTR_DST_ADDR, + RDMA_NL_RDMA_CM_NUM_ATTR, +}; + +struct rdma_cm_id_stats { + __u32 qp_num; + __u32 bound_dev_if; + __u32 port_space; + __s32 pid; + __u8 cm_state; + __u8 node_type; + __u8 port_num; + __u8 qp_type; +}; + +#ifdef __KERNEL__ + +#include <linux/netlink.h> + +struct ibnl_client_cbs { + int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); +}; + +int ibnl_init(void); +void ibnl_cleanup(void); + +/** + * Add a a client to the list of IB netlink exporters. + * @index: Index of the added client + * @nops: Number of supported ops by the added client. + * @cb_table: A table for op->callback + * + * Returns 0 on success or a negative error code. + */ +int ibnl_add_client(int index, int nops, + const struct ibnl_client_cbs cb_table[]); + +/** + * Remove a client from IB netlink. + * @index: Index of the removed IB client. + * + * Returns 0 on success or a negative error code. + */ +int ibnl_remove_client(int index); + +/** + * Put a new message in a supplied skb. + * @skb: The netlink skb. + * @nlh: Pointer to put the header of the new netlink message. + * @seq: The message sequence number. + * @len: The requested message length to allocate. + * @client: Calling IB netlink client. + * @op: message content op. + * Returns the allocated buffer on success and NULL on failure. + */ +void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, + int len, int client, int op); +/** + * Put a new attribute in a supplied skb. + * @skb: The netlink skb. + * @nlh: Header of the netlink message to append the attribute to. + * @len: The length of the attribute data. + * @data: The attribute data to put. + * @type: The attribute type. + * Returns the 0 and a negative error code on failure. + */ +int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, + int len, void *data, int type); + +#endif /* __KERNEL__ */ + +#endif /* _RDMA_NETLINK_H */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index b33257bc7e83..70213b4515eb 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -58,6 +58,7 @@ #define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_physdev_op 33 #define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_tmem_op 38 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -461,6 +462,27 @@ typedef uint8_t xen_domain_handle_t[16]; #define __mk_unsigned_long(x) x ## UL #define mk_unsigned_long(x) __mk_unsigned_long(x) +#define TMEM_SPEC_VERSION 1 + +struct tmem_op { + uint32_t cmd; + int32_t pool_id; + union { + struct { /* for cmd == TMEM_NEW_POOL */ + uint64_t uuid[2]; + uint32_t flags; + } new; + struct { + uint64_t oid[3]; + uint32_t index; + uint32_t tmem_offset; + uint32_t pfn_offset; + uint32_t len; + GUEST_HANDLE(void) gmfn; /* guest machine page frame */ + } gen; + } u; +}; + #else /* __ASSEMBLY__ */ /* In assembly code we cannot use C numeric constant suffixes. */ diff --git a/ipc/shm.c b/ipc/shm.c index 729acb7e3148..ab3385a21b27 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -347,7 +347,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) struct file * file; char name[13]; int id; - int acctflag = 0; + vm_flags_t acctflag = 0; if (size < SHMMIN || size > ns->shm_ctlmax) return -EINVAL; diff --git a/mm/Kconfig b/mm/Kconfig index e9c0c61f2ddd..8ca47a5ee9c8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -347,3 +347,26 @@ config NEED_PER_CPU_KM depends on !SMP bool default y + +config CLEANCACHE + bool "Enable cleancache driver to cache clean pages if tmem is present" + default n + help + Cleancache can be thought of as a page-granularity victim cache + for clean pages that the kernel's pageframe replacement algorithm + (PFRA) would like to keep around, but can't since there isn't enough + memory. So when the PFRA "evicts" a page, it first attempts to use + cleancacne code to put the data contained in that page into + "transcendent memory", memory that is not directly accessible or + addressable by the kernel and is of unknown and possibly + time-varying size. And when a cleancache-enabled + filesystem wishes to access a page in a file on disk, it first + checks cleancache to see if it already contains it; if it does, + the page is copied into the kernel and a disk access is avoided. + When a transcendent memory driver is available (such as zcache or + Xen transcendent memory), a significant I/O reduction + may be achieved. When none is available, all cleancache calls + are reduced to a single pointer-compare-against-NULL resulting + in a negligible performance hit. + + If unsure, say Y to enable cleancache diff --git a/mm/Makefile b/mm/Makefile index 42a8326c3e3d..836e4163c1bf 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o +obj-$(CONFIG_CLEANCACHE) += cleancache.o diff --git a/mm/cleancache.c b/mm/cleancache.c new file mode 100644 index 000000000000..bcaae4c2a770 --- /dev/null +++ b/mm/cleancache.c @@ -0,0 +1,244 @@ +/* + * Cleancache frontend + * + * This code provides the generic "frontend" layer to call a matching + * "backend" driver implementation of cleancache. See + * Documentation/vm/cleancache.txt for more information. + * + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Author: Dan Magenheimer + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/exportfs.h> +#include <linux/mm.h> +#include <linux/cleancache.h> + +/* + * This global enablement flag may be read thousands of times per second + * by cleancache_get/put/flush even on systems where cleancache_ops + * is not claimed (e.g. cleancache is config'ed on but remains + * disabled), so is preferred to the slower alternative: a function + * call that checks a non-global. + */ +int cleancache_enabled; +EXPORT_SYMBOL(cleancache_enabled); + +/* + * cleancache_ops is set by cleancache_ops_register to contain the pointers + * to the cleancache "backend" implementation functions. + */ +static struct cleancache_ops cleancache_ops; + +/* useful stats available in /sys/kernel/mm/cleancache */ +static unsigned long cleancache_succ_gets; +static unsigned long cleancache_failed_gets; +static unsigned long cleancache_puts; +static unsigned long cleancache_flushes; + +/* + * register operations for cleancache, returning previous thus allowing + * detection of multiple backends and possible nesting + */ +struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) +{ + struct cleancache_ops old = cleancache_ops; + + cleancache_ops = *ops; + cleancache_enabled = 1; + return old; +} +EXPORT_SYMBOL(cleancache_register_ops); + +/* Called by a cleancache-enabled filesystem at time of mount */ +void __cleancache_init_fs(struct super_block *sb) +{ + sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); +} +EXPORT_SYMBOL(__cleancache_init_fs); + +/* Called by a cleancache-enabled clustered filesystem at time of mount */ +void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) +{ + sb->cleancache_poolid = + (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); +} +EXPORT_SYMBOL(__cleancache_init_shared_fs); + +/* + * If the filesystem uses exportable filehandles, use the filehandle as + * the key, else use the inode number. + */ +static int cleancache_get_key(struct inode *inode, + struct cleancache_filekey *key) +{ + int (*fhfn)(struct dentry *, __u32 *fh, int *, int); + int len = 0, maxlen = CLEANCACHE_KEY_MAX; + struct super_block *sb = inode->i_sb; + + key->u.ino = inode->i_ino; + if (sb->s_export_op != NULL) { + fhfn = sb->s_export_op->encode_fh; + if (fhfn) { + struct dentry d; + d.d_inode = inode; + len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); + if (len <= 0 || len == 255) + return -1; + if (maxlen > CLEANCACHE_KEY_MAX) + return -1; + } + } + return 0; +} + +/* + * "Get" data from cleancache associated with the poolid/inode/index + * that were specified when the data was put to cleanache and, if + * successful, use it to fill the specified page with data and return 0. + * The pageframe is unchanged and returns -1 if the get fails. + * Page must be locked by caller. + */ +int __cleancache_get_page(struct page *page) +{ + int ret = -1; + int pool_id; + struct cleancache_filekey key = { .u.key = { 0 } }; + + VM_BUG_ON(!PageLocked(page)); + pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (pool_id < 0) + goto out; + + if (cleancache_get_key(page->mapping->host, &key) < 0) + goto out; + + ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); + if (ret == 0) + cleancache_succ_gets++; + else + cleancache_failed_gets++; +out: + return ret; +} +EXPORT_SYMBOL(__cleancache_get_page); + +/* + * "Put" data from a page to cleancache and associate it with the + * (previously-obtained per-filesystem) poolid and the page's, + * inode and page index. Page must be locked. Note that a put_page + * always "succeeds", though a subsequent get_page may succeed or fail. + */ +void __cleancache_put_page(struct page *page) +{ + int pool_id; + struct cleancache_filekey key = { .u.key = { 0 } }; + + VM_BUG_ON(!PageLocked(page)); + pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (pool_id >= 0 && + cleancache_get_key(page->mapping->host, &key) >= 0) { + (*cleancache_ops.put_page)(pool_id, key, page->index, page); + cleancache_puts++; + } +} +EXPORT_SYMBOL(__cleancache_put_page); + +/* + * Flush any data from cleancache associated with the poolid and the + * page's inode and page index so that a subsequent "get" will fail. + */ +void __cleancache_flush_page(struct address_space *mapping, struct page *page) +{ + /* careful... page->mapping is NULL sometimes when this is called */ + int pool_id = mapping->host->i_sb->cleancache_poolid; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (pool_id >= 0) { + VM_BUG_ON(!PageLocked(page)); + if (cleancache_get_key(mapping->host, &key) >= 0) { + (*cleancache_ops.flush_page)(pool_id, key, page->index); + cleancache_flushes++; + } + } +} +EXPORT_SYMBOL(__cleancache_flush_page); + +/* + * Flush all data from cleancache associated with the poolid and the + * mappings's inode so that all subsequent gets to this poolid/inode + * will fail. + */ +void __cleancache_flush_inode(struct address_space *mapping) +{ + int pool_id = mapping->host->i_sb->cleancache_poolid; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) + (*cleancache_ops.flush_inode)(pool_id, key); +} +EXPORT_SYMBOL(__cleancache_flush_inode); + +/* + * Called by any cleancache-enabled filesystem at time of unmount; + * note that pool_id is surrendered and may be reutrned by a subsequent + * cleancache_init_fs or cleancache_init_shared_fs + */ +void __cleancache_flush_fs(struct super_block *sb) +{ + if (sb->cleancache_poolid >= 0) { + int old_poolid = sb->cleancache_poolid; + sb->cleancache_poolid = -1; + (*cleancache_ops.flush_fs)(old_poolid); + } +} +EXPORT_SYMBOL(__cleancache_flush_fs); + +#ifdef CONFIG_SYSFS + +/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */ + +#define CLEANCACHE_SYSFS_RO(_name) \ + static ssize_t cleancache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%lu\n", cleancache_##_name); \ + } \ + static struct kobj_attribute cleancache_##_name##_attr = { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = cleancache_##_name##_show, \ + } + +CLEANCACHE_SYSFS_RO(succ_gets); +CLEANCACHE_SYSFS_RO(failed_gets); +CLEANCACHE_SYSFS_RO(puts); +CLEANCACHE_SYSFS_RO(flushes); + +static struct attribute *cleancache_attrs[] = { + &cleancache_succ_gets_attr.attr, + &cleancache_failed_gets_attr.attr, + &cleancache_puts_attr.attr, + &cleancache_flushes_attr.attr, + NULL, +}; + +static struct attribute_group cleancache_attr_group = { + .attrs = cleancache_attrs, + .name = "cleancache", +}; + +#endif /* CONFIG_SYSFS */ + +static int __init init_cleancache(void) +{ +#ifdef CONFIG_SYSFS + int err; + + err = sysfs_create_group(mm_kobj, &cleancache_attr_group); +#endif /* CONFIG_SYSFS */ + return 0; +} +module_init(init_cleancache) diff --git a/mm/filemap.c b/mm/filemap.c index 68e782b3d3de..7455ccd8bda8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -34,6 +34,7 @@ #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ #include <linux/memcontrol.h> #include <linux/mm_inline.h> /* for page_is_file_cache() */ +#include <linux/cleancache.h> #include "internal.h" /* @@ -118,6 +119,16 @@ void __delete_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; + /* + * if we're uptodate, flush out into the cleancache, otherwise + * invalidate any existing cleancache entries. We can't leave + * stale data around in the cleancache once our page is gone + */ + if (PageUptodate(page) && PageMappedToDisk(page)) + cleancache_put_page(page); + else + cleancache_flush_page(mapping, page); + radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; mapping->nrpages--; diff --git a/mm/fremap.c b/mm/fremap.c index 7f4123056e06..b8e0e2d468af 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -224,7 +224,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, /* * drop PG_Mlocked flag for over-mapped range */ - unsigned int saved_flags = vma->vm_flags; + vm_flags_t saved_flags = vma->vm_flags; munlock_vma_pages_range(vma, start, start + size); vma->vm_flags = saved_flags; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5fd68b95c671..f33bb319b73f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2833,7 +2833,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma, int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, - int acctflag) + vm_flags_t vm_flags) { long ret, chg; struct hstate *h = hstate_inode(inode); @@ -2843,7 +2843,7 @@ int hugetlb_reserve_pages(struct inode *inode, * attempt will be made for VM_NORESERVE to allocate a page * and filesystem quota without using reserves */ - if (acctflag & VM_NORESERVE) + if (vm_flags & VM_NORESERVE) return 0; /* diff --git a/mm/memory.c b/mm/memory.c index b73f677f0bb1..fc24f7d788bd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -730,7 +730,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, add_taint(TAINT_BAD_PAGE); } -static inline int is_cow_mapping(unsigned int flags) +static inline int is_cow_mapping(vm_flags_t flags) { return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } diff --git a/mm/mlock.c b/mm/mlock.c index 516b2c2ddd5a..048260c4e02e 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -307,13 +307,13 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, * For vmas that pass the filters, merge/split as appropriate. */ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, - unsigned long start, unsigned long end, unsigned int newflags) + unsigned long start, unsigned long end, vm_flags_t newflags) { struct mm_struct *mm = vma->vm_mm; pgoff_t pgoff; int nr_pages; int ret = 0; - int lock = newflags & VM_LOCKED; + int lock = !!(newflags & VM_LOCKED); if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) @@ -385,7 +385,7 @@ static int do_mlock(unsigned long start, size_t len, int on) prev = vma; for (nstart = start ; ; ) { - unsigned int newflags; + vm_flags_t newflags; /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ @@ -524,7 +524,7 @@ static int do_mlockall(int flags) goto out; for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { - unsigned int newflags; + vm_flags_t newflags; newflags = vma->vm_flags | VM_LOCKED; if (!(flags & MCL_CURRENT)) diff --git a/mm/mmap.c b/mm/mmap.c index ac2631b7477f..bbdc9af5e117 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -960,7 +960,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, { struct mm_struct * mm = current->mm; struct inode *inode; - unsigned int vm_flags; + vm_flags_t vm_flags; int error; unsigned long reqprot = prot; @@ -1165,7 +1165,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) */ int vma_wants_writenotify(struct vm_area_struct *vma) { - unsigned int vm_flags = vma->vm_flags; + vm_flags_t vm_flags = vma->vm_flags; /* If it was private or non-writable, the write bit is already clear */ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) @@ -1193,7 +1193,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma) * We account for memory if it's a private writeable mapping, * not hugepages and VM_NORESERVE wasn't set. */ -static inline int accountable_mapping(struct file *file, unsigned int vm_flags) +static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) { /* * hugetlb has its own accounting separate from the core VM @@ -1207,7 +1207,7 @@ static inline int accountable_mapping(struct file *file, unsigned int vm_flags) unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, - unsigned int vm_flags, unsigned long pgoff) + vm_flags_t vm_flags, unsigned long pgoff) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; diff --git a/mm/truncate.c b/mm/truncate.c index a95667529135..3a29a6180212 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -19,6 +19,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/buffer_head.h> /* grr. try_to_release_page, do_invalidatepage */ +#include <linux/cleancache.h> #include "internal.h" @@ -51,6 +52,7 @@ void do_invalidatepage(struct page *page, unsigned long offset) static inline void truncate_partial_page(struct page *page, unsigned partial) { zero_user_segment(page, partial, PAGE_CACHE_SIZE); + cleancache_flush_page(page->mapping, page); if (page_has_private(page)) do_invalidatepage(page, partial); } @@ -214,6 +216,7 @@ void truncate_inode_pages_range(struct address_space *mapping, pgoff_t next; int i; + cleancache_flush_inode(mapping); if (mapping->nrpages == 0) return; @@ -291,6 +294,7 @@ void truncate_inode_pages_range(struct address_space *mapping, pagevec_release(&pvec); mem_cgroup_uncharge_end(); } + cleancache_flush_inode(mapping); } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -440,6 +444,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, int did_range_unmap = 0; int wrapped = 0; + cleancache_flush_inode(mapping); pagevec_init(&pvec, 0); next = start; while (next <= end && !wrapped && @@ -498,6 +503,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, mem_cgroup_uncharge_end(); cond_resched(); } + cleancache_flush_inode(mapping); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 844a7a5607e3..159c50f1c6bf 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -589,7 +589,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) return -ENOMEM; /* Create the RDMA CM ID */ - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; diff --git a/net/rds/ib.c b/net/rds/ib.c index cce19f95c624..3b83086bcc30 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -325,7 +325,7 @@ static int rds_ib_laddr_check(__be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index ee369d201a65..fd453dd5124b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -587,7 +587,7 @@ int rds_ib_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; diff --git a/net/rds/iw.c b/net/rds/iw.c index 5a9676fe594f..f7474844f096 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -226,7 +226,7 @@ static int rds_iw_laddr_check(__be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 3a60a15d1b4a..c12db66f24c7 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -522,7 +522,7 @@ int rds_iw_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 4195a0539829..f8760e1b6688 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -158,7 +158,8 @@ static int rds_rdma_listen_init(void) struct rdma_cm_id *cm_id; int ret; - cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6c014dd3a20b..c3c232a88d94 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -695,7 +695,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, return ERR_PTR(-ENOMEM); xprt = &cma_xprt->sc_xprt; - listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); + listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); dprintk("svcrdma: rdma_create_id failed = %d\n", ret); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index d4297dc43dc4..80f8da344df5 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -387,7 +387,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, init_completion(&ia->ri_done); - id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); + id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); dprintk("RPC: %s: rdma_create_id() failed %i\n", diff --git a/sound/soc/omap/Kconfig b/sound/soc/omap/Kconfig index b5922984eac6..99054cf1f68f 100644 --- a/sound/soc/omap/Kconfig +++ b/sound/soc/omap/Kconfig @@ -65,14 +65,6 @@ config SND_OMAP_SOC_OVERO Say Y if you want to add support for SoC audio on the Gumstix Overo or CompuLab CM-T35 -config SND_OMAP_SOC_OMAP2EVM - tristate "SoC Audio support for OMAP2EVM board" - depends on TWL4030_CORE && SND_OMAP_SOC && MACH_OMAP2EVM - select SND_OMAP_SOC_MCBSP - select SND_SOC_TWL4030 - help - Say Y if you want to add support for SoC audio on the omap2evm board. - config SND_OMAP_SOC_OMAP3EVM tristate "SoC Audio support for OMAP3EVM board" depends on TWL4030_CORE && SND_OMAP_SOC && MACH_OMAP3EVM diff --git a/sound/soc/omap/Makefile b/sound/soc/omap/Makefile index ba9fc650db28..6c2c87eed5bb 100644 --- a/sound/soc/omap/Makefile +++ b/sound/soc/omap/Makefile @@ -13,7 +13,6 @@ snd-soc-rx51-objs := rx51.o snd-soc-ams-delta-objs := ams-delta.o snd-soc-osk5912-objs := osk5912.o snd-soc-overo-objs := overo.o -snd-soc-omap2evm-objs := omap2evm.o snd-soc-omap3evm-objs := omap3evm.o snd-soc-am3517evm-objs := am3517evm.o snd-soc-sdp3430-objs := sdp3430.o diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c deleted file mode 100644 index 29b60d6796e7..000000000000 --- a/sound/soc/omap/omap2evm.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * omap2evm.c -- SoC audio machine driver for omap2evm board - * - * Author: Arun KS <arunks@mistralsolutions.com> - * - * Based on sound/soc/omap/overo.c by Steve Sakoman - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#include <linux/clk.h> -#include <linux/platform_device.h> -#include <sound/core.h> -#include <sound/pcm.h> -#include <sound/soc.h> - -#include <asm/mach-types.h> -#include <mach/hardware.h> -#include <mach/gpio.h> -#include <plat/mcbsp.h> - -#include "omap-mcbsp.h" -#include "omap-pcm.h" - -static int omap2evm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *codec_dai = rtd->codec_dai; - struct snd_soc_dai *cpu_dai = rtd->cpu_dai; - int ret; - - /* Set codec DAI configuration */ - ret = snd_soc_dai_set_fmt(codec_dai, - SND_SOC_DAIFMT_I2S | - SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBM_CFM); - if (ret < 0) { - printk(KERN_ERR "can't set codec DAI configuration\n"); - return ret; - } - - /* Set cpu DAI configuration */ - ret = snd_soc_dai_set_fmt(cpu_dai, - SND_SOC_DAIFMT_I2S | - SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBM_CFM); - if (ret < 0) { - printk(KERN_ERR "can't set cpu DAI configuration\n"); - return ret; - } - - /* Set the codec system clock for DAC and ADC */ - ret = snd_soc_dai_set_sysclk(codec_dai, 0, 26000000, - SND_SOC_CLOCK_IN); - if (ret < 0) { - printk(KERN_ERR "can't set codec system clock\n"); - return ret; - } - - return 0; -} - -static struct snd_soc_ops omap2evm_ops = { - .hw_params = omap2evm_hw_params, -}; - -/* Digital audio interface glue - connects codec <--> CPU */ -static struct snd_soc_dai_link omap2evm_dai = { - .name = "TWL4030", - .stream_name = "TWL4030", - .cpu_dai_name = "omap-mcbsp-dai.1", - .codec_dai_name = "twl4030-hifi", - .platform_name = "omap-pcm-audio", - .codec_name = "twl4030-codec", - .ops = &omap2evm_ops, -}; - -/* Audio machine driver */ -static struct snd_soc_card snd_soc_omap2evm = { - .name = "omap2evm", - .dai_link = &omap2evm_dai, - .num_links = 1, -}; - -static struct platform_device *omap2evm_snd_device; - -static int __init omap2evm_soc_init(void) -{ - int ret; - - if (!machine_is_omap2evm()) - return -ENODEV; - printk(KERN_INFO "omap2evm SoC init\n"); - - omap2evm_snd_device = platform_device_alloc("soc-audio", -1); - if (!omap2evm_snd_device) { - printk(KERN_ERR "Platform device allocation failed\n"); - return -ENOMEM; - } - - platform_set_drvdata(omap2evm_snd_device, &snd_soc_omap2evm); - - ret = platform_device_add(omap2evm_snd_device); - if (ret) - goto err1; - - return 0; - -err1: - printk(KERN_ERR "Unable to add platform device\n"); - platform_device_put(omap2evm_snd_device); - - return ret; -} -module_init(omap2evm_soc_init); - -static void __exit omap2evm_soc_exit(void) -{ - platform_device_unregister(omap2evm_snd_device); -} -module_exit(omap2evm_soc_exit); - -MODULE_AUTHOR("Arun KS <arunks@mistralsolutions.com>"); -MODULE_DESCRIPTION("ALSA SoC omap2evm"); -MODULE_LICENSE("GPL"); |