diff options
436 files changed, 22829 insertions, 7557 deletions
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index 589b40cc5eb5..2a272275c81b 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl @@ -483,7 +483,7 @@ printk(KERN_INFO "my ip: %pI4\n", &ipaddress); <function>get_user()</function> / <function>put_user()</function> - <filename class="headerfile">include/asm/uaccess.h</filename> + <filename class="headerfile">include/linux/uaccess.h</filename> </title> <para> diff --git a/Documentation/devicetree/bindings/pwm/pwm-meson.txt b/Documentation/devicetree/bindings/pwm/pwm-meson.txt new file mode 100644 index 000000000000..5376a4468cb6 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/pwm-meson.txt @@ -0,0 +1,23 @@ +Amlogic Meson PWM Controller +============================ + +Required properties: +- compatible: Shall contain "amlogic,meson8b-pwm" or "amlogic,meson-gxbb-pwm". +- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of + the cells format. + +Optional properties: +- clocks: Could contain one or two parents clocks phandle for each of the two + PWM channels. +- clock-names: Could contain at least the "clkin0" and/or "clkin1" names. + +Example: + + pwm_ab: pwm@8550 { + compatible = "amlogic,meson-gxbb-pwm"; + reg = <0x0 0x08550 0x0 0x10>; + #pwm-cells = <3>; + status = "disabled"; + clocks = <&xtal>, <&xtal>; + clock-names = "clkin0", "clkin1"; + } diff --git a/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt b/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt index f8f59baf6b67..6f8af2bcc7b7 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt @@ -2,8 +2,9 @@ MediaTek display PWM controller Required properties: - compatible: should be "mediatek,<name>-disp-pwm": - - "mediatek,mt8173-disp-pwm": found on mt8173 SoC. + - "mediatek,mt2701-disp-pwm": found on mt2701 SoC. - "mediatek,mt6595-disp-pwm": found on mt6595 SoC. + - "mediatek,mt8173-disp-pwm": found on mt8173 SoC. - reg: physical base address and length of the controller's registers. - #pwm-cells: must be 2. See pwm.txt in this directory for a description of the cell format. diff --git a/Documentation/devicetree/bindings/pwm/pwm-st.txt b/Documentation/devicetree/bindings/pwm/pwm-st.txt index 84d2fb807d3c..19fce774cafa 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-st.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-st.txt @@ -13,13 +13,14 @@ Required parameters: - pinctrl-0: List of phandles pointing to pin configuration nodes for PWM module. For Pinctrl properties, please refer to [1]. -- clock-names: Set to "pwm". +- clock-names: Valid entries are "pwm" and/or "capture". - clocks: phandle of the clock used by the PWM module. For Clk properties, please refer to [2]. +- interrupts: IRQ for the Capture device Optional properties: -- st,pwm-num-chan: Number of available channels. If not passed, the driver - will consider single channel by default. +- st,pwm-num-chan: Number of available PWM channels. Default is 0. +- st,capture-num-chan: Number of available Capture channels. Default is 0. [1] Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt [2] Documentation/devicetree/bindings/clock/clock-bindings.txt @@ -38,4 +39,5 @@ pwm1: pwm@fe510000 { clocks = <&clk_sysin>; clock-names = "pwm"; st,pwm-num-chan = <4>; + st,capture-num-chan = <2>; }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt index cf6068b8e974..f1cbeefb3087 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt @@ -6,6 +6,7 @@ Required properties: - "allwinner,sun5i-a10s-pwm" - "allwinner,sun5i-a13-pwm" - "allwinner,sun7i-a20-pwm" + - "allwinner,sun8i-h3-pwm" - reg: physical base address and length of the controller's registers - #pwm-cells: should be 3. See pwm.txt in this directory for a description of the cells format. diff --git a/Documentation/devicetree/bindings/thermal/max77620_thermal.txt b/Documentation/devicetree/bindings/thermal/max77620_thermal.txt new file mode 100644 index 000000000000..323a3b3822aa --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/max77620_thermal.txt @@ -0,0 +1,70 @@ +Thermal driver for MAX77620 Power management IC from Maxim Semiconductor. + +Maxim Semiconductor MAX77620 supports alarm interrupts when its +die temperature crosses 120C and 140C. These threshold temperatures +are not configurable. Device does not provide the real temperature +of die other than just indicating whether temperature is above or +below threshold level. + +Required properties: +------------------- +#thermal-sensor-cells: Please refer <devicetree/bindings/thermal/thermal.txt> + for more details. + The value must be 0. + +For more details, please refer generic thermal DT binding document +<devicetree/bindings/thermal/thermal.txt>. + +Please refer <devicetree/bindings/mfd/max77620.txt> for mfd DT binding +document for the MAX77620. + +Example: +-------- +#include <dt-bindings/mfd/max77620.h> +#include <dt-bindings/thermal/thermal.h> +... + +i2c@7000d000 { + spmic: max77620@3c { + compatible = "maxim,max77620"; + ::::: + #thermal-sensor-cells = <0>; + ::: + }; +}; + +cool_dev: cool-dev { + compatible = "cooling-dev"; + #cooling-cells = <2>; +}; + +thermal-zones { + PMIC-Die { + polling-delay = <0>; + polling-delay-passive = <0>; + thermal-sensors = <&spmic>; + + trips { + pmic_die_warn_temp_thresh: hot-die { + temperature = <120000>; + type = "hot"; + hysteresis = <0>; + }; + + pmic_die_cirt_temp_thresh: cirtical-die { + temperature = <140000>; + type = "critical"; + hysteresis = <0>; + }; + }; + + cooling-maps { + map0 { + trip = <&pmic_die_warn_temp_thresh>; + cooling-device = <&cool_dev THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <100>; + }; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt b/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt index 81f9a512bc2a..e2f494d74d8a 100644 --- a/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt @@ -8,7 +8,9 @@ apmixedsys register space via AHB bus accesses, so a phandle to the APMIXEDSYS is also needed. Required properties: -- compatible: "mediatek,mt8173-thermal" +- compatible: + - "mediatek,mt8173-thermal" : For MT8173 family of SoCs + - "mediatek,mt2701-thermal" : For MT2701 family of SoCs - reg: Address range of the thermal controller - interrupts: IRQ for the thermal controller - clocks, clock-names: Clocks needed for the thermal controller. required diff --git a/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt b/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt index edebfa0a985e..b6c0ae53d4dc 100644 --- a/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt +++ b/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt @@ -10,8 +10,14 @@ Required properties : - compatible : For Tegra124, must contain "nvidia,tegra124-soctherm". For Tegra132, must contain "nvidia,tegra132-soctherm". For Tegra210, must contain "nvidia,tegra210-soctherm". -- reg : Should contain 1 entry: +- reg : Should contain at least 2 entries for each entry in reg-names: - SOCTHERM register set + - Tegra CAR register set: Required for Tegra124 and Tegra210. + - CCROC register set: Required for Tegra132. +- reg-names : Should contain at least 2 entries: + - soctherm-reg + - car-reg + - ccroc-reg - interrupts : Defines the interrupt used by SOCTHERM - clocks : Must contain an entry for each entry in clock-names. See ../clocks/clock-bindings.txt for details. @@ -25,17 +31,45 @@ Required properties : - #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description of this property. See <dt-bindings/thermal/tegra124-soctherm.h> for a list of valid values when referring to thermal sensors. +- throttle-cfgs: A sub-node which is a container of configuration for each + hardware throttle events. These events can be set as cooling devices. + * throttle events: Sub-nodes must be named as "light" or "heavy". + Properties: + - nvidia,priority: Each throttles has its own throttle settings, so the + SW need to set priorities for various throttle, the HW arbiter can select + the final throttle settings. + Bigger value indicates higher priority, In general, higher priority + translates to lower target frequency. SW needs to ensure that critical + thermal alarms are given higher priority, and ensure that there is + no race if priority of two vectors is set to the same value. + The range of this value is 1~100. + - nvidia,cpu-throt-percent: This property is for Tegra124 and Tegra210. + It is the throttling depth of pulse skippers, it's the percentage + throttling. + - nvidia,cpu-throt-level: This property is only for Tegra132, it is the + level of pulse skippers, which used to throttle clock frequencies. It + indicates cpu clock throttling depth, and the depth can be programmed. + Must set as following values: + TEGRA_SOCTHERM_THROT_LEVEL_LOW, TEGRA_SOCTHERM_THROT_LEVEL_MED + TEGRA_SOCTHERM_THROT_LEVEL_HIGH, TEGRA_SOCTHERM_THROT_LEVEL_NONE + - #cooling-cells: Should be 1. This cooling device only support on/off state. + See ./thermal.txt for a description of this property. Note: - the "critical" type trip points will be set to SOC_THERM hardware as the shut down temperature. Once the temperature of this thermal zone is higher than it, the system will be shutdown or reset by hardware. +- the "hot" type trip points will be set to SOC_THERM hardware as the throttle +temperature. Once the the temperature of this thermal zone is higher +than it, it will trigger the HW throttle event. Example : soctherm@700e2000 { compatible = "nvidia,tegra124-soctherm"; - reg = <0x0 0x700e2000 0x0 0x1000>; + reg = <0x0 0x700e2000 0x0 0x600 /* SOC_THERM reg_base */ + 0x0 0x60006000 0x0 0x400 /* CAR reg_base */ + reg-names = "soctherm-reg", "car-reg"; interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; clocks = <&tegra_car TEGRA124_CLK_TSENSOR>, <&tegra_car TEGRA124_CLK_SOC_THERM>; @@ -44,6 +78,76 @@ Example : reset-names = "soctherm"; #thermal-sensor-cells = <1>; + + throttle-cfgs { + /* + * When the "heavy" cooling device triggered, + * the HW will skip cpu clock's pulse in 85% depth + */ + throttle_heavy: heavy { + nvidia,priority = <100>; + nvidia,cpu-throt-percent = <85>; + + #cooling-cells = <1>; + }; + + /* + * When the "light" cooling device triggered, + * the HW will skip cpu clock's pulse in 50% depth + */ + throttle_light: light { + nvidia,priority = <80>; + nvidia,cpu-throt-percent = <50>; + + #cooling-cells = <1>; + }; + + /* + * If these two devices are triggered in same time, the HW throttle + * arbiter will select the highest priority as the final throttle + * settings to skip cpu pulse. + */ + }; + }; + +Example: referring to Tegra132's "reg", "reg-names" and "throttle-cfgs" : + + soctherm@700e2000 { + compatible = "nvidia,tegra132-soctherm"; + reg = <0x0 0x700e2000 0x0 0x600 /* SOC_THERM reg_base */ + 0x0 0x70040000 0x0 0x200>; /* CCROC reg_base */; + reg-names = "soctherm-reg", "ccroc-reg"; + + throttle-cfgs { + /* + * When the "heavy" cooling device triggered, + * the HW will skip cpu clock's pulse in HIGH level + */ + throttle_heavy: heavy { + nvidia,priority = <100>; + nvidia,cpu-throt-level = <TEGRA_SOCTHERM_THROT_LEVEL_HIGH>; + + #cooling-cells = <1>; + }; + + /* + * When the "light" cooling device triggered, + * the HW will skip cpu clock's pulse in MED level + */ + throttle_light: light { + nvidia,priority = <80>; + nvidia,cpu-throt-level = <TEGRA_SOCTHERM_THROT_LEVEL_MED>; + + #cooling-cells = <1>; + }; + + /* + * If these two devices are triggered in same time, the HW throttle + * arbiter will select the highest priority as the final throttle + * settings to skip cpu pulse. + */ + + }; }; Example: referring to thermal sensors : @@ -62,6 +166,19 @@ Example: referring to thermal sensors : hysteresis = <1000>; type = "critical"; }; + + cpu_throttle_trip: throttle-trip { + temperature = <100000>; + hysteresis = <1000>; + type = "hot"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_throttle_trip>; + cooling-device = <&throttle_heavy 1 1>; + }; }; }; }; diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.txt b/Documentation/devicetree/bindings/thermal/qcom-tsens.txt new file mode 100644 index 000000000000..292ed89d900b --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.txt @@ -0,0 +1,21 @@ +* QCOM SoC Temperature Sensor (TSENS) + +Required properties: +- compatible : + - "qcom,msm8916-tsens" : For 8916 Family of SoCs + - "qcom,msm8974-tsens" : For 8974 Family of SoCs + - "qcom,msm8996-tsens" : For 8996 Family of SoCs + +- reg: Address range of the thermal registers +- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description. +- Refer to Documentation/devicetree/bindings/nvmem/nvmem.txt to know how to specify +nvmem cells + +Example: +tsens: thermal-sensor@900000 { + compatible = "qcom,msm8916-tsens"; + reg = <0x4a8000 0x2000>; + nvmem-cells = <&tsens_caldata>, <&tsens_calsel>; + nvmem-cell-names = "caldata", "calsel"; + #thermal-sensor-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt b/Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt index 6d63782a7378..c6ae9c9d5e3e 100644 --- a/Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt @@ -7,6 +7,8 @@ Required properties: - reg : Physical base address and size Optional properties: +- clocks : Input clock specifier. Refer to common clock + bindings. - clock-frequency : Frequency of clock in Hz - xlnx,wdt-enable-once : 0 - Watchdog can be restarted 1 - Watchdog can be enabled just once @@ -17,6 +19,7 @@ Example: axi-timebase-wdt@40100000 { clock-frequency = <50000000>; compatible = "xlnx,xps-timebase-wdt-1.00.a"; + clocks = <&clkc 15>; reg = <0x40100000 0x10000>; xlnx,wdt-enable-once = <0x0>; xlnx,wdt-interval = <0x1b>; diff --git a/Documentation/devicetree/bindings/watchdog/st_lpc_wdt.txt b/Documentation/devicetree/bindings/watchdog/st_lpc_wdt.txt index 039c5ca45577..b949039bc502 100644 --- a/Documentation/devicetree/bindings/watchdog/st_lpc_wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/st_lpc_wdt.txt @@ -9,8 +9,7 @@ functionality. Required properties -- compatible : Must be one of: "st,stih407-lpc" "st,stih416-lpc" - "st,stih415-lpc" "st,stid127-lpc" +- compatible : Should be: "st,stih407-lpc" - reg : LPC registers base address + size - interrupts : LPC interrupt line number and associated flags - clocks : Clock used by LPC device (See: ../clock/clock-bindings.txt) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a1489e14f8ee..58f3c1041759 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2470,6 +2470,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nfsrootdebug [NFS] enable nfsroot debugging messages. See Documentation/filesystems/nfs/nfsroot.txt. + nfs.callback_nr_threads= + [NFSv4] set the total number of threads that the + NFS client will assign to service NFSv4 callback + requests. + nfs.callback_tcpport= [NFS] set the TCP port on which the NFSv4 callback channel should listen. @@ -2493,6 +2498,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. of returning the full 64-bit number. The default is to return 64-bit inode numbers. + nfs.max_session_cb_slots= + [NFSv4.1] Sets the maximum number of session + slots the client will assign to the callback + channel. This determines the maximum number of + callbacks the client will process in parallel for + a particular server. + nfs.max_session_slots= [NFSv4.1] Sets the maximum number of session slots the client will attempt to negotiate with the server. diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index efc3f3d293c4..ef473dc7f55e 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -49,6 +49,9 @@ temperature) and throttle appropriate devices. .bind: bind the thermal zone device with a thermal cooling device. .unbind: unbind the thermal zone device with a thermal cooling device. .get_temp: get the current temperature of the thermal zone. + .set_trips: set the trip points window. Whenever the current temperature + is updated, the trip points immediately below and above the + current temperature are found. .get_mode: get the current mode (enabled/disabled) of the thermal zone. - "enabled" means the kernel thermal management is enabled. - "disabled" will prevent kernel thermal driver action upon trip points @@ -95,6 +98,10 @@ temperature) and throttle appropriate devices. get_temp: a pointer to a function that reads the sensor temperature. This is mandatory callback provided by sensor driver. + set_trips: a pointer to a function that sets a + temperature window. When this window is + left the driver must inform the thermal + core via thermal_zone_device_update. get_trend: a pointer to a function that reads the sensor temperature trend. set_emul_temp: a pointer to a function that sets @@ -140,6 +147,18 @@ temperature) and throttle appropriate devices. Normally this function will not need to be called and the resource management code will ensure that the resource is freed. +1.1.7 int thermal_zone_get_slope(struct thermal_zone_device *tz) + + This interface is used to read the slope attribute value + for the thermal zone device, which might be useful for platform + drivers for temperature calculations. + +1.1.8 int thermal_zone_get_offset(struct thermal_zone_device *tz) + + This interface is used to read the offset attribute value + for the thermal zone device, which might be useful for platform + drivers for temperature calculations. + 1.2 thermal cooling device interface 1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name, void *devdata, struct thermal_cooling_device_ops *) diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt index 7f31125c123e..ea277478982f 100644 --- a/Documentation/watchdog/watchdog-kernel-api.txt +++ b/Documentation/watchdog/watchdog-kernel-api.txt @@ -48,8 +48,10 @@ struct watchdog_device { const struct attribute_group **groups; const struct watchdog_info *info; const struct watchdog_ops *ops; + const struct watchdog_governor *gov; unsigned int bootstatus; unsigned int timeout; + unsigned int pretimeout; unsigned int min_timeout; unsigned int max_timeout; unsigned int min_hw_heartbeat_ms; @@ -74,9 +76,11 @@ It contains following fields: * info: a pointer to a watchdog_info structure. This structure gives some additional information about the watchdog timer itself. (Like it's unique name) * ops: a pointer to the list of watchdog operations that the watchdog supports. +* gov: a pointer to the assigned watchdog device pretimeout governor or NULL. * timeout: the watchdog timer's timeout value (in seconds). This is the time after which the system will reboot if user space does not send a heartbeat request if WDOG_ACTIVE is set. +* pretimeout: the watchdog timer's pretimeout value (in seconds). * min_timeout: the watchdog timer's minimum timeout value (in seconds). If set, the minimum configurable value for 'timeout'. * max_timeout: the watchdog timer's maximum timeout value (in seconds), @@ -121,6 +125,7 @@ struct watchdog_ops { int (*ping)(struct watchdog_device *); unsigned int (*status)(struct watchdog_device *); int (*set_timeout)(struct watchdog_device *, unsigned int); + int (*set_pretimeout)(struct watchdog_device *, unsigned int); unsigned int (*get_timeleft)(struct watchdog_device *); int (*restart)(struct watchdog_device *); void (*ref)(struct watchdog_device *) __deprecated; @@ -188,6 +193,23 @@ they are supported. These optional routines/operations are: If set_timeout is not provided but, WDIOF_SETTIMEOUT is set, the watchdog infrastructure updates the timeout value of the watchdog_device internally to the requested value. + If the pretimeout feature is used (WDIOF_PRETIMEOUT), then set_timeout must + also take care of checking if pretimeout is still valid and set up the timer + accordingly. This can't be done in the core without races, so it is the + duty of the driver. +* set_pretimeout: this routine checks and changes the pretimeout value of + the watchdog. It is optional because not all watchdogs support pretimeout + notification. The timeout value is not an absolute time, but the number of + seconds before the actual timeout would happen. It returns 0 on success, + -EINVAL for "parameter out of range" and -EIO for "could not write value to + the watchdog". A value of 0 disables pretimeout notification. + (Note: the WDIOF_PRETIMEOUT needs to be set in the options field of the + watchdog's info structure). + If the watchdog driver does not have to perform any action but setting the + watchdog_device.pretimeout, this callback can be omitted. That means if + set_pretimeout is not provided but WDIOF_PRETIMEOUT is set, the watchdog + infrastructure updates the pretimeout value of the watchdog_device internally + to the requested value. * get_timeleft: this routines returns the time that's left before a reset. * restart: this routine restarts the machine. It returns 0 on success or a negative errno code for failure. @@ -268,3 +290,14 @@ User should follow the following guidelines for setting the priority: * 128: default restart handler, use if no other handler is expected to be available, and/or if restart is sufficient to restart the entire system * 255: highest priority, will preempt all other restart handlers + +To raise a pretimeout notification, the following function should be used: + +void watchdog_notify_pretimeout(struct watchdog_device *wdd) + +The function can be called in the interrupt context. If watchdog pretimeout +governor framework (kbuild CONFIG_WATCHDOG_PRETIMEOUT_GOV symbol) is enabled, +an action is taken by a preconfigured pretimeout governor preassigned to +the watchdog device. If watchdog pretimeout governor framework is not +enabled, watchdog_notify_pretimeout() prints a notification message to +the kernel log buffer. diff --git a/MAINTAINERS b/MAINTAINERS index 5e925a25e77d..1fc66f0aceb5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4775,15 +4775,6 @@ L: iommu@lists.linux-foundation.org S: Maintained F: drivers/iommu/exynos-iommu.c -EXYNOS MIPI DISPLAY DRIVERS -M: Inki Dae <inki.dae@samsung.com> -M: Donghwa Lee <dh09.lee@samsung.com> -M: Kyungmin Park <kyungmin.park@samsung.com> -L: linux-fbdev@vger.kernel.org -S: Maintained -F: drivers/video/fbdev/exynos/exynos_mipi* -F: include/video/exynos_mipi* - EZchip NPS platform support M: Noam Camus <noamc@ezchip.com> S: Supported @@ -4962,12 +4953,9 @@ F: drivers/net/wan/dlci.c F: drivers/net/wan/sdla.c FRAMEBUFFER LAYER -M: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com> M: Tomi Valkeinen <tomi.valkeinen@ti.com> L: linux-fbdev@vger.kernel.org -W: http://linux-fbdev.sourceforge.net/ Q: http://patchwork.kernel.org/project/linux-fbdev/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/plagnioj/linux-fbdev.git S: Maintained F: Documentation/fb/ F: drivers/video/ @@ -9201,6 +9189,14 @@ S: Maintained F: Documentation/devicetree/bindings/pci/versatile.txt F: drivers/pci/host/pci-versatile.c +PCI DRIVER FOR ARMADA 8K +M: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> +L: linux-pci@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org +S: Maintained +F: Documentation/devicetree/bindings/pci/pci-armada8k.txt +F: drivers/pci/host/pcie-armada8k.c + PCI DRIVER FOR APPLIEDMICRO XGENE M: Tanmay Inamdar <tinamdar@apm.com> L: linux-pci@vger.kernel.org @@ -9247,6 +9243,7 @@ M: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained +F: Documentation/devicetree/bindings/pci/aardvark-pci.txt F: drivers/pci/host/pci-aardvark.c PCI DRIVER FOR NVIDIA TEGRA @@ -621,6 +621,7 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) +KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index e52b82449a79..53994f9fbbcc 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -2045,44 +2045,32 @@ thermal-zones { cpu { trips { - trip { + cpu-shutdown-trip { temperature = <101000>; hysteresis = <0>; type = "critical"; }; }; - - cooling-maps { - /* There are currently no cooling maps because there are no cooling devices */ - }; }; mem { trips { - trip { + mem-shutdown-trip { temperature = <101000>; hysteresis = <0>; type = "critical"; }; }; - - cooling-maps { - /* There are currently no cooling maps because there are no cooling devices */ - }; }; gpu { trips { - trip { + gpu-shutdown-trip { temperature = <101000>; hysteresis = <0>; type = "critical"; }; }; - - cooling-maps { - /* There are currently no cooling maps because there are no cooling devices */ - }; }; }; }; diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index ea340f9de448..187a36c6d0fc 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -851,7 +851,9 @@ soctherm: thermal-sensor@700e2000 { compatible = "nvidia,tegra124-soctherm"; - reg = <0x0 0x700e2000 0x0 0x1000>; + reg = <0x0 0x700e2000 0x0 0x600 /* SOC_THERM reg_base */ + 0x0 0x60006000 0x0 0x400>; /* CAR reg_base */ + reg-names = "soctherm-reg", "car-reg"; interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; clocks = <&tegra_car TEGRA124_CLK_TSENSOR>, <&tegra_car TEGRA124_CLK_SOC_THERM>; @@ -859,6 +861,15 @@ resets = <&tegra_car 78>; reset-names = "soctherm"; #thermal-sensor-cells = <1>; + + throttle-cfgs { + throttle_heavy: heavy { + nvidia,priority = <100>; + nvidia,cpu-throt-percent = <85>; + + #cooling-cells = <2>; + }; + }; }; dfll: clock@70110000 { @@ -1154,6 +1165,26 @@ thermal-sensors = <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>; + + trips { + cpu-shutdown-trip { + temperature = <103000>; + hysteresis = <0>; + type = "critical"; + }; + cpu_throttle_trip: throttle-trip { + temperature = <100000>; + hysteresis = <1000>; + type = "hot"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_throttle_trip>; + cooling-device = <&throttle_heavy 1 1>; + }; + }; }; mem { @@ -1162,6 +1193,21 @@ thermal-sensors = <&soctherm TEGRA124_SOCTHERM_SENSOR_MEM>; + + trips { + mem-shutdown-trip { + temperature = <103000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* + * There are currently no cooling maps, + * because there are no cooling devices. + */ + }; }; gpu { @@ -1170,6 +1216,26 @@ thermal-sensors = <&soctherm TEGRA124_SOCTHERM_SENSOR_GPU>; + + trips { + gpu-shutdown-trip { + temperature = <101000>; + hysteresis = <0>; + type = "critical"; + }; + gpu_throttle_trip: throttle-trip { + temperature = <99000>; + hysteresis = <1000>; + type = "hot"; + }; + }; + + cooling-maps { + map0 { + trip = <&gpu_throttle_trip>; + cooling-device = <&throttle_heavy 1 1>; + }; + }; }; pllx { @@ -1178,6 +1244,21 @@ thermal-sensors = <&soctherm TEGRA124_SOCTHERM_SENSOR_PLLX>; + + trips { + pllx-shutdown-trip { + temperature = <103000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* + * There are currently no cooling maps, + * because there are no cooling devices. + */ + }; }; }; diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 4e484f406419..c58f6841f8aa 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -168,8 +168,6 @@ CONFIG_DRM_PANEL_SAMSUNG_LD9040=y CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=y CONFIG_DRM_NXP_PTN3460=y CONFIG_DRM_PARADE_PS8622=y -CONFIG_EXYNOS_VIDEO=y -CONFIG_EXYNOS_MIPI_DSI=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=y CONFIG_BACKLIGHT_PWM=y diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 05ec5e0df32d..67532f242271 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -23,7 +23,6 @@ static inline int fsr_fs(unsigned int fsr) #endif void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); -unsigned long search_exception_table(unsigned long addr); void early_abt_enable(void); #endif /* __ARCH_ARM_FAULT_H */ diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi index 2013f8916084..3f3a46a4bd01 100644 --- a/arch/arm64/boot/dts/nvidia/tegra132.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra132.dtsi @@ -4,6 +4,7 @@ #include <dt-bindings/pinctrl/pinctrl-tegra.h> #include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h> #include <dt-bindings/interrupt-controller/arm-gic.h> +#include <dt-bindings/thermal/tegra124-soctherm.h> / { compatible = "nvidia,tegra132", "nvidia,tegra124"; @@ -727,8 +728,10 @@ }; soctherm: thermal-sensor@700e2000 { - compatible = "nvidia,tegra124-soctherm"; - reg = <0x0 0x700e2000 0x0 0x1000>; + compatible = "nvidia,tegra132-soctherm"; + reg = <0x0 0x700e2000 0x0 0x600 /* 0: SOC_THERM reg_base */ + 0x0 0x70040000 0x0 0x200>; /* 2: CCROC reg_base */ + reg-names = "soctherm-reg", "ccroc-reg"; interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; clocks = <&tegra_car TEGRA124_CLK_TSENSOR>, <&tegra_car TEGRA124_CLK_SOC_THERM>; @@ -736,6 +739,118 @@ resets = <&tegra_car 78>; reset-names = "soctherm"; #thermal-sensor-cells = <1>; + + throttle-cfgs { + throttle_heavy: heavy { + nvidia,priority = <100>; + nvidia,cpu-throt-level = <TEGRA_SOCTHERM_THROT_LEVEL_HIGH>; + + #cooling-cells = <2>; + }; + }; + }; + + thermal-zones { + cpu { + polling-delay-passive = <1000>; + polling-delay = <0>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>; + + trips { + cpu_shutdown_trip { + temperature = <105000>; + hysteresis = <1000>; + type = "critical"; + }; + + cpu_throttle_trip: throttle-trip { + temperature = <102000>; + hysteresis = <1000>; + type = "hot"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_throttle_trip>; + cooling-device = <&throttle_heavy 1 1>; + }; + }; + }; + mem { + polling-delay-passive = <0>; + polling-delay = <0>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_MEM>; + + trips { + mem_shutdown_trip { + temperature = <101000>; + hysteresis = <1000>; + type = "critical"; + }; + }; + + cooling-maps { + /* + * There are currently no cooling maps, + * because there are no cooling devices. + */ + }; + }; + gpu { + polling-delay-passive = <1000>; + polling-delay = <0>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_GPU>; + + trips { + gpu_shutdown_trip { + temperature = <101000>; + hysteresis = <1000>; + type = "critical"; + }; + + gpu_throttle_trip: throttle-trip { + temperature = <99000>; + hysteresis = <1000>; + type = "hot"; + }; + }; + + cooling-maps { + map0 { + trip = <&gpu_throttle_trip>; + cooling-device = <&throttle_heavy 1 1>; + }; + }; + }; + pllx { + polling-delay-passive = <0>; + polling-delay = <0>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_PLLX>; + + trips { + pllx_shutdown_trip { + temperature = <105000>; + hysteresis = <1000>; + type = "critical"; + }; + }; + + cooling-maps { + /* + * There are currently no cooling maps, + * because there are no cooling devices. + */ + }; + }; }; ahub@70300000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi index f6739797150a..46045fe719da 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi @@ -3,6 +3,7 @@ #include <dt-bindings/memory/tegra210-mc.h> #include <dt-bindings/pinctrl/pinctrl-tegra.h> #include <dt-bindings/interrupt-controller/arm-gic.h> +#include <dt-bindings/thermal/tegra124-soctherm.h> / { compatible = "nvidia,tegra210"; @@ -1159,4 +1160,130 @@ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>; interrupt-parent = <&gic>; }; + + soctherm: thermal-sensor@700e2000 { + compatible = "nvidia,tegra210-soctherm"; + reg = <0x0 0x700e2000 0x0 0x600 /* SOC_THERM reg_base */ + 0x0 0x60006000 0x0 0x400>; /* CAR reg_base */ + reg-names = "soctherm-reg", "car-reg"; + interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&tegra_car TEGRA210_CLK_TSENSOR>, + <&tegra_car TEGRA210_CLK_SOC_THERM>; + clock-names = "tsensor", "soctherm"; + resets = <&tegra_car 78>; + reset-names = "soctherm"; + #thermal-sensor-cells = <1>; + + throttle-cfgs { + throttle_heavy: heavy { + nvidia,priority = <100>; + nvidia,cpu-throt-percent = <85>; + + #cooling-cells = <2>; + }; + }; + }; + + thermal-zones { + cpu { + polling-delay-passive = <1000>; + polling-delay = <0>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>; + + trips { + cpu-shutdown-trip { + temperature = <102500>; + hysteresis = <0>; + type = "critical"; + }; + + cpu_throttle_trip: throttle-trip { + temperature = <98500>; + hysteresis = <1000>; + type = "hot"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_throttle_trip>; + cooling-device = <&throttle_heavy 1 1>; + }; + }; + }; + mem { + polling-delay-passive = <0>; + polling-delay = <0>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_MEM>; + + trips { + mem-shutdown-trip { + temperature = <103000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* + * There are currently no cooling maps, + * because there are no cooling devices. + */ + }; + }; + gpu { + polling-delay-passive = <1000>; + polling-delay = <0>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_GPU>; + + trips { + gpu-shutdown-trip { + temperature = <103000>; + hysteresis = <0>; + type = "critical"; + }; + + gpu_throttle_trip: throttle-trip { + temperature = <100000>; + hysteresis = <1000>; + type = "hot"; + }; + }; + + cooling-maps { + map0 { + trip = <&gpu_throttle_trip>; + cooling-device = <&throttle_heavy 1 1>; + }; + }; + }; + pllx { + polling-delay-passive = <0>; + polling-delay = <0>; + + thermal-sensors = + <&soctherm TEGRA124_SOCTHERM_SENSOR_PLLX>; + + trips { + pllx-shutdown-trip { + temperature = <103000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + /* + * There are currently no cooling maps, + * because there are no cooling devices. + */ + }; + }; + }; }; diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index 07d7a7ef8bd5..a0513d463a1f 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -522,5 +522,6 @@ extern void __init pgtable_cache_init(void); #ifndef __ASSEMBLY__ extern void __init paging_init(void); #endif /* !__ASSEMBLY__ */ +#define HAVE_ARCH_UNMAPPED_AREA #endif /* _ASM_PGTABLE_H */ diff --git a/arch/frv/include/asm/segment.h b/arch/frv/include/asm/segment.h index 4377c89a57f5..2305142d4cf8 100644 --- a/arch/frv/include/asm/segment.h +++ b/arch/frv/include/asm/segment.h @@ -32,7 +32,6 @@ typedef struct { #define get_ds() (KERNEL_DS) #define get_fs() (__current_thread_info->addr_limit) #define segment_eq(a, b) ((a).seg == (b).seg) -#define __kernel_ds_p() segment_eq(get_fs(), KERNEL_DS) #define get_addr_limit() (get_fs().seg) #define set_fs(_x) \ diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h index 87d9e34c5df8..c0f4057eab60 100644 --- a/arch/frv/include/asm/uaccess.h +++ b/arch/frv/include/asm/uaccess.h @@ -20,8 +20,6 @@ #include <asm/segment.h> #include <asm/sections.h> -#define HAVE_ARCH_UNMAPPED_AREA /* we decide where to put mmaps */ - #define __ptr(x) ((unsigned long __force *)(x)) #define VERIFY_READ 0 diff --git a/arch/m68k/include/asm/uaccess_no.h b/arch/m68k/include/asm/uaccess_no.h index 1bdf15263754..36deeb36503b 100644 --- a/arch/m68k/include/asm/uaccess_no.h +++ b/arch/m68k/include/asm/uaccess_no.h @@ -44,9 +44,6 @@ struct exception_table_entry unsigned long insn, fixup; }; -/* Returns 0 if exception not found and fixup otherwise. */ -extern unsigned long search_exception_table(unsigned long); - /* * These are the main single-value transfer routines. They automatically diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 826676778094..253a67e275ad 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -71,9 +71,6 @@ struct exception_table_entry { unsigned long insn, fixup; }; -/* Returns 0 if exception not found and fixup otherwise. */ -extern unsigned long search_exception_table(unsigned long); - #ifndef CONFIG_MMU /* Check against bounds of physical memory */ diff --git a/arch/mips/include/asm/extable.h b/arch/mips/include/asm/extable.h new file mode 100644 index 000000000000..dce7a627a925 --- /dev/null +++ b/arch/mips/include/asm/extable.h @@ -0,0 +1,13 @@ +#ifndef _ASM_EXTABLE_H +#define _ASM_EXTABLE_H + +struct exception_table_entry +{ + unsigned long insn; + unsigned long nextinsn; +}; + +struct pt_regs; +extern int fixup_exception(struct pt_regs *regs); + +#endif diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h index 0aaf9a01ea50..702c273e67a9 100644 --- a/arch/mips/include/asm/module.h +++ b/arch/mips/include/asm/module.h @@ -3,7 +3,7 @@ #include <linux/list.h> #include <linux/elf.h> -#include <asm/uaccess.h> +#include <asm/extable.h> struct mod_arch_specific { /* Data Bus Error exception tables */ diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 21a2aaba20d5..4daf839cd8a8 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -16,6 +16,7 @@ #include <linux/thread_info.h> #include <linux/string.h> #include <asm/asm-eva.h> +#include <asm/extable.h> /* * The fs value determines whether argument validity checking should be @@ -1485,12 +1486,4 @@ static inline long strnlen_user(const char __user *s, long n) return res; } -struct exception_table_entry -{ - unsigned long insn; - unsigned long nextinsn; -}; - -extern int fixup_exception(struct pt_regs *regs); - #endif /* _ASM_UACCESS_H */ diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c index 27533c109f92..dd292dcec684 100644 --- a/arch/mips/lasat/picvue_proc.c +++ b/arch/mips/lasat/picvue_proc.c @@ -16,6 +16,7 @@ #include <linux/timer.h> #include <linux/mutex.h> +#include <linux/uaccess.h> #include "picvue.h" diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index 769d5ed8e992..b10ba121c849 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -18,7 +18,6 @@ #include <asm/page.h> #include <asm/ptrace.h> #include <asm/cpu-regs.h> -#include <asm/uaccess.h> #include <asm/current.h> /* Forward declaration, a strange C thing */ diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index d012e877a95a..2eedf6f46a57 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -38,7 +38,6 @@ #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define __kernel_ds_p() (current_thread_info()->addr_limit.seg == 0x9FFFFFFF) #define segment_eq(a, b) ((a).seg == (b).seg) @@ -72,12 +71,6 @@ static inline int ___range_ok(unsigned long addr, unsigned int size) #define access_ok(type, addr, size) (__range_ok((addr), (size)) == 0) #define __access_ok(addr, size) (__range_ok((addr), (size)) == 0) -static inline int verify_area(int type, const void *addr, unsigned long size) -{ - return access_ok(type, addr, size) ? 0 : -EFAULT; -} - - /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index dfd0301cf200..cd8cb1d1176b 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -75,7 +75,7 @@ static int restore_sigcontext(struct pt_regs *regs, struct fpucontext *buf; err |= __get_user(buf, &sc->fpucontext); if (buf) { - if (verify_area(VERIFY_READ, buf, sizeof(*buf))) + if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) goto badframe; err |= fpu_restore_sigcontext(buf); } @@ -98,7 +98,7 @@ asmlinkage long sys_sigreturn(void) long d0; frame = (struct sigframe __user *) current_frame()->sp; - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask)) goto badframe; @@ -130,7 +130,7 @@ asmlinkage long sys_rt_sigreturn(void) long d0; frame = (struct rt_sigframe __user *) current_frame()->sp; - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 5cc6b4f1b795..140faa16685a 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -82,10 +82,6 @@ struct exception_table_entry { unsigned long insn, fixup; }; -/* Returns 0 if exception not found and fixup otherwise. */ -extern unsigned long search_exception_table(unsigned long); -extern void sort_exception_table(void); - /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index e44bdb9078a5..c2c43f714684 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -83,10 +83,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e)) /* This is the size of the initially mapped kernel memory */ -#if defined(CONFIG_64BIT) || defined(CONFIG_SMP) -#define KERNEL_INITIAL_ORDER 25 /* 1<<25 = 32MB */ +#if defined(CONFIG_64BIT) +#define KERNEL_INITIAL_ORDER 26 /* 1<<26 = 64MB */ #else -#define KERNEL_INITIAL_ORDER 24 /* 1<<24 = 16MB */ +#define KERNEL_INITIAL_ORDER 25 /* 1<<25 = 32MB */ #endif #define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 5e953ab4530d..63670231f48a 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -11,6 +11,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, void die_if_kernel(char *str, struct pt_regs *regs, long err); /* mm/fault.c */ +const char *trap_name(unsigned long code); void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address); #endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 97d6b208e129..378df9207406 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -458,8 +458,8 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o } printk("\n"); - printk(KERN_CRIT "%s: Code=%d regs=%p (Addr=" RFMT ")\n", - msg, code, regs, offset); + pr_crit("%s: Code=%d (%s) regs=%p (Addr=" RFMT ")\n", + msg, code, trap_name(code), regs, offset); show_regs(regs); spin_unlock(&terminate_lock); diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index b37787dbe775..3d6ef1b29c6a 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -90,8 +90,9 @@ SECTIONS /* Start of data section */ _sdata = .; - RO_DATA_SECTION(8) - + /* Architecturally we need to keep __gp below 0x1000000 and thus + * in front of RO_DATA_SECTION() which stores lots of tracepoint + * and ftrace symbols. */ #ifdef CONFIG_64BIT . = ALIGN(16); /* Linkage tables */ @@ -106,6 +107,12 @@ SECTIONS } #endif + RO_DATA_SECTION(8) + + /* RO because of BUILDTIME_EXTABLE_SORT */ + EXCEPTION_TABLE(8) + NOTES + /* unwind info */ .PARISC.unwind : { __start___unwind = .; @@ -121,9 +128,6 @@ SECTIONS . = ALIGN(HUGEPAGE_SIZE); data_start = .; - EXCEPTION_TABLE(8) - NOTES - /* Data */ RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, PAGE_SIZE) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 47a6ca4c9e40..8ff9253930af 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -14,7 +14,7 @@ #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/interrupt.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/uaccess.h> #include <asm/traps.h> @@ -204,6 +204,16 @@ static const char * const trap_description[] = { [28] "Unaligned data reference trap", }; +const char *trap_name(unsigned long code) +{ + const char *t = NULL; + + if (code < ARRAY_SIZE(trap_description)) + t = trap_description[code]; + + return t ? t : "Unknown trap"; +} + /* * Print out info about fatal segfaults, if the show_unhandled_signals * sysctl is set: @@ -213,8 +223,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long code, unsigned long address, struct task_struct *tsk, struct vm_area_struct *vma) { - const char *trap_name = NULL; - if (!unhandled_signal(tsk, SIGSEGV)) return; @@ -226,10 +234,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long code, tsk->comm, code, address); print_vma_addr(KERN_CONT " in ", regs->iaoq[0]); - if (code < ARRAY_SIZE(trap_description)) - trap_name = trap_description[code]; - pr_warn(KERN_CONT " trap #%lu: %s%c", code, - trap_name ? trap_name : "unknown", + pr_cont(" trap #%lu: %s%c", code, trap_name(code), vma ? ',':'\n'); if (vma) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 356f38473b5d..e02ada312be8 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -105,6 +105,8 @@ static void * __init get_memblock(unsigned long size) else panic("get_memblock() failed.\n"); + memset(__va(phys), 0, size); + return __va(phys); } diff --git a/arch/score/include/asm/extable.h b/arch/score/include/asm/extable.h new file mode 100644 index 000000000000..c4423ccf830d --- /dev/null +++ b/arch/score/include/asm/extable.h @@ -0,0 +1,11 @@ +#ifndef _ASM_SCORE_EXTABLE_H +#define _ASM_SCORE_EXTABLE_H + +struct exception_table_entry { + unsigned long insn; + unsigned long fixup; +}; + +struct pt_regs; +extern int fixup_exception(struct pt_regs *regs); +#endif diff --git a/arch/score/include/asm/module.h b/arch/score/include/asm/module.h index abf395bbfaba..6dc1f2935eef 100644 --- a/arch/score/include/asm/module.h +++ b/arch/score/include/asm/module.h @@ -2,7 +2,7 @@ #define _ASM_SCORE_MODULE_H #include <linux/list.h> -#include <asm/uaccess.h> +#include <asm/extable.h> #include <asm-generic/module.h> struct mod_arch_specific { diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index 01aec8ccde83..db58ab98ec4b 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -4,6 +4,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/thread_info.h> +#include <asm/extable.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -420,12 +421,5 @@ static inline long strnlen_user(const char __user *str, long len) return __strnlen_user(str, len); } -struct exception_table_entry { - unsigned long insn; - unsigned long fixup; -}; - -extern int fixup_exception(struct pt_regs *regs); - #endif /* __SCORE_UACCESS_H */ diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index 92ade79ac427..a38d0c7b818f 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -192,8 +192,6 @@ struct exception_table_entry { #endif int fixup_exception(struct pt_regs *regs); -/* Returns 0 if exception not found and fixup.unit otherwise. */ -unsigned long search_exception_table(unsigned long addr); const struct exception_table_entry *search_exception_tables(unsigned long addr); extern void *set_exception_table_vec(unsigned int vec, void *handler); diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 93310837c2df..3f2d403873bd 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -7,7 +7,7 @@ #include <asm/ptrace.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <asm/extable_64.h> #include <asm/spitfire.h> /* diff --git a/arch/sparc/include/asm/extable_64.h b/arch/sparc/include/asm/extable_64.h new file mode 100644 index 000000000000..1121cb056ffb --- /dev/null +++ b/arch/sparc/include/asm/extable_64.h @@ -0,0 +1,20 @@ +#ifndef __ASM_EXTABLE64_H +#define __ASM_EXTABLE64_H +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + unsigned int insn, fixup; +}; + +#endif diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 37a315d0ddd4..b68acc563235 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -13,6 +13,7 @@ #include <asm/asi.h> #include <asm/spitfire.h> #include <asm-generic/uaccess-unaligned.h> +#include <asm/extable_64.h> #endif #ifndef __ASSEMBLY__ @@ -81,23 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si return 1; } -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry { - unsigned int insn, fixup; -}; - void __ret_efault(void); void __retl_efault(void); diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 61518cf79437..872877d930de 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -4,7 +4,6 @@ /* Caches aren't brain-dead on the intel. */ #include <asm-generic/cacheflush.h> #include <asm/special_insns.h> -#include <asm/uaccess.h> /* * The set_memory_* API can be used to change various attributes of a virtual diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h new file mode 100644 index 000000000000..b8ad261d11dc --- /dev/null +++ b/arch/x86/include/asm/extable.h @@ -0,0 +1,35 @@ +#ifndef _ASM_X86_EXTABLE_H +#define _ASM_X86_EXTABLE_H +/* + * The exception table consists of triples of addresses relative to the + * exception table entry itself. The first address is of an instruction + * that is allowed to fault, the second is the target at which the program + * should continue. The third is a handler function to deal with the fault + * caused by the instruction in the first field. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + int insn, fixup, handler; +}; +struct pt_regs; + +#define ARCH_HAS_RELATIVE_EXTABLE + +#define swap_ex_entry_fixup(a, b, tmp, delta) \ + do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->handler = (b)->handler + (delta); \ + (b)->handler = (tmp).handler - (delta); \ + } while (0) + +extern int fixup_exception(struct pt_regs *regs, int trapnr); +extern bool ex_has_fault_handler(unsigned long ip); +extern void early_fixup_exception(struct pt_regs *regs, int trapnr); + +#endif diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 13b6cdd0af57..2f75f30cb2f6 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -2,7 +2,7 @@ #define _ASM_X86_SECTIONS_H #include <asm-generic/sections.h> -#include <asm/uaccess.h> +#include <asm/extable.h> extern char __brk_base[], __brk_limit[]; extern struct exception_table_entry __stop___ex_table[]; diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2131c4ce7d8a..faf3687f1035 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -11,6 +11,7 @@ #include <asm/asm.h> #include <asm/page.h> #include <asm/smap.h> +#include <asm/extable.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -91,37 +92,6 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un likely(!__range_not_ok(addr, size, user_addr_max())) /* - * The exception table consists of triples of addresses relative to the - * exception table entry itself. The first address is of an instruction - * that is allowed to fault, the second is the target at which the program - * should continue. The third is a handler function to deal with the fault - * caused by the instruction in the first field. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry { - int insn, fixup, handler; -}; - -#define ARCH_HAS_RELATIVE_EXTABLE - -#define swap_ex_entry_fixup(a, b, tmp, delta) \ - do { \ - (a)->fixup = (b)->fixup + (delta); \ - (b)->fixup = (tmp).fixup - (delta); \ - (a)->handler = (b)->handler + (delta); \ - (b)->handler = (tmp).handler - (delta); \ - } while (0) - -extern int fixup_exception(struct pt_regs *regs, int trapnr); -extern bool ex_has_fault_handler(unsigned long ip); -extern void early_fixup_exception(struct pt_regs *regs, int trapnr); - -/* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. * diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4dc13340653e..9f72ca3b2669 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -5,7 +5,7 @@ */ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ -#include <linux/extable.h> /* search_exception_table */ +#include <linux/extable.h> /* search_exception_tables */ #include <linux/bootmem.h> /* max_low_pfn */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ diff --git a/arch/xtensa/include/asm/asm-uaccess.h b/arch/xtensa/include/asm/asm-uaccess.h new file mode 100644 index 000000000000..a7a110039786 --- /dev/null +++ b/arch/xtensa/include/asm/asm-uaccess.h @@ -0,0 +1,160 @@ +/* + * include/asm-xtensa/uaccess.h + * + * User space memory access functions + * + * These routines provide basic accessing functions to the user memory + * space for the kernel. This header file provides functions such as: + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001 - 2005 Tensilica Inc. + */ + +#ifndef _XTENSA_ASM_UACCESS_H +#define _XTENSA_ASM_UACCESS_H + +#include <linux/errno.h> +#include <asm/types.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +#include <asm/current.h> +#include <asm/asm-offsets.h> +#include <asm/processor.h> + +/* + * These assembly macros mirror the C macros in asm/uaccess.h. They + * should always have identical functionality. See + * arch/xtensa/kernel/sys.S for usage. + */ + +#define KERNEL_DS 0 +#define USER_DS 1 + +#define get_ds (KERNEL_DS) + +/* + * get_fs reads current->thread.current_ds into a register. + * On Entry: + * <ad> anything + * <sp> stack + * On Exit: + * <ad> contains current->thread.current_ds + */ + .macro get_fs ad, sp + GET_CURRENT(\ad,\sp) +#if THREAD_CURRENT_DS > 1020 + addi \ad, \ad, TASK_THREAD + l32i \ad, \ad, THREAD_CURRENT_DS - TASK_THREAD +#else + l32i \ad, \ad, THREAD_CURRENT_DS +#endif + .endm + +/* + * set_fs sets current->thread.current_ds to some value. + * On Entry: + * <at> anything (temp register) + * <av> value to write + * <sp> stack + * On Exit: + * <at> destroyed (actually, current) + * <av> preserved, value to write + */ + .macro set_fs at, av, sp + GET_CURRENT(\at,\sp) + s32i \av, \at, THREAD_CURRENT_DS + .endm + +/* + * kernel_ok determines whether we should bypass addr/size checking. + * See the equivalent C-macro version below for clarity. + * On success, kernel_ok branches to a label indicated by parameter + * <success>. This implies that the macro falls through to the next + * insruction on an error. + * + * Note that while this macro can be used independently, we designed + * in for optimal use in the access_ok macro below (i.e., we fall + * through on error). + * + * On Entry: + * <at> anything (temp register) + * <success> label to branch to on success; implies + * fall-through macro on error + * <sp> stack pointer + * On Exit: + * <at> destroyed (actually, current->thread.current_ds) + */ + +#if ((KERNEL_DS != 0) || (USER_DS == 0)) +# error Assembly macro kernel_ok fails +#endif + .macro kernel_ok at, sp, success + get_fs \at, \sp + beqz \at, \success + .endm + +/* + * user_ok determines whether the access to user-space memory is allowed. + * See the equivalent C-macro version below for clarity. + * + * On error, user_ok branches to a label indicated by parameter + * <error>. This implies that the macro falls through to the next + * instruction on success. + * + * Note that while this macro can be used independently, we designed + * in for optimal use in the access_ok macro below (i.e., we fall + * through on success). + * + * On Entry: + * <aa> register containing memory address + * <as> register containing memory size + * <at> temp register + * <error> label to branch to on error; implies fall-through + * macro on success + * On Exit: + * <aa> preserved + * <as> preserved + * <at> destroyed (actually, (TASK_SIZE + 1 - size)) + */ + .macro user_ok aa, as, at, error + movi \at, __XTENSA_UL_CONST(TASK_SIZE) + bgeu \as, \at, \error + sub \at, \at, \as + bgeu \aa, \at, \error + .endm + +/* + * access_ok determines whether a memory access is allowed. See the + * equivalent C-macro version below for clarity. + * + * On error, access_ok branches to a label indicated by parameter + * <error>. This implies that the macro falls through to the next + * instruction on success. + * + * Note that we assume success is the common case, and we optimize the + * branch fall-through case on success. + * + * On Entry: + * <aa> register containing memory address + * <as> register containing memory size + * <at> temp register + * <sp> + * <error> label to branch to on error; implies fall-through + * macro on success + * On Exit: + * <aa> preserved + * <as> preserved + * <at> destroyed + */ + .macro access_ok aa, as, at, sp, error + kernel_ok \at, \sp, .Laccess_ok_\@ + user_ok \aa, \as, \at, \error +.Laccess_ok_\@: + .endm + +#endif /* _XTENSA_ASM_UACCESS_H */ diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index 147b26ed9c91..848a3d736bcb 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -17,153 +17,12 @@ #define _XTENSA_UACCESS_H #include <linux/errno.h> -#ifndef __ASSEMBLY__ #include <linux/prefetch.h> -#endif #include <asm/types.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 -#ifdef __ASSEMBLY__ - -#include <asm/current.h> -#include <asm/asm-offsets.h> -#include <asm/processor.h> - -/* - * These assembly macros mirror the C macros that follow below. They - * should always have identical functionality. See - * arch/xtensa/kernel/sys.S for usage. - */ - -#define KERNEL_DS 0 -#define USER_DS 1 - -#define get_ds (KERNEL_DS) - -/* - * get_fs reads current->thread.current_ds into a register. - * On Entry: - * <ad> anything - * <sp> stack - * On Exit: - * <ad> contains current->thread.current_ds - */ - .macro get_fs ad, sp - GET_CURRENT(\ad,\sp) -#if THREAD_CURRENT_DS > 1020 - addi \ad, \ad, TASK_THREAD - l32i \ad, \ad, THREAD_CURRENT_DS - TASK_THREAD -#else - l32i \ad, \ad, THREAD_CURRENT_DS -#endif - .endm - -/* - * set_fs sets current->thread.current_ds to some value. - * On Entry: - * <at> anything (temp register) - * <av> value to write - * <sp> stack - * On Exit: - * <at> destroyed (actually, current) - * <av> preserved, value to write - */ - .macro set_fs at, av, sp - GET_CURRENT(\at,\sp) - s32i \av, \at, THREAD_CURRENT_DS - .endm - -/* - * kernel_ok determines whether we should bypass addr/size checking. - * See the equivalent C-macro version below for clarity. - * On success, kernel_ok branches to a label indicated by parameter - * <success>. This implies that the macro falls through to the next - * insruction on an error. - * - * Note that while this macro can be used independently, we designed - * in for optimal use in the access_ok macro below (i.e., we fall - * through on error). - * - * On Entry: - * <at> anything (temp register) - * <success> label to branch to on success; implies - * fall-through macro on error - * <sp> stack pointer - * On Exit: - * <at> destroyed (actually, current->thread.current_ds) - */ - -#if ((KERNEL_DS != 0) || (USER_DS == 0)) -# error Assembly macro kernel_ok fails -#endif - .macro kernel_ok at, sp, success - get_fs \at, \sp - beqz \at, \success - .endm - -/* - * user_ok determines whether the access to user-space memory is allowed. - * See the equivalent C-macro version below for clarity. - * - * On error, user_ok branches to a label indicated by parameter - * <error>. This implies that the macro falls through to the next - * instruction on success. - * - * Note that while this macro can be used independently, we designed - * in for optimal use in the access_ok macro below (i.e., we fall - * through on success). - * - * On Entry: - * <aa> register containing memory address - * <as> register containing memory size - * <at> temp register - * <error> label to branch to on error; implies fall-through - * macro on success - * On Exit: - * <aa> preserved - * <as> preserved - * <at> destroyed (actually, (TASK_SIZE + 1 - size)) - */ - .macro user_ok aa, as, at, error - movi \at, __XTENSA_UL_CONST(TASK_SIZE) - bgeu \as, \at, \error - sub \at, \at, \as - bgeu \aa, \at, \error - .endm - -/* - * access_ok determines whether a memory access is allowed. See the - * equivalent C-macro version below for clarity. - * - * On error, access_ok branches to a label indicated by parameter - * <error>. This implies that the macro falls through to the next - * instruction on success. - * - * Note that we assume success is the common case, and we optimize the - * branch fall-through case on success. - * - * On Entry: - * <aa> register containing memory address - * <as> register containing memory size - * <at> temp register - * <sp> - * <error> label to branch to on error; implies fall-through - * macro on success - * On Exit: - * <aa> preserved - * <as> preserved - * <at> destroyed - */ - .macro access_ok aa, as, at, sp, error - kernel_ok \at, \sp, .Laccess_ok_\@ - user_ok \aa, \as, \at, \error -.Laccess_ok_\@: - .endm - -#else /* __ASSEMBLY__ not defined */ - #include <linux/sched.h> /* @@ -495,16 +354,4 @@ struct exception_table_entry unsigned long insn, fixup; }; -/* Returns 0 if exception not found and fixup.unit otherwise. */ - -extern unsigned long search_exception_table(unsigned long addr); -extern void sort_exception_table(void); - -/* Returns the new pc */ -#define fixup_exception(map_reg, fixup_unit, pc) \ -({ \ - fixup_unit; \ -}) - -#endif /* __ASSEMBLY__ */ #endif /* _XTENSA_UACCESS_H */ diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index a482df5df2b2..6911e384f608 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -17,7 +17,7 @@ #include <asm/processor.h> #include <asm/coprocessor.h> #include <asm/thread_info.h> -#include <asm/uaccess.h> +#include <asm/asm-uaccess.h> #include <asm/unistd.h> #include <asm/ptrace.h> #include <asm/current.h> diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index fa04d9d368a7..f5ef3cc0497c 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -17,7 +17,7 @@ #include <asm/processor.h> #include <asm/coprocessor.h> #include <asm/thread_info.h> -#include <asm/uaccess.h> +#include <asm/asm-uaccess.h> #include <asm/unistd.h> #include <asm/ptrace.h> #include <asm/current.h> diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index f4ebe39539af..35e8fbca10ad 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -520,7 +520,8 @@ static void acpi_thermal_check(void *data) if (!tz->tz_enabled) return; - thermal_zone_device_update(tz->thermal_zone); + thermal_zone_device_update(tz->thermal_zone, + THERMAL_EVENT_UNSPECIFIED); } /* sys I/F for generic thermal sysfs support */ diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c index 480a777db577..7c19d9b22785 100644 --- a/drivers/char/tb0219.c +++ b/drivers/char/tb0219.c @@ -21,6 +21,7 @@ #include <linux/fs.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/reboot.h> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3f31ca32f52b..5fa36ebc0640 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -471,9 +471,9 @@ static int bond_check_dev_link(struct bonding *bond, /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); mii = if_mii(&ifr); - if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { + if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { mii->reg_num = MII_BMSR; - if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) + if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) return mii->val_out & BMSR_LSTATUS; } } diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index bddb198c0b74..380a64115a98 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -693,7 +693,7 @@ static int cn23xx_enable_io_queues(struct octeon_device *oct) while ((reg_val & CN23XX_PKT_INPUT_CTL_RST) && !(reg_val & CN23XX_PKT_INPUT_CTL_QUIET) && - loop--) { + --loop) { reg_val = octeon_read_csr64( oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index d4585154151d..cc4fd61914d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -287,7 +287,7 @@ retry: goto retry; } - MLX5_SET64(manage_pages_in, in, pas[i], addr); + MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr); } MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); @@ -344,7 +344,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, if (fwp->func_id != func_id) continue; - MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); + MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->addr); i++; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index a6db10717d5c..02a8be2faed7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1517,7 +1517,7 @@ static void qed_ll2_register_cb_ops(struct qed_dev *cdev, static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) { struct qed_ll2_info ll2_info; - struct qed_ll2_buffer *buffer; + struct qed_ll2_buffer *buffer, *tmp_buffer; enum qed_ll2_conn_type conn_type; struct qed_ptt *p_ptt; int rc, i; @@ -1587,7 +1587,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) /* Post all Rx buffers to FW */ spin_lock_bh(&cdev->ll2->lock); - list_for_each_entry(buffer, &cdev->ll2->list, list) { + list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) { rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle, buffer->phys_addr, 0, buffer, 1); diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 23430059471c..76831a398bed 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -2947,7 +2947,7 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .roce_ll2_stats = &qed_roce_ll2_stats, }; -const struct qed_rdma_ops *qed_get_rdma_ops() +const struct qed_rdma_ops *qed_get_rdma_ops(void) { return &qed_rdma_ops_pass; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1a06c87e3935..da0b80a1917a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -652,20 +652,27 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) if (IS_ERR(priv->clk_ptp_ref)) { priv->clk_ptp_rate = clk_get_rate(priv->stmmac_clk); priv->clk_ptp_ref = NULL; + netdev_dbg(priv->dev, "PTP uses main clock\n"); } else { clk_prepare_enable(priv->clk_ptp_ref); priv->clk_ptp_rate = clk_get_rate(priv->clk_ptp_ref); + netdev_dbg(priv->dev, "PTP rate %d\n", priv->clk_ptp_rate); } priv->adv_ts = 0; - if (priv->dma_cap.atime_stamp && priv->extend_desc) + /* Check if adv_ts can be enabled for dwmac 4.x core */ + if (priv->plat->has_gmac4 && priv->dma_cap.atime_stamp) + priv->adv_ts = 1; + /* Dwmac 3.x core with extend_desc can support adv_ts */ + else if (priv->extend_desc && priv->dma_cap.atime_stamp) priv->adv_ts = 1; - if (netif_msg_hw(priv) && priv->dma_cap.time_stamp) - pr_debug("IEEE 1588-2002 Time Stamp supported\n"); + if (priv->dma_cap.time_stamp) + netdev_info(priv->dev, "IEEE 1588-2002 Timestamp supported\n"); - if (netif_msg_hw(priv) && priv->adv_ts) - pr_debug("IEEE 1588-2008 Advanced Time Stamp supported\n"); + if (priv->adv_ts) + netdev_info(priv->dev, + "IEEE 1588-2008 Advanced Timestamp supported\n"); priv->hw->ptp = &stmmac_ptp; priv->hwts_tx_en = 0; @@ -1702,8 +1709,8 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) if (init_ptp) { ret = stmmac_init_ptp(priv); - if (ret && ret != -EOPNOTSUPP) - pr_warn("%s: failed PTP initialisation\n", __func__); + if (ret) + netdev_warn(priv->dev, "PTP support cannot init.\n"); } #ifdef CONFIG_DEBUG_FS diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 6e3b82972ce8..289d52725a6c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -186,10 +186,12 @@ int stmmac_ptp_register(struct stmmac_priv *priv) priv->device); if (IS_ERR(priv->ptp_clock)) { priv->ptp_clock = NULL; - pr_err("ptp_clock_register() failed on %s\n", priv->dev->name); - } else if (priv->ptp_clock) - pr_debug("Added PTP HW clock successfully on %s\n", - priv->dev->name); + return PTR_ERR(priv->ptp_clock); + } + + spin_lock_init(&priv->ptp_lock); + + netdev_dbg(priv->dev, "Added PTP HW clock successfully\n"); return 0; } diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 4a3eeb10d45b..c8d53d8c83ee 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -610,8 +610,8 @@ err_out_regions: #ifdef CONFIG_PCI if (pdev) pci_release_regions(pdev); -#endif err_out: +#endif if (pdev) pci_disable_device(pdev); return rc; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 35f9f9742a48..c688d68c39aa 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -431,8 +431,7 @@ static void axienet_setoptions(struct net_device *ndev, u32 options) lp->options |= options; } -static void __axienet_device_reset(struct axienet_local *lp, - struct device *dev, off_t offset) +static void __axienet_device_reset(struct axienet_local *lp, off_t offset) { u32 timeout; /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset @@ -468,8 +467,8 @@ static void axienet_device_reset(struct net_device *ndev) u32 axienet_status; struct axienet_local *lp = netdev_priv(ndev); - __axienet_device_reset(lp, &ndev->dev, XAXIDMA_TX_CR_OFFSET); - __axienet_device_reset(lp, &ndev->dev, XAXIDMA_RX_CR_OFFSET); + __axienet_device_reset(lp, XAXIDMA_TX_CR_OFFSET); + __axienet_device_reset(lp, XAXIDMA_RX_CR_OFFSET); lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE; lp->options |= XAE_OPTION_VLAN; @@ -1338,8 +1337,8 @@ static void axienet_dma_err_handler(unsigned long data) axienet_iow(lp, XAE_MDIO_MC_OFFSET, (mdio_mcreg & ~XAE_MDIO_MC_MDIOEN_MASK)); - __axienet_device_reset(lp, &ndev->dev, XAXIDMA_TX_CR_OFFSET); - __axienet_device_reset(lp, &ndev->dev, XAXIDMA_RX_CR_OFFSET); + __axienet_device_reset(lp, XAXIDMA_TX_CR_OFFSET); + __axienet_device_reset(lp, XAXIDMA_RX_CR_OFFSET); axienet_iow(lp, XAE_MDIO_MC_OFFSET, mdio_mcreg); axienet_mdio_wait_until_ready(lp); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 52eeb2f67276..f0919bd3a563 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -442,8 +442,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) } net_trans_info = get_net_transport_info(skb, &hdr_offset); - if (net_trans_info == TRANSPORT_INFO_NOT_IP) - goto do_send; /* * Setup the sendside checksum offload only if this is not a @@ -478,56 +476,29 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) } lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; - goto do_send; - } - - if ((skb->ip_summed == CHECKSUM_NONE) || - (skb->ip_summed == CHECKSUM_UNNECESSARY)) - goto do_send; - - rndis_msg_size += NDIS_CSUM_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, - TCPIP_CHKSUM_PKTINFO); - - csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + - ppi->ppi_offset); - - if (net_trans_info & (INFO_IPV4 << 16)) - csum_info->transmit.is_ipv4 = 1; - else - csum_info->transmit.is_ipv6 = 1; - - if (net_trans_info & INFO_TCP) { - csum_info->transmit.tcp_checksum = 1; - csum_info->transmit.tcp_header_offset = hdr_offset; - } else if (net_trans_info & INFO_UDP) { - /* UDP checksum offload is not supported on ws2008r2. - * Furthermore, on ws2012 and ws2012r2, there are some - * issues with udp checksum offload from Linux guests. - * (these are host issues). - * For now compute the checksum here. - */ - struct udphdr *uh; - u16 udp_len; - - ret = skb_cow_head(skb, 0); - if (ret) - goto no_memory; - - uh = udp_hdr(skb); - udp_len = ntohs(uh->len); - uh->check = 0; - uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, - udp_len, IPPROTO_UDP, - csum_partial(uh, udp_len, 0)); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - csum_info->transmit.udp_checksum = 0; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (net_trans_info & INFO_TCP) { + rndis_msg_size += NDIS_CSUM_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, + TCPIP_CHKSUM_PKTINFO); + + csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + + ppi->ppi_offset); + + if (net_trans_info & (INFO_IPV4 << 16)) + csum_info->transmit.is_ipv4 = 1; + else + csum_info->transmit.is_ipv6 = 1; + + csum_info->transmit.tcp_checksum = 1; + csum_info->transmit.tcp_header_offset = hdr_offset; + } else { + /* UDP checksum (and other) offload is not supported. */ + if (skb_checksum_help(skb)) + goto drop; + } } -do_send: /* Start filling in the page buffers with the rndis hdr */ rndis_msg->msg_len += rndis_msg_size; packet->total_data_buflen = rndis_msg->msg_len; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c6f66832a1a6..f424b867f73e 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -608,6 +608,21 @@ void phy_start_machine(struct phy_device *phydev) } /** + * phy_trigger_machine - trigger the state machine to run + * + * @phydev: the phy_device struct + * + * Description: There has been a change in state which requires that the + * state machine runs. + */ + +static void phy_trigger_machine(struct phy_device *phydev) +{ + cancel_delayed_work_sync(&phydev->state_queue); + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0); +} + +/** * phy_stop_machine - stop the PHY state machine tracking * @phydev: target phy_device struct * @@ -639,6 +654,8 @@ static void phy_error(struct phy_device *phydev) mutex_lock(&phydev->lock); phydev->state = PHY_HALTED; mutex_unlock(&phydev->lock); + + phy_trigger_machine(phydev); } /** @@ -800,8 +817,7 @@ void phy_change(struct work_struct *work) } /* reschedule state queue work to run as soon as possible */ - cancel_delayed_work_sync(&phydev->state_queue); - queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0); + phy_trigger_machine(phydev); return; ignore: @@ -890,6 +906,8 @@ void phy_start(struct phy_device *phydev) /* if phy was suspended, bring the physical link up again */ if (do_resume) phy_resume(phydev); + + phy_trigger_machine(phydev); } EXPORT_SYMBOL(phy_start); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 9d1fce8a6e84..3ff76c6db4f6 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -59,6 +59,10 @@ enum qmi_wwan_flags { QMI_WWAN_FLAG_RAWIP = 1 << 0, }; +enum qmi_wwan_quirks { + QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */ +}; + static void qmi_wwan_netdev_setup(struct net_device *net) { struct usbnet *dev = netdev_priv(net); @@ -411,9 +415,14 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) * clearing out state the clients might need. * * MDM9x30 is the first QMI chipset with USB3 support. Abuse - * this fact to enable the quirk. + * this fact to enable the quirk for all USB3 devices. + * + * There are also chipsets with the same "set DTR" requirement + * but without USB3 support. Devices based on these chips + * need a quirk flag in the device ID table. */ - if (le16_to_cpu(dev->udev->descriptor.bcdUSB) >= 0x0201) { + if (dev->driver_info->data & QMI_WWAN_QUIRK_DTR || + le16_to_cpu(dev->udev->descriptor.bcdUSB) >= 0x0201) { qmi_wwan_manage_power(dev, 1); qmi_wwan_change_dtr(dev, true); } @@ -526,6 +535,16 @@ static const struct driver_info qmi_wwan_info = { .rx_fixup = qmi_wwan_rx_fixup, }; +static const struct driver_info qmi_wwan_info_quirk_dtr = { + .description = "WWAN/QMI device", + .flags = FLAG_WWAN, + .bind = qmi_wwan_bind, + .unbind = qmi_wwan_unbind, + .manage_power = qmi_wwan_manage_power, + .rx_fixup = qmi_wwan_rx_fixup, + .data = QMI_WWAN_QUIRK_DTR, +}; + #define HUAWEI_VENDOR_ID 0x12D1 /* map QMI/wwan function by a fixed interface number */ @@ -533,6 +552,11 @@ static const struct driver_info qmi_wwan_info = { USB_DEVICE_INTERFACE_NUMBER(vend, prod, num), \ .driver_info = (unsigned long)&qmi_wwan_info +/* devices requiring "set DTR" quirk */ +#define QMI_QUIRK_SET_DTR(vend, prod, num) \ + USB_DEVICE_INTERFACE_NUMBER(vend, prod, num), \ + .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr + /* Gobi 1000 QMI/wwan interface number is 3 according to qcserial */ #define QMI_GOBI1K_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 3) @@ -895,6 +919,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index cf68149cbb55..3ce1f7da8647 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -407,4 +407,8 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb); +#ifdef CONFIG_DEBUG_FS +void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m); +#endif + #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 613bac057650..e8c5dddc54ba 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -360,6 +360,74 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, return XEN_NETIF_CTRL_STATUS_SUCCESS; } +#ifdef CONFIG_DEBUG_FS +void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) +{ + unsigned int i; + + switch (vif->hash.alg) { + case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ: + seq_puts(m, "Hash Algorithm: TOEPLITZ\n"); + break; + + case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE: + seq_puts(m, "Hash Algorithm: NONE\n"); + /* FALLTHRU */ + default: + return; + } + + if (vif->hash.flags) { + seq_puts(m, "\nHash Flags:\n"); + + if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4) + seq_puts(m, "- IPv4\n"); + if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) + seq_puts(m, "- IPv4 + TCP\n"); + if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6) + seq_puts(m, "- IPv6\n"); + if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) + seq_puts(m, "- IPv6 + TCP\n"); + } + + seq_puts(m, "\nHash Key:\n"); + + for (i = 0; i < XEN_NETBK_MAX_HASH_KEY_SIZE; ) { + unsigned int j, n; + + n = 8; + if (i + n >= XEN_NETBK_MAX_HASH_KEY_SIZE) + n = XEN_NETBK_MAX_HASH_KEY_SIZE - i; + + seq_printf(m, "[%2u - %2u]: ", i, i + n - 1); + + for (j = 0; j < n; j++, i++) + seq_printf(m, "%02x ", vif->hash.key[i]); + + seq_puts(m, "\n"); + } + + if (vif->hash.size != 0) { + seq_puts(m, "\nHash Mapping:\n"); + + for (i = 0; i < vif->hash.size; ) { + unsigned int j, n; + + n = 8; + if (i + n >= vif->hash.size) + n = vif->hash.size - i; + + seq_printf(m, "[%4u - %4u]: ", i, i + n - 1); + + for (j = 0; j < n; j++, i++) + seq_printf(m, "%4u ", vif->hash.mapping[i]); + + seq_puts(m, "\n"); + } + } +} +#endif /* CONFIG_DEBUG_FS */ + void xenvif_init_hash(struct xenvif *vif) { if (xenvif_hash_cache_size == 0) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 8e9ade6ccf18..b1cf7c6f407a 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -337,9 +337,9 @@ static void xenvif_rx_next_chunk(struct xenvif_queue *queue, frag_data += pkt->frag_offset; frag_len -= pkt->frag_offset; - chunk_len = min(frag_len, XEN_PAGE_SIZE - offset); - chunk_len = min(chunk_len, - XEN_PAGE_SIZE - xen_offset_in_page(frag_data)); + chunk_len = min_t(size_t, frag_len, XEN_PAGE_SIZE - offset); + chunk_len = min_t(size_t, chunk_len, XEN_PAGE_SIZE - + xen_offset_in_page(frag_data)); pkt->frag_offset += chunk_len; @@ -425,6 +425,8 @@ void xenvif_rx_skb(struct xenvif_queue *queue) xenvif_rx_next_skb(queue, &pkt); + queue->last_rx_time = jiffies; + do { struct xen_netif_rx_request *req; struct xen_netif_rx_response *rsp; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 7056404e3cb8..8674e188b697 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -165,7 +165,7 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, return count; } -static int xenvif_dump_open(struct inode *inode, struct file *filp) +static int xenvif_io_ring_open(struct inode *inode, struct file *filp) { int ret; void *queue = NULL; @@ -179,13 +179,35 @@ static int xenvif_dump_open(struct inode *inode, struct file *filp) static const struct file_operations xenvif_dbg_io_ring_ops_fops = { .owner = THIS_MODULE, - .open = xenvif_dump_open, + .open = xenvif_io_ring_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .write = xenvif_write_io_ring, }; +static int xenvif_read_ctrl(struct seq_file *m, void *v) +{ + struct xenvif *vif = m->private; + + xenvif_dump_hash_info(vif, m); + + return 0; +} + +static int xenvif_ctrl_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, xenvif_read_ctrl, inode->i_private); +} + +static const struct file_operations xenvif_dbg_ctrl_ops_fops = { + .owner = THIS_MODULE, + .open = xenvif_ctrl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static void xenvif_debugfs_addif(struct xenvif *vif) { struct dentry *pfile; @@ -210,6 +232,17 @@ static void xenvif_debugfs_addif(struct xenvif *vif) pr_warn("Creation of io_ring file returned %ld!\n", PTR_ERR(pfile)); } + + if (vif->ctrl_irq) { + pfile = debugfs_create_file("ctrl", + S_IRUSR, + vif->xenvif_dbg_root, + vif, + &xenvif_dbg_ctrl_ops_fops); + if (IS_ERR_OR_NULL(pfile)) + pr_warn("Creation of ctrl file returned %ld!\n", + PTR_ERR(pfile)); + } } else netdev_warn(vif->dev, "Creation of vif debugfs dir returned %ld!\n", diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index e4a5b7ee90cf..4fce494271cc 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -230,20 +230,20 @@ static int advk_pcie_link_up(struct advk_pcie *pcie) static int advk_pcie_wait_for_link(struct advk_pcie *pcie) { + struct device *dev = &pcie->pdev->dev; int retries; /* check if the link is up or not */ for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) { if (advk_pcie_link_up(pcie)) { - dev_info(&pcie->pdev->dev, "link up\n"); + dev_info(dev, "link up\n"); return 0; } usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX); } - dev_err(&pcie->pdev->dev, "link never came up\n"); - + dev_err(dev, "link never came up\n"); return -ETIMEDOUT; } @@ -376,6 +376,7 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) static void advk_pcie_check_pio_status(struct advk_pcie *pcie) { + struct device *dev = &pcie->pdev->dev; u32 reg; unsigned int status; char *strcomp_status, *str_posted; @@ -407,12 +408,13 @@ static void advk_pcie_check_pio_status(struct advk_pcie *pcie) else str_posted = "Posted"; - dev_err(&pcie->pdev->dev, "%s PIO Response Status: %s, %#x @ %#x\n", + dev_err(dev, "%s PIO Response Status: %s, %#x @ %#x\n", str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS)); } static int advk_pcie_wait_pio(struct advk_pcie *pcie) { + struct device *dev = &pcie->pdev->dev; unsigned long timeout; timeout = jiffies + msecs_to_jiffies(PIO_TIMEOUT_MS); @@ -426,7 +428,7 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie) return 0; } - dev_err(&pcie->pdev->dev, "config read/write timed out\n"); + dev_err(dev, "config read/write timed out\n"); return -ETIMEDOUT; } @@ -560,10 +562,11 @@ static int advk_pcie_alloc_msi(struct advk_pcie *pcie) static void advk_pcie_free_msi(struct advk_pcie *pcie, int hwirq) { + struct device *dev = &pcie->pdev->dev; + mutex_lock(&pcie->msi_used_lock); if (!test_bit(hwirq, pcie->msi_irq_in_use)) - dev_err(&pcie->pdev->dev, "trying to free unused MSI#%d\n", - hwirq); + dev_err(dev, "trying to free unused MSI#%d\n", hwirq); else clear_bit(hwirq, pcie->msi_irq_in_use); mutex_unlock(&pcie->msi_used_lock); @@ -910,6 +913,7 @@ out_release_res: static int advk_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct advk_pcie *pcie; struct resource *res; struct pci_bus *bus, *child; @@ -917,31 +921,29 @@ static int advk_pcie_probe(struct platform_device *pdev) struct device_node *msi_node; int ret, irq; - pcie = devm_kzalloc(&pdev->dev, sizeof(struct advk_pcie), - GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(struct advk_pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; pcie->pdev = pdev; - platform_set_drvdata(pdev, pcie); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pcie->base = devm_ioremap_resource(&pdev->dev, res); + pcie->base = devm_ioremap_resource(dev, res); if (IS_ERR(pcie->base)) return PTR_ERR(pcie->base); irq = platform_get_irq(pdev, 0); - ret = devm_request_irq(&pdev->dev, irq, advk_pcie_irq_handler, + ret = devm_request_irq(dev, irq, advk_pcie_irq_handler, IRQF_SHARED | IRQF_NO_THREAD, "advk-pcie", pcie); if (ret) { - dev_err(&pdev->dev, "Failed to register interrupt\n"); + dev_err(dev, "Failed to register interrupt\n"); return ret; } ret = advk_pcie_parse_request_of_pci_ranges(pcie); if (ret) { - dev_err(&pdev->dev, "Failed to parse resources\n"); + dev_err(dev, "Failed to parse resources\n"); return ret; } @@ -949,24 +951,24 @@ static int advk_pcie_probe(struct platform_device *pdev) ret = advk_pcie_init_irq_domain(pcie); if (ret) { - dev_err(&pdev->dev, "Failed to initialize irq\n"); + dev_err(dev, "Failed to initialize irq\n"); return ret; } ret = advk_pcie_init_msi_irq_domain(pcie); if (ret) { - dev_err(&pdev->dev, "Failed to initialize irq\n"); + dev_err(dev, "Failed to initialize irq\n"); advk_pcie_remove_irq_domain(pcie); return ret; } - msi_node = of_parse_phandle(pdev->dev.of_node, "msi-parent", 0); + msi_node = of_parse_phandle(dev->of_node, "msi-parent", 0); if (msi_node) msi = of_pci_find_msi_chip_by_node(msi_node); else msi = NULL; - bus = pci_scan_root_bus_msi(&pdev->dev, 0, &advk_pcie_ops, + bus = pci_scan_root_bus_msi(dev, 0, &advk_pcie_ops, pcie, &pcie->resources, &pcie->msi); if (!bus) { advk_pcie_remove_msi_irq_domain(pcie); @@ -980,7 +982,6 @@ static int advk_pcie_probe(struct platform_device *pdev) pcie_bus_configure_settings(child); pci_bus_add_devices(bus); - return 0; } diff --git a/drivers/pci/host/pci-dra7xx.c b/drivers/pci/host/pci-dra7xx.c index 19223ed2e619..9595fad63f6f 100644 --- a/drivers/pci/host/pci-dra7xx.c +++ b/drivers/pci/host/pci-dra7xx.c @@ -64,11 +64,10 @@ #define DRA7XX_CPU_TO_BUS_ADDR 0x0FFFFFFF struct dra7xx_pcie { - void __iomem *base; - struct phy **phy; - int phy_count; - struct device *dev; struct pcie_port pp; + void __iomem *base; /* DT ti_conf */ + int phy_count; /* DT phy-names count */ + struct phy **phy; }; #define to_dra7xx_pcie(x) container_of((x), struct dra7xx_pcie, pp) @@ -84,17 +83,6 @@ static inline void dra7xx_pcie_writel(struct dra7xx_pcie *pcie, u32 offset, writel(value, pcie->base + offset); } -static inline u32 dra7xx_pcie_readl_rc(struct pcie_port *pp, u32 offset) -{ - return readl(pp->dbi_base + offset); -} - -static inline void dra7xx_pcie_writel_rc(struct pcie_port *pp, u32 offset, - u32 value) -{ - writel(value, pp->dbi_base + offset); -} - static int dra7xx_pcie_link_up(struct pcie_port *pp) { struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp); @@ -103,13 +91,14 @@ static int dra7xx_pcie_link_up(struct pcie_port *pp) return !!(reg & LINK_UP); } -static int dra7xx_pcie_establish_link(struct pcie_port *pp) +static int dra7xx_pcie_establish_link(struct dra7xx_pcie *dra7xx) { - struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp); + struct pcie_port *pp = &dra7xx->pp; + struct device *dev = pp->dev; u32 reg; if (dw_pcie_link_up(pp)) { - dev_err(pp->dev, "link is already up\n"); + dev_err(dev, "link is already up\n"); return 0; } @@ -120,10 +109,8 @@ static int dra7xx_pcie_establish_link(struct pcie_port *pp) return dw_pcie_wait_for_link(pp); } -static void dra7xx_pcie_enable_interrupts(struct pcie_port *pp) +static void dra7xx_pcie_enable_interrupts(struct dra7xx_pcie *dra7xx) { - struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp); - dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN, ~INTERRUPTS); dra7xx_pcie_writel(dra7xx, @@ -142,6 +129,8 @@ static void dra7xx_pcie_enable_interrupts(struct pcie_port *pp) static void dra7xx_pcie_host_init(struct pcie_port *pp) { + struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp); + pp->io_base &= DRA7XX_CPU_TO_BUS_ADDR; pp->mem_base &= DRA7XX_CPU_TO_BUS_ADDR; pp->cfg0_base &= DRA7XX_CPU_TO_BUS_ADDR; @@ -149,10 +138,10 @@ static void dra7xx_pcie_host_init(struct pcie_port *pp) dw_pcie_setup_rc(pp); - dra7xx_pcie_establish_link(pp); + dra7xx_pcie_establish_link(dra7xx); if (IS_ENABLED(CONFIG_PCI_MSI)) dw_pcie_msi_init(pp); - dra7xx_pcie_enable_interrupts(pp); + dra7xx_pcie_enable_interrupts(dra7xx); } static struct pcie_host_ops dra7xx_pcie_host_ops = { @@ -196,8 +185,8 @@ static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp) static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg) { - struct pcie_port *pp = arg; - struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp); + struct dra7xx_pcie *dra7xx = arg; + struct pcie_port *pp = &dra7xx->pp; u32 reg; reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI); @@ -223,51 +212,51 @@ static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg) static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg) { struct dra7xx_pcie *dra7xx = arg; + struct device *dev = dra7xx->pp.dev; u32 reg; reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN); if (reg & ERR_SYS) - dev_dbg(dra7xx->dev, "System Error\n"); + dev_dbg(dev, "System Error\n"); if (reg & ERR_FATAL) - dev_dbg(dra7xx->dev, "Fatal Error\n"); + dev_dbg(dev, "Fatal Error\n"); if (reg & ERR_NONFATAL) - dev_dbg(dra7xx->dev, "Non Fatal Error\n"); + dev_dbg(dev, "Non Fatal Error\n"); if (reg & ERR_COR) - dev_dbg(dra7xx->dev, "Correctable Error\n"); + dev_dbg(dev, "Correctable Error\n"); if (reg & ERR_AXI) - dev_dbg(dra7xx->dev, "AXI tag lookup fatal Error\n"); + dev_dbg(dev, "AXI tag lookup fatal Error\n"); if (reg & ERR_ECRC) - dev_dbg(dra7xx->dev, "ECRC Error\n"); + dev_dbg(dev, "ECRC Error\n"); if (reg & PME_TURN_OFF) - dev_dbg(dra7xx->dev, + dev_dbg(dev, "Power Management Event Turn-Off message received\n"); if (reg & PME_TO_ACK) - dev_dbg(dra7xx->dev, + dev_dbg(dev, "Power Management Turn-Off Ack message received\n"); if (reg & PM_PME) - dev_dbg(dra7xx->dev, - "PM Power Management Event message received\n"); + dev_dbg(dev, "PM Power Management Event message received\n"); if (reg & LINK_REQ_RST) - dev_dbg(dra7xx->dev, "Link Request Reset\n"); + dev_dbg(dev, "Link Request Reset\n"); if (reg & LINK_UP_EVT) - dev_dbg(dra7xx->dev, "Link-up state change\n"); + dev_dbg(dev, "Link-up state change\n"); if (reg & CFG_BME_EVT) - dev_dbg(dra7xx->dev, "CFG 'Bus Master Enable' change\n"); + dev_dbg(dev, "CFG 'Bus Master Enable' change\n"); if (reg & CFG_MSE_EVT) - dev_dbg(dra7xx->dev, "CFG 'Memory Space Enable' change\n"); + dev_dbg(dev, "CFG 'Memory Space Enable' change\n"); dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN, reg); @@ -278,13 +267,9 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx, struct platform_device *pdev) { int ret; - struct pcie_port *pp; + struct pcie_port *pp = &dra7xx->pp; + struct device *dev = pp->dev; struct resource *res; - struct device *dev = &pdev->dev; - - pp = &dra7xx->pp; - pp->dev = dev; - pp->ops = &dra7xx_pcie_host_ops; pp->irq = platform_get_irq(pdev, 1); if (pp->irq < 0) { @@ -292,12 +277,11 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx, return -EINVAL; } - ret = devm_request_irq(&pdev->dev, pp->irq, - dra7xx_pcie_msi_irq_handler, + ret = devm_request_irq(dev, pp->irq, dra7xx_pcie_msi_irq_handler, IRQF_SHARED | IRQF_NO_THREAD, - "dra7-pcie-msi", pp); + "dra7-pcie-msi", dra7xx); if (ret) { - dev_err(&pdev->dev, "failed to request irq\n"); + dev_err(dev, "failed to request irq\n"); return ret; } @@ -314,7 +298,7 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx, ret = dw_pcie_host_init(pp); if (ret) { - dev_err(dra7xx->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -332,6 +316,7 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) void __iomem *base; struct resource *res; struct dra7xx_pcie *dra7xx; + struct pcie_port *pp; struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; char name[10]; @@ -343,6 +328,10 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) if (!dra7xx) return -ENOMEM; + pp = &dra7xx->pp; + pp->dev = dev; + pp->ops = &dra7xx_pcie_host_ops; + irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(dev, "missing IRQ resource\n"); @@ -390,7 +379,6 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) dra7xx->base = base; dra7xx->phy = phy; - dra7xx->dev = dev; dra7xx->phy_count = phy_count; pm_runtime_enable(dev); @@ -407,7 +395,7 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) ret = devm_gpio_request_one(dev, gpio_sel, gpio_flags, "pcie_reset"); if (ret) { - dev_err(&pdev->dev, "gpio%d request failed, ret %d\n", + dev_err(dev, "gpio%d request failed, ret %d\n", gpio_sel, ret); goto err_gpio; } @@ -420,12 +408,11 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) reg &= ~LTSSM_EN; dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD, reg); - platform_set_drvdata(pdev, dra7xx); - ret = dra7xx_add_pcie_port(dra7xx, pdev); if (ret < 0) goto err_gpio; + platform_set_drvdata(pdev, dra7xx); return 0; err_gpio: @@ -451,9 +438,9 @@ static int dra7xx_pcie_suspend(struct device *dev) u32 val; /* clear MSE */ - val = dra7xx_pcie_readl_rc(pp, PCI_COMMAND); + val = dw_pcie_readl_rc(pp, PCI_COMMAND); val &= ~PCI_COMMAND_MEMORY; - dra7xx_pcie_writel_rc(pp, PCI_COMMAND, val); + dw_pcie_writel_rc(pp, PCI_COMMAND, val); return 0; } @@ -465,9 +452,9 @@ static int dra7xx_pcie_resume(struct device *dev) u32 val; /* set MSE */ - val = dra7xx_pcie_readl_rc(pp, PCI_COMMAND); + val = dw_pcie_readl_rc(pp, PCI_COMMAND); val |= PCI_COMMAND_MEMORY; - dra7xx_pcie_writel_rc(pp, PCI_COMMAND, val); + dw_pcie_writel_rc(pp, PCI_COMMAND, val); return 0; } diff --git a/drivers/pci/host/pci-exynos.c b/drivers/pci/host/pci-exynos.c index 2e2d7f00b9e8..f1c544bb8b68 100644 --- a/drivers/pci/host/pci-exynos.c +++ b/drivers/pci/host/pci-exynos.c @@ -29,13 +29,13 @@ #define to_exynos_pcie(x) container_of(x, struct exynos_pcie, pp) struct exynos_pcie { - void __iomem *elbi_base; - void __iomem *phy_base; - void __iomem *block_base; + struct pcie_port pp; + void __iomem *elbi_base; /* DT 0th resource */ + void __iomem *phy_base; /* DT 1st resource */ + void __iomem *block_base; /* DT 2nd resource */ int reset_gpio; struct clk *clk; struct clk *bus_clk; - struct pcie_port pp; }; /* PCIe ELBI registers */ @@ -102,40 +102,40 @@ struct exynos_pcie { #define PCIE_PHY_TRSV3_PD_TSV (0x1 << 7) #define PCIE_PHY_TRSV3_LVCC 0x31c -static inline void exynos_elb_writel(struct exynos_pcie *pcie, u32 val, u32 reg) +static void exynos_elb_writel(struct exynos_pcie *exynos_pcie, u32 val, u32 reg) { - writel(val, pcie->elbi_base + reg); + writel(val, exynos_pcie->elbi_base + reg); } -static inline u32 exynos_elb_readl(struct exynos_pcie *pcie, u32 reg) +static u32 exynos_elb_readl(struct exynos_pcie *exynos_pcie, u32 reg) { - return readl(pcie->elbi_base + reg); + return readl(exynos_pcie->elbi_base + reg); } -static inline void exynos_phy_writel(struct exynos_pcie *pcie, u32 val, u32 reg) +static void exynos_phy_writel(struct exynos_pcie *exynos_pcie, u32 val, u32 reg) { - writel(val, pcie->phy_base + reg); + writel(val, exynos_pcie->phy_base + reg); } -static inline u32 exynos_phy_readl(struct exynos_pcie *pcie, u32 reg) +static u32 exynos_phy_readl(struct exynos_pcie *exynos_pcie, u32 reg) { - return readl(pcie->phy_base + reg); + return readl(exynos_pcie->phy_base + reg); } -static inline void exynos_blk_writel(struct exynos_pcie *pcie, u32 val, u32 reg) +static void exynos_blk_writel(struct exynos_pcie *exynos_pcie, u32 val, u32 reg) { - writel(val, pcie->block_base + reg); + writel(val, exynos_pcie->block_base + reg); } -static inline u32 exynos_blk_readl(struct exynos_pcie *pcie, u32 reg) +static u32 exynos_blk_readl(struct exynos_pcie *exynos_pcie, u32 reg) { - return readl(pcie->block_base + reg); + return readl(exynos_pcie->block_base + reg); } -static void exynos_pcie_sideband_dbi_w_mode(struct pcie_port *pp, bool on) +static void exynos_pcie_sideband_dbi_w_mode(struct exynos_pcie *exynos_pcie, + bool on) { u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); if (on) { val = exynos_elb_readl(exynos_pcie, PCIE_ELBI_SLV_AWMISC); @@ -148,10 +148,10 @@ static void exynos_pcie_sideband_dbi_w_mode(struct pcie_port *pp, bool on) } } -static void exynos_pcie_sideband_dbi_r_mode(struct pcie_port *pp, bool on) +static void exynos_pcie_sideband_dbi_r_mode(struct exynos_pcie *exynos_pcie, + bool on) { u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); if (on) { val = exynos_elb_readl(exynos_pcie, PCIE_ELBI_SLV_ARMISC); @@ -164,10 +164,9 @@ static void exynos_pcie_sideband_dbi_r_mode(struct pcie_port *pp, bool on) } } -static void exynos_pcie_assert_core_reset(struct pcie_port *pp) +static void exynos_pcie_assert_core_reset(struct exynos_pcie *exynos_pcie) { u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); val = exynos_elb_readl(exynos_pcie, PCIE_CORE_RESET); val &= ~PCIE_CORE_RESET_ENABLE; @@ -177,10 +176,9 @@ static void exynos_pcie_assert_core_reset(struct pcie_port *pp) exynos_elb_writel(exynos_pcie, 0, PCIE_NONSTICKY_RESET); } -static void exynos_pcie_deassert_core_reset(struct pcie_port *pp) +static void exynos_pcie_deassert_core_reset(struct exynos_pcie *exynos_pcie) { u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); val = exynos_elb_readl(exynos_pcie, PCIE_CORE_RESET); val |= PCIE_CORE_RESET_ENABLE; @@ -193,18 +191,14 @@ static void exynos_pcie_deassert_core_reset(struct pcie_port *pp) exynos_blk_writel(exynos_pcie, 1, PCIE_PHY_MAC_RESET); } -static void exynos_pcie_assert_phy_reset(struct pcie_port *pp) +static void exynos_pcie_assert_phy_reset(struct exynos_pcie *exynos_pcie) { - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); - exynos_blk_writel(exynos_pcie, 0, PCIE_PHY_MAC_RESET); exynos_blk_writel(exynos_pcie, 1, PCIE_PHY_GLOBAL_RESET); } -static void exynos_pcie_deassert_phy_reset(struct pcie_port *pp) +static void exynos_pcie_deassert_phy_reset(struct exynos_pcie *exynos_pcie) { - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); - exynos_blk_writel(exynos_pcie, 0, PCIE_PHY_GLOBAL_RESET); exynos_elb_writel(exynos_pcie, 1, PCIE_PWR_RESET); exynos_blk_writel(exynos_pcie, 0, PCIE_PHY_COMMON_RESET); @@ -213,10 +207,9 @@ static void exynos_pcie_deassert_phy_reset(struct pcie_port *pp) exynos_blk_writel(exynos_pcie, 0, PCIE_PHY_TRSV_RESET); } -static void exynos_pcie_power_on_phy(struct pcie_port *pp) +static void exynos_pcie_power_on_phy(struct exynos_pcie *exynos_pcie) { u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); val = exynos_phy_readl(exynos_pcie, PCIE_PHY_COMMON_POWER); val &= ~PCIE_PHY_COMMON_PD_CMN; @@ -239,10 +232,9 @@ static void exynos_pcie_power_on_phy(struct pcie_port *pp) exynos_phy_writel(exynos_pcie, val, PCIE_PHY_TRSV3_POWER); } -static void exynos_pcie_power_off_phy(struct pcie_port *pp) +static void exynos_pcie_power_off_phy(struct exynos_pcie *exynos_pcie) { u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); val = exynos_phy_readl(exynos_pcie, PCIE_PHY_COMMON_POWER); val |= PCIE_PHY_COMMON_PD_CMN; @@ -265,10 +257,8 @@ static void exynos_pcie_power_off_phy(struct pcie_port *pp) exynos_phy_writel(exynos_pcie, val, PCIE_PHY_TRSV3_POWER); } -static void exynos_pcie_init_phy(struct pcie_port *pp) +static void exynos_pcie_init_phy(struct exynos_pcie *exynos_pcie) { - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); - /* DCC feedback control off */ exynos_phy_writel(exynos_pcie, 0x29, PCIE_PHY_DCC_FEEDBACK); @@ -305,51 +295,41 @@ static void exynos_pcie_init_phy(struct pcie_port *pp) exynos_phy_writel(exynos_pcie, 0xa0, PCIE_PHY_TRSV3_LVCC); } -static void exynos_pcie_assert_reset(struct pcie_port *pp) +static void exynos_pcie_assert_reset(struct exynos_pcie *exynos_pcie) { - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); + struct pcie_port *pp = &exynos_pcie->pp; + struct device *dev = pp->dev; if (exynos_pcie->reset_gpio >= 0) - devm_gpio_request_one(pp->dev, exynos_pcie->reset_gpio, + devm_gpio_request_one(dev, exynos_pcie->reset_gpio, GPIOF_OUT_INIT_HIGH, "RESET"); } -static int exynos_pcie_establish_link(struct pcie_port *pp) +static int exynos_pcie_establish_link(struct exynos_pcie *exynos_pcie) { - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); + struct pcie_port *pp = &exynos_pcie->pp; + struct device *dev = pp->dev; u32 val; if (dw_pcie_link_up(pp)) { - dev_err(pp->dev, "Link already up\n"); + dev_err(dev, "Link already up\n"); return 0; } - /* assert reset signals */ - exynos_pcie_assert_core_reset(pp); - exynos_pcie_assert_phy_reset(pp); - - /* de-assert phy reset */ - exynos_pcie_deassert_phy_reset(pp); - - /* power on phy */ - exynos_pcie_power_on_phy(pp); - - /* initialize phy */ - exynos_pcie_init_phy(pp); + exynos_pcie_assert_core_reset(exynos_pcie); + exynos_pcie_assert_phy_reset(exynos_pcie); + exynos_pcie_deassert_phy_reset(exynos_pcie); + exynos_pcie_power_on_phy(exynos_pcie); + exynos_pcie_init_phy(exynos_pcie); /* pulse for common reset */ exynos_blk_writel(exynos_pcie, 1, PCIE_PHY_COMMON_RESET); udelay(500); exynos_blk_writel(exynos_pcie, 0, PCIE_PHY_COMMON_RESET); - /* de-assert core reset */ - exynos_pcie_deassert_core_reset(pp); - - /* setup root complex */ + exynos_pcie_deassert_core_reset(exynos_pcie); dw_pcie_setup_rc(pp); - - /* assert reset signal */ - exynos_pcie_assert_reset(pp); + exynos_pcie_assert_reset(exynos_pcie); /* assert LTSSM enable */ exynos_elb_writel(exynos_pcie, PCIE_ELBI_LTSSM_ENABLE, @@ -361,27 +341,23 @@ static int exynos_pcie_establish_link(struct pcie_port *pp) while (exynos_phy_readl(exynos_pcie, PCIE_PHY_PLL_LOCKED) == 0) { val = exynos_blk_readl(exynos_pcie, PCIE_PHY_PLL_LOCKED); - dev_info(pp->dev, "PLL Locked: 0x%x\n", val); + dev_info(dev, "PLL Locked: 0x%x\n", val); } - /* power off phy */ - exynos_pcie_power_off_phy(pp); - + exynos_pcie_power_off_phy(exynos_pcie); return -ETIMEDOUT; } -static void exynos_pcie_clear_irq_pulse(struct pcie_port *pp) +static void exynos_pcie_clear_irq_pulse(struct exynos_pcie *exynos_pcie) { u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); val = exynos_elb_readl(exynos_pcie, PCIE_IRQ_PULSE); exynos_elb_writel(exynos_pcie, val, PCIE_IRQ_PULSE); } -static void exynos_pcie_enable_irq_pulse(struct pcie_port *pp) +static void exynos_pcie_enable_irq_pulse(struct exynos_pcie *exynos_pcie) { u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); /* enable INTX interrupt */ val = IRQ_INTA_ASSERT | IRQ_INTB_ASSERT | @@ -391,23 +367,24 @@ static void exynos_pcie_enable_irq_pulse(struct pcie_port *pp) static irqreturn_t exynos_pcie_irq_handler(int irq, void *arg) { - struct pcie_port *pp = arg; + struct exynos_pcie *exynos_pcie = arg; - exynos_pcie_clear_irq_pulse(pp); + exynos_pcie_clear_irq_pulse(exynos_pcie); return IRQ_HANDLED; } static irqreturn_t exynos_pcie_msi_irq_handler(int irq, void *arg) { - struct pcie_port *pp = arg; + struct exynos_pcie *exynos_pcie = arg; + struct pcie_port *pp = &exynos_pcie->pp; return dw_handle_msi_irq(pp); } -static void exynos_pcie_msi_init(struct pcie_port *pp) +static void exynos_pcie_msi_init(struct exynos_pcie *exynos_pcie) { + struct pcie_port *pp = &exynos_pcie->pp; u32 val; - struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); dw_pcie_msi_init(pp); @@ -417,60 +394,64 @@ static void exynos_pcie_msi_init(struct pcie_port *pp) exynos_elb_writel(exynos_pcie, val, PCIE_IRQ_EN_LEVEL); } -static void exynos_pcie_enable_interrupts(struct pcie_port *pp) +static void exynos_pcie_enable_interrupts(struct exynos_pcie *exynos_pcie) { - exynos_pcie_enable_irq_pulse(pp); + exynos_pcie_enable_irq_pulse(exynos_pcie); if (IS_ENABLED(CONFIG_PCI_MSI)) - exynos_pcie_msi_init(pp); + exynos_pcie_msi_init(exynos_pcie); } -static inline u32 exynos_pcie_readl_rc(struct pcie_port *pp, - void __iomem *dbi_base) +static u32 exynos_pcie_readl_rc(struct pcie_port *pp, u32 reg) { + struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); u32 val; - exynos_pcie_sideband_dbi_r_mode(pp, true); - val = readl(dbi_base); - exynos_pcie_sideband_dbi_r_mode(pp, false); + exynos_pcie_sideband_dbi_r_mode(exynos_pcie, true); + val = readl(pp->dbi_base + reg); + exynos_pcie_sideband_dbi_r_mode(exynos_pcie, false); return val; } -static inline void exynos_pcie_writel_rc(struct pcie_port *pp, - u32 val, void __iomem *dbi_base) +static void exynos_pcie_writel_rc(struct pcie_port *pp, u32 reg, u32 val) { - exynos_pcie_sideband_dbi_w_mode(pp, true); - writel(val, dbi_base); - exynos_pcie_sideband_dbi_w_mode(pp, false); + struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); + + exynos_pcie_sideband_dbi_w_mode(exynos_pcie, true); + writel(val, pp->dbi_base + reg); + exynos_pcie_sideband_dbi_w_mode(exynos_pcie, false); } static int exynos_pcie_rd_own_conf(struct pcie_port *pp, int where, int size, u32 *val) { + struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); int ret; - exynos_pcie_sideband_dbi_r_mode(pp, true); + exynos_pcie_sideband_dbi_r_mode(exynos_pcie, true); ret = dw_pcie_cfg_read(pp->dbi_base + where, size, val); - exynos_pcie_sideband_dbi_r_mode(pp, false); + exynos_pcie_sideband_dbi_r_mode(exynos_pcie, false); return ret; } static int exynos_pcie_wr_own_conf(struct pcie_port *pp, int where, int size, u32 val) { + struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); int ret; - exynos_pcie_sideband_dbi_w_mode(pp, true); + exynos_pcie_sideband_dbi_w_mode(exynos_pcie, true); ret = dw_pcie_cfg_write(pp->dbi_base + where, size, val); - exynos_pcie_sideband_dbi_w_mode(pp, false); + exynos_pcie_sideband_dbi_w_mode(exynos_pcie, false); return ret; } static int exynos_pcie_link_up(struct pcie_port *pp) { struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); - u32 val = exynos_elb_readl(exynos_pcie, PCIE_ELBI_RDLH_LINKUP); + u32 val; + val = exynos_elb_readl(exynos_pcie, PCIE_ELBI_RDLH_LINKUP); if (val == PCIE_ELBI_LTSSM_ENABLE) return 1; @@ -479,8 +460,10 @@ static int exynos_pcie_link_up(struct pcie_port *pp) static void exynos_pcie_host_init(struct pcie_port *pp) { - exynos_pcie_establish_link(pp); - exynos_pcie_enable_interrupts(pp); + struct exynos_pcie *exynos_pcie = to_exynos_pcie(pp); + + exynos_pcie_establish_link(exynos_pcie); + exynos_pcie_enable_interrupts(exynos_pcie); } static struct pcie_host_ops exynos_pcie_host_ops = { @@ -492,36 +475,38 @@ static struct pcie_host_ops exynos_pcie_host_ops = { .host_init = exynos_pcie_host_init, }; -static int __init exynos_add_pcie_port(struct pcie_port *pp, +static int __init exynos_add_pcie_port(struct exynos_pcie *exynos_pcie, struct platform_device *pdev) { + struct pcie_port *pp = &exynos_pcie->pp; + struct device *dev = pp->dev; int ret; pp->irq = platform_get_irq(pdev, 1); if (!pp->irq) { - dev_err(&pdev->dev, "failed to get irq\n"); + dev_err(dev, "failed to get irq\n"); return -ENODEV; } - ret = devm_request_irq(&pdev->dev, pp->irq, exynos_pcie_irq_handler, - IRQF_SHARED, "exynos-pcie", pp); + ret = devm_request_irq(dev, pp->irq, exynos_pcie_irq_handler, + IRQF_SHARED, "exynos-pcie", exynos_pcie); if (ret) { - dev_err(&pdev->dev, "failed to request irq\n"); + dev_err(dev, "failed to request irq\n"); return ret; } if (IS_ENABLED(CONFIG_PCI_MSI)) { pp->msi_irq = platform_get_irq(pdev, 0); if (!pp->msi_irq) { - dev_err(&pdev->dev, "failed to get msi irq\n"); + dev_err(dev, "failed to get msi irq\n"); return -ENODEV; } - ret = devm_request_irq(&pdev->dev, pp->msi_irq, + ret = devm_request_irq(dev, pp->msi_irq, exynos_pcie_msi_irq_handler, IRQF_SHARED | IRQF_NO_THREAD, - "exynos-pcie", pp); + "exynos-pcie", exynos_pcie); if (ret) { - dev_err(&pdev->dev, "failed to request msi irq\n"); + dev_err(dev, "failed to request msi irq\n"); return ret; } } @@ -531,7 +516,7 @@ static int __init exynos_add_pcie_port(struct pcie_port *pp, ret = dw_pcie_host_init(pp); if (ret) { - dev_err(&pdev->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -540,37 +525,36 @@ static int __init exynos_add_pcie_port(struct pcie_port *pp, static int __init exynos_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct exynos_pcie *exynos_pcie; struct pcie_port *pp; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = dev->of_node; struct resource *elbi_base; struct resource *phy_base; struct resource *block_base; int ret; - exynos_pcie = devm_kzalloc(&pdev->dev, sizeof(*exynos_pcie), - GFP_KERNEL); + exynos_pcie = devm_kzalloc(dev, sizeof(*exynos_pcie), GFP_KERNEL); if (!exynos_pcie) return -ENOMEM; pp = &exynos_pcie->pp; - - pp->dev = &pdev->dev; + pp->dev = dev; exynos_pcie->reset_gpio = of_get_named_gpio(np, "reset-gpio", 0); - exynos_pcie->clk = devm_clk_get(&pdev->dev, "pcie"); + exynos_pcie->clk = devm_clk_get(dev, "pcie"); if (IS_ERR(exynos_pcie->clk)) { - dev_err(&pdev->dev, "Failed to get pcie rc clock\n"); + dev_err(dev, "Failed to get pcie rc clock\n"); return PTR_ERR(exynos_pcie->clk); } ret = clk_prepare_enable(exynos_pcie->clk); if (ret) return ret; - exynos_pcie->bus_clk = devm_clk_get(&pdev->dev, "pcie_bus"); + exynos_pcie->bus_clk = devm_clk_get(dev, "pcie_bus"); if (IS_ERR(exynos_pcie->bus_clk)) { - dev_err(&pdev->dev, "Failed to get pcie bus clock\n"); + dev_err(dev, "Failed to get pcie bus clock\n"); ret = PTR_ERR(exynos_pcie->bus_clk); goto fail_clk; } @@ -579,27 +563,27 @@ static int __init exynos_pcie_probe(struct platform_device *pdev) goto fail_clk; elbi_base = platform_get_resource(pdev, IORESOURCE_MEM, 0); - exynos_pcie->elbi_base = devm_ioremap_resource(&pdev->dev, elbi_base); + exynos_pcie->elbi_base = devm_ioremap_resource(dev, elbi_base); if (IS_ERR(exynos_pcie->elbi_base)) { ret = PTR_ERR(exynos_pcie->elbi_base); goto fail_bus_clk; } phy_base = platform_get_resource(pdev, IORESOURCE_MEM, 1); - exynos_pcie->phy_base = devm_ioremap_resource(&pdev->dev, phy_base); + exynos_pcie->phy_base = devm_ioremap_resource(dev, phy_base); if (IS_ERR(exynos_pcie->phy_base)) { ret = PTR_ERR(exynos_pcie->phy_base); goto fail_bus_clk; } block_base = platform_get_resource(pdev, IORESOURCE_MEM, 2); - exynos_pcie->block_base = devm_ioremap_resource(&pdev->dev, block_base); + exynos_pcie->block_base = devm_ioremap_resource(dev, block_base); if (IS_ERR(exynos_pcie->block_base)) { ret = PTR_ERR(exynos_pcie->block_base); goto fail_bus_clk; } - ret = exynos_add_pcie_port(pp, pdev); + ret = exynos_add_pcie_port(exynos_pcie, pdev); if (ret < 0) goto fail_bus_clk; diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c index ead4a5c3480b..c8cefb078218 100644 --- a/drivers/pci/host/pci-imx6.c +++ b/drivers/pci/host/pci-imx6.c @@ -39,16 +39,15 @@ enum imx6_pcie_variants { }; struct imx6_pcie { + struct pcie_port pp; /* pp.dbi_base is DT 0th resource */ int reset_gpio; bool gpio_active_high; struct clk *pcie_bus; struct clk *pcie_phy; struct clk *pcie_inbound_axi; struct clk *pcie; - struct pcie_port pp; struct regmap *iomuxc_gpr; enum imx6_pcie_variants variant; - void __iomem *mem_base; u32 tx_deemph_gen1; u32 tx_deemph_gen2_3p5db; u32 tx_deemph_gen2_6db; @@ -96,14 +95,15 @@ struct imx6_pcie { #define PHY_RX_OVRD_IN_LO_RX_DATA_EN (1 << 5) #define PHY_RX_OVRD_IN_LO_RX_PLL_EN (1 << 3) -static int pcie_phy_poll_ack(void __iomem *dbi_base, int exp_val) +static int pcie_phy_poll_ack(struct imx6_pcie *imx6_pcie, int exp_val) { + struct pcie_port *pp = &imx6_pcie->pp; u32 val; u32 max_iterations = 10; u32 wait_counter = 0; do { - val = readl(dbi_base + PCIE_PHY_STAT); + val = dw_pcie_readl_rc(pp, PCIE_PHY_STAT); val = (val >> PCIE_PHY_STAT_ACK_LOC) & 0x1; wait_counter++; @@ -116,123 +116,126 @@ static int pcie_phy_poll_ack(void __iomem *dbi_base, int exp_val) return -ETIMEDOUT; } -static int pcie_phy_wait_ack(void __iomem *dbi_base, int addr) +static int pcie_phy_wait_ack(struct imx6_pcie *imx6_pcie, int addr) { + struct pcie_port *pp = &imx6_pcie->pp; u32 val; int ret; val = addr << PCIE_PHY_CTRL_DATA_LOC; - writel(val, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, val); val |= (0x1 << PCIE_PHY_CTRL_CAP_ADR_LOC); - writel(val, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, val); - ret = pcie_phy_poll_ack(dbi_base, 1); + ret = pcie_phy_poll_ack(imx6_pcie, 1); if (ret) return ret; val = addr << PCIE_PHY_CTRL_DATA_LOC; - writel(val, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, val); - return pcie_phy_poll_ack(dbi_base, 0); + return pcie_phy_poll_ack(imx6_pcie, 0); } /* Read from the 16-bit PCIe PHY control registers (not memory-mapped) */ -static int pcie_phy_read(void __iomem *dbi_base, int addr, int *data) +static int pcie_phy_read(struct imx6_pcie *imx6_pcie, int addr, int *data) { + struct pcie_port *pp = &imx6_pcie->pp; u32 val, phy_ctl; int ret; - ret = pcie_phy_wait_ack(dbi_base, addr); + ret = pcie_phy_wait_ack(imx6_pcie, addr); if (ret) return ret; /* assert Read signal */ phy_ctl = 0x1 << PCIE_PHY_CTRL_RD_LOC; - writel(phy_ctl, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, phy_ctl); - ret = pcie_phy_poll_ack(dbi_base, 1); + ret = pcie_phy_poll_ack(imx6_pcie, 1); if (ret) return ret; - val = readl(dbi_base + PCIE_PHY_STAT); + val = dw_pcie_readl_rc(pp, PCIE_PHY_STAT); *data = val & 0xffff; /* deassert Read signal */ - writel(0x00, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, 0x00); - return pcie_phy_poll_ack(dbi_base, 0); + return pcie_phy_poll_ack(imx6_pcie, 0); } -static int pcie_phy_write(void __iomem *dbi_base, int addr, int data) +static int pcie_phy_write(struct imx6_pcie *imx6_pcie, int addr, int data) { + struct pcie_port *pp = &imx6_pcie->pp; u32 var; int ret; /* write addr */ /* cap addr */ - ret = pcie_phy_wait_ack(dbi_base, addr); + ret = pcie_phy_wait_ack(imx6_pcie, addr); if (ret) return ret; var = data << PCIE_PHY_CTRL_DATA_LOC; - writel(var, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, var); /* capture data */ var |= (0x1 << PCIE_PHY_CTRL_CAP_DAT_LOC); - writel(var, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, var); - ret = pcie_phy_poll_ack(dbi_base, 1); + ret = pcie_phy_poll_ack(imx6_pcie, 1); if (ret) return ret; /* deassert cap data */ var = data << PCIE_PHY_CTRL_DATA_LOC; - writel(var, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, var); /* wait for ack de-assertion */ - ret = pcie_phy_poll_ack(dbi_base, 0); + ret = pcie_phy_poll_ack(imx6_pcie, 0); if (ret) return ret; /* assert wr signal */ var = 0x1 << PCIE_PHY_CTRL_WR_LOC; - writel(var, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, var); /* wait for ack */ - ret = pcie_phy_poll_ack(dbi_base, 1); + ret = pcie_phy_poll_ack(imx6_pcie, 1); if (ret) return ret; /* deassert wr signal */ var = data << PCIE_PHY_CTRL_DATA_LOC; - writel(var, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, var); /* wait for ack de-assertion */ - ret = pcie_phy_poll_ack(dbi_base, 0); + ret = pcie_phy_poll_ack(imx6_pcie, 0); if (ret) return ret; - writel(0x0, dbi_base + PCIE_PHY_CTRL); + dw_pcie_writel_rc(pp, PCIE_PHY_CTRL, 0x0); return 0; } -static void imx6_pcie_reset_phy(struct pcie_port *pp) +static void imx6_pcie_reset_phy(struct imx6_pcie *imx6_pcie) { u32 tmp; - pcie_phy_read(pp->dbi_base, PHY_RX_OVRD_IN_LO, &tmp); + pcie_phy_read(imx6_pcie, PHY_RX_OVRD_IN_LO, &tmp); tmp |= (PHY_RX_OVRD_IN_LO_RX_DATA_EN | PHY_RX_OVRD_IN_LO_RX_PLL_EN); - pcie_phy_write(pp->dbi_base, PHY_RX_OVRD_IN_LO, tmp); + pcie_phy_write(imx6_pcie, PHY_RX_OVRD_IN_LO, tmp); usleep_range(2000, 3000); - pcie_phy_read(pp->dbi_base, PHY_RX_OVRD_IN_LO, &tmp); + pcie_phy_read(imx6_pcie, PHY_RX_OVRD_IN_LO, &tmp); tmp &= ~(PHY_RX_OVRD_IN_LO_RX_DATA_EN | PHY_RX_OVRD_IN_LO_RX_PLL_EN); - pcie_phy_write(pp->dbi_base, PHY_RX_OVRD_IN_LO, tmp); + pcie_phy_write(imx6_pcie, PHY_RX_OVRD_IN_LO, tmp); } /* Added for PCI abort handling */ @@ -242,9 +245,9 @@ static int imx6q_pcie_abort_handler(unsigned long addr, return 0; } -static int imx6_pcie_assert_core_reset(struct pcie_port *pp) +static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie) { - struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp); + struct pcie_port *pp = &imx6_pcie->pp; u32 val, gpr1, gpr12; switch (imx6_pcie->variant) { @@ -281,10 +284,10 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp) if ((gpr1 & IMX6Q_GPR1_PCIE_REF_CLK_EN) && (gpr12 & IMX6Q_GPR12_PCIE_CTL_2)) { - val = readl(pp->dbi_base + PCIE_PL_PFLR); + val = dw_pcie_readl_rc(pp, PCIE_PL_PFLR); val &= ~PCIE_PL_PFLR_LINK_STATE_MASK; val |= PCIE_PL_PFLR_FORCE_LINK; - writel(val, pp->dbi_base + PCIE_PL_PFLR); + dw_pcie_writel_rc(pp, PCIE_PL_PFLR, val); regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, IMX6Q_GPR12_PCIE_CTL_2, 0 << 10); @@ -296,20 +299,19 @@ static int imx6_pcie_assert_core_reset(struct pcie_port *pp) IMX6Q_GPR1_PCIE_REF_CLK_EN, 0 << 16); break; } - - return 0; } static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie) { struct pcie_port *pp = &imx6_pcie->pp; + struct device *dev = pp->dev; int ret = 0; switch (imx6_pcie->variant) { case IMX6SX: ret = clk_prepare_enable(imx6_pcie->pcie_inbound_axi); if (ret) { - dev_err(pp->dev, "unable to enable pcie_axi clock\n"); + dev_err(dev, "unable to enable pcie_axi clock\n"); break; } @@ -336,32 +338,33 @@ static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie) return ret; } -static int imx6_pcie_deassert_core_reset(struct pcie_port *pp) +static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie) { - struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp); + struct pcie_port *pp = &imx6_pcie->pp; + struct device *dev = pp->dev; int ret; ret = clk_prepare_enable(imx6_pcie->pcie_phy); if (ret) { - dev_err(pp->dev, "unable to enable pcie_phy clock\n"); - goto err_pcie_phy; + dev_err(dev, "unable to enable pcie_phy clock\n"); + return; } ret = clk_prepare_enable(imx6_pcie->pcie_bus); if (ret) { - dev_err(pp->dev, "unable to enable pcie_bus clock\n"); + dev_err(dev, "unable to enable pcie_bus clock\n"); goto err_pcie_bus; } ret = clk_prepare_enable(imx6_pcie->pcie); if (ret) { - dev_err(pp->dev, "unable to enable pcie clock\n"); + dev_err(dev, "unable to enable pcie clock\n"); goto err_pcie; } ret = imx6_pcie_enable_ref_clk(imx6_pcie); if (ret) { - dev_err(pp->dev, "unable to enable pcie ref clock\n"); + dev_err(dev, "unable to enable pcie ref clock\n"); goto err_ref_clk; } @@ -392,7 +395,7 @@ static int imx6_pcie_deassert_core_reset(struct pcie_port *pp) break; } - return 0; + return; err_ref_clk: clk_disable_unprepare(imx6_pcie->pcie); @@ -400,14 +403,10 @@ err_pcie: clk_disable_unprepare(imx6_pcie->pcie_bus); err_pcie_bus: clk_disable_unprepare(imx6_pcie->pcie_phy); -err_pcie_phy: - return ret; } -static void imx6_pcie_init_phy(struct pcie_port *pp) +static void imx6_pcie_init_phy(struct imx6_pcie *imx6_pcie) { - struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp); - if (imx6_pcie->variant == IMX6SX) regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, IMX6SX_GPR12_PCIE_RX_EQ_MASK, @@ -439,45 +438,52 @@ static void imx6_pcie_init_phy(struct pcie_port *pp) imx6_pcie->tx_swing_low << 25); } -static int imx6_pcie_wait_for_link(struct pcie_port *pp) +static int imx6_pcie_wait_for_link(struct imx6_pcie *imx6_pcie) { + struct pcie_port *pp = &imx6_pcie->pp; + struct device *dev = pp->dev; + /* check if the link is up or not */ if (!dw_pcie_wait_for_link(pp)) return 0; - dev_dbg(pp->dev, "DEBUG_R0: 0x%08x, DEBUG_R1: 0x%08x\n", - readl(pp->dbi_base + PCIE_PHY_DEBUG_R0), - readl(pp->dbi_base + PCIE_PHY_DEBUG_R1)); + dev_dbg(dev, "DEBUG_R0: 0x%08x, DEBUG_R1: 0x%08x\n", + dw_pcie_readl_rc(pp, PCIE_PHY_DEBUG_R0), + dw_pcie_readl_rc(pp, PCIE_PHY_DEBUG_R1)); return -ETIMEDOUT; } -static int imx6_pcie_wait_for_speed_change(struct pcie_port *pp) +static int imx6_pcie_wait_for_speed_change(struct imx6_pcie *imx6_pcie) { + struct pcie_port *pp = &imx6_pcie->pp; + struct device *dev = pp->dev; u32 tmp; unsigned int retries; for (retries = 0; retries < 200; retries++) { - tmp = readl(pp->dbi_base + PCIE_LINK_WIDTH_SPEED_CONTROL); + tmp = dw_pcie_readl_rc(pp, PCIE_LINK_WIDTH_SPEED_CONTROL); /* Test if the speed change finished. */ if (!(tmp & PORT_LOGIC_SPEED_CHANGE)) return 0; usleep_range(100, 1000); } - dev_err(pp->dev, "Speed change timeout\n"); + dev_err(dev, "Speed change timeout\n"); return -EINVAL; } static irqreturn_t imx6_pcie_msi_handler(int irq, void *arg) { - struct pcie_port *pp = arg; + struct imx6_pcie *imx6_pcie = arg; + struct pcie_port *pp = &imx6_pcie->pp; return dw_handle_msi_irq(pp); } -static int imx6_pcie_establish_link(struct pcie_port *pp) +static int imx6_pcie_establish_link(struct imx6_pcie *imx6_pcie) { - struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp); + struct pcie_port *pp = &imx6_pcie->pp; + struct device *dev = pp->dev; u32 tmp; int ret; @@ -486,76 +492,73 @@ static int imx6_pcie_establish_link(struct pcie_port *pp) * started in Gen2 mode, there is a possibility the devices on the * bus will not be detected at all. This happens with PCIe switches. */ - tmp = readl(pp->dbi_base + PCIE_RC_LCR); + tmp = dw_pcie_readl_rc(pp, PCIE_RC_LCR); tmp &= ~PCIE_RC_LCR_MAX_LINK_SPEEDS_MASK; tmp |= PCIE_RC_LCR_MAX_LINK_SPEEDS_GEN1; - writel(tmp, pp->dbi_base + PCIE_RC_LCR); + dw_pcie_writel_rc(pp, PCIE_RC_LCR, tmp); /* Start LTSSM. */ regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, IMX6Q_GPR12_PCIE_CTL_2, 1 << 10); - ret = imx6_pcie_wait_for_link(pp); + ret = imx6_pcie_wait_for_link(imx6_pcie); if (ret) { - dev_info(pp->dev, "Link never came up\n"); + dev_info(dev, "Link never came up\n"); goto err_reset_phy; } if (imx6_pcie->link_gen == 2) { /* Allow Gen2 mode after the link is up. */ - tmp = readl(pp->dbi_base + PCIE_RC_LCR); + tmp = dw_pcie_readl_rc(pp, PCIE_RC_LCR); tmp &= ~PCIE_RC_LCR_MAX_LINK_SPEEDS_MASK; tmp |= PCIE_RC_LCR_MAX_LINK_SPEEDS_GEN2; - writel(tmp, pp->dbi_base + PCIE_RC_LCR); + dw_pcie_writel_rc(pp, PCIE_RC_LCR, tmp); } else { - dev_info(pp->dev, "Link: Gen2 disabled\n"); + dev_info(dev, "Link: Gen2 disabled\n"); } /* * Start Directed Speed Change so the best possible speed both link * partners support can be negotiated. */ - tmp = readl(pp->dbi_base + PCIE_LINK_WIDTH_SPEED_CONTROL); + tmp = dw_pcie_readl_rc(pp, PCIE_LINK_WIDTH_SPEED_CONTROL); tmp |= PORT_LOGIC_SPEED_CHANGE; - writel(tmp, pp->dbi_base + PCIE_LINK_WIDTH_SPEED_CONTROL); + dw_pcie_writel_rc(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, tmp); - ret = imx6_pcie_wait_for_speed_change(pp); + ret = imx6_pcie_wait_for_speed_change(imx6_pcie); if (ret) { - dev_err(pp->dev, "Failed to bring link up!\n"); + dev_err(dev, "Failed to bring link up!\n"); goto err_reset_phy; } /* Make sure link training is finished as well! */ - ret = imx6_pcie_wait_for_link(pp); + ret = imx6_pcie_wait_for_link(imx6_pcie); if (ret) { - dev_err(pp->dev, "Failed to bring link up!\n"); + dev_err(dev, "Failed to bring link up!\n"); goto err_reset_phy; } - tmp = readl(pp->dbi_base + PCIE_RC_LCSR); - dev_info(pp->dev, "Link up, Gen%i\n", (tmp >> 16) & 0xf); + tmp = dw_pcie_readl_rc(pp, PCIE_RC_LCSR); + dev_info(dev, "Link up, Gen%i\n", (tmp >> 16) & 0xf); return 0; err_reset_phy: - dev_dbg(pp->dev, "PHY DEBUG_R0=0x%08x DEBUG_R1=0x%08x\n", - readl(pp->dbi_base + PCIE_PHY_DEBUG_R0), - readl(pp->dbi_base + PCIE_PHY_DEBUG_R1)); - imx6_pcie_reset_phy(pp); - + dev_dbg(dev, "PHY DEBUG_R0=0x%08x DEBUG_R1=0x%08x\n", + dw_pcie_readl_rc(pp, PCIE_PHY_DEBUG_R0), + dw_pcie_readl_rc(pp, PCIE_PHY_DEBUG_R1)); + imx6_pcie_reset_phy(imx6_pcie); return ret; } static void imx6_pcie_host_init(struct pcie_port *pp) { - imx6_pcie_assert_core_reset(pp); - - imx6_pcie_init_phy(pp); - - imx6_pcie_deassert_core_reset(pp); + struct imx6_pcie *imx6_pcie = to_imx6_pcie(pp); + imx6_pcie_assert_core_reset(imx6_pcie); + imx6_pcie_init_phy(imx6_pcie); + imx6_pcie_deassert_core_reset(imx6_pcie); dw_pcie_setup_rc(pp); - - imx6_pcie_establish_link(pp); + imx6_pcie_establish_link(imx6_pcie); if (IS_ENABLED(CONFIG_PCI_MSI)) dw_pcie_msi_init(pp); @@ -563,7 +566,7 @@ static void imx6_pcie_host_init(struct pcie_port *pp) static int imx6_pcie_link_up(struct pcie_port *pp) { - return readl(pp->dbi_base + PCIE_PHY_DEBUG_R1) & + return dw_pcie_readl_rc(pp, PCIE_PHY_DEBUG_R1) & PCIE_PHY_DEBUG_R1_XMLH_LINK_UP; } @@ -572,24 +575,26 @@ static struct pcie_host_ops imx6_pcie_host_ops = { .host_init = imx6_pcie_host_init, }; -static int __init imx6_add_pcie_port(struct pcie_port *pp, - struct platform_device *pdev) +static int __init imx6_add_pcie_port(struct imx6_pcie *imx6_pcie, + struct platform_device *pdev) { + struct pcie_port *pp = &imx6_pcie->pp; + struct device *dev = pp->dev; int ret; if (IS_ENABLED(CONFIG_PCI_MSI)) { pp->msi_irq = platform_get_irq_byname(pdev, "msi"); if (pp->msi_irq <= 0) { - dev_err(&pdev->dev, "failed to get MSI irq\n"); + dev_err(dev, "failed to get MSI irq\n"); return -ENODEV; } - ret = devm_request_irq(&pdev->dev, pp->msi_irq, + ret = devm_request_irq(dev, pp->msi_irq, imx6_pcie_msi_handler, IRQF_SHARED | IRQF_NO_THREAD, - "mx6-pcie-msi", pp); + "mx6-pcie-msi", imx6_pcie); if (ret) { - dev_err(&pdev->dev, "failed to request MSI irq\n"); + dev_err(dev, "failed to request MSI irq\n"); return ret; } } @@ -599,7 +604,7 @@ static int __init imx6_add_pcie_port(struct pcie_port *pp, ret = dw_pcie_host_init(pp); if (ret) { - dev_err(&pdev->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -608,75 +613,72 @@ static int __init imx6_add_pcie_port(struct pcie_port *pp, static int __init imx6_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct imx6_pcie *imx6_pcie; struct pcie_port *pp; - struct device_node *np = pdev->dev.of_node; struct resource *dbi_base; - struct device_node *node = pdev->dev.of_node; + struct device_node *node = dev->of_node; int ret; - imx6_pcie = devm_kzalloc(&pdev->dev, sizeof(*imx6_pcie), GFP_KERNEL); + imx6_pcie = devm_kzalloc(dev, sizeof(*imx6_pcie), GFP_KERNEL); if (!imx6_pcie) return -ENOMEM; pp = &imx6_pcie->pp; - pp->dev = &pdev->dev; + pp->dev = dev; imx6_pcie->variant = - (enum imx6_pcie_variants)of_device_get_match_data(&pdev->dev); + (enum imx6_pcie_variants)of_device_get_match_data(dev); /* Added for PCI abort handling */ hook_fault_code(16 + 6, imx6q_pcie_abort_handler, SIGBUS, 0, "imprecise external abort"); dbi_base = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pp->dbi_base = devm_ioremap_resource(&pdev->dev, dbi_base); + pp->dbi_base = devm_ioremap_resource(dev, dbi_base); if (IS_ERR(pp->dbi_base)) return PTR_ERR(pp->dbi_base); /* Fetch GPIOs */ - imx6_pcie->reset_gpio = of_get_named_gpio(np, "reset-gpio", 0); - imx6_pcie->gpio_active_high = of_property_read_bool(np, + imx6_pcie->reset_gpio = of_get_named_gpio(node, "reset-gpio", 0); + imx6_pcie->gpio_active_high = of_property_read_bool(node, "reset-gpio-active-high"); if (gpio_is_valid(imx6_pcie->reset_gpio)) { - ret = devm_gpio_request_one(&pdev->dev, imx6_pcie->reset_gpio, + ret = devm_gpio_request_one(dev, imx6_pcie->reset_gpio, imx6_pcie->gpio_active_high ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, "PCIe reset"); if (ret) { - dev_err(&pdev->dev, "unable to get reset gpio\n"); + dev_err(dev, "unable to get reset gpio\n"); return ret; } } /* Fetch clocks */ - imx6_pcie->pcie_phy = devm_clk_get(&pdev->dev, "pcie_phy"); + imx6_pcie->pcie_phy = devm_clk_get(dev, "pcie_phy"); if (IS_ERR(imx6_pcie->pcie_phy)) { - dev_err(&pdev->dev, - "pcie_phy clock source missing or invalid\n"); + dev_err(dev, "pcie_phy clock source missing or invalid\n"); return PTR_ERR(imx6_pcie->pcie_phy); } - imx6_pcie->pcie_bus = devm_clk_get(&pdev->dev, "pcie_bus"); + imx6_pcie->pcie_bus = devm_clk_get(dev, "pcie_bus"); if (IS_ERR(imx6_pcie->pcie_bus)) { - dev_err(&pdev->dev, - "pcie_bus clock source missing or invalid\n"); + dev_err(dev, "pcie_bus clock source missing or invalid\n"); return PTR_ERR(imx6_pcie->pcie_bus); } - imx6_pcie->pcie = devm_clk_get(&pdev->dev, "pcie"); + imx6_pcie->pcie = devm_clk_get(dev, "pcie"); if (IS_ERR(imx6_pcie->pcie)) { - dev_err(&pdev->dev, - "pcie clock source missing or invalid\n"); + dev_err(dev, "pcie clock source missing or invalid\n"); return PTR_ERR(imx6_pcie->pcie); } if (imx6_pcie->variant == IMX6SX) { - imx6_pcie->pcie_inbound_axi = devm_clk_get(&pdev->dev, + imx6_pcie->pcie_inbound_axi = devm_clk_get(dev, "pcie_inbound_axi"); if (IS_ERR(imx6_pcie->pcie_inbound_axi)) { - dev_err(&pdev->dev, + dev_err(dev, "pcie_incbound_axi clock missing or invalid\n"); return PTR_ERR(imx6_pcie->pcie_inbound_axi); } @@ -686,7 +688,7 @@ static int __init imx6_pcie_probe(struct platform_device *pdev) imx6_pcie->iomuxc_gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); if (IS_ERR(imx6_pcie->iomuxc_gpr)) { - dev_err(&pdev->dev, "unable to find iomuxc registers\n"); + dev_err(dev, "unable to find iomuxc registers\n"); return PTR_ERR(imx6_pcie->iomuxc_gpr); } @@ -712,12 +714,12 @@ static int __init imx6_pcie_probe(struct platform_device *pdev) imx6_pcie->tx_swing_low = 127; /* Limit link speed */ - ret = of_property_read_u32(pp->dev->of_node, "fsl,max-link-speed", + ret = of_property_read_u32(node, "fsl,max-link-speed", &imx6_pcie->link_gen); if (ret) imx6_pcie->link_gen = 1; - ret = imx6_add_pcie_port(pp, pdev); + ret = imx6_add_pcie_port(imx6_pcie, pdev); if (ret < 0) return ret; @@ -730,7 +732,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev) struct imx6_pcie *imx6_pcie = platform_get_drvdata(pdev); /* bring down link, so bootloader gets clean state in case of reboot */ - imx6_pcie_assert_core_reset(&imx6_pcie->pp); + imx6_pcie_assert_core_reset(imx6_pcie); } static const struct of_device_id imx6_pcie_of_match[] = { diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c index 41515092eb0d..9397c4667106 100644 --- a/drivers/pci/host/pci-keystone-dw.c +++ b/drivers/pci/host/pci-keystone-dw.c @@ -88,13 +88,24 @@ phys_addr_t ks_dw_pcie_get_msi_addr(struct pcie_port *pp) return ks_pcie->app.start + MSI_IRQ; } +static u32 ks_dw_app_readl(struct keystone_pcie *ks_pcie, u32 offset) +{ + return readl(ks_pcie->va_app_base + offset); +} + +static void ks_dw_app_writel(struct keystone_pcie *ks_pcie, u32 offset, u32 val) +{ + writel(val, ks_pcie->va_app_base + offset); +} + void ks_dw_pcie_handle_msi_irq(struct keystone_pcie *ks_pcie, int offset) { struct pcie_port *pp = &ks_pcie->pp; + struct device *dev = pp->dev; u32 pending, vector; int src, virq; - pending = readl(ks_pcie->va_app_base + MSI0_IRQ_STATUS + (offset << 4)); + pending = ks_dw_app_readl(ks_pcie, MSI0_IRQ_STATUS + (offset << 4)); /* * MSI0 status bit 0-3 shows vectors 0, 8, 16, 24, MSI1 status bit @@ -104,7 +115,7 @@ void ks_dw_pcie_handle_msi_irq(struct keystone_pcie *ks_pcie, int offset) if (BIT(src) & pending) { vector = offset + (src << 3); virq = irq_linear_revmap(pp->irq_domain, vector); - dev_dbg(pp->dev, "irq: bit %d, vector %d, virq %d\n", + dev_dbg(dev, "irq: bit %d, vector %d, virq %d\n", src, vector, virq); generic_handle_irq(virq); } @@ -124,9 +135,9 @@ static void ks_dw_pcie_msi_irq_ack(struct irq_data *d) offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); update_reg_offset_bit_pos(offset, ®_offset, &bit_pos); - writel(BIT(bit_pos), - ks_pcie->va_app_base + MSI0_IRQ_STATUS + (reg_offset << 4)); - writel(reg_offset + MSI_IRQ_OFFSET, ks_pcie->va_app_base + IRQ_EOI); + ks_dw_app_writel(ks_pcie, MSI0_IRQ_STATUS + (reg_offset << 4), + BIT(bit_pos)); + ks_dw_app_writel(ks_pcie, IRQ_EOI, reg_offset + MSI_IRQ_OFFSET); } void ks_dw_pcie_msi_set_irq(struct pcie_port *pp, int irq) @@ -135,8 +146,8 @@ void ks_dw_pcie_msi_set_irq(struct pcie_port *pp, int irq) struct keystone_pcie *ks_pcie = to_keystone_pcie(pp); update_reg_offset_bit_pos(irq, ®_offset, &bit_pos); - writel(BIT(bit_pos), - ks_pcie->va_app_base + MSI0_IRQ_ENABLE_SET + (reg_offset << 4)); + ks_dw_app_writel(ks_pcie, MSI0_IRQ_ENABLE_SET + (reg_offset << 4), + BIT(bit_pos)); } void ks_dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq) @@ -145,8 +156,8 @@ void ks_dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq) struct keystone_pcie *ks_pcie = to_keystone_pcie(pp); update_reg_offset_bit_pos(irq, ®_offset, &bit_pos); - writel(BIT(bit_pos), - ks_pcie->va_app_base + MSI0_IRQ_ENABLE_CLR + (reg_offset << 4)); + ks_dw_app_writel(ks_pcie, MSI0_IRQ_ENABLE_CLR + (reg_offset << 4), + BIT(bit_pos)); } static void ks_dw_pcie_msi_irq_mask(struct irq_data *d) @@ -215,6 +226,7 @@ static const struct irq_domain_ops ks_dw_pcie_msi_domain_ops = { int ks_dw_pcie_msi_host_init(struct pcie_port *pp, struct msi_controller *chip) { struct keystone_pcie *ks_pcie = to_keystone_pcie(pp); + struct device *dev = pp->dev; int i; pp->irq_domain = irq_domain_add_linear(ks_pcie->msi_intc_np, @@ -222,7 +234,7 @@ int ks_dw_pcie_msi_host_init(struct pcie_port *pp, struct msi_controller *chip) &ks_dw_pcie_msi_domain_ops, chip); if (!pp->irq_domain) { - dev_err(pp->dev, "irq domain init failed\n"); + dev_err(dev, "irq domain init failed\n"); return -ENXIO; } @@ -237,47 +249,47 @@ void ks_dw_pcie_enable_legacy_irqs(struct keystone_pcie *ks_pcie) int i; for (i = 0; i < MAX_LEGACY_IRQS; i++) - writel(0x1, ks_pcie->va_app_base + IRQ_ENABLE_SET + (i << 4)); + ks_dw_app_writel(ks_pcie, IRQ_ENABLE_SET + (i << 4), 0x1); } void ks_dw_pcie_handle_legacy_irq(struct keystone_pcie *ks_pcie, int offset) { struct pcie_port *pp = &ks_pcie->pp; + struct device *dev = pp->dev; u32 pending; int virq; - pending = readl(ks_pcie->va_app_base + IRQ_STATUS + (offset << 4)); + pending = ks_dw_app_readl(ks_pcie, IRQ_STATUS + (offset << 4)); if (BIT(0) & pending) { virq = irq_linear_revmap(ks_pcie->legacy_irq_domain, offset); - dev_dbg(pp->dev, ": irq: irq_offset %d, virq %d\n", offset, - virq); + dev_dbg(dev, ": irq: irq_offset %d, virq %d\n", offset, virq); generic_handle_irq(virq); } /* EOI the INTx interrupt */ - writel(offset, ks_pcie->va_app_base + IRQ_EOI); + ks_dw_app_writel(ks_pcie, IRQ_EOI, offset); } -void ks_dw_pcie_enable_error_irq(void __iomem *reg_base) +void ks_dw_pcie_enable_error_irq(struct keystone_pcie *ks_pcie) { - writel(ERR_IRQ_ALL, reg_base + ERR_IRQ_ENABLE_SET); + ks_dw_app_writel(ks_pcie, ERR_IRQ_ENABLE_SET, ERR_IRQ_ALL); } -irqreturn_t ks_dw_pcie_handle_error_irq(struct device *dev, - void __iomem *reg_base) +irqreturn_t ks_dw_pcie_handle_error_irq(struct keystone_pcie *ks_pcie) { u32 status; - status = readl(reg_base + ERR_IRQ_STATUS_RAW) & ERR_IRQ_ALL; + status = ks_dw_app_readl(ks_pcie, ERR_IRQ_STATUS_RAW) & ERR_IRQ_ALL; if (!status) return IRQ_NONE; if (status & ERR_FATAL_IRQ) - dev_err(dev, "fatal error (status %#010x)\n", status); + dev_err(ks_pcie->pp.dev, "fatal error (status %#010x)\n", + status); /* Ack the IRQ; status bits are RW1C */ - writel(status, reg_base + ERR_IRQ_STATUS); + ks_dw_app_writel(ks_pcie, ERR_IRQ_STATUS, status); return IRQ_HANDLED; } @@ -322,15 +334,15 @@ static const struct irq_domain_ops ks_dw_pcie_legacy_irq_domain_ops = { * Since modification of dbi_cs2 involves different clock domain, read the * status back to ensure the transition is complete. */ -static void ks_dw_pcie_set_dbi_mode(void __iomem *reg_virt) +static void ks_dw_pcie_set_dbi_mode(struct keystone_pcie *ks_pcie) { u32 val; - writel(DBI_CS2_EN_VAL | readl(reg_virt + CMD_STATUS), - reg_virt + CMD_STATUS); + val = ks_dw_app_readl(ks_pcie, CMD_STATUS); + ks_dw_app_writel(ks_pcie, CMD_STATUS, DBI_CS2_EN_VAL | val); do { - val = readl(reg_virt + CMD_STATUS); + val = ks_dw_app_readl(ks_pcie, CMD_STATUS); } while (!(val & DBI_CS2_EN_VAL)); } @@ -340,15 +352,15 @@ static void ks_dw_pcie_set_dbi_mode(void __iomem *reg_virt) * Since modification of dbi_cs2 involves different clock domain, read the * status back to ensure the transition is complete. */ -static void ks_dw_pcie_clear_dbi_mode(void __iomem *reg_virt) +static void ks_dw_pcie_clear_dbi_mode(struct keystone_pcie *ks_pcie) { u32 val; - writel(~DBI_CS2_EN_VAL & readl(reg_virt + CMD_STATUS), - reg_virt + CMD_STATUS); + val = ks_dw_app_readl(ks_pcie, CMD_STATUS); + ks_dw_app_writel(ks_pcie, CMD_STATUS, ~DBI_CS2_EN_VAL & val); do { - val = readl(reg_virt + CMD_STATUS); + val = ks_dw_app_readl(ks_pcie, CMD_STATUS); } while (val & DBI_CS2_EN_VAL); } @@ -357,28 +369,29 @@ void ks_dw_pcie_setup_rc_app_regs(struct keystone_pcie *ks_pcie) struct pcie_port *pp = &ks_pcie->pp; u32 start = pp->mem->start, end = pp->mem->end; int i, tr_size; + u32 val; /* Disable BARs for inbound access */ - ks_dw_pcie_set_dbi_mode(ks_pcie->va_app_base); - writel(0, pp->dbi_base + PCI_BASE_ADDRESS_0); - writel(0, pp->dbi_base + PCI_BASE_ADDRESS_1); - ks_dw_pcie_clear_dbi_mode(ks_pcie->va_app_base); + ks_dw_pcie_set_dbi_mode(ks_pcie); + dw_pcie_writel_rc(pp, PCI_BASE_ADDRESS_0, 0); + dw_pcie_writel_rc(pp, PCI_BASE_ADDRESS_1, 0); + ks_dw_pcie_clear_dbi_mode(ks_pcie); /* Set outbound translation size per window division */ - writel(CFG_PCIM_WIN_SZ_IDX & 0x7, ks_pcie->va_app_base + OB_SIZE); + ks_dw_app_writel(ks_pcie, OB_SIZE, CFG_PCIM_WIN_SZ_IDX & 0x7); tr_size = (1 << (CFG_PCIM_WIN_SZ_IDX & 0x7)) * SZ_1M; /* Using Direct 1:1 mapping of RC <-> PCI memory space */ for (i = 0; (i < CFG_PCIM_WIN_CNT) && (start < end); i++) { - writel(start | 1, ks_pcie->va_app_base + OB_OFFSET_INDEX(i)); - writel(0, ks_pcie->va_app_base + OB_OFFSET_HI(i)); + ks_dw_app_writel(ks_pcie, OB_OFFSET_INDEX(i), start | 1); + ks_dw_app_writel(ks_pcie, OB_OFFSET_HI(i), 0); start += tr_size; } /* Enable OB translation */ - writel(OB_XLAT_EN_VAL | readl(ks_pcie->va_app_base + CMD_STATUS), - ks_pcie->va_app_base + CMD_STATUS); + val = ks_dw_app_readl(ks_pcie, CMD_STATUS); + ks_dw_app_writel(ks_pcie, CMD_STATUS, OB_XLAT_EN_VAL | val); } /** @@ -418,7 +431,7 @@ static void __iomem *ks_pcie_cfg_setup(struct keystone_pcie *ks_pcie, u8 bus, if (bus != 1) regval |= BIT(24); - writel(regval, ks_pcie->va_app_base + CFG_SETUP); + ks_dw_app_writel(ks_pcie, CFG_SETUP, regval); return pp->va_cfg0_base; } @@ -456,19 +469,19 @@ void ks_dw_pcie_v3_65_scan_bus(struct pcie_port *pp) struct keystone_pcie *ks_pcie = to_keystone_pcie(pp); /* Configure and set up BAR0 */ - ks_dw_pcie_set_dbi_mode(ks_pcie->va_app_base); + ks_dw_pcie_set_dbi_mode(ks_pcie); /* Enable BAR0 */ - writel(1, pp->dbi_base + PCI_BASE_ADDRESS_0); - writel(SZ_4K - 1, pp->dbi_base + PCI_BASE_ADDRESS_0); + dw_pcie_writel_rc(pp, PCI_BASE_ADDRESS_0, 1); + dw_pcie_writel_rc(pp, PCI_BASE_ADDRESS_0, SZ_4K - 1); - ks_dw_pcie_clear_dbi_mode(ks_pcie->va_app_base); + ks_dw_pcie_clear_dbi_mode(ks_pcie); /* * For BAR0, just setting bus address for inbound writes (MSI) should * be sufficient. Use physical address to avoid any conflicts. */ - writel(ks_pcie->app.start, pp->dbi_base + PCI_BASE_ADDRESS_0); + dw_pcie_writel_rc(pp, PCI_BASE_ADDRESS_0, ks_pcie->app.start); } /** @@ -476,8 +489,9 @@ void ks_dw_pcie_v3_65_scan_bus(struct pcie_port *pp) */ int ks_dw_pcie_link_up(struct pcie_port *pp) { - u32 val = readl(pp->dbi_base + DEBUG0); + u32 val; + val = dw_pcie_readl_rc(pp, DEBUG0); return (val & LTSSM_STATE_MASK) == LTSSM_STATE_L0; } @@ -486,13 +500,13 @@ void ks_dw_pcie_initiate_link_train(struct keystone_pcie *ks_pcie) u32 val; /* Disable Link training */ - val = readl(ks_pcie->va_app_base + CMD_STATUS); + val = ks_dw_app_readl(ks_pcie, CMD_STATUS); val &= ~LTSSM_EN_VAL; - writel(LTSSM_EN_VAL | val, ks_pcie->va_app_base + CMD_STATUS); + ks_dw_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val); /* Initiate Link Training */ - val = readl(ks_pcie->va_app_base + CMD_STATUS); - writel(LTSSM_EN_VAL | val, ks_pcie->va_app_base + CMD_STATUS); + val = ks_dw_app_readl(ks_pcie, CMD_STATUS); + ks_dw_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val); } /** @@ -506,12 +520,13 @@ int __init ks_dw_pcie_host_init(struct keystone_pcie *ks_pcie, struct device_node *msi_intc_np) { struct pcie_port *pp = &ks_pcie->pp; - struct platform_device *pdev = to_platform_device(pp->dev); + struct device *dev = pp->dev; + struct platform_device *pdev = to_platform_device(dev); struct resource *res; /* Index 0 is the config reg. space address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pp->dbi_base = devm_ioremap_resource(pp->dev, res); + pp->dbi_base = devm_ioremap_resource(dev, res); if (IS_ERR(pp->dbi_base)) return PTR_ERR(pp->dbi_base); @@ -524,7 +539,7 @@ int __init ks_dw_pcie_host_init(struct keystone_pcie *ks_pcie, /* Index 1 is the application reg. space address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - ks_pcie->va_app_base = devm_ioremap_resource(pp->dev, res); + ks_pcie->va_app_base = devm_ioremap_resource(dev, res); if (IS_ERR(ks_pcie->va_app_base)) return PTR_ERR(ks_pcie->va_app_base); @@ -537,7 +552,7 @@ int __init ks_dw_pcie_host_init(struct keystone_pcie *ks_pcie, &ks_dw_pcie_legacy_irq_domain_ops, NULL); if (!ks_pcie->legacy_irq_domain) { - dev_err(pp->dev, "Failed to add irq domain for legacy irqs\n"); + dev_err(dev, "Failed to add irq domain for legacy irqs\n"); return -EINVAL; } diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c index 82b461b5b08a..043c19a05da1 100644 --- a/drivers/pci/host/pci-keystone.c +++ b/drivers/pci/host/pci-keystone.c @@ -89,12 +89,13 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, quirk_limit_mrrs); static int ks_pcie_establish_link(struct keystone_pcie *ks_pcie) { struct pcie_port *pp = &ks_pcie->pp; + struct device *dev = pp->dev; unsigned int retries; dw_pcie_setup_rc(pp); if (dw_pcie_link_up(pp)) { - dev_err(pp->dev, "Link already up\n"); + dev_err(dev, "Link already up\n"); return 0; } @@ -105,7 +106,7 @@ static int ks_pcie_establish_link(struct keystone_pcie *ks_pcie) return 0; } - dev_err(pp->dev, "phy link never came up\n"); + dev_err(dev, "phy link never came up\n"); return -ETIMEDOUT; } @@ -115,9 +116,10 @@ static void ks_pcie_msi_irq_handler(struct irq_desc *desc) struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc); u32 offset = irq - ks_pcie->msi_host_irqs[0]; struct pcie_port *pp = &ks_pcie->pp; + struct device *dev = pp->dev; struct irq_chip *chip = irq_desc_get_chip(desc); - dev_dbg(pp->dev, "%s, irq %d\n", __func__, irq); + dev_dbg(dev, "%s, irq %d\n", __func__, irq); /* * The chained irq handler installation would have replaced normal @@ -142,10 +144,11 @@ static void ks_pcie_legacy_irq_handler(struct irq_desc *desc) unsigned int irq = irq_desc_get_irq(desc); struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc); struct pcie_port *pp = &ks_pcie->pp; + struct device *dev = pp->dev; u32 irq_offset = irq - ks_pcie->legacy_host_irqs[0]; struct irq_chip *chip = irq_desc_get_chip(desc); - dev_dbg(pp->dev, ": Handling legacy irq %d\n", irq); + dev_dbg(dev, ": Handling legacy irq %d\n", irq); /* * The chained irq handler installation would have replaced normal @@ -234,7 +237,7 @@ static void ks_pcie_setup_interrupts(struct keystone_pcie *ks_pcie) } if (ks_pcie->error_irq > 0) - ks_dw_pcie_enable_error_irq(ks_pcie->va_app_base); + ks_dw_pcie_enable_error_irq(ks_pcie); } /* @@ -302,14 +305,14 @@ static irqreturn_t pcie_err_irq_handler(int irq, void *priv) { struct keystone_pcie *ks_pcie = priv; - return ks_dw_pcie_handle_error_irq(ks_pcie->pp.dev, - ks_pcie->va_app_base); + return ks_dw_pcie_handle_error_irq(ks_pcie); } static int __init ks_add_pcie_port(struct keystone_pcie *ks_pcie, struct platform_device *pdev) { struct pcie_port *pp = &ks_pcie->pp; + struct device *dev = pp->dev; int ret; ret = ks_pcie_get_irq_controller_info(ks_pcie, @@ -332,12 +335,12 @@ static int __init ks_add_pcie_port(struct keystone_pcie *ks_pcie, */ ks_pcie->error_irq = irq_of_parse_and_map(ks_pcie->np, 0); if (ks_pcie->error_irq <= 0) - dev_info(&pdev->dev, "no error IRQ defined\n"); + dev_info(dev, "no error IRQ defined\n"); else { ret = request_irq(ks_pcie->error_irq, pcie_err_irq_handler, IRQF_SHARED, "pcie-error-irq", ks_pcie); if (ret < 0) { - dev_err(&pdev->dev, "failed to request error IRQ %d\n", + dev_err(dev, "failed to request error IRQ %d\n", ks_pcie->error_irq); return ret; } @@ -347,7 +350,7 @@ static int __init ks_add_pcie_port(struct keystone_pcie *ks_pcie, pp->ops = &keystone_pcie_host_ops; ret = ks_dw_pcie_host_init(ks_pcie, ks_pcie->msi_intc_np); if (ret) { - dev_err(&pdev->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -381,12 +384,12 @@ static int __init ks_pcie_probe(struct platform_device *pdev) struct phy *phy; int ret; - ks_pcie = devm_kzalloc(&pdev->dev, sizeof(*ks_pcie), - GFP_KERNEL); + ks_pcie = devm_kzalloc(dev, sizeof(*ks_pcie), GFP_KERNEL); if (!ks_pcie) return -ENOMEM; pp = &ks_pcie->pp; + pp->dev = dev; /* initialize SerDes Phy if present */ phy = devm_phy_get(dev, "pcie-phy"); @@ -408,7 +411,6 @@ static int __init ks_pcie_probe(struct platform_device *pdev) devm_iounmap(dev, reg_p); devm_release_mem_region(dev, res->start, resource_size(res)); - pp->dev = dev; ks_pcie->np = dev->of_node; platform_set_drvdata(pdev, ks_pcie); ks_pcie->clk = devm_clk_get(dev, "pcie"); diff --git a/drivers/pci/host/pci-keystone.h b/drivers/pci/host/pci-keystone.h index a5b0cb2ba4d7..bc54bafda068 100644 --- a/drivers/pci/host/pci-keystone.h +++ b/drivers/pci/host/pci-keystone.h @@ -17,8 +17,8 @@ #define MAX_LEGACY_HOST_IRQS 4 struct keystone_pcie { + struct pcie_port pp; /* pp.dbi_base is DT 0th res */ struct clk *clk; - struct pcie_port pp; /* PCI Device ID */ u32 device_id; int num_legacy_host_irqs; @@ -34,7 +34,7 @@ struct keystone_pcie { int error_irq; /* Application register space */ - void __iomem *va_app_base; + void __iomem *va_app_base; /* DT 1st resource */ struct resource app; }; @@ -45,9 +45,8 @@ phys_addr_t ks_dw_pcie_get_msi_addr(struct pcie_port *pp); /* Keystone specific PCI controller APIs */ void ks_dw_pcie_enable_legacy_irqs(struct keystone_pcie *ks_pcie); void ks_dw_pcie_handle_legacy_irq(struct keystone_pcie *ks_pcie, int offset); -void ks_dw_pcie_enable_error_irq(void __iomem *reg_base); -irqreturn_t ks_dw_pcie_handle_error_irq(struct device *dev, - void __iomem *reg_base); +void ks_dw_pcie_enable_error_irq(struct keystone_pcie *ks_pcie); +irqreturn_t ks_dw_pcie_handle_error_irq(struct keystone_pcie *ks_pcie); int ks_dw_pcie_host_init(struct keystone_pcie *ks_pcie, struct device_node *msi_intc_np); int ks_dw_pcie_wr_other_conf(struct pcie_port *pp, struct pci_bus *bus, diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index 114ba819277a..2cb7315e26d0 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -45,10 +45,9 @@ struct ls_pcie_drvdata { }; struct ls_pcie { - void __iomem *dbi; + struct pcie_port pp; /* pp.dbi_base is DT regs */ void __iomem *lut; struct regmap *scfg; - struct pcie_port pp; const struct ls_pcie_drvdata *drvdata; int index; }; @@ -59,7 +58,7 @@ static bool ls_pcie_is_bridge(struct ls_pcie *pcie) { u32 header_type; - header_type = ioread8(pcie->dbi + PCI_HEADER_TYPE); + header_type = ioread8(pcie->pp.dbi_base + PCI_HEADER_TYPE); header_type &= 0x7f; return header_type == PCI_HEADER_TYPE_BRIDGE; @@ -68,13 +67,13 @@ static bool ls_pcie_is_bridge(struct ls_pcie *pcie) /* Clear multi-function bit */ static void ls_pcie_clear_multifunction(struct ls_pcie *pcie) { - iowrite8(PCI_HEADER_TYPE_BRIDGE, pcie->dbi + PCI_HEADER_TYPE); + iowrite8(PCI_HEADER_TYPE_BRIDGE, pcie->pp.dbi_base + PCI_HEADER_TYPE); } /* Fix class value */ static void ls_pcie_fix_class(struct ls_pcie *pcie) { - iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->dbi + PCI_CLASS_DEVICE); + iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->pp.dbi_base + PCI_CLASS_DEVICE); } /* Drop MSG TLP except for Vendor MSG */ @@ -82,9 +81,9 @@ static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie) { u32 val; - val = ioread32(pcie->dbi + PCIE_STRFMR1); + val = ioread32(pcie->pp.dbi_base + PCIE_STRFMR1); val &= 0xDFFFFFFF; - iowrite32(val, pcie->dbi + PCIE_STRFMR1); + iowrite32(val, pcie->pp.dbi_base + PCIE_STRFMR1); } static int ls1021_pcie_link_up(struct pcie_port *pp) @@ -106,18 +105,19 @@ static int ls1021_pcie_link_up(struct pcie_port *pp) static void ls1021_pcie_host_init(struct pcie_port *pp) { + struct device *dev = pp->dev; struct ls_pcie *pcie = to_ls_pcie(pp); u32 index[2]; - pcie->scfg = syscon_regmap_lookup_by_phandle(pp->dev->of_node, + pcie->scfg = syscon_regmap_lookup_by_phandle(dev->of_node, "fsl,pcie-scfg"); if (IS_ERR(pcie->scfg)) { - dev_err(pp->dev, "No syscfg phandle specified\n"); + dev_err(dev, "No syscfg phandle specified\n"); pcie->scfg = NULL; return; } - if (of_property_read_u32_array(pp->dev->of_node, + if (of_property_read_u32_array(dev->of_node, "fsl,pcie-scfg", index, 2)) { pcie->scfg = NULL; return; @@ -148,18 +148,19 @@ static void ls_pcie_host_init(struct pcie_port *pp) { struct ls_pcie *pcie = to_ls_pcie(pp); - iowrite32(1, pcie->dbi + PCIE_DBI_RO_WR_EN); + iowrite32(1, pcie->pp.dbi_base + PCIE_DBI_RO_WR_EN); ls_pcie_fix_class(pcie); ls_pcie_clear_multifunction(pcie); ls_pcie_drop_msg_tlp(pcie); - iowrite32(0, pcie->dbi + PCIE_DBI_RO_WR_EN); + iowrite32(0, pcie->pp.dbi_base + PCIE_DBI_RO_WR_EN); } static int ls_pcie_msi_host_init(struct pcie_port *pp, struct msi_controller *chip) { + struct device *dev = pp->dev; + struct device_node *np = dev->of_node; struct device_node *msi_node; - struct device_node *np = pp->dev->of_node; /* * The MSI domain is set by the generic of_msi_configure(). This @@ -169,7 +170,7 @@ static int ls_pcie_msi_host_init(struct pcie_port *pp, */ msi_node = of_parse_phandle(np, "msi-parent", 0); if (!msi_node) { - dev_err(pp->dev, "failed to find msi-parent\n"); + dev_err(dev, "failed to find msi-parent\n"); return -EINVAL; } @@ -212,19 +213,15 @@ static const struct of_device_id ls_pcie_of_match[] = { { }, }; -static int __init ls_add_pcie_port(struct pcie_port *pp, - struct platform_device *pdev) +static int __init ls_add_pcie_port(struct ls_pcie *pcie) { + struct pcie_port *pp = &pcie->pp; + struct device *dev = pp->dev; int ret; - struct ls_pcie *pcie = to_ls_pcie(pp); - - pp->dev = &pdev->dev; - pp->dbi_base = pcie->dbi; - pp->ops = pcie->drvdata->ops; ret = dw_pcie_host_init(pp); if (ret) { - dev_err(pp->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -233,38 +230,42 @@ static int __init ls_add_pcie_port(struct pcie_port *pp, static int __init ls_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; const struct of_device_id *match; struct ls_pcie *pcie; + struct pcie_port *pp; struct resource *dbi_base; int ret; - match = of_match_device(ls_pcie_of_match, &pdev->dev); + match = of_match_device(ls_pcie_of_match, dev); if (!match) return -ENODEV; - pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; + pp = &pcie->pp; + pp->dev = dev; + pp->ops = pcie->drvdata->ops; + dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - pcie->dbi = devm_ioremap_resource(&pdev->dev, dbi_base); - if (IS_ERR(pcie->dbi)) { - dev_err(&pdev->dev, "missing *regs* space\n"); - return PTR_ERR(pcie->dbi); + pcie->pp.dbi_base = devm_ioremap_resource(dev, dbi_base); + if (IS_ERR(pcie->pp.dbi_base)) { + dev_err(dev, "missing *regs* space\n"); + return PTR_ERR(pcie->pp.dbi_base); } pcie->drvdata = match->data; - pcie->lut = pcie->dbi + pcie->drvdata->lut_offset; + pcie->lut = pcie->pp.dbi_base + pcie->drvdata->lut_offset; if (!ls_pcie_is_bridge(pcie)) return -ENODEV; - ret = ls_add_pcie_port(&pcie->pp, pdev); + ret = ls_add_pcie_port(pcie); if (ret < 0) return ret; - platform_set_drvdata(pdev, pcie); - return 0; } diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c index 307f81d6b479..45a89d969700 100644 --- a/drivers/pci/host/pci-mvebu.c +++ b/drivers/pci/host/pci-mvebu.c @@ -1190,13 +1190,13 @@ static void mvebu_pcie_powerdown(struct mvebu_pcie_port *port) static int mvebu_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct mvebu_pcie *pcie; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = dev->of_node; struct device_node *child; int num, i, ret; - pcie = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pcie), - GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; @@ -1206,7 +1206,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev) /* Get the PCIe memory and I/O aperture */ mvebu_mbus_get_pcie_mem_aperture(&pcie->mem); if (resource_size(&pcie->mem) == 0) { - dev_err(&pdev->dev, "invalid memory aperture size\n"); + dev_err(dev, "invalid memory aperture size\n"); return -EINVAL; } @@ -1224,20 +1224,18 @@ static int mvebu_pcie_probe(struct platform_device *pdev) /* Get the bus range */ ret = of_pci_parse_bus_range(np, &pcie->busn); if (ret) { - dev_err(&pdev->dev, "failed to parse bus-range property: %d\n", - ret); + dev_err(dev, "failed to parse bus-range property: %d\n", ret); return ret; } - num = of_get_available_child_count(pdev->dev.of_node); + num = of_get_available_child_count(np); - pcie->ports = devm_kcalloc(&pdev->dev, num, sizeof(*pcie->ports), - GFP_KERNEL); + pcie->ports = devm_kcalloc(dev, num, sizeof(*pcie->ports), GFP_KERNEL); if (!pcie->ports) return -ENOMEM; i = 0; - for_each_available_child_of_node(pdev->dev.of_node, child) { + for_each_available_child_of_node(np, child) { struct mvebu_pcie_port *port = &pcie->ports[i]; ret = mvebu_pcie_parse_port(pcie, port, child); @@ -1266,8 +1264,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev) port->base = mvebu_pcie_map_registers(pdev, child, port); if (IS_ERR(port->base)) { - dev_err(&pdev->dev, "%s: cannot map registers\n", - port->name); + dev_err(dev, "%s: cannot map registers\n", port->name); port->base = NULL; mvebu_pcie_powerdown(port); continue; diff --git a/drivers/pci/host/pci-rcar-gen2.c b/drivers/pci/host/pci-rcar-gen2.c index 597566f96f5e..1eeefa4df64c 100644 --- a/drivers/pci/host/pci-rcar-gen2.c +++ b/drivers/pci/host/pci-rcar-gen2.c @@ -154,10 +154,11 @@ static int rcar_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) static irqreturn_t rcar_pci_err_irq(int irq, void *pw) { struct rcar_pci_priv *priv = pw; + struct device *dev = priv->dev; u32 status = ioread32(priv->reg + RCAR_PCI_INT_STATUS_REG); if (status & RCAR_PCI_INT_ALLERRORS) { - dev_err(priv->dev, "error irq: status %08x\n", status); + dev_err(dev, "error irq: status %08x\n", status); /* clear the error(s) */ iowrite32(status & RCAR_PCI_INT_ALLERRORS, @@ -170,13 +171,14 @@ static irqreturn_t rcar_pci_err_irq(int irq, void *pw) static void rcar_pci_setup_errirq(struct rcar_pci_priv *priv) { + struct device *dev = priv->dev; int ret; u32 val; - ret = devm_request_irq(priv->dev, priv->irq, rcar_pci_err_irq, + ret = devm_request_irq(dev, priv->irq, rcar_pci_err_irq, IRQF_SHARED, "error irq", priv); if (ret) { - dev_err(priv->dev, "cannot claim IRQ for error handling\n"); + dev_err(dev, "cannot claim IRQ for error handling\n"); return; } @@ -192,15 +194,16 @@ static inline void rcar_pci_setup_errirq(struct rcar_pci_priv *priv) { } static int rcar_pci_setup(int nr, struct pci_sys_data *sys) { struct rcar_pci_priv *priv = sys->private_data; + struct device *dev = priv->dev; void __iomem *reg = priv->reg; u32 val; int ret; - pm_runtime_enable(priv->dev); - pm_runtime_get_sync(priv->dev); + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); val = ioread32(reg + RCAR_PCI_UNIT_REV_REG); - dev_info(priv->dev, "PCI: bus%u revision %x\n", sys->busnr, val); + dev_info(dev, "PCI: bus%u revision %x\n", sys->busnr, val); /* Disable Direct Power Down State and assert reset */ val = ioread32(reg + RCAR_USBCTR_REG) & ~RCAR_USBCTR_DIRPD; @@ -275,7 +278,7 @@ static int rcar_pci_setup(int nr, struct pci_sys_data *sys) /* Add PCI resources */ pci_add_resource(&sys->resources, &priv->mem_res); - ret = devm_request_pci_bus_resources(priv->dev, &sys->resources); + ret = devm_request_pci_bus_resources(dev, &sys->resources); if (ret < 0) return ret; @@ -311,6 +314,7 @@ static int pci_dma_range_parser_init(struct of_pci_range_parser *parser, static int rcar_pci_parse_map_dma_ranges(struct rcar_pci_priv *pci, struct device_node *np) { + struct device *dev = pci->dev; struct of_pci_range range; struct of_pci_range_parser parser; int index = 0; @@ -331,14 +335,14 @@ static int rcar_pci_parse_map_dma_ranges(struct rcar_pci_priv *pci, /* Catch HW limitations */ if (!(range.flags & IORESOURCE_PREFETCH)) { - dev_err(pci->dev, "window must be prefetchable\n"); + dev_err(dev, "window must be prefetchable\n"); return -EINVAL; } if (pci->window_addr) { u32 lowaddr = 1 << (ffs(pci->window_addr) - 1); if (lowaddr < pci->window_size) { - dev_err(pci->dev, "invalid window size/addr\n"); + dev_err(dev, "invalid window size/addr\n"); return -EINVAL; } } @@ -350,6 +354,7 @@ static int rcar_pci_parse_map_dma_ranges(struct rcar_pci_priv *pci, static int rcar_pci_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct resource *cfg_res, *mem_res; struct rcar_pci_priv *priv; void __iomem *reg; @@ -357,7 +362,7 @@ static int rcar_pci_probe(struct platform_device *pdev) void *hw_private[1]; cfg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - reg = devm_ioremap_resource(&pdev->dev, cfg_res); + reg = devm_ioremap_resource(dev, cfg_res); if (IS_ERR(reg)) return PTR_ERR(reg); @@ -368,8 +373,7 @@ static int rcar_pci_probe(struct platform_device *pdev) if (mem_res->start & 0xFFFF) return -EINVAL; - priv = devm_kzalloc(&pdev->dev, - sizeof(struct rcar_pci_priv), GFP_KERNEL); + priv = devm_kzalloc(dev, sizeof(struct rcar_pci_priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -378,10 +382,10 @@ static int rcar_pci_probe(struct platform_device *pdev) priv->irq = platform_get_irq(pdev, 0); priv->reg = reg; - priv->dev = &pdev->dev; + priv->dev = dev; if (priv->irq < 0) { - dev_err(&pdev->dev, "no valid irq found\n"); + dev_err(dev, "no valid irq found\n"); return priv->irq; } @@ -390,23 +394,23 @@ static int rcar_pci_probe(struct platform_device *pdev) priv->window_pci = 0x40000000; priv->window_size = SZ_1G; - if (pdev->dev.of_node) { + if (dev->of_node) { struct resource busnr; int ret; - ret = of_pci_parse_bus_range(pdev->dev.of_node, &busnr); + ret = of_pci_parse_bus_range(dev->of_node, &busnr); if (ret < 0) { - dev_err(&pdev->dev, "failed to parse bus-range\n"); + dev_err(dev, "failed to parse bus-range\n"); return ret; } priv->busnr = busnr.start; if (busnr.end != busnr.start) - dev_warn(&pdev->dev, "only one bus number supported\n"); + dev_warn(dev, "only one bus number supported\n"); - ret = rcar_pci_parse_map_dma_ranges(priv, pdev->dev.of_node); + ret = rcar_pci_parse_map_dma_ranges(priv, dev->of_node); if (ret < 0) { - dev_err(&pdev->dev, "failed to parse dma-range\n"); + dev_err(dev, "failed to parse dma-range\n"); return ret; } } else { @@ -421,7 +425,7 @@ static int rcar_pci_probe(struct platform_device *pdev) hw.map_irq = rcar_pci_map_irq; hw.ops = &rcar_pci_ops; hw.setup = rcar_pci_setup; - pci_common_init_dev(&pdev->dev, &hw); + pci_common_init_dev(dev, &hw); return 0; } diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index e2a8e4cab22e..8dfccf733241 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -384,6 +384,7 @@ static unsigned long tegra_pcie_conf_offset(unsigned int devfn, int where) static struct tegra_pcie_bus *tegra_pcie_bus_alloc(struct tegra_pcie *pcie, unsigned int busnr) { + struct device *dev = pcie->dev; pgprot_t prot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_XN | L_PTE_MT_DEV_SHARED | L_PTE_SHARED); phys_addr_t cs = pcie->cs->start; @@ -413,8 +414,7 @@ static struct tegra_pcie_bus *tegra_pcie_bus_alloc(struct tegra_pcie *pcie, err = ioremap_page_range(virt, virt + SZ_64K, phys, prot); if (err < 0) { - dev_err(pcie->dev, "ioremap_page_range() failed: %d\n", - err); + dev_err(dev, "ioremap_page_range() failed: %d\n", err); goto unmap; } } @@ -462,6 +462,7 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus, int where) { struct tegra_pcie *pcie = sys_to_pcie(bus->sysdata); + struct device *dev = pcie->dev; void __iomem *addr = NULL; if (bus->number == 0) { @@ -482,8 +483,7 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus, addr = (void __iomem *)b->area->addr; if (!addr) { - dev_err(pcie->dev, - "failed to map cfg. space for bus %u\n", + dev_err(dev, "failed to map cfg. space for bus %u\n", bus->number); return NULL; } @@ -584,12 +584,13 @@ static void tegra_pcie_port_disable(struct tegra_pcie_port *port) static void tegra_pcie_port_free(struct tegra_pcie_port *port) { struct tegra_pcie *pcie = port->pcie; + struct device *dev = pcie->dev; - devm_iounmap(pcie->dev, port->base); - devm_release_mem_region(pcie->dev, port->regs.start, + devm_iounmap(dev, port->base); + devm_release_mem_region(dev, port->regs.start, resource_size(&port->regs)); list_del(&port->list); - devm_kfree(pcie->dev, port); + devm_kfree(dev, port); } /* Tegra PCIE root complex wrongly reports device class */ @@ -612,12 +613,13 @@ DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, tegra_pcie_relax_enable); static int tegra_pcie_setup(int nr, struct pci_sys_data *sys) { struct tegra_pcie *pcie = sys_to_pcie(sys); + struct device *dev = pcie->dev; int err; sys->mem_offset = pcie->offset.mem; sys->io_offset = pcie->offset.io; - err = devm_request_resource(pcie->dev, &iomem_resource, &pcie->io); + err = devm_request_resource(dev, &iomem_resource, &pcie->io); if (err < 0) return err; @@ -631,7 +633,7 @@ static int tegra_pcie_setup(int nr, struct pci_sys_data *sys) sys->mem_offset); pci_add_resource(&sys->resources, &pcie->busn); - err = devm_request_pci_bus_resources(pcie->dev, &sys->resources); + err = devm_request_pci_bus_resources(dev, &sys->resources); if (err < 0) return err; @@ -672,6 +674,7 @@ static irqreturn_t tegra_pcie_isr(int irq, void *arg) "Peer2Peer error", }; struct tegra_pcie *pcie = arg; + struct device *dev = pcie->dev; u32 code, signature; code = afi_readl(pcie, AFI_INTR_CODE) & AFI_INTR_CODE_MASK; @@ -689,11 +692,9 @@ static irqreturn_t tegra_pcie_isr(int irq, void *arg) * happen a lot during enumeration */ if (code == AFI_INTR_MASTER_ABORT) - dev_dbg(pcie->dev, "%s, signature: %08x\n", err_msg[code], - signature); + dev_dbg(dev, "%s, signature: %08x\n", err_msg[code], signature); else - dev_err(pcie->dev, "%s, signature: %08x\n", err_msg[code], - signature); + dev_err(dev, "%s, signature: %08x\n", err_msg[code], signature); if (code == AFI_INTR_TARGET_ABORT || code == AFI_INTR_MASTER_ABORT || code == AFI_INTR_FPCI_DECODE_ERROR) { @@ -701,9 +702,9 @@ static irqreturn_t tegra_pcie_isr(int irq, void *arg) u64 address = (u64)fpci << 32 | (signature & 0xfffffffc); if (code == AFI_INTR_MASTER_ABORT) - dev_dbg(pcie->dev, " FPCI address: %10llx\n", address); + dev_dbg(dev, " FPCI address: %10llx\n", address); else - dev_err(pcie->dev, " FPCI address: %10llx\n", address); + dev_err(dev, " FPCI address: %10llx\n", address); } return IRQ_HANDLED; @@ -793,6 +794,7 @@ static int tegra_pcie_pll_wait(struct tegra_pcie *pcie, unsigned long timeout) static int tegra_pcie_phy_enable(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; const struct tegra_pcie_soc *soc = pcie->soc; u32 value; int err; @@ -829,7 +831,7 @@ static int tegra_pcie_phy_enable(struct tegra_pcie *pcie) /* wait for the PLL to lock */ err = tegra_pcie_pll_wait(pcie, 500); if (err < 0) { - dev_err(pcie->dev, "PLL failed to lock: %d\n", err); + dev_err(dev, "PLL failed to lock: %d\n", err); return err; } @@ -859,7 +861,7 @@ static int tegra_pcie_phy_disable(struct tegra_pcie *pcie) /* override IDDQ */ value = pads_readl(pcie, PADS_CTL); value |= PADS_CTL_IDDQ_1L; - pads_writel(pcie, PADS_CTL, value); + pads_writel(pcie, value, PADS_CTL); /* reset PLL */ value = pads_readl(pcie, soc->pads_pll_ctl); @@ -880,8 +882,7 @@ static int tegra_pcie_port_phy_power_on(struct tegra_pcie_port *port) for (i = 0; i < port->lanes; i++) { err = phy_power_on(port->phys[i]); if (err < 0) { - dev_err(dev, "failed to power on PHY#%u: %d\n", i, - err); + dev_err(dev, "failed to power on PHY#%u: %d\n", i, err); return err; } } @@ -909,6 +910,7 @@ static int tegra_pcie_port_phy_power_off(struct tegra_pcie_port *port) static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; const struct tegra_pcie_soc *soc = pcie->soc; struct tegra_pcie_port *port; int err; @@ -920,7 +922,7 @@ static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie) err = tegra_pcie_phy_enable(pcie); if (err < 0) - dev_err(pcie->dev, "failed to power on PHY: %d\n", err); + dev_err(dev, "failed to power on PHY: %d\n", err); return err; } @@ -928,7 +930,7 @@ static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie) list_for_each_entry(port, &pcie->ports, list) { err = tegra_pcie_port_phy_power_on(port); if (err < 0) { - dev_err(pcie->dev, + dev_err(dev, "failed to power on PCIe port %u PHY: %d\n", port->index, err); return err; @@ -946,6 +948,7 @@ static int tegra_pcie_phy_power_on(struct tegra_pcie *pcie) static int tegra_pcie_phy_power_off(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; struct tegra_pcie_port *port; int err; @@ -956,8 +959,7 @@ static int tegra_pcie_phy_power_off(struct tegra_pcie *pcie) err = tegra_pcie_phy_disable(pcie); if (err < 0) - dev_err(pcie->dev, "failed to power off PHY: %d\n", - err); + dev_err(dev, "failed to power off PHY: %d\n", err); return err; } @@ -965,7 +967,7 @@ static int tegra_pcie_phy_power_off(struct tegra_pcie *pcie) list_for_each_entry(port, &pcie->ports, list) { err = tegra_pcie_port_phy_power_off(port); if (err < 0) { - dev_err(pcie->dev, + dev_err(dev, "failed to power off PCIe port %u PHY: %d\n", port->index, err); return err; @@ -977,6 +979,7 @@ static int tegra_pcie_phy_power_off(struct tegra_pcie *pcie) static int tegra_pcie_enable_controller(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; const struct tegra_pcie_soc *soc = pcie->soc; struct tegra_pcie_port *port; unsigned long value; @@ -1016,7 +1019,7 @@ static int tegra_pcie_enable_controller(struct tegra_pcie *pcie) err = tegra_pcie_phy_power_on(pcie); if (err < 0) { - dev_err(pcie->dev, "failed to power on PHY(s): %d\n", err); + dev_err(dev, "failed to power on PHY(s): %d\n", err); return err; } @@ -1049,13 +1052,14 @@ static int tegra_pcie_enable_controller(struct tegra_pcie *pcie) static void tegra_pcie_power_off(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; int err; /* TODO: disable and unprepare clocks? */ err = tegra_pcie_phy_power_off(pcie); if (err < 0) - dev_err(pcie->dev, "failed to power off PHY(s): %d\n", err); + dev_err(dev, "failed to power off PHY(s): %d\n", err); reset_control_assert(pcie->pcie_xrst); reset_control_assert(pcie->afi_rst); @@ -1065,11 +1069,12 @@ static void tegra_pcie_power_off(struct tegra_pcie *pcie) err = regulator_bulk_disable(pcie->num_supplies, pcie->supplies); if (err < 0) - dev_warn(pcie->dev, "failed to disable regulators: %d\n", err); + dev_warn(dev, "failed to disable regulators: %d\n", err); } static int tegra_pcie_power_on(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; const struct tegra_pcie_soc *soc = pcie->soc; int err; @@ -1082,13 +1087,13 @@ static int tegra_pcie_power_on(struct tegra_pcie *pcie) /* enable regulators */ err = regulator_bulk_enable(pcie->num_supplies, pcie->supplies); if (err < 0) - dev_err(pcie->dev, "failed to enable regulators: %d\n", err); + dev_err(dev, "failed to enable regulators: %d\n", err); err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_PCIE, pcie->pex_clk, pcie->pex_rst); if (err) { - dev_err(pcie->dev, "powerup sequence failed: %d\n", err); + dev_err(dev, "powerup sequence failed: %d\n", err); return err; } @@ -1096,22 +1101,21 @@ static int tegra_pcie_power_on(struct tegra_pcie *pcie) err = clk_prepare_enable(pcie->afi_clk); if (err < 0) { - dev_err(pcie->dev, "failed to enable AFI clock: %d\n", err); + dev_err(dev, "failed to enable AFI clock: %d\n", err); return err; } if (soc->has_cml_clk) { err = clk_prepare_enable(pcie->cml_clk); if (err < 0) { - dev_err(pcie->dev, "failed to enable CML clock: %d\n", - err); + dev_err(dev, "failed to enable CML clock: %d\n", err); return err; } } err = clk_prepare_enable(pcie->pll_e); if (err < 0) { - dev_err(pcie->dev, "failed to enable PLLE clock: %d\n", err); + dev_err(dev, "failed to enable PLLE clock: %d\n", err); return err; } @@ -1120,22 +1124,23 @@ static int tegra_pcie_power_on(struct tegra_pcie *pcie) static int tegra_pcie_clocks_get(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; const struct tegra_pcie_soc *soc = pcie->soc; - pcie->pex_clk = devm_clk_get(pcie->dev, "pex"); + pcie->pex_clk = devm_clk_get(dev, "pex"); if (IS_ERR(pcie->pex_clk)) return PTR_ERR(pcie->pex_clk); - pcie->afi_clk = devm_clk_get(pcie->dev, "afi"); + pcie->afi_clk = devm_clk_get(dev, "afi"); if (IS_ERR(pcie->afi_clk)) return PTR_ERR(pcie->afi_clk); - pcie->pll_e = devm_clk_get(pcie->dev, "pll_e"); + pcie->pll_e = devm_clk_get(dev, "pll_e"); if (IS_ERR(pcie->pll_e)) return PTR_ERR(pcie->pll_e); if (soc->has_cml_clk) { - pcie->cml_clk = devm_clk_get(pcie->dev, "cml"); + pcie->cml_clk = devm_clk_get(dev, "cml"); if (IS_ERR(pcie->cml_clk)) return PTR_ERR(pcie->cml_clk); } @@ -1145,15 +1150,17 @@ static int tegra_pcie_clocks_get(struct tegra_pcie *pcie) static int tegra_pcie_resets_get(struct tegra_pcie *pcie) { - pcie->pex_rst = devm_reset_control_get(pcie->dev, "pex"); + struct device *dev = pcie->dev; + + pcie->pex_rst = devm_reset_control_get(dev, "pex"); if (IS_ERR(pcie->pex_rst)) return PTR_ERR(pcie->pex_rst); - pcie->afi_rst = devm_reset_control_get(pcie->dev, "afi"); + pcie->afi_rst = devm_reset_control_get(dev, "afi"); if (IS_ERR(pcie->afi_rst)) return PTR_ERR(pcie->afi_rst); - pcie->pcie_xrst = devm_reset_control_get(pcie->dev, "pcie_x"); + pcie->pcie_xrst = devm_reset_control_get(dev, "pcie_x"); if (IS_ERR(pcie->pcie_xrst)) return PTR_ERR(pcie->pcie_xrst); @@ -1162,18 +1169,19 @@ static int tegra_pcie_resets_get(struct tegra_pcie *pcie) static int tegra_pcie_phys_get_legacy(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; int err; - pcie->phy = devm_phy_optional_get(pcie->dev, "pcie"); + pcie->phy = devm_phy_optional_get(dev, "pcie"); if (IS_ERR(pcie->phy)) { err = PTR_ERR(pcie->phy); - dev_err(pcie->dev, "failed to get PHY: %d\n", err); + dev_err(dev, "failed to get PHY: %d\n", err); return err; } err = phy_init(pcie->phy); if (err < 0) { - dev_err(pcie->dev, "failed to initialize PHY: %d\n", err); + dev_err(dev, "failed to initialize PHY: %d\n", err); return err; } @@ -1256,43 +1264,44 @@ static int tegra_pcie_phys_get(struct tegra_pcie *pcie) static int tegra_pcie_get_resources(struct tegra_pcie *pcie) { - struct platform_device *pdev = to_platform_device(pcie->dev); + struct device *dev = pcie->dev; + struct platform_device *pdev = to_platform_device(dev); struct resource *pads, *afi, *res; int err; err = tegra_pcie_clocks_get(pcie); if (err) { - dev_err(&pdev->dev, "failed to get clocks: %d\n", err); + dev_err(dev, "failed to get clocks: %d\n", err); return err; } err = tegra_pcie_resets_get(pcie); if (err) { - dev_err(&pdev->dev, "failed to get resets: %d\n", err); + dev_err(dev, "failed to get resets: %d\n", err); return err; } err = tegra_pcie_phys_get(pcie); if (err < 0) { - dev_err(&pdev->dev, "failed to get PHYs: %d\n", err); + dev_err(dev, "failed to get PHYs: %d\n", err); return err; } err = tegra_pcie_power_on(pcie); if (err) { - dev_err(&pdev->dev, "failed to power up: %d\n", err); + dev_err(dev, "failed to power up: %d\n", err); return err; } pads = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pads"); - pcie->pads = devm_ioremap_resource(&pdev->dev, pads); + pcie->pads = devm_ioremap_resource(dev, pads); if (IS_ERR(pcie->pads)) { err = PTR_ERR(pcie->pads); goto poweroff; } afi = platform_get_resource_byname(pdev, IORESOURCE_MEM, "afi"); - pcie->afi = devm_ioremap_resource(&pdev->dev, afi); + pcie->afi = devm_ioremap_resource(dev, afi); if (IS_ERR(pcie->afi)) { err = PTR_ERR(pcie->afi); goto poweroff; @@ -1305,7 +1314,7 @@ static int tegra_pcie_get_resources(struct tegra_pcie *pcie) goto poweroff; } - pcie->cs = devm_request_mem_region(pcie->dev, res->start, + pcie->cs = devm_request_mem_region(dev, res->start, resource_size(res), res->name); if (!pcie->cs) { err = -EADDRNOTAVAIL; @@ -1315,7 +1324,7 @@ static int tegra_pcie_get_resources(struct tegra_pcie *pcie) /* request interrupt */ err = platform_get_irq_byname(pdev, "intr"); if (err < 0) { - dev_err(&pdev->dev, "failed to get IRQ: %d\n", err); + dev_err(dev, "failed to get IRQ: %d\n", err); goto poweroff; } @@ -1323,7 +1332,7 @@ static int tegra_pcie_get_resources(struct tegra_pcie *pcie) err = request_irq(pcie->irq, tegra_pcie_isr, IRQF_SHARED, "PCIE", pcie); if (err) { - dev_err(&pdev->dev, "failed to register IRQ: %d\n", err); + dev_err(dev, "failed to register IRQ: %d\n", err); goto poweroff; } @@ -1336,6 +1345,7 @@ poweroff: static int tegra_pcie_put_resources(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; int err; if (pcie->irq > 0) @@ -1345,7 +1355,7 @@ static int tegra_pcie_put_resources(struct tegra_pcie *pcie) err = phy_exit(pcie->phy); if (err < 0) - dev_err(pcie->dev, "failed to teardown PHY: %d\n", err); + dev_err(dev, "failed to teardown PHY: %d\n", err); return 0; } @@ -1384,6 +1394,7 @@ static void tegra_msi_free(struct tegra_msi *chip, unsigned long irq) static irqreturn_t tegra_pcie_msi_irq(int irq, void *data) { struct tegra_pcie *pcie = data; + struct device *dev = pcie->dev; struct tegra_msi *msi = &pcie->msi; unsigned int i, processed = 0; @@ -1403,13 +1414,13 @@ static irqreturn_t tegra_pcie_msi_irq(int irq, void *data) if (test_bit(index, msi->used)) generic_handle_irq(irq); else - dev_info(pcie->dev, "unhandled MSI\n"); + dev_info(dev, "unhandled MSI\n"); } else { /* * that's weird who triggered this? * just clear it */ - dev_info(pcie->dev, "unexpected MSI\n"); + dev_info(dev, "unexpected MSI\n"); } /* see if there's any more pending in this vector */ @@ -1488,7 +1499,8 @@ static const struct irq_domain_ops msi_domain_ops = { static int tegra_pcie_enable_msi(struct tegra_pcie *pcie) { - struct platform_device *pdev = to_platform_device(pcie->dev); + struct device *dev = pcie->dev; + struct platform_device *pdev = to_platform_device(dev); const struct tegra_pcie_soc *soc = pcie->soc; struct tegra_msi *msi = &pcie->msi; unsigned long base; @@ -1497,20 +1509,20 @@ static int tegra_pcie_enable_msi(struct tegra_pcie *pcie) mutex_init(&msi->lock); - msi->chip.dev = pcie->dev; + msi->chip.dev = dev; msi->chip.setup_irq = tegra_msi_setup_irq; msi->chip.teardown_irq = tegra_msi_teardown_irq; - msi->domain = irq_domain_add_linear(pcie->dev->of_node, INT_PCI_MSI_NR, + msi->domain = irq_domain_add_linear(dev->of_node, INT_PCI_MSI_NR, &msi_domain_ops, &msi->chip); if (!msi->domain) { - dev_err(&pdev->dev, "failed to create IRQ domain\n"); + dev_err(dev, "failed to create IRQ domain\n"); return -ENOMEM; } err = platform_get_irq_byname(pdev, "msi"); if (err < 0) { - dev_err(&pdev->dev, "failed to get IRQ: %d\n", err); + dev_err(dev, "failed to get IRQ: %d\n", err); goto err; } @@ -1519,7 +1531,7 @@ static int tegra_pcie_enable_msi(struct tegra_pcie *pcie) err = request_irq(msi->irq, tegra_pcie_msi_irq, IRQF_NO_THREAD, tegra_msi_irq_chip.name, pcie); if (err < 0) { - dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); + dev_err(dev, "failed to request IRQ: %d\n", err); goto err; } @@ -1594,46 +1606,47 @@ static int tegra_pcie_disable_msi(struct tegra_pcie *pcie) static int tegra_pcie_get_xbar_config(struct tegra_pcie *pcie, u32 lanes, u32 *xbar) { - struct device_node *np = pcie->dev->of_node; + struct device *dev = pcie->dev; + struct device_node *np = dev->of_node; if (of_device_is_compatible(np, "nvidia,tegra124-pcie")) { switch (lanes) { case 0x0000104: - dev_info(pcie->dev, "4x1, 1x1 configuration\n"); + dev_info(dev, "4x1, 1x1 configuration\n"); *xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_X4_X1; return 0; case 0x0000102: - dev_info(pcie->dev, "2x1, 1x1 configuration\n"); + dev_info(dev, "2x1, 1x1 configuration\n"); *xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_X2_X1; return 0; } } else if (of_device_is_compatible(np, "nvidia,tegra30-pcie")) { switch (lanes) { case 0x00000204: - dev_info(pcie->dev, "4x1, 2x1 configuration\n"); + dev_info(dev, "4x1, 2x1 configuration\n"); *xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_420; return 0; case 0x00020202: - dev_info(pcie->dev, "2x3 configuration\n"); + dev_info(dev, "2x3 configuration\n"); *xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_222; return 0; case 0x00010104: - dev_info(pcie->dev, "4x1, 1x2 configuration\n"); + dev_info(dev, "4x1, 1x2 configuration\n"); *xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_411; return 0; } } else if (of_device_is_compatible(np, "nvidia,tegra20-pcie")) { switch (lanes) { case 0x00000004: - dev_info(pcie->dev, "single-mode configuration\n"); + dev_info(dev, "single-mode configuration\n"); *xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_SINGLE; return 0; case 0x00000202: - dev_info(pcie->dev, "dual-mode configuration\n"); + dev_info(dev, "dual-mode configuration\n"); *xbar = AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_DUAL; return 0; } @@ -1673,7 +1686,8 @@ static bool of_regulator_bulk_available(struct device_node *np, */ static int tegra_pcie_get_legacy_regulators(struct tegra_pcie *pcie) { - struct device_node *np = pcie->dev->of_node; + struct device *dev = pcie->dev; + struct device_node *np = dev->of_node; if (of_device_is_compatible(np, "nvidia,tegra30-pcie")) pcie->num_supplies = 3; @@ -1681,12 +1695,12 @@ static int tegra_pcie_get_legacy_regulators(struct tegra_pcie *pcie) pcie->num_supplies = 2; if (pcie->num_supplies == 0) { - dev_err(pcie->dev, "device %s not supported in legacy mode\n", + dev_err(dev, "device %s not supported in legacy mode\n", np->full_name); return -ENODEV; } - pcie->supplies = devm_kcalloc(pcie->dev, pcie->num_supplies, + pcie->supplies = devm_kcalloc(dev, pcie->num_supplies, sizeof(*pcie->supplies), GFP_KERNEL); if (!pcie->supplies) @@ -1698,8 +1712,7 @@ static int tegra_pcie_get_legacy_regulators(struct tegra_pcie *pcie) if (pcie->num_supplies > 2) pcie->supplies[2].supply = "avdd"; - return devm_regulator_bulk_get(pcie->dev, pcie->num_supplies, - pcie->supplies); + return devm_regulator_bulk_get(dev, pcie->num_supplies, pcie->supplies); } /* @@ -1713,13 +1726,14 @@ static int tegra_pcie_get_legacy_regulators(struct tegra_pcie *pcie) */ static int tegra_pcie_get_regulators(struct tegra_pcie *pcie, u32 lane_mask) { - struct device_node *np = pcie->dev->of_node; + struct device *dev = pcie->dev; + struct device_node *np = dev->of_node; unsigned int i = 0; if (of_device_is_compatible(np, "nvidia,tegra124-pcie")) { pcie->num_supplies = 7; - pcie->supplies = devm_kcalloc(pcie->dev, pcie->num_supplies, + pcie->supplies = devm_kcalloc(dev, pcie->num_supplies, sizeof(*pcie->supplies), GFP_KERNEL); if (!pcie->supplies) @@ -1746,7 +1760,7 @@ static int tegra_pcie_get_regulators(struct tegra_pcie *pcie, u32 lane_mask) pcie->num_supplies = 4 + (need_pexa ? 2 : 0) + (need_pexb ? 2 : 0); - pcie->supplies = devm_kcalloc(pcie->dev, pcie->num_supplies, + pcie->supplies = devm_kcalloc(dev, pcie->num_supplies, sizeof(*pcie->supplies), GFP_KERNEL); if (!pcie->supplies) @@ -1769,7 +1783,7 @@ static int tegra_pcie_get_regulators(struct tegra_pcie *pcie, u32 lane_mask) } else if (of_device_is_compatible(np, "nvidia,tegra20-pcie")) { pcie->num_supplies = 5; - pcie->supplies = devm_kcalloc(pcie->dev, pcie->num_supplies, + pcie->supplies = devm_kcalloc(dev, pcie->num_supplies, sizeof(*pcie->supplies), GFP_KERNEL); if (!pcie->supplies) @@ -1782,9 +1796,9 @@ static int tegra_pcie_get_regulators(struct tegra_pcie *pcie, u32 lane_mask) pcie->supplies[4].supply = "vddio-pex-clk"; } - if (of_regulator_bulk_available(pcie->dev->of_node, pcie->supplies, + if (of_regulator_bulk_available(dev->of_node, pcie->supplies, pcie->num_supplies)) - return devm_regulator_bulk_get(pcie->dev, pcie->num_supplies, + return devm_regulator_bulk_get(dev, pcie->num_supplies, pcie->supplies); /* @@ -1792,9 +1806,9 @@ static int tegra_pcie_get_regulators(struct tegra_pcie *pcie, u32 lane_mask) * that the device tree complies with an older version of the device * tree binding. */ - dev_info(pcie->dev, "using legacy DT binding for power supplies\n"); + dev_info(dev, "using legacy DT binding for power supplies\n"); - devm_kfree(pcie->dev, pcie->supplies); + devm_kfree(dev, pcie->supplies); pcie->num_supplies = 0; return tegra_pcie_get_legacy_regulators(pcie); @@ -1802,7 +1816,8 @@ static int tegra_pcie_get_regulators(struct tegra_pcie *pcie, u32 lane_mask) static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) { - struct device_node *np = pcie->dev->of_node, *port; + struct device *dev = pcie->dev; + struct device_node *np = dev->of_node, *port; const struct tegra_pcie_soc *soc = pcie->soc; struct of_pci_range_parser parser; struct of_pci_range range; @@ -1812,7 +1827,7 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) int err; if (of_pci_range_parser_init(&parser, np)) { - dev_err(pcie->dev, "missing \"ranges\" property\n"); + dev_err(dev, "missing \"ranges\" property\n"); return -EINVAL; } @@ -1867,8 +1882,7 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) err = of_pci_parse_bus_range(np, &pcie->busn); if (err < 0) { - dev_err(pcie->dev, "failed to parse ranges property: %d\n", - err); + dev_err(dev, "failed to parse ranges property: %d\n", err); pcie->busn.name = np->name; pcie->busn.start = 0; pcie->busn.end = 0xff; @@ -1883,15 +1897,14 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) err = of_pci_get_devfn(port); if (err < 0) { - dev_err(pcie->dev, "failed to parse address: %d\n", - err); + dev_err(dev, "failed to parse address: %d\n", err); return err; } index = PCI_SLOT(err); if (index < 1 || index > soc->num_ports) { - dev_err(pcie->dev, "invalid port number: %d\n", index); + dev_err(dev, "invalid port number: %d\n", index); return -EINVAL; } @@ -1899,13 +1912,13 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) err = of_property_read_u32(port, "nvidia,num-lanes", &value); if (err < 0) { - dev_err(pcie->dev, "failed to parse # of lanes: %d\n", + dev_err(dev, "failed to parse # of lanes: %d\n", err); return err; } if (value > 16) { - dev_err(pcie->dev, "invalid # of lanes: %u\n", value); + dev_err(dev, "invalid # of lanes: %u\n", value); return -EINVAL; } @@ -1919,14 +1932,13 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) mask |= ((1 << value) - 1) << lane; lane += value; - rp = devm_kzalloc(pcie->dev, sizeof(*rp), GFP_KERNEL); + rp = devm_kzalloc(dev, sizeof(*rp), GFP_KERNEL); if (!rp) return -ENOMEM; err = of_address_to_resource(port, 0, &rp->regs); if (err < 0) { - dev_err(pcie->dev, "failed to parse address: %d\n", - err); + dev_err(dev, "failed to parse address: %d\n", err); return err; } @@ -1936,7 +1948,7 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) rp->pcie = pcie; rp->np = port; - rp->base = devm_ioremap_resource(pcie->dev, &rp->regs); + rp->base = devm_ioremap_resource(dev, &rp->regs); if (IS_ERR(rp->base)) return PTR_ERR(rp->base); @@ -1945,7 +1957,7 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) err = tegra_pcie_get_xbar_config(pcie, lanes, &pcie->xbar_config); if (err < 0) { - dev_err(pcie->dev, "invalid lane configuration\n"); + dev_err(dev, "invalid lane configuration\n"); return err; } @@ -1964,6 +1976,7 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) #define TEGRA_PCIE_LINKUP_TIMEOUT 200 /* up to 1.2 seconds */ static bool tegra_pcie_port_check_link(struct tegra_pcie_port *port) { + struct device *dev = port->pcie->dev; unsigned int retries = 3; unsigned long value; @@ -1986,8 +1999,7 @@ static bool tegra_pcie_port_check_link(struct tegra_pcie_port *port) } while (--timeout); if (!timeout) { - dev_err(port->pcie->dev, "link %u down, retrying\n", - port->index); + dev_err(dev, "link %u down, retrying\n", port->index); goto retry; } @@ -2011,11 +2023,12 @@ retry: static int tegra_pcie_enable(struct tegra_pcie *pcie) { + struct device *dev = pcie->dev; struct tegra_pcie_port *port, *tmp; struct hw_pci hw; list_for_each_entry_safe(port, tmp, &pcie->ports, list) { - dev_info(pcie->dev, "probing port %u, using %u lanes\n", + dev_info(dev, "probing port %u, using %u lanes\n", port->index, port->lanes); tegra_pcie_port_enable(port); @@ -2023,7 +2036,7 @@ static int tegra_pcie_enable(struct tegra_pcie *pcie) if (tegra_pcie_port_check_link(port)) continue; - dev_info(pcie->dev, "link %u down, ignoring\n", port->index); + dev_info(dev, "link %u down, ignoring\n", port->index); tegra_pcie_port_disable(port); tegra_pcie_port_free(port); @@ -2041,8 +2054,7 @@ static int tegra_pcie_enable(struct tegra_pcie *pcie) hw.map_irq = tegra_pcie_map_irq; hw.ops = &tegra_pcie_ops; - pci_common_init_dev(pcie->dev, &hw); - + pci_common_init_dev(dev, &hw); return 0; } @@ -2204,17 +2216,18 @@ remove: static int tegra_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct tegra_pcie *pcie; int err; - pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; - pcie->soc = of_device_get_match_data(&pdev->dev); + pcie->soc = of_device_get_match_data(dev); INIT_LIST_HEAD(&pcie->buses); INIT_LIST_HEAD(&pcie->ports); - pcie->dev = &pdev->dev; + pcie->dev = dev; err = tegra_pcie_parse_dt(pcie); if (err < 0) @@ -2222,7 +2235,7 @@ static int tegra_pcie_probe(struct platform_device *pdev) err = tegra_pcie_get_resources(pcie); if (err < 0) { - dev_err(&pdev->dev, "failed to request resources: %d\n", err); + dev_err(dev, "failed to request resources: %d\n", err); return err; } @@ -2236,27 +2249,23 @@ static int tegra_pcie_probe(struct platform_device *pdev) if (IS_ENABLED(CONFIG_PCI_MSI)) { err = tegra_pcie_enable_msi(pcie); if (err < 0) { - dev_err(&pdev->dev, - "failed to enable MSI support: %d\n", - err); + dev_err(dev, "failed to enable MSI support: %d\n", err); goto put_resources; } } err = tegra_pcie_enable(pcie); if (err < 0) { - dev_err(&pdev->dev, "failed to enable PCIe ports: %d\n", err); + dev_err(dev, "failed to enable PCIe ports: %d\n", err); goto disable_msi; } if (IS_ENABLED(CONFIG_DEBUG_FS)) { err = tegra_pcie_debugfs_init(pcie); if (err < 0) - dev_err(&pdev->dev, "failed to setup debugfs: %d\n", - err); + dev_err(dev, "failed to setup debugfs: %d\n", err); } - platform_set_drvdata(pdev, pcie); return 0; disable_msi: diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c index a81273c23341..1de23d74783f 100644 --- a/drivers/pci/host/pci-xgene.c +++ b/drivers/pci/host/pci-xgene.c @@ -76,6 +76,16 @@ struct xgene_pcie_port { u32 version; }; +static u32 xgene_pcie_readl(struct xgene_pcie_port *port, u32 reg) +{ + return readl(port->csr_base + reg); +} + +static void xgene_pcie_writel(struct xgene_pcie_port *port, u32 reg, u32 val) +{ + writel(val, port->csr_base + reg); +} + static inline u32 pcie_bar_low_val(u32 addr, u32 flags) { return (addr & PCI_BASE_ADDRESS_MEM_MASK) | flags; @@ -112,9 +122,9 @@ static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn) if (!pci_is_root_bus(bus)) rtdid_val = (b << 8) | (d << 3) | f; - writel(rtdid_val, port->csr_base + RTDID); + xgene_pcie_writel(port, RTDID, rtdid_val); /* read the register back to ensure flush */ - readl(port->csr_base + RTDID); + xgene_pcie_readl(port, RTDID); } /* @@ -179,28 +189,28 @@ static struct pci_ops xgene_pcie_ops = { .write = pci_generic_config_write32, }; -static u64 xgene_pcie_set_ib_mask(void __iomem *csr_base, u32 addr, +static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr, u32 flags, u64 size) { u64 mask = (~(size - 1) & PCI_BASE_ADDRESS_MEM_MASK) | flags; u32 val32 = 0; u32 val; - val32 = readl(csr_base + addr); + val32 = xgene_pcie_readl(port, addr); val = (val32 & 0x0000ffff) | (lower_32_bits(mask) << 16); - writel(val, csr_base + addr); + xgene_pcie_writel(port, addr, val); - val32 = readl(csr_base + addr + 0x04); + val32 = xgene_pcie_readl(port, addr + 0x04); val = (val32 & 0xffff0000) | (lower_32_bits(mask) >> 16); - writel(val, csr_base + addr + 0x04); + xgene_pcie_writel(port, addr + 0x04, val); - val32 = readl(csr_base + addr + 0x04); + val32 = xgene_pcie_readl(port, addr + 0x04); val = (val32 & 0x0000ffff) | (upper_32_bits(mask) << 16); - writel(val, csr_base + addr + 0x04); + xgene_pcie_writel(port, addr + 0x04, val); - val32 = readl(csr_base + addr + 0x08); + val32 = xgene_pcie_readl(port, addr + 0x08); val = (val32 & 0xffff0000) | (upper_32_bits(mask) >> 16); - writel(val, csr_base + addr + 0x08); + xgene_pcie_writel(port, addr + 0x08, val); return mask; } @@ -208,32 +218,32 @@ static u64 xgene_pcie_set_ib_mask(void __iomem *csr_base, u32 addr, static void xgene_pcie_linkup(struct xgene_pcie_port *port, u32 *lanes, u32 *speed) { - void __iomem *csr_base = port->csr_base; u32 val32; port->link_up = false; - val32 = readl(csr_base + PCIECORE_CTLANDSTATUS); + val32 = xgene_pcie_readl(port, PCIECORE_CTLANDSTATUS); if (val32 & LINK_UP_MASK) { port->link_up = true; *speed = PIPE_PHY_RATE_RD(val32); - val32 = readl(csr_base + BRIDGE_STATUS_0); + val32 = xgene_pcie_readl(port, BRIDGE_STATUS_0); *lanes = val32 >> 26; } } static int xgene_pcie_init_port(struct xgene_pcie_port *port) { + struct device *dev = port->dev; int rc; - port->clk = clk_get(port->dev, NULL); + port->clk = clk_get(dev, NULL); if (IS_ERR(port->clk)) { - dev_err(port->dev, "clock not available\n"); + dev_err(dev, "clock not available\n"); return -ENODEV; } rc = clk_prepare_enable(port->clk); if (rc) { - dev_err(port->dev, "clock enable failed\n"); + dev_err(dev, "clock enable failed\n"); return rc; } @@ -243,15 +253,16 @@ static int xgene_pcie_init_port(struct xgene_pcie_port *port) static int xgene_pcie_map_reg(struct xgene_pcie_port *port, struct platform_device *pdev) { + struct device *dev = port->dev; struct resource *res; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "csr"); - port->csr_base = devm_ioremap_resource(port->dev, res); + port->csr_base = devm_ioremap_resource(dev, res); if (IS_ERR(port->csr_base)) return PTR_ERR(port->csr_base); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg"); - port->cfg_base = devm_ioremap_resource(port->dev, res); + port->cfg_base = devm_ioremap_resource(dev, res); if (IS_ERR(port->cfg_base)) return PTR_ERR(port->cfg_base); port->cfg_addr = res->start; @@ -263,7 +274,7 @@ static void xgene_pcie_setup_ob_reg(struct xgene_pcie_port *port, struct resource *res, u32 offset, u64 cpu_addr, u64 pci_addr) { - void __iomem *base = port->csr_base + offset; + struct device *dev = port->dev; resource_size_t size = resource_size(res); u64 restype = resource_type(res); u64 mask = 0; @@ -280,22 +291,24 @@ static void xgene_pcie_setup_ob_reg(struct xgene_pcie_port *port, if (size >= min_size) mask = ~(size - 1) | flag; else - dev_warn(port->dev, "res size 0x%llx less than minimum 0x%x\n", + dev_warn(dev, "res size 0x%llx less than minimum 0x%x\n", (u64)size, min_size); - writel(lower_32_bits(cpu_addr), base); - writel(upper_32_bits(cpu_addr), base + 0x04); - writel(lower_32_bits(mask), base + 0x08); - writel(upper_32_bits(mask), base + 0x0c); - writel(lower_32_bits(pci_addr), base + 0x10); - writel(upper_32_bits(pci_addr), base + 0x14); + xgene_pcie_writel(port, offset, lower_32_bits(cpu_addr)); + xgene_pcie_writel(port, offset + 0x04, upper_32_bits(cpu_addr)); + xgene_pcie_writel(port, offset + 0x08, lower_32_bits(mask)); + xgene_pcie_writel(port, offset + 0x0c, upper_32_bits(mask)); + xgene_pcie_writel(port, offset + 0x10, lower_32_bits(pci_addr)); + xgene_pcie_writel(port, offset + 0x14, upper_32_bits(pci_addr)); } -static void xgene_pcie_setup_cfg_reg(void __iomem *csr_base, u64 addr) +static void xgene_pcie_setup_cfg_reg(struct xgene_pcie_port *port) { - writel(lower_32_bits(addr), csr_base + CFGBARL); - writel(upper_32_bits(addr), csr_base + CFGBARH); - writel(EN_REG, csr_base + CFGCTL); + u64 addr = port->cfg_addr; + + xgene_pcie_writel(port, CFGBARL, lower_32_bits(addr)); + xgene_pcie_writel(port, CFGBARH, upper_32_bits(addr)); + xgene_pcie_writel(port, CFGCTL, EN_REG); } static int xgene_pcie_map_ranges(struct xgene_pcie_port *port, @@ -310,7 +323,7 @@ static int xgene_pcie_map_ranges(struct xgene_pcie_port *port, struct resource *res = window->res; u64 restype = resource_type(res); - dev_dbg(port->dev, "%pR\n", res); + dev_dbg(dev, "%pR\n", res); switch (restype) { case IORESOURCE_IO: @@ -339,17 +352,18 @@ static int xgene_pcie_map_ranges(struct xgene_pcie_port *port, return -EINVAL; } } - xgene_pcie_setup_cfg_reg(port->csr_base, port->cfg_addr); - + xgene_pcie_setup_cfg_reg(port); return 0; } -static void xgene_pcie_setup_pims(void *addr, u64 pim, u64 size) +static void xgene_pcie_setup_pims(struct xgene_pcie_port *port, u32 pim_reg, + u64 pim, u64 size) { - writel(lower_32_bits(pim), addr); - writel(upper_32_bits(pim) | EN_COHERENCY, addr + 0x04); - writel(lower_32_bits(size), addr + 0x10); - writel(upper_32_bits(size), addr + 0x14); + xgene_pcie_writel(port, pim_reg, lower_32_bits(pim)); + xgene_pcie_writel(port, pim_reg + 0x04, + upper_32_bits(pim) | EN_COHERENCY); + xgene_pcie_writel(port, pim_reg + 0x10, lower_32_bits(size)); + xgene_pcie_writel(port, pim_reg + 0x14, upper_32_bits(size)); } /* @@ -379,10 +393,10 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) static void xgene_pcie_setup_ib_reg(struct xgene_pcie_port *port, struct of_pci_range *range, u8 *ib_reg_mask) { - void __iomem *csr_base = port->csr_base; void __iomem *cfg_base = port->cfg_base; + struct device *dev = port->dev; void *bar_addr; - void *pim_addr; + u32 pim_reg; u64 cpu_addr = range->cpu_addr; u64 pci_addr = range->pci_addr; u64 size = range->size; @@ -393,7 +407,7 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie_port *port, region = xgene_pcie_select_ib_reg(ib_reg_mask, range->size); if (region < 0) { - dev_warn(port->dev, "invalid pcie dma-range config\n"); + dev_warn(dev, "invalid pcie dma-range config\n"); return; } @@ -403,29 +417,27 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie_port *port, bar_low = pcie_bar_low_val((u32)cpu_addr, flags); switch (region) { case 0: - xgene_pcie_set_ib_mask(csr_base, BRIDGE_CFG_4, flags, size); + xgene_pcie_set_ib_mask(port, BRIDGE_CFG_4, flags, size); bar_addr = cfg_base + PCI_BASE_ADDRESS_0; writel(bar_low, bar_addr); writel(upper_32_bits(cpu_addr), bar_addr + 0x4); - pim_addr = csr_base + PIM1_1L; + pim_reg = PIM1_1L; break; case 1: - bar_addr = csr_base + IBAR2; - writel(bar_low, bar_addr); - writel(lower_32_bits(mask), csr_base + IR2MSK); - pim_addr = csr_base + PIM2_1L; + xgene_pcie_writel(port, IBAR2, bar_low); + xgene_pcie_writel(port, IR2MSK, lower_32_bits(mask)); + pim_reg = PIM2_1L; break; case 2: - bar_addr = csr_base + IBAR3L; - writel(bar_low, bar_addr); - writel(upper_32_bits(cpu_addr), bar_addr + 0x4); - writel(lower_32_bits(mask), csr_base + IR3MSKL); - writel(upper_32_bits(mask), csr_base + IR3MSKL + 0x4); - pim_addr = csr_base + PIM3_1L; + xgene_pcie_writel(port, IBAR3L, bar_low); + xgene_pcie_writel(port, IBAR3L + 0x4, upper_32_bits(cpu_addr)); + xgene_pcie_writel(port, IR3MSKL, lower_32_bits(mask)); + xgene_pcie_writel(port, IR3MSKL + 0x4, upper_32_bits(mask)); + pim_reg = PIM3_1L; break; } - xgene_pcie_setup_pims(pim_addr, pci_addr, ~(size - 1)); + xgene_pcie_setup_pims(port, pim_reg, pci_addr, ~(size - 1)); } static int pci_dma_range_parser_init(struct of_pci_range_parser *parser, @@ -463,7 +475,7 @@ static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie_port *port) for_each_of_pci_range(&parser, &range) { u64 end = range.cpu_addr + range.size - 1; - dev_dbg(port->dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n", + dev_dbg(dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n", range.flags, range.cpu_addr, end, range.pci_addr); xgene_pcie_setup_ib_reg(port, &range, &ib_reg_mask); } @@ -476,13 +488,14 @@ static void xgene_pcie_clear_config(struct xgene_pcie_port *port) int i; for (i = PIM1_1L; i <= CFGCTL; i += 4) - writel(0x0, port->csr_base + i); + xgene_pcie_writel(port, i, 0); } static int xgene_pcie_setup(struct xgene_pcie_port *port, struct list_head *res, resource_size_t io_base) { + struct device *dev = port->dev; u32 val, lanes = 0, speed = 0; int ret; @@ -490,7 +503,7 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port, /* setup the vendor and device IDs correctly */ val = (XGENE_PCIE_DEVICEID << 16) | XGENE_PCIE_VENDORID; - writel(val, port->csr_base + BRIDGE_CFG_0); + xgene_pcie_writel(port, BRIDGE_CFG_0, val); ret = xgene_pcie_map_ranges(port, res, io_base); if (ret) @@ -502,27 +515,28 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port, xgene_pcie_linkup(port, &lanes, &speed); if (!port->link_up) - dev_info(port->dev, "(rc) link down\n"); + dev_info(dev, "(rc) link down\n"); else - dev_info(port->dev, "(rc) x%d gen-%d link up\n", - lanes, speed + 1); + dev_info(dev, "(rc) x%d gen-%d link up\n", lanes, speed + 1); return 0; } static int xgene_pcie_probe_bridge(struct platform_device *pdev) { - struct device_node *dn = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct device_node *dn = dev->of_node; struct xgene_pcie_port *port; resource_size_t iobase = 0; struct pci_bus *bus; int ret; LIST_HEAD(res); - port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL); + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL); if (!port) return -ENOMEM; - port->node = of_node_get(pdev->dev.of_node); - port->dev = &pdev->dev; + + port->node = of_node_get(dn); + port->dev = dev; port->version = XGENE_PCIE_IP_VER_UNKN; if (of_device_is_compatible(port->node, "apm,xgene-pcie")) @@ -540,7 +554,7 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) if (ret) return ret; - ret = devm_request_pci_bus_resources(&pdev->dev, &res); + ret = devm_request_pci_bus_resources(dev, &res); if (ret) goto error; @@ -548,8 +562,7 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) if (ret) goto error; - bus = pci_create_root_bus(&pdev->dev, 0, - &xgene_pcie_ops, port, &res); + bus = pci_create_root_bus(dev, 0, &xgene_pcie_ops, port, &res); if (!bus) { ret = -ENOMEM; goto error; @@ -558,8 +571,6 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) pci_scan_child_bus(bus); pci_assign_unassigned_bus_resources(bus); pci_bus_add_devices(bus); - - platform_set_drvdata(pdev, port); return 0; error: diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c index c24e96559cbb..b0ac4dfafa0b 100644 --- a/drivers/pci/host/pcie-altera.c +++ b/drivers/pci/host/pcie-altera.c @@ -55,15 +55,19 @@ #define TLP_PAYLOAD_SIZE 0x01 #define TLP_READ_TAG 0x1d #define TLP_WRITE_TAG 0x10 -#define TLP_CFG_DW0(fmttype) (((fmttype) << 24) | TLP_PAYLOAD_SIZE) -#define TLP_CFG_DW1(reqid, tag, be) (((reqid) << 16) | (tag << 8) | (be)) +#define RP_DEVFN 0 +#define TLP_REQ_ID(bus, devfn) (((bus) << 8) | (devfn)) +#define TLP_CFG_DW0(pcie, bus) \ + ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGRD0 \ + : TLP_FMTTYPE_CFGRD1) << 24) | \ + TLP_PAYLOAD_SIZE) +#define TLP_CFG_DW1(pcie, tag, be) \ + (((TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN)) << 16) | (tag << 8) | (be)) #define TLP_CFG_DW2(bus, devfn, offset) \ (((bus) << 24) | ((devfn) << 16) | (offset)) -#define TLP_REQ_ID(bus, devfn) (((bus) << 8) | (devfn)) #define TLP_COMP_STATUS(s) (((s) >> 12) & 7) #define TLP_HDR_SIZE 3 #define TLP_LOOP 500 -#define RP_DEVFN 0 #define LINK_UP_TIMEOUT HZ #define LINK_RETRAIN_TIMEOUT HZ @@ -74,7 +78,7 @@ struct altera_pcie { struct platform_device *pdev; - void __iomem *cra_base; + void __iomem *cra_base; /* DT Cra */ int irq; u8 root_bus_nr; struct irq_domain *irq_domain; @@ -131,7 +135,7 @@ static void tlp_write_tx(struct altera_pcie *pcie, cra_writel(pcie, tlp_rp_regdata->ctrl, RP_TX_CNTRL); } -static bool altera_pcie_valid_config(struct altera_pcie *pcie, +static bool altera_pcie_valid_device(struct altera_pcie *pcie, struct pci_bus *bus, int dev) { /* If there is no link, then there is no device */ @@ -218,13 +222,8 @@ static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn, { u32 headers[TLP_HDR_SIZE]; - if (bus == pcie->root_bus_nr) - headers[0] = TLP_CFG_DW0(TLP_FMTTYPE_CFGRD0); - else - headers[0] = TLP_CFG_DW0(TLP_FMTTYPE_CFGRD1); - - headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN), - TLP_READ_TAG, byte_en); + headers[0] = TLP_CFG_DW0(pcie, bus); + headers[1] = TLP_CFG_DW1(pcie, TLP_READ_TAG, byte_en); headers[2] = TLP_CFG_DW2(bus, devfn, where); tlp_write_packet(pcie, headers, 0, false); @@ -238,13 +237,8 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn, u32 headers[TLP_HDR_SIZE]; int ret; - if (bus == pcie->root_bus_nr) - headers[0] = TLP_CFG_DW0(TLP_FMTTYPE_CFGWR0); - else - headers[0] = TLP_CFG_DW0(TLP_FMTTYPE_CFGWR1); - - headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN), - TLP_WRITE_TAG, byte_en); + headers[0] = TLP_CFG_DW0(pcie, bus); + headers[1] = TLP_CFG_DW1(pcie, TLP_WRITE_TAG, byte_en); headers[2] = TLP_CFG_DW2(bus, devfn, where); /* check alignment to Qword */ @@ -342,7 +336,7 @@ static int altera_pcie_cfg_read(struct pci_bus *bus, unsigned int devfn, if (altera_pcie_hide_rc_bar(bus, devfn, where)) return PCIBIOS_BAD_REGISTER_NUMBER; - if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) { + if (!altera_pcie_valid_device(pcie, bus, PCI_SLOT(devfn))) { *value = 0xffffffff; return PCIBIOS_DEVICE_NOT_FOUND; } @@ -359,7 +353,7 @@ static int altera_pcie_cfg_write(struct pci_bus *bus, unsigned int devfn, if (altera_pcie_hide_rc_bar(bus, devfn, where)) return PCIBIOS_BAD_REGISTER_NUMBER; - if (!altera_pcie_valid_config(pcie, bus, PCI_SLOT(devfn))) + if (!altera_pcie_valid_device(pcie, bus, PCI_SLOT(devfn))) return PCIBIOS_DEVICE_NOT_FOUND; return _altera_pcie_cfg_write(pcie, bus->number, devfn, where, size, @@ -394,6 +388,7 @@ static int altera_write_cap_word(struct altera_pcie *pcie, u8 busno, static void altera_wait_link_retrain(struct altera_pcie *pcie) { + struct device *dev = &pcie->pdev->dev; u16 reg16; unsigned long start_jiffies; @@ -406,7 +401,7 @@ static void altera_wait_link_retrain(struct altera_pcie *pcie) break; if (time_after(jiffies, start_jiffies + LINK_RETRAIN_TIMEOUT)) { - dev_err(&pcie->pdev->dev, "link retrain timeout\n"); + dev_err(dev, "link retrain timeout\n"); break; } udelay(100); @@ -419,7 +414,7 @@ static void altera_wait_link_retrain(struct altera_pcie *pcie) break; if (time_after(jiffies, start_jiffies + LINK_UP_TIMEOUT)) { - dev_err(&pcie->pdev->dev, "link up timeout\n"); + dev_err(dev, "link up timeout\n"); break; } udelay(100); @@ -460,7 +455,6 @@ static int altera_pcie_intx_map(struct irq_domain *domain, unsigned int irq, { irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq); irq_set_chip_data(irq, domain->host_data); - return 0; } @@ -472,12 +466,14 @@ static void altera_pcie_isr(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct altera_pcie *pcie; + struct device *dev; unsigned long status; u32 bit; u32 virq; chained_irq_enter(chip, desc); pcie = irq_desc_get_handler_data(desc); + dev = &pcie->pdev->dev; while ((status = cra_readl(pcie, P2A_INT_STATUS) & P2A_INT_STS_ALL) != 0) { @@ -489,8 +485,7 @@ static void altera_pcie_isr(struct irq_desc *desc) if (virq) generic_handle_irq(virq); else - dev_err(&pcie->pdev->dev, - "unexpected IRQ, INT%d\n", bit); + dev_err(dev, "unexpected IRQ, INT%d\n", bit); } } @@ -549,30 +544,25 @@ static int altera_pcie_init_irq_domain(struct altera_pcie *pcie) static int altera_pcie_parse_dt(struct altera_pcie *pcie) { - struct resource *cra; + struct device *dev = &pcie->pdev->dev; struct platform_device *pdev = pcie->pdev; + struct resource *cra; cra = platform_get_resource_byname(pdev, IORESOURCE_MEM, "Cra"); - if (!cra) { - dev_err(&pdev->dev, "no Cra memory resource defined\n"); - return -ENODEV; - } - - pcie->cra_base = devm_ioremap_resource(&pdev->dev, cra); + pcie->cra_base = devm_ioremap_resource(dev, cra); if (IS_ERR(pcie->cra_base)) { - dev_err(&pdev->dev, "failed to map cra memory\n"); + dev_err(dev, "failed to map cra memory\n"); return PTR_ERR(pcie->cra_base); } /* setup IRQ */ pcie->irq = platform_get_irq(pdev, 0); if (pcie->irq <= 0) { - dev_err(&pdev->dev, "failed to get IRQ: %d\n", pcie->irq); + dev_err(dev, "failed to get IRQ: %d\n", pcie->irq); return -EINVAL; } irq_set_chained_handler_and_data(pcie->irq, altera_pcie_isr, pcie); - return 0; } @@ -583,12 +573,13 @@ static void altera_pcie_host_init(struct altera_pcie *pcie) static int altera_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct altera_pcie *pcie; struct pci_bus *bus; struct pci_bus *child; int ret; - pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; @@ -596,7 +587,7 @@ static int altera_pcie_probe(struct platform_device *pdev) ret = altera_pcie_parse_dt(pcie); if (ret) { - dev_err(&pdev->dev, "Parsing DT failed\n"); + dev_err(dev, "Parsing DT failed\n"); return ret; } @@ -604,13 +595,13 @@ static int altera_pcie_probe(struct platform_device *pdev) ret = altera_pcie_parse_request_of_pci_ranges(pcie); if (ret) { - dev_err(&pdev->dev, "Failed add resources\n"); + dev_err(dev, "Failed add resources\n"); return ret; } ret = altera_pcie_init_irq_domain(pcie); if (ret) { - dev_err(&pdev->dev, "Failed creating IRQ Domain\n"); + dev_err(dev, "Failed creating IRQ Domain\n"); return ret; } @@ -620,7 +611,7 @@ static int altera_pcie_probe(struct platform_device *pdev) cra_writel(pcie, P2A_INT_ENA_ALL, P2A_INT_ENABLE); altera_pcie_host_init(pcie); - bus = pci_scan_root_bus(&pdev->dev, pcie->root_bus_nr, &altera_pcie_ops, + bus = pci_scan_root_bus(dev, pcie->root_bus_nr, &altera_pcie_ops, pcie, &pcie->resources); if (!bus) return -ENOMEM; @@ -633,8 +624,6 @@ static int altera_pcie_probe(struct platform_device *pdev) pcie_bus_configure_settings(child); pci_bus_add_devices(bus); - - platform_set_drvdata(pdev, pcie); return ret; } diff --git a/drivers/pci/host/pcie-armada8k.c b/drivers/pci/host/pcie-armada8k.c index 0f4f570068e3..0ac0f18690f2 100644 --- a/drivers/pci/host/pcie-armada8k.c +++ b/drivers/pci/host/pcie-armada8k.c @@ -29,34 +29,33 @@ #include "pcie-designware.h" struct armada8k_pcie { - void __iomem *base; + struct pcie_port pp; /* pp.dbi_base is DT ctrl */ struct clk *clk; - struct pcie_port pp; }; #define PCIE_VENDOR_REGS_OFFSET 0x8000 -#define PCIE_GLOBAL_CONTROL_REG 0x0 +#define PCIE_GLOBAL_CONTROL_REG (PCIE_VENDOR_REGS_OFFSET + 0x0) #define PCIE_APP_LTSSM_EN BIT(2) #define PCIE_DEVICE_TYPE_SHIFT 4 #define PCIE_DEVICE_TYPE_MASK 0xF #define PCIE_DEVICE_TYPE_RC 0x4 /* Root complex */ -#define PCIE_GLOBAL_STATUS_REG 0x8 +#define PCIE_GLOBAL_STATUS_REG (PCIE_VENDOR_REGS_OFFSET + 0x8) #define PCIE_GLB_STS_RDLH_LINK_UP BIT(1) #define PCIE_GLB_STS_PHY_LINK_UP BIT(9) -#define PCIE_GLOBAL_INT_CAUSE1_REG 0x1C -#define PCIE_GLOBAL_INT_MASK1_REG 0x20 +#define PCIE_GLOBAL_INT_CAUSE1_REG (PCIE_VENDOR_REGS_OFFSET + 0x1C) +#define PCIE_GLOBAL_INT_MASK1_REG (PCIE_VENDOR_REGS_OFFSET + 0x20) #define PCIE_INT_A_ASSERT_MASK BIT(9) #define PCIE_INT_B_ASSERT_MASK BIT(10) #define PCIE_INT_C_ASSERT_MASK BIT(11) #define PCIE_INT_D_ASSERT_MASK BIT(12) -#define PCIE_ARCACHE_TRC_REG 0x50 -#define PCIE_AWCACHE_TRC_REG 0x54 -#define PCIE_ARUSER_REG 0x5C -#define PCIE_AWUSER_REG 0x60 +#define PCIE_ARCACHE_TRC_REG (PCIE_VENDOR_REGS_OFFSET + 0x50) +#define PCIE_AWCACHE_TRC_REG (PCIE_VENDOR_REGS_OFFSET + 0x54) +#define PCIE_ARUSER_REG (PCIE_VENDOR_REGS_OFFSET + 0x5C) +#define PCIE_AWUSER_REG (PCIE_VENDOR_REGS_OFFSET + 0x60) /* * AR/AW Cache defauls: Normal memory, Write-Back, Read / Write * allocate @@ -72,11 +71,10 @@ struct armada8k_pcie { static int armada8k_pcie_link_up(struct pcie_port *pp) { - struct armada8k_pcie *pcie = to_armada8k_pcie(pp); u32 reg; u32 mask = PCIE_GLB_STS_RDLH_LINK_UP | PCIE_GLB_STS_PHY_LINK_UP; - reg = readl(pcie->base + PCIE_GLOBAL_STATUS_REG); + reg = dw_pcie_readl_rc(pp, PCIE_GLOBAL_STATUS_REG); if ((reg & mask) == mask) return 1; @@ -85,51 +83,50 @@ static int armada8k_pcie_link_up(struct pcie_port *pp) return 0; } -static void armada8k_pcie_establish_link(struct pcie_port *pp) +static void armada8k_pcie_establish_link(struct armada8k_pcie *pcie) { - struct armada8k_pcie *pcie = to_armada8k_pcie(pp); - void __iomem *base = pcie->base; + struct pcie_port *pp = &pcie->pp; u32 reg; if (!dw_pcie_link_up(pp)) { /* Disable LTSSM state machine to enable configuration */ - reg = readl(base + PCIE_GLOBAL_CONTROL_REG); + reg = dw_pcie_readl_rc(pp, PCIE_GLOBAL_CONTROL_REG); reg &= ~(PCIE_APP_LTSSM_EN); - writel(reg, base + PCIE_GLOBAL_CONTROL_REG); + dw_pcie_writel_rc(pp, PCIE_GLOBAL_CONTROL_REG, reg); } /* Set the device to root complex mode */ - reg = readl(base + PCIE_GLOBAL_CONTROL_REG); + reg = dw_pcie_readl_rc(pp, PCIE_GLOBAL_CONTROL_REG); reg &= ~(PCIE_DEVICE_TYPE_MASK << PCIE_DEVICE_TYPE_SHIFT); reg |= PCIE_DEVICE_TYPE_RC << PCIE_DEVICE_TYPE_SHIFT; - writel(reg, base + PCIE_GLOBAL_CONTROL_REG); + dw_pcie_writel_rc(pp, PCIE_GLOBAL_CONTROL_REG, reg); /* Set the PCIe master AxCache attributes */ - writel(ARCACHE_DEFAULT_VALUE, base + PCIE_ARCACHE_TRC_REG); - writel(AWCACHE_DEFAULT_VALUE, base + PCIE_AWCACHE_TRC_REG); + dw_pcie_writel_rc(pp, PCIE_ARCACHE_TRC_REG, ARCACHE_DEFAULT_VALUE); + dw_pcie_writel_rc(pp, PCIE_AWCACHE_TRC_REG, AWCACHE_DEFAULT_VALUE); /* Set the PCIe master AxDomain attributes */ - reg = readl(base + PCIE_ARUSER_REG); + reg = dw_pcie_readl_rc(pp, PCIE_ARUSER_REG); reg &= ~(AX_USER_DOMAIN_MASK << AX_USER_DOMAIN_SHIFT); reg |= DOMAIN_OUTER_SHAREABLE << AX_USER_DOMAIN_SHIFT; - writel(reg, base + PCIE_ARUSER_REG); + dw_pcie_writel_rc(pp, PCIE_ARUSER_REG, reg); - reg = readl(base + PCIE_AWUSER_REG); + reg = dw_pcie_readl_rc(pp, PCIE_AWUSER_REG); reg &= ~(AX_USER_DOMAIN_MASK << AX_USER_DOMAIN_SHIFT); reg |= DOMAIN_OUTER_SHAREABLE << AX_USER_DOMAIN_SHIFT; - writel(reg, base + PCIE_AWUSER_REG); + dw_pcie_writel_rc(pp, PCIE_AWUSER_REG, reg); /* Enable INT A-D interrupts */ - reg = readl(base + PCIE_GLOBAL_INT_MASK1_REG); + reg = dw_pcie_readl_rc(pp, PCIE_GLOBAL_INT_MASK1_REG); reg |= PCIE_INT_A_ASSERT_MASK | PCIE_INT_B_ASSERT_MASK | PCIE_INT_C_ASSERT_MASK | PCIE_INT_D_ASSERT_MASK; - writel(reg, base + PCIE_GLOBAL_INT_MASK1_REG); + dw_pcie_writel_rc(pp, PCIE_GLOBAL_INT_MASK1_REG, reg); if (!dw_pcie_link_up(pp)) { /* Configuration done. Start LTSSM */ - reg = readl(base + PCIE_GLOBAL_CONTROL_REG); + reg = dw_pcie_readl_rc(pp, PCIE_GLOBAL_CONTROL_REG); reg |= PCIE_APP_LTSSM_EN; - writel(reg, base + PCIE_GLOBAL_CONTROL_REG); + dw_pcie_writel_rc(pp, PCIE_GLOBAL_CONTROL_REG, reg); } /* Wait until the link becomes active again */ @@ -139,15 +136,16 @@ static void armada8k_pcie_establish_link(struct pcie_port *pp) static void armada8k_pcie_host_init(struct pcie_port *pp) { + struct armada8k_pcie *pcie = to_armada8k_pcie(pp); + dw_pcie_setup_rc(pp); - armada8k_pcie_establish_link(pp); + armada8k_pcie_establish_link(pcie); } static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) { - struct pcie_port *pp = arg; - struct armada8k_pcie *pcie = to_armada8k_pcie(pp); - void __iomem *base = pcie->base; + struct armada8k_pcie *pcie = arg; + struct pcie_port *pp = &pcie->pp; u32 val; /* @@ -155,8 +153,8 @@ static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) * PCI device. However, they are also latched into the PCIe * controller, so we simply discard them. */ - val = readl(base + PCIE_GLOBAL_INT_CAUSE1_REG); - writel(val, base + PCIE_GLOBAL_INT_CAUSE1_REG); + val = dw_pcie_readl_rc(pp, PCIE_GLOBAL_INT_CAUSE1_REG); + dw_pcie_writel_rc(pp, PCIE_GLOBAL_INT_CAUSE1_REG, val); return IRQ_HANDLED; } @@ -166,9 +164,10 @@ static struct pcie_host_ops armada8k_pcie_host_ops = { .host_init = armada8k_pcie_host_init, }; -static int armada8k_add_pcie_port(struct pcie_port *pp, +static int armada8k_add_pcie_port(struct armada8k_pcie *pcie, struct platform_device *pdev) { + struct pcie_port *pp = &pcie->pp; struct device *dev = &pdev->dev; int ret; @@ -182,7 +181,7 @@ static int armada8k_add_pcie_port(struct pcie_port *pp, } ret = devm_request_irq(dev, pp->irq, armada8k_pcie_irq_handler, - IRQF_SHARED, "armada8k-pcie", pp); + IRQF_SHARED, "armada8k-pcie", pcie); if (ret) { dev_err(dev, "failed to request irq %d\n", pp->irq); return ret; @@ -217,7 +216,6 @@ static int armada8k_pcie_probe(struct platform_device *pdev) pp = &pcie->pp; pp->dev = dev; - platform_set_drvdata(pdev, pcie); /* Get the dw-pcie unit configuration/control registers base. */ base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl"); @@ -228,9 +226,7 @@ static int armada8k_pcie_probe(struct platform_device *pdev) goto fail; } - pcie->base = pp->dbi_base + PCIE_VENDOR_REGS_OFFSET; - - ret = armada8k_add_pcie_port(pp, pdev); + ret = armada8k_add_pcie_port(pcie, pdev); if (ret) goto fail; diff --git a/drivers/pci/host/pcie-artpec6.c b/drivers/pci/host/pcie-artpec6.c index 39bf1a6df463..212786b27f1a 100644 --- a/drivers/pci/host/pcie-artpec6.c +++ b/drivers/pci/host/pcie-artpec6.c @@ -27,9 +27,9 @@ #define to_artpec6_pcie(x) container_of(x, struct artpec6_pcie, pp) struct artpec6_pcie { - struct pcie_port pp; - struct regmap *regmap; - void __iomem *phy_base; + struct pcie_port pp; /* pp.dbi_base is DT dbi */ + struct regmap *regmap; /* DT axis,syscon-pcie */ + void __iomem *phy_base; /* DT phy */ }; /* PCIe Port Logic registers (memory-mapped) */ @@ -65,18 +65,31 @@ struct artpec6_pcie { #define ARTPEC6_CPU_TO_BUS_ADDR 0x0fffffff -static int artpec6_pcie_establish_link(struct pcie_port *pp) +static u32 artpec6_pcie_readl(struct artpec6_pcie *artpec6_pcie, u32 offset) { - struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pp); + u32 val; + + regmap_read(artpec6_pcie->regmap, offset, &val); + return val; +} + +static void artpec6_pcie_writel(struct artpec6_pcie *artpec6_pcie, u32 offset, u32 val) +{ + regmap_write(artpec6_pcie->regmap, offset, val); +} + +static int artpec6_pcie_establish_link(struct artpec6_pcie *artpec6_pcie) +{ + struct pcie_port *pp = &artpec6_pcie->pp; u32 val; unsigned int retries; /* Hold DW core in reset */ - regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val = artpec6_pcie_readl(artpec6_pcie, PCIECFG); val |= PCIECFG_CORE_RESET_REQ; - regmap_write(artpec6_pcie->regmap, PCIECFG, val); + artpec6_pcie_writel(artpec6_pcie, PCIECFG, val); - regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val = artpec6_pcie_readl(artpec6_pcie, PCIECFG); val |= PCIECFG_RISRCREN | /* Receiver term. 50 Ohm */ PCIECFG_MODE_TX_DRV_EN | PCIECFG_CISRREN | /* Reference clock term. 100 Ohm */ @@ -84,27 +97,27 @@ static int artpec6_pcie_establish_link(struct pcie_port *pp) val |= PCIECFG_REFCLK_ENABLE; val &= ~PCIECFG_DBG_OEN; val &= ~PCIECFG_CLKREQ_B; - regmap_write(artpec6_pcie->regmap, PCIECFG, val); + artpec6_pcie_writel(artpec6_pcie, PCIECFG, val); usleep_range(5000, 6000); - regmap_read(artpec6_pcie->regmap, NOCCFG, &val); + val = artpec6_pcie_readl(artpec6_pcie, NOCCFG); val |= NOCCFG_ENABLE_CLK_PCIE; - regmap_write(artpec6_pcie->regmap, NOCCFG, val); + artpec6_pcie_writel(artpec6_pcie, NOCCFG, val); usleep_range(20, 30); - regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val = artpec6_pcie_readl(artpec6_pcie, PCIECFG); val |= PCIECFG_PCLK_ENABLE | PCIECFG_PLL_ENABLE; - regmap_write(artpec6_pcie->regmap, PCIECFG, val); + artpec6_pcie_writel(artpec6_pcie, PCIECFG, val); usleep_range(6000, 7000); - regmap_read(artpec6_pcie->regmap, NOCCFG, &val); + val = artpec6_pcie_readl(artpec6_pcie, NOCCFG); val &= ~NOCCFG_POWER_PCIE_IDLEREQ; - regmap_write(artpec6_pcie->regmap, NOCCFG, val); + artpec6_pcie_writel(artpec6_pcie, NOCCFG, val); retries = 50; do { usleep_range(1000, 2000); - regmap_read(artpec6_pcie->regmap, NOCCFG, &val); + val = artpec6_pcie_readl(artpec6_pcie, NOCCFG); retries--; } while (retries && (val & (NOCCFG_POWER_PCIE_IDLEACK | NOCCFG_POWER_PCIE_IDLE))); @@ -117,16 +130,16 @@ static int artpec6_pcie_establish_link(struct pcie_port *pp) } while (retries && !(val & PHY_COSPLLLOCK)); /* Take DW core out of reset */ - regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val = artpec6_pcie_readl(artpec6_pcie, PCIECFG); val &= ~PCIECFG_CORE_RESET_REQ; - regmap_write(artpec6_pcie->regmap, PCIECFG, val); + artpec6_pcie_writel(artpec6_pcie, PCIECFG, val); usleep_range(100, 200); /* * Enable writing to config regs. This is required as the Synopsys * driver changes the class code. That register needs DBI write enable. */ - writel(DBI_RO_WR_EN, pp->dbi_base + MISC_CONTROL_1_OFF); + dw_pcie_writel_rc(pp, MISC_CONTROL_1_OFF, DBI_RO_WR_EN); pp->io_base &= ARTPEC6_CPU_TO_BUS_ADDR; pp->mem_base &= ARTPEC6_CPU_TO_BUS_ADDR; @@ -137,78 +150,69 @@ static int artpec6_pcie_establish_link(struct pcie_port *pp) dw_pcie_setup_rc(pp); /* assert LTSSM enable */ - regmap_read(artpec6_pcie->regmap, PCIECFG, &val); + val = artpec6_pcie_readl(artpec6_pcie, PCIECFG); val |= PCIECFG_LTSSM_ENABLE; - regmap_write(artpec6_pcie->regmap, PCIECFG, val); + artpec6_pcie_writel(artpec6_pcie, PCIECFG, val); /* check if the link is up or not */ if (!dw_pcie_wait_for_link(pp)) return 0; dev_dbg(pp->dev, "DEBUG_R0: 0x%08x, DEBUG_R1: 0x%08x\n", - readl(pp->dbi_base + PCIE_PHY_DEBUG_R0), - readl(pp->dbi_base + PCIE_PHY_DEBUG_R1)); + dw_pcie_readl_rc(pp, PCIE_PHY_DEBUG_R0), + dw_pcie_readl_rc(pp, PCIE_PHY_DEBUG_R1)); return -ETIMEDOUT; } -static void artpec6_pcie_enable_interrupts(struct pcie_port *pp) +static void artpec6_pcie_enable_interrupts(struct artpec6_pcie *artpec6_pcie) { + struct pcie_port *pp = &artpec6_pcie->pp; + if (IS_ENABLED(CONFIG_PCI_MSI)) dw_pcie_msi_init(pp); } static void artpec6_pcie_host_init(struct pcie_port *pp) { - artpec6_pcie_establish_link(pp); - artpec6_pcie_enable_interrupts(pp); -} - -static int artpec6_pcie_link_up(struct pcie_port *pp) -{ - u32 rc; - - /* - * Get status from Synopsys IP - * link is debug bit 36, debug register 1 starts at bit 32 - */ - rc = readl(pp->dbi_base + PCIE_PHY_DEBUG_R1) & (0x1 << (36 - 32)); - if (rc) - return 1; + struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pp); - return 0; + artpec6_pcie_establish_link(artpec6_pcie); + artpec6_pcie_enable_interrupts(artpec6_pcie); } static struct pcie_host_ops artpec6_pcie_host_ops = { - .link_up = artpec6_pcie_link_up, .host_init = artpec6_pcie_host_init, }; static irqreturn_t artpec6_pcie_msi_handler(int irq, void *arg) { - struct pcie_port *pp = arg; + struct artpec6_pcie *artpec6_pcie = arg; + struct pcie_port *pp = &artpec6_pcie->pp; return dw_handle_msi_irq(pp); } -static int artpec6_add_pcie_port(struct pcie_port *pp, +static int artpec6_add_pcie_port(struct artpec6_pcie *artpec6_pcie, struct platform_device *pdev) { + struct pcie_port *pp = &artpec6_pcie->pp; + struct device *dev = pp->dev; int ret; if (IS_ENABLED(CONFIG_PCI_MSI)) { pp->msi_irq = platform_get_irq_byname(pdev, "msi"); if (pp->msi_irq <= 0) { - dev_err(&pdev->dev, "failed to get MSI irq\n"); + dev_err(dev, "failed to get MSI irq\n"); return -ENODEV; } - ret = devm_request_irq(&pdev->dev, pp->msi_irq, + ret = devm_request_irq(dev, pp->msi_irq, artpec6_pcie_msi_handler, IRQF_SHARED | IRQF_NO_THREAD, - "artpec6-pcie-msi", pp); + "artpec6-pcie-msi", artpec6_pcie); if (ret) { - dev_err(&pdev->dev, "failed to request MSI irq\n"); + dev_err(dev, "failed to request MSI irq\n"); return ret; } } @@ -218,7 +222,7 @@ static int artpec6_add_pcie_port(struct pcie_port *pp, ret = dw_pcie_host_init(pp); if (ret) { - dev_err(&pdev->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -227,41 +231,40 @@ static int artpec6_add_pcie_port(struct pcie_port *pp, static int artpec6_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct artpec6_pcie *artpec6_pcie; struct pcie_port *pp; struct resource *dbi_base; struct resource *phy_base; int ret; - artpec6_pcie = devm_kzalloc(&pdev->dev, sizeof(*artpec6_pcie), - GFP_KERNEL); + artpec6_pcie = devm_kzalloc(dev, sizeof(*artpec6_pcie), GFP_KERNEL); if (!artpec6_pcie) return -ENOMEM; pp = &artpec6_pcie->pp; - pp->dev = &pdev->dev; + pp->dev = dev; dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); - pp->dbi_base = devm_ioremap_resource(&pdev->dev, dbi_base); + pp->dbi_base = devm_ioremap_resource(dev, dbi_base); if (IS_ERR(pp->dbi_base)) return PTR_ERR(pp->dbi_base); phy_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "phy"); - artpec6_pcie->phy_base = devm_ioremap_resource(&pdev->dev, phy_base); + artpec6_pcie->phy_base = devm_ioremap_resource(dev, phy_base); if (IS_ERR(artpec6_pcie->phy_base)) return PTR_ERR(artpec6_pcie->phy_base); artpec6_pcie->regmap = - syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + syscon_regmap_lookup_by_phandle(dev->of_node, "axis,syscon-pcie"); if (IS_ERR(artpec6_pcie->regmap)) return PTR_ERR(artpec6_pcie->regmap); - ret = artpec6_add_pcie_port(pp, pdev); + ret = artpec6_add_pcie_port(artpec6_pcie, pdev); if (ret < 0) return ret; - platform_set_drvdata(pdev, artpec6_pcie); return 0; } diff --git a/drivers/pci/host/pcie-designware-plat.c b/drivers/pci/host/pcie-designware-plat.c index 17da005497a5..537f58a664fa 100644 --- a/drivers/pci/host/pcie-designware-plat.c +++ b/drivers/pci/host/pcie-designware-plat.c @@ -25,8 +25,7 @@ #include "pcie-designware.h" struct dw_plat_pcie { - void __iomem *mem_base; - struct pcie_port pp; + struct pcie_port pp; /* pp.dbi_base is DT 0th resource */ }; static irqreturn_t dw_plat_pcie_msi_irq_handler(int irq, void *arg) @@ -52,6 +51,7 @@ static struct pcie_host_ops dw_plat_pcie_host_ops = { static int dw_plat_add_pcie_port(struct pcie_port *pp, struct platform_device *pdev) { + struct device *dev = pp->dev; int ret; pp->irq = platform_get_irq(pdev, 1); @@ -63,11 +63,11 @@ static int dw_plat_add_pcie_port(struct pcie_port *pp, if (pp->msi_irq < 0) return pp->msi_irq; - ret = devm_request_irq(&pdev->dev, pp->msi_irq, + ret = devm_request_irq(dev, pp->msi_irq, dw_plat_pcie_msi_irq_handler, IRQF_SHARED, "dw-plat-pcie-msi", pp); if (ret) { - dev_err(&pdev->dev, "failed to request MSI IRQ\n"); + dev_err(dev, "failed to request MSI IRQ\n"); return ret; } } @@ -77,7 +77,7 @@ static int dw_plat_add_pcie_port(struct pcie_port *pp, ret = dw_pcie_host_init(pp); if (ret) { - dev_err(&pdev->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -86,31 +86,28 @@ static int dw_plat_add_pcie_port(struct pcie_port *pp, static int dw_plat_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct dw_plat_pcie *dw_plat_pcie; struct pcie_port *pp; struct resource *res; /* Resource from DT */ int ret; - dw_plat_pcie = devm_kzalloc(&pdev->dev, sizeof(*dw_plat_pcie), - GFP_KERNEL); + dw_plat_pcie = devm_kzalloc(dev, sizeof(*dw_plat_pcie), GFP_KERNEL); if (!dw_plat_pcie) return -ENOMEM; pp = &dw_plat_pcie->pp; - pp->dev = &pdev->dev; + pp->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dw_plat_pcie->mem_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dw_plat_pcie->mem_base)) - return PTR_ERR(dw_plat_pcie->mem_base); - - pp->dbi_base = dw_plat_pcie->mem_base; + pp->dbi_base = devm_ioremap_resource(dev, res); + if (IS_ERR(pp->dbi_base)) + return PTR_ERR(pp->dbi_base); ret = dw_plat_add_pcie_port(pp, pdev); if (ret < 0) return ret; - platform_set_drvdata(pdev, dw_plat_pcie); return 0; } diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index 74da71ea544a..035f50c03281 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -141,41 +141,35 @@ int dw_pcie_cfg_write(void __iomem *addr, int size, u32 val) return PCIBIOS_SUCCESSFUL; } -static inline u32 dw_pcie_readl_rc(struct pcie_port *pp, u32 reg) +u32 dw_pcie_readl_rc(struct pcie_port *pp, u32 reg) { if (pp->ops->readl_rc) - return pp->ops->readl_rc(pp, pp->dbi_base + reg); + return pp->ops->readl_rc(pp, reg); return readl(pp->dbi_base + reg); } -static inline void dw_pcie_writel_rc(struct pcie_port *pp, u32 val, u32 reg) +void dw_pcie_writel_rc(struct pcie_port *pp, u32 reg, u32 val) { if (pp->ops->writel_rc) - pp->ops->writel_rc(pp, val, pp->dbi_base + reg); + pp->ops->writel_rc(pp, reg, val); else writel(val, pp->dbi_base + reg); } -static inline u32 dw_pcie_readl_unroll(struct pcie_port *pp, u32 index, u32 reg) +static u32 dw_pcie_readl_unroll(struct pcie_port *pp, u32 index, u32 reg) { u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index); - if (pp->ops->readl_rc) - return pp->ops->readl_rc(pp, pp->dbi_base + offset + reg); - - return readl(pp->dbi_base + offset + reg); + return dw_pcie_readl_rc(pp, offset + reg); } -static inline void dw_pcie_writel_unroll(struct pcie_port *pp, u32 index, - u32 val, u32 reg) +static void dw_pcie_writel_unroll(struct pcie_port *pp, u32 index, u32 reg, + u32 val) { u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index); - if (pp->ops->writel_rc) - pp->ops->writel_rc(pp, val, pp->dbi_base + offset + reg); - else - writel(val, pp->dbi_base + offset + reg); + dw_pcie_writel_rc(pp, offset + reg, val); } static int dw_pcie_rd_own_conf(struct pcie_port *pp, int where, int size, @@ -202,35 +196,35 @@ static void dw_pcie_prog_outbound_atu(struct pcie_port *pp, int index, u32 retries, val; if (pp->iatu_unroll_enabled) { - dw_pcie_writel_unroll(pp, index, - lower_32_bits(cpu_addr), PCIE_ATU_UNR_LOWER_BASE); - dw_pcie_writel_unroll(pp, index, - upper_32_bits(cpu_addr), PCIE_ATU_UNR_UPPER_BASE); - dw_pcie_writel_unroll(pp, index, - lower_32_bits(cpu_addr + size - 1), PCIE_ATU_UNR_LIMIT); - dw_pcie_writel_unroll(pp, index, - lower_32_bits(pci_addr), PCIE_ATU_UNR_LOWER_TARGET); - dw_pcie_writel_unroll(pp, index, - upper_32_bits(pci_addr), PCIE_ATU_UNR_UPPER_TARGET); - dw_pcie_writel_unroll(pp, index, - type, PCIE_ATU_UNR_REGION_CTRL1); - dw_pcie_writel_unroll(pp, index, - PCIE_ATU_ENABLE, PCIE_ATU_UNR_REGION_CTRL2); + dw_pcie_writel_unroll(pp, index, PCIE_ATU_UNR_LOWER_BASE, + lower_32_bits(cpu_addr)); + dw_pcie_writel_unroll(pp, index, PCIE_ATU_UNR_UPPER_BASE, + upper_32_bits(cpu_addr)); + dw_pcie_writel_unroll(pp, index, PCIE_ATU_UNR_LIMIT, + lower_32_bits(cpu_addr + size - 1)); + dw_pcie_writel_unroll(pp, index, PCIE_ATU_UNR_LOWER_TARGET, + lower_32_bits(pci_addr)); + dw_pcie_writel_unroll(pp, index, PCIE_ATU_UNR_UPPER_TARGET, + upper_32_bits(pci_addr)); + dw_pcie_writel_unroll(pp, index, PCIE_ATU_UNR_REGION_CTRL1, + type); + dw_pcie_writel_unroll(pp, index, PCIE_ATU_UNR_REGION_CTRL2, + PCIE_ATU_ENABLE); } else { - dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | index, - PCIE_ATU_VIEWPORT); - dw_pcie_writel_rc(pp, lower_32_bits(cpu_addr), - PCIE_ATU_LOWER_BASE); - dw_pcie_writel_rc(pp, upper_32_bits(cpu_addr), - PCIE_ATU_UPPER_BASE); - dw_pcie_writel_rc(pp, lower_32_bits(cpu_addr + size - 1), - PCIE_ATU_LIMIT); - dw_pcie_writel_rc(pp, lower_32_bits(pci_addr), - PCIE_ATU_LOWER_TARGET); - dw_pcie_writel_rc(pp, upper_32_bits(pci_addr), - PCIE_ATU_UPPER_TARGET); - dw_pcie_writel_rc(pp, type, PCIE_ATU_CR1); - dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2); + dw_pcie_writel_rc(pp, PCIE_ATU_VIEWPORT, + PCIE_ATU_REGION_OUTBOUND | index); + dw_pcie_writel_rc(pp, PCIE_ATU_LOWER_BASE, + lower_32_bits(cpu_addr)); + dw_pcie_writel_rc(pp, PCIE_ATU_UPPER_BASE, + upper_32_bits(cpu_addr)); + dw_pcie_writel_rc(pp, PCIE_ATU_LIMIT, + lower_32_bits(cpu_addr + size - 1)); + dw_pcie_writel_rc(pp, PCIE_ATU_LOWER_TARGET, + lower_32_bits(pci_addr)); + dw_pcie_writel_rc(pp, PCIE_ATU_UPPER_TARGET, + upper_32_bits(pci_addr)); + dw_pcie_writel_rc(pp, PCIE_ATU_CR1, type); + dw_pcie_writel_rc(pp, PCIE_ATU_CR2, PCIE_ATU_ENABLE); } /* @@ -760,8 +754,8 @@ static int dw_pcie_wr_other_conf(struct pcie_port *pp, struct pci_bus *bus, return ret; } -static int dw_pcie_valid_config(struct pcie_port *pp, - struct pci_bus *bus, int dev) +static int dw_pcie_valid_device(struct pcie_port *pp, struct pci_bus *bus, + int dev) { /* If there is no link, then there is no device */ if (bus->number != pp->root_bus_nr) { @@ -781,7 +775,7 @@ static int dw_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where, { struct pcie_port *pp = bus->sysdata; - if (dw_pcie_valid_config(pp, bus, PCI_SLOT(devfn)) == 0) { + if (!dw_pcie_valid_device(pp, bus, PCI_SLOT(devfn))) { *val = 0xffffffff; return PCIBIOS_DEVICE_NOT_FOUND; } @@ -797,7 +791,7 @@ static int dw_pcie_wr_conf(struct pci_bus *bus, u32 devfn, { struct pcie_port *pp = bus->sysdata; - if (dw_pcie_valid_config(pp, bus, PCI_SLOT(devfn)) == 0) + if (!dw_pcie_valid_device(pp, bus, PCI_SLOT(devfn))) return PCIBIOS_DEVICE_NOT_FOUND; if (bus->number == pp->root_bus_nr) @@ -835,7 +829,7 @@ void dw_pcie_setup_rc(struct pcie_port *pp) dev_err(pp->dev, "num-lanes %u: invalid value\n", pp->lanes); return; } - dw_pcie_writel_rc(pp, val, PCIE_PORT_LINK_CONTROL); + dw_pcie_writel_rc(pp, PCIE_PORT_LINK_CONTROL, val); /* set link width speed control register */ val = dw_pcie_readl_rc(pp, PCIE_LINK_WIDTH_SPEED_CONTROL); @@ -854,30 +848,30 @@ void dw_pcie_setup_rc(struct pcie_port *pp) val |= PORT_LOGIC_LINK_WIDTH_8_LANES; break; } - dw_pcie_writel_rc(pp, val, PCIE_LINK_WIDTH_SPEED_CONTROL); + dw_pcie_writel_rc(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, val); /* setup RC BARs */ - dw_pcie_writel_rc(pp, 0x00000004, PCI_BASE_ADDRESS_0); - dw_pcie_writel_rc(pp, 0x00000000, PCI_BASE_ADDRESS_1); + dw_pcie_writel_rc(pp, PCI_BASE_ADDRESS_0, 0x00000004); + dw_pcie_writel_rc(pp, PCI_BASE_ADDRESS_1, 0x00000000); /* setup interrupt pins */ val = dw_pcie_readl_rc(pp, PCI_INTERRUPT_LINE); val &= 0xffff00ff; val |= 0x00000100; - dw_pcie_writel_rc(pp, val, PCI_INTERRUPT_LINE); + dw_pcie_writel_rc(pp, PCI_INTERRUPT_LINE, val); /* setup bus numbers */ val = dw_pcie_readl_rc(pp, PCI_PRIMARY_BUS); val &= 0xff000000; val |= 0x00010100; - dw_pcie_writel_rc(pp, val, PCI_PRIMARY_BUS); + dw_pcie_writel_rc(pp, PCI_PRIMARY_BUS, val); /* setup command register */ val = dw_pcie_readl_rc(pp, PCI_COMMAND); val &= 0xffff0000; val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_SERR; - dw_pcie_writel_rc(pp, val, PCI_COMMAND); + dw_pcie_writel_rc(pp, PCI_COMMAND, val); /* * If the platform provides ->rd_other_conf, it means the platform diff --git a/drivers/pci/host/pcie-designware.h b/drivers/pci/host/pcie-designware.h index c8e5bc647f49..a567ea288ee2 100644 --- a/drivers/pci/host/pcie-designware.h +++ b/drivers/pci/host/pcie-designware.h @@ -54,9 +54,8 @@ struct pcie_port { }; struct pcie_host_ops { - u32 (*readl_rc)(struct pcie_port *pp, void __iomem *dbi_base); - void (*writel_rc)(struct pcie_port *pp, - u32 val, void __iomem *dbi_base); + u32 (*readl_rc)(struct pcie_port *pp, u32 reg); + void (*writel_rc)(struct pcie_port *pp, u32 reg, u32 val); int (*rd_own_conf)(struct pcie_port *pp, int where, int size, u32 *val); int (*wr_own_conf)(struct pcie_port *pp, int where, int size, u32 val); int (*rd_other_conf)(struct pcie_port *pp, struct pci_bus *bus, @@ -73,6 +72,8 @@ struct pcie_host_ops { int (*msi_host_init)(struct pcie_port *pp, struct msi_controller *chip); }; +u32 dw_pcie_readl_rc(struct pcie_port *pp, u32 reg); +void dw_pcie_writel_rc(struct pcie_port *pp, u32 reg, u32 val); int dw_pcie_cfg_read(void __iomem *addr, int size, u32 *val); int dw_pcie_cfg_write(void __iomem *addr, int size, u32 val); irqreturn_t dw_handle_msi_irq(struct pcie_port *pp); diff --git a/drivers/pci/host/pcie-hisi.c b/drivers/pci/host/pcie-hisi.c index 7ee9dfcc45fb..56154c25980c 100644 --- a/drivers/pci/host/pcie-hisi.c +++ b/drivers/pci/host/pcie-hisi.c @@ -22,51 +22,38 @@ #include "pcie-designware.h" -#define PCIE_LTSSM_LINKUP_STATE 0x11 -#define PCIE_LTSSM_STATE_MASK 0x3F -#define PCIE_SUBCTRL_SYS_STATE4_REG 0x6818 -#define PCIE_SYS_STATE4 0x31c -#define PCIE_HIP06_CTRL_OFF 0x1000 +#define PCIE_SUBCTRL_SYS_STATE4_REG 0x6818 +#define PCIE_HIP06_CTRL_OFF 0x1000 +#define PCIE_SYS_STATE4 (PCIE_HIP06_CTRL_OFF + 0x31c) +#define PCIE_LTSSM_LINKUP_STATE 0x11 +#define PCIE_LTSSM_STATE_MASK 0x3F #define to_hisi_pcie(x) container_of(x, struct hisi_pcie, pp) struct hisi_pcie; struct pcie_soc_ops { - int (*hisi_pcie_link_up)(struct hisi_pcie *pcie); + int (*hisi_pcie_link_up)(struct hisi_pcie *hisi_pcie); }; struct hisi_pcie { + struct pcie_port pp; /* pp.dbi_base is DT rc_dbi */ struct regmap *subctrl; - void __iomem *reg_base; u32 port_id; - struct pcie_port pp; struct pcie_soc_ops *soc_ops; }; -static inline void hisi_pcie_apb_writel(struct hisi_pcie *pcie, - u32 val, u32 reg) -{ - writel(val, pcie->reg_base + reg); -} - -static inline u32 hisi_pcie_apb_readl(struct hisi_pcie *pcie, u32 reg) -{ - return readl(pcie->reg_base + reg); -} - /* HipXX PCIe host only supports 32-bit config access */ static int hisi_pcie_cfg_read(struct pcie_port *pp, int where, int size, u32 *val) { u32 reg; u32 reg_val; - struct hisi_pcie *pcie = to_hisi_pcie(pp); void *walker = ®_val; walker += (where & 0x3); reg = where & ~0x3; - reg_val = hisi_pcie_apb_readl(pcie, reg); + reg_val = dw_pcie_readl_rc(pp, reg); if (size == 1) *val = *(u8 __force *) walker; @@ -86,21 +73,20 @@ static int hisi_pcie_cfg_write(struct pcie_port *pp, int where, int size, { u32 reg_val; u32 reg; - struct hisi_pcie *pcie = to_hisi_pcie(pp); void *walker = ®_val; walker += (where & 0x3); reg = where & ~0x3; if (size == 4) - hisi_pcie_apb_writel(pcie, val, reg); + dw_pcie_writel_rc(pp, reg, val); else if (size == 2) { - reg_val = hisi_pcie_apb_readl(pcie, reg); + reg_val = dw_pcie_readl_rc(pp, reg); *(u16 __force *) walker = val; - hisi_pcie_apb_writel(pcie, reg_val, reg); + dw_pcie_writel_rc(pp, reg, reg_val); } else if (size == 1) { - reg_val = hisi_pcie_apb_readl(pcie, reg); + reg_val = dw_pcie_readl_rc(pp, reg); *(u8 __force *) walker = val; - hisi_pcie_apb_writel(pcie, reg_val, reg); + dw_pcie_writel_rc(pp, reg, reg_val); } else return PCIBIOS_BAD_REGISTER_NUMBER; @@ -119,10 +105,10 @@ static int hisi_pcie_link_up_hip05(struct hisi_pcie *hisi_pcie) static int hisi_pcie_link_up_hip06(struct hisi_pcie *hisi_pcie) { + struct pcie_port *pp = &hisi_pcie->pp; u32 val; - val = hisi_pcie_apb_readl(hisi_pcie, PCIE_HIP06_CTRL_OFF + - PCIE_SYS_STATE4); + val = dw_pcie_readl_rc(pp, PCIE_SYS_STATE4); return ((val & PCIE_LTSSM_STATE_MASK) == PCIE_LTSSM_LINKUP_STATE); } @@ -140,19 +126,20 @@ static struct pcie_host_ops hisi_pcie_host_ops = { .link_up = hisi_pcie_link_up, }; -static int hisi_add_pcie_port(struct pcie_port *pp, - struct platform_device *pdev) +static int hisi_add_pcie_port(struct hisi_pcie *hisi_pcie, + struct platform_device *pdev) { + struct pcie_port *pp = &hisi_pcie->pp; + struct device *dev = pp->dev; int ret; u32 port_id; - struct hisi_pcie *hisi_pcie = to_hisi_pcie(pp); - if (of_property_read_u32(pdev->dev.of_node, "port-id", &port_id)) { - dev_err(&pdev->dev, "failed to read port-id\n"); + if (of_property_read_u32(dev->of_node, "port-id", &port_id)) { + dev_err(dev, "failed to read port-id\n"); return -EINVAL; } if (port_id > 3) { - dev_err(&pdev->dev, "Invalid port-id: %d\n", port_id); + dev_err(dev, "Invalid port-id: %d\n", port_id); return -EINVAL; } hisi_pcie->port_id = port_id; @@ -161,7 +148,7 @@ static int hisi_add_pcie_port(struct pcie_port *pp, ret = dw_pcie_host_init(pp); if (ret) { - dev_err(&pdev->dev, "failed to initialize host\n"); + dev_err(dev, "failed to initialize host\n"); return ret; } @@ -170,6 +157,7 @@ static int hisi_add_pcie_port(struct pcie_port *pp, static int hisi_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct hisi_pcie *hisi_pcie; struct pcie_port *pp; const struct of_device_id *match; @@ -177,40 +165,36 @@ static int hisi_pcie_probe(struct platform_device *pdev) struct device_driver *driver; int ret; - hisi_pcie = devm_kzalloc(&pdev->dev, sizeof(*hisi_pcie), GFP_KERNEL); + hisi_pcie = devm_kzalloc(dev, sizeof(*hisi_pcie), GFP_KERNEL); if (!hisi_pcie) return -ENOMEM; pp = &hisi_pcie->pp; - pp->dev = &pdev->dev; - driver = (pdev->dev).driver; + pp->dev = dev; + driver = dev->driver; - match = of_match_device(driver->of_match_table, &pdev->dev); + match = of_match_device(driver->of_match_table, dev); hisi_pcie->soc_ops = (struct pcie_soc_ops *) match->data; hisi_pcie->subctrl = syscon_regmap_lookup_by_compatible("hisilicon,pcie-sas-subctrl"); if (IS_ERR(hisi_pcie->subctrl)) { - dev_err(pp->dev, "cannot get subctrl base\n"); + dev_err(dev, "cannot get subctrl base\n"); return PTR_ERR(hisi_pcie->subctrl); } reg = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rc_dbi"); - hisi_pcie->reg_base = devm_ioremap_resource(&pdev->dev, reg); - if (IS_ERR(hisi_pcie->reg_base)) { - dev_err(pp->dev, "cannot get rc_dbi base\n"); - return PTR_ERR(hisi_pcie->reg_base); + pp->dbi_base = devm_ioremap_resource(dev, reg); + if (IS_ERR(pp->dbi_base)) { + dev_err(dev, "cannot get rc_dbi base\n"); + return PTR_ERR(pp->dbi_base); } - hisi_pcie->pp.dbi_base = hisi_pcie->reg_base; - - ret = hisi_add_pcie_port(pp, pdev); + ret = hisi_add_pcie_port(hisi_pcie, pdev); if (ret) return ret; - platform_set_drvdata(pdev, hisi_pcie); - - dev_warn(pp->dev, "only 32-bit config accesses supported; smaller writes may corrupt adjacent RW1C fields\n"); + dev_warn(dev, "only 32-bit config accesses supported; smaller writes may corrupt adjacent RW1C fields\n"); return 0; } diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c index 0d7bee4a0d26..8ce089043a27 100644 --- a/drivers/pci/host/pcie-iproc-bcma.c +++ b/drivers/pci/host/pcie-iproc-bcma.c @@ -42,19 +42,24 @@ static int iproc_pcie_bcma_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) static int iproc_pcie_bcma_probe(struct bcma_device *bdev) { + struct device *dev = &bdev->dev; struct iproc_pcie *pcie; LIST_HEAD(res); struct resource res_mem; int ret; - pcie = devm_kzalloc(&bdev->dev, sizeof(*pcie), GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; - pcie->dev = &bdev->dev; - bcma_set_drvdata(bdev, pcie); + pcie->dev = dev; pcie->base = bdev->io_addr; + if (!pcie->base) { + dev_err(dev, "no controller registers\n"); + return -ENOMEM; + } + pcie->base_addr = bdev->addr; res_mem.start = bdev->addr_s[0]; @@ -67,10 +72,11 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) ret = iproc_pcie_setup(pcie, &res); if (ret) - dev_err(pcie->dev, "PCIe controller setup failed\n"); + dev_err(dev, "PCIe controller setup failed\n"); pci_free_resource_list(&res); + bcma_set_drvdata(bdev, pcie); return ret; } diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c index 1738c5288eb6..a3de087976b3 100644 --- a/drivers/pci/host/pcie-iproc-platform.c +++ b/drivers/pci/host/pcie-iproc-platform.c @@ -40,35 +40,35 @@ MODULE_DEVICE_TABLE(of, iproc_pcie_of_match_table); static int iproc_pcie_pltfm_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; const struct of_device_id *of_id; struct iproc_pcie *pcie; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = dev->of_node; struct resource reg; resource_size_t iobase = 0; LIST_HEAD(res); int ret; - of_id = of_match_device(iproc_pcie_of_match_table, &pdev->dev); + of_id = of_match_device(iproc_pcie_of_match_table, dev); if (!of_id) return -EINVAL; - pcie = devm_kzalloc(&pdev->dev, sizeof(struct iproc_pcie), GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; - pcie->dev = &pdev->dev; + pcie->dev = dev; pcie->type = (enum iproc_pcie_type)of_id->data; - platform_set_drvdata(pdev, pcie); ret = of_address_to_resource(np, 0, ®); if (ret < 0) { - dev_err(pcie->dev, "unable to obtain controller resources\n"); + dev_err(dev, "unable to obtain controller resources\n"); return ret; } - pcie->base = devm_ioremap(pcie->dev, reg.start, resource_size(®)); + pcie->base = devm_ioremap(dev, reg.start, resource_size(®)); if (!pcie->base) { - dev_err(pcie->dev, "unable to map controller registers\n"); + dev_err(dev, "unable to map controller registers\n"); return -ENOMEM; } pcie->base_addr = reg.start; @@ -79,7 +79,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) ret = of_property_read_u32(np, "brcm,pcie-ob-axi-offset", &val); if (ret) { - dev_err(pcie->dev, + dev_err(dev, "missing brcm,pcie-ob-axi-offset property\n"); return ret; } @@ -88,7 +88,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) ret = of_property_read_u32(np, "brcm,pcie-ob-window-size", &val); if (ret) { - dev_err(pcie->dev, + dev_err(dev, "missing brcm,pcie-ob-window-size property\n"); return ret; } @@ -101,7 +101,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) } /* PHY use is optional */ - pcie->phy = devm_phy_get(&pdev->dev, "pcie-phy"); + pcie->phy = devm_phy_get(dev, "pcie-phy"); if (IS_ERR(pcie->phy)) { if (PTR_ERR(pcie->phy) == -EPROBE_DEFER) return -EPROBE_DEFER; @@ -110,7 +110,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase); if (ret) { - dev_err(pcie->dev, + dev_err(dev, "unable to get PCI host bridge resources\n"); return ret; } @@ -119,10 +119,11 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) ret = iproc_pcie_setup(pcie, &res); if (ret) - dev_err(pcie->dev, "PCIe controller setup failed\n"); + dev_err(dev, "PCIe controller setup failed\n"); pci_free_resource_list(&res); + platform_set_drvdata(pdev, pcie); return ret; } diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c index e167b2f0098d..0b999a9fb843 100644 --- a/drivers/pci/host/pcie-iproc.c +++ b/drivers/pci/host/pcie-iproc.c @@ -63,6 +63,8 @@ #define OARR_SIZE_CFG_SHIFT 1 #define OARR_SIZE_CFG BIT(OARR_SIZE_CFG_SHIFT) +#define PCI_EXP_CAP 0xac + #define MAX_NUM_OB_WINDOWS 2 #define IPROC_PCIE_REG_INVALID 0xffff @@ -258,9 +260,10 @@ static void iproc_pcie_reset(struct iproc_pcie *pcie) static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus) { + struct device *dev = pcie->dev; u8 hdr_type; u32 link_ctrl, class, val; - u16 pos, link_status; + u16 pos = PCI_EXP_CAP, link_status; bool link_is_active = false; /* @@ -272,14 +275,14 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus) val = iproc_pcie_read_reg(pcie, IPROC_PCIE_LINK_STATUS); if (!(val & PCIE_PHYLINKUP) || !(val & PCIE_DL_ACTIVE)) { - dev_err(pcie->dev, "PHY or data link is INACTIVE!\n"); + dev_err(dev, "PHY or data link is INACTIVE!\n"); return -ENODEV; } /* make sure we are not in EP mode */ pci_bus_read_config_byte(bus, 0, PCI_HEADER_TYPE, &hdr_type); if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) { - dev_err(pcie->dev, "in EP mode, hdr=%#02x\n", hdr_type); + dev_err(dev, "in EP mode, hdr=%#02x\n", hdr_type); return -EFAULT; } @@ -293,30 +296,27 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus) pci_bus_write_config_dword(bus, 0, PCI_BRIDGE_CTRL_REG_OFFSET, class); /* check link status to see if link is active */ - pos = pci_bus_find_capability(bus, 0, PCI_CAP_ID_EXP); pci_bus_read_config_word(bus, 0, pos + PCI_EXP_LNKSTA, &link_status); if (link_status & PCI_EXP_LNKSTA_NLW) link_is_active = true; if (!link_is_active) { /* try GEN 1 link speed */ -#define PCI_LINK_STATUS_CTRL_2_OFFSET 0x0dc #define PCI_TARGET_LINK_SPEED_MASK 0xf #define PCI_TARGET_LINK_SPEED_GEN2 0x2 #define PCI_TARGET_LINK_SPEED_GEN1 0x1 pci_bus_read_config_dword(bus, 0, - PCI_LINK_STATUS_CTRL_2_OFFSET, + pos + PCI_EXP_LNKCTL2, &link_ctrl); if ((link_ctrl & PCI_TARGET_LINK_SPEED_MASK) == PCI_TARGET_LINK_SPEED_GEN2) { link_ctrl &= ~PCI_TARGET_LINK_SPEED_MASK; link_ctrl |= PCI_TARGET_LINK_SPEED_GEN1; pci_bus_write_config_dword(bus, 0, - PCI_LINK_STATUS_CTRL_2_OFFSET, + pos + PCI_EXP_LNKCTL2, link_ctrl); msleep(100); - pos = pci_bus_find_capability(bus, 0, PCI_CAP_ID_EXP); pci_bus_read_config_word(bus, 0, pos + PCI_EXP_LNKSTA, &link_status); if (link_status & PCI_EXP_LNKSTA_NLW) @@ -324,7 +324,7 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie, struct pci_bus *bus) } } - dev_info(pcie->dev, "link: %s\n", link_is_active ? "UP" : "DOWN"); + dev_info(dev, "link: %s\n", link_is_active ? "UP" : "DOWN"); return link_is_active ? 0 : -ENODEV; } @@ -349,12 +349,13 @@ static int iproc_pcie_setup_ob(struct iproc_pcie *pcie, u64 axi_addr, u64 pci_addr, resource_size_t size) { struct iproc_pcie_ob *ob = &pcie->ob; + struct device *dev = pcie->dev; unsigned i; u64 max_size = (u64)ob->window_size * MAX_NUM_OB_WINDOWS; u64 remainder; if (size > max_size) { - dev_err(pcie->dev, + dev_err(dev, "res size %pap exceeds max supported size 0x%llx\n", &size, max_size); return -EINVAL; @@ -362,15 +363,14 @@ static int iproc_pcie_setup_ob(struct iproc_pcie *pcie, u64 axi_addr, div64_u64_rem(size, ob->window_size, &remainder); if (remainder) { - dev_err(pcie->dev, + dev_err(dev, "res size %pap needs to be multiple of window size %pap\n", &size, &ob->window_size); return -EINVAL; } if (axi_addr < ob->axi_offset) { - dev_err(pcie->dev, - "axi address %pap less than offset %pap\n", + dev_err(dev, "axi address %pap less than offset %pap\n", &axi_addr, &ob->axi_offset); return -EINVAL; } @@ -406,6 +406,7 @@ static int iproc_pcie_setup_ob(struct iproc_pcie *pcie, u64 axi_addr, static int iproc_pcie_map_ranges(struct iproc_pcie *pcie, struct list_head *resources) { + struct device *dev = pcie->dev; struct resource_entry *window; int ret; @@ -425,7 +426,7 @@ static int iproc_pcie_map_ranges(struct iproc_pcie *pcie, return ret; break; default: - dev_err(pcie->dev, "invalid resource %pR\n", res); + dev_err(dev, "invalid resource %pR\n", res); return -EINVAL; } } @@ -455,26 +456,25 @@ static void iproc_pcie_msi_disable(struct iproc_pcie *pcie) int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) { + struct device *dev; int ret; void *sysdata; struct pci_bus *bus; - if (!pcie || !pcie->dev || !pcie->base) - return -EINVAL; - - ret = devm_request_pci_bus_resources(pcie->dev, res); + dev = pcie->dev; + ret = devm_request_pci_bus_resources(dev, res); if (ret) return ret; ret = phy_init(pcie->phy); if (ret) { - dev_err(pcie->dev, "unable to initialize PCIe PHY\n"); + dev_err(dev, "unable to initialize PCIe PHY\n"); return ret; } ret = phy_power_on(pcie->phy); if (ret) { - dev_err(pcie->dev, "unable to power on PCIe PHY\n"); + dev_err(dev, "unable to power on PCIe PHY\n"); goto err_exit_phy; } @@ -486,7 +486,7 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) pcie->reg_offsets = iproc_pcie_reg_paxc; break; default: - dev_err(pcie->dev, "incompatible iProc PCIe interface\n"); + dev_err(dev, "incompatible iProc PCIe interface\n"); ret = -EINVAL; goto err_power_off_phy; } @@ -496,7 +496,7 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) if (pcie->need_ob_cfg) { ret = iproc_pcie_map_ranges(pcie, res); if (ret) { - dev_err(pcie->dev, "map failed\n"); + dev_err(dev, "map failed\n"); goto err_power_off_phy; } } @@ -508,9 +508,9 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) sysdata = pcie; #endif - bus = pci_create_root_bus(pcie->dev, 0, &iproc_pcie_ops, sysdata, res); + bus = pci_create_root_bus(dev, 0, &iproc_pcie_ops, sysdata, res); if (!bus) { - dev_err(pcie->dev, "unable to create PCI root bus\n"); + dev_err(dev, "unable to create PCI root bus\n"); ret = -ENOMEM; goto err_power_off_phy; } @@ -518,7 +518,7 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) ret = iproc_pcie_check_link(pcie, bus); if (ret) { - dev_err(pcie->dev, "no PCIe EP device detected\n"); + dev_err(dev, "no PCIe EP device detected\n"); goto err_rm_root_bus; } @@ -526,7 +526,7 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) if (IS_ENABLED(CONFIG_PCI_MSI)) if (iproc_pcie_msi_enable(pcie)) - dev_info(pcie->dev, "not using iProc MSI\n"); + dev_info(dev, "not using iProc MSI\n"); pci_scan_child_bus(bus); pci_assign_unassigned_bus_resources(bus); diff --git a/drivers/pci/host/pcie-qcom.c b/drivers/pci/host/pcie-qcom.c index 5ec2d440a6b7..ef0a84c7a588 100644 --- a/drivers/pci/host/pcie-qcom.c +++ b/drivers/pci/host/pcie-qcom.c @@ -86,12 +86,10 @@ struct qcom_pcie_ops { }; struct qcom_pcie { - struct pcie_port pp; - struct device *dev; + struct pcie_port pp; /* pp.dbi_base is DT dbi */ + void __iomem *parf; /* DT parf */ + void __iomem *elbi; /* DT elbi */ union qcom_pcie_resources res; - void __iomem *parf; - void __iomem *dbi; - void __iomem *elbi; struct phy *phy; struct gpio_desc *reset; struct qcom_pcie_ops *ops; @@ -136,7 +134,7 @@ static int qcom_pcie_establish_link(struct qcom_pcie *pcie) static int qcom_pcie_get_resources_v0(struct qcom_pcie *pcie) { struct qcom_pcie_resources_v0 *res = &pcie->res.v0; - struct device *dev = pcie->dev; + struct device *dev = pcie->pp.dev; res->vdda = devm_regulator_get(dev, "vdda"); if (IS_ERR(res->vdda)) @@ -188,7 +186,7 @@ static int qcom_pcie_get_resources_v0(struct qcom_pcie *pcie) static int qcom_pcie_get_resources_v1(struct qcom_pcie *pcie) { struct qcom_pcie_resources_v1 *res = &pcie->res.v1; - struct device *dev = pcie->dev; + struct device *dev = pcie->pp.dev; res->vdda = devm_regulator_get(dev, "vdda"); if (IS_ERR(res->vdda)) @@ -237,7 +235,7 @@ static void qcom_pcie_deinit_v0(struct qcom_pcie *pcie) static int qcom_pcie_init_v0(struct qcom_pcie *pcie) { struct qcom_pcie_resources_v0 *res = &pcie->res.v0; - struct device *dev = pcie->dev; + struct device *dev = pcie->pp.dev; u32 val; int ret; @@ -359,7 +357,7 @@ static void qcom_pcie_deinit_v1(struct qcom_pcie *pcie) static int qcom_pcie_init_v1(struct qcom_pcie *pcie) { struct qcom_pcie_resources_v1 *res = &pcie->res.v1; - struct device *dev = pcie->dev; + struct device *dev = pcie->pp.dev; int ret; ret = reset_control_deassert(res->core); @@ -426,7 +424,7 @@ err_res: static int qcom_pcie_link_up(struct pcie_port *pp) { struct qcom_pcie *pcie = to_qcom_pcie(pp); - u16 val = readw(pcie->dbi + PCIE20_CAP + PCI_EXP_LNKSTA); + u16 val = readw(pcie->pp.dbi_base + PCIE20_CAP + PCI_EXP_LNKSTA); return !!(val & PCI_EXP_LNKSTA_DLLLA); } @@ -509,8 +507,8 @@ static int qcom_pcie_probe(struct platform_device *pdev) if (!pcie) return -ENOMEM; + pp = &pcie->pp; pcie->ops = (struct qcom_pcie_ops *)of_device_get_match_data(dev); - pcie->dev = dev; pcie->reset = devm_gpiod_get_optional(dev, "perst", GPIOD_OUT_LOW); if (IS_ERR(pcie->reset)) @@ -522,9 +520,9 @@ static int qcom_pcie_probe(struct platform_device *pdev) return PTR_ERR(pcie->parf); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); - pcie->dbi = devm_ioremap_resource(dev, res); - if (IS_ERR(pcie->dbi)) - return PTR_ERR(pcie->dbi); + pp->dbi_base = devm_ioremap_resource(dev, res); + if (IS_ERR(pp->dbi_base)) + return PTR_ERR(pp->dbi_base); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "elbi"); pcie->elbi = devm_ioremap_resource(dev, res); @@ -539,9 +537,7 @@ static int qcom_pcie_probe(struct platform_device *pdev) if (ret) return ret; - pp = &pcie->pp; pp->dev = dev; - pp->dbi_base = pcie->dbi; pp->root_bus_nr = -1; pp->ops = &qcom_pcie_dw_ops; @@ -569,8 +565,6 @@ static int qcom_pcie_probe(struct platform_device *pdev) return ret; } - platform_set_drvdata(pdev, pcie); - return 0; } diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c index e06b1d3b4dea..62700d1896f4 100644 --- a/drivers/pci/host/pcie-rcar.c +++ b/drivers/pci/host/pcie-rcar.c @@ -31,8 +31,6 @@ #include <linux/pm_runtime.h> #include <linux/slab.h> -#define DRV_NAME "rcar-pcie" - #define PCIECAR 0x000010 #define PCIECCTLR 0x000018 #define CONFIG_SEND_ENABLE (1 << 31) @@ -397,6 +395,7 @@ static int rcar_pcie_setup(struct list_head *resource, struct rcar_pcie *pci) static void rcar_pcie_force_speedup(struct rcar_pcie *pcie) { + struct device *dev = pcie->dev; unsigned int timeout = 1000; u32 macsr; @@ -404,7 +403,7 @@ static void rcar_pcie_force_speedup(struct rcar_pcie *pcie) return; if (rcar_pci_read_reg(pcie, MACCTLR) & SPEED_CHANGE) { - dev_err(pcie->dev, "Speed change already in progress\n"); + dev_err(dev, "Speed change already in progress\n"); return; } @@ -433,7 +432,7 @@ static void rcar_pcie_force_speedup(struct rcar_pcie *pcie) rcar_pci_write_reg(pcie, macsr, MACSR); if (macsr & SPCHGFAIL) - dev_err(pcie->dev, "Speed change failed\n"); + dev_err(dev, "Speed change failed\n"); goto done; } @@ -441,15 +440,16 @@ static void rcar_pcie_force_speedup(struct rcar_pcie *pcie) msleep(1); }; - dev_err(pcie->dev, "Speed change timed out\n"); + dev_err(dev, "Speed change timed out\n"); done: - dev_info(pcie->dev, "Current link speed is %s GT/s\n", + dev_info(dev, "Current link speed is %s GT/s\n", (macsr & LINK_SPEED) == LINK_SPEED_5_0GTS ? "5" : "2.5"); } static int rcar_pcie_enable(struct rcar_pcie *pcie) { + struct device *dev = pcie->dev; struct pci_bus *bus, *child; LIST_HEAD(res); @@ -461,14 +461,14 @@ static int rcar_pcie_enable(struct rcar_pcie *pcie) pci_add_flags(PCI_REASSIGN_ALL_RSRC | PCI_REASSIGN_ALL_BUS); if (IS_ENABLED(CONFIG_PCI_MSI)) - bus = pci_scan_root_bus_msi(pcie->dev, pcie->root_bus_nr, + bus = pci_scan_root_bus_msi(dev, pcie->root_bus_nr, &rcar_pcie_ops, pcie, &res, &pcie->msi.chip); else - bus = pci_scan_root_bus(pcie->dev, pcie->root_bus_nr, + bus = pci_scan_root_bus(dev, pcie->root_bus_nr, &rcar_pcie_ops, pcie, &res); if (!bus) { - dev_err(pcie->dev, "Scanning rootbus failed"); + dev_err(dev, "Scanning rootbus failed"); return -ENODEV; } @@ -487,6 +487,7 @@ static int rcar_pcie_enable(struct rcar_pcie *pcie) static int phy_wait_for_ack(struct rcar_pcie *pcie) { + struct device *dev = pcie->dev; unsigned int timeout = 100; while (timeout--) { @@ -496,7 +497,7 @@ static int phy_wait_for_ack(struct rcar_pcie *pcie) udelay(100); } - dev_err(pcie->dev, "Access to PCIe phy timed out\n"); + dev_err(dev, "Access to PCIe phy timed out\n"); return -ETIMEDOUT; } @@ -697,6 +698,7 @@ static irqreturn_t rcar_pcie_msi_irq(int irq, void *data) { struct rcar_pcie *pcie = data; struct rcar_msi *msi = &pcie->msi; + struct device *dev = pcie->dev; unsigned long reg; reg = rcar_pci_read_reg(pcie, PCIEMSIFR); @@ -717,10 +719,10 @@ static irqreturn_t rcar_pcie_msi_irq(int irq, void *data) if (test_bit(index, msi->used)) generic_handle_irq(irq); else - dev_info(pcie->dev, "unhandled MSI\n"); + dev_info(dev, "unhandled MSI\n"); } else { /* Unknown MSI, just clear it */ - dev_dbg(pcie->dev, "unexpected MSI\n"); + dev_dbg(dev, "unexpected MSI\n"); } /* see if there's any more pending in this vector */ @@ -843,22 +845,22 @@ static const struct irq_domain_ops msi_domain_ops = { static int rcar_pcie_enable_msi(struct rcar_pcie *pcie) { - struct platform_device *pdev = to_platform_device(pcie->dev); + struct device *dev = pcie->dev; struct rcar_msi *msi = &pcie->msi; unsigned long base; int err, i; mutex_init(&msi->lock); - msi->chip.dev = pcie->dev; + msi->chip.dev = dev; msi->chip.setup_irq = rcar_msi_setup_irq; msi->chip.setup_irqs = rcar_msi_setup_irqs; msi->chip.teardown_irq = rcar_msi_teardown_irq; - msi->domain = irq_domain_add_linear(pcie->dev->of_node, INT_PCI_MSI_NR, + msi->domain = irq_domain_add_linear(dev->of_node, INT_PCI_MSI_NR, &msi_domain_ops, &msi->chip); if (!msi->domain) { - dev_err(&pdev->dev, "failed to create IRQ domain\n"); + dev_err(dev, "failed to create IRQ domain\n"); return -ENOMEM; } @@ -866,19 +868,19 @@ static int rcar_pcie_enable_msi(struct rcar_pcie *pcie) irq_create_mapping(msi->domain, i); /* Two irqs are for MSI, but they are also used for non-MSI irqs */ - err = devm_request_irq(&pdev->dev, msi->irq1, rcar_pcie_msi_irq, + err = devm_request_irq(dev, msi->irq1, rcar_pcie_msi_irq, IRQF_SHARED | IRQF_NO_THREAD, rcar_msi_irq_chip.name, pcie); if (err < 0) { - dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); + dev_err(dev, "failed to request IRQ: %d\n", err); goto err; } - err = devm_request_irq(&pdev->dev, msi->irq2, rcar_pcie_msi_irq, + err = devm_request_irq(dev, msi->irq2, rcar_pcie_msi_irq, IRQF_SHARED | IRQF_NO_THREAD, rcar_msi_irq_chip.name, pcie); if (err < 0) { - dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); + dev_err(dev, "failed to request IRQ: %d\n", err); goto err; } @@ -899,32 +901,32 @@ err: return err; } -static int rcar_pcie_get_resources(struct platform_device *pdev, - struct rcar_pcie *pcie) +static int rcar_pcie_get_resources(struct rcar_pcie *pcie) { + struct device *dev = pcie->dev; struct resource res; int err, i; - err = of_address_to_resource(pdev->dev.of_node, 0, &res); + err = of_address_to_resource(dev->of_node, 0, &res); if (err) return err; - pcie->base = devm_ioremap_resource(&pdev->dev, &res); + pcie->base = devm_ioremap_resource(dev, &res); if (IS_ERR(pcie->base)) return PTR_ERR(pcie->base); - pcie->clk = devm_clk_get(&pdev->dev, "pcie"); + pcie->clk = devm_clk_get(dev, "pcie"); if (IS_ERR(pcie->clk)) { - dev_err(pcie->dev, "cannot get platform clock\n"); + dev_err(dev, "cannot get platform clock\n"); return PTR_ERR(pcie->clk); } err = clk_prepare_enable(pcie->clk); if (err) return err; - pcie->bus_clk = devm_clk_get(&pdev->dev, "pcie_bus"); + pcie->bus_clk = devm_clk_get(dev, "pcie_bus"); if (IS_ERR(pcie->bus_clk)) { - dev_err(pcie->dev, "cannot get pcie bus clock\n"); + dev_err(dev, "cannot get pcie bus clock\n"); err = PTR_ERR(pcie->bus_clk); goto fail_clk; } @@ -932,17 +934,17 @@ static int rcar_pcie_get_resources(struct platform_device *pdev, if (err) goto fail_clk; - i = irq_of_parse_and_map(pdev->dev.of_node, 0); + i = irq_of_parse_and_map(dev->of_node, 0); if (!i) { - dev_err(pcie->dev, "cannot get platform resources for msi interrupt\n"); + dev_err(dev, "cannot get platform resources for msi interrupt\n"); err = -ENOENT; goto err_map_reg; } pcie->msi.irq1 = i; - i = irq_of_parse_and_map(pdev->dev.of_node, 1); + i = irq_of_parse_and_map(dev->of_node, 1); if (!i) { - dev_err(pcie->dev, "cannot get platform resources for msi interrupt\n"); + dev_err(dev, "cannot get platform resources for msi interrupt\n"); err = -ENOENT; goto err_map_reg; } @@ -1119,60 +1121,60 @@ out_release_res: static int rcar_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct rcar_pcie *pcie; unsigned int data; const struct of_device_id *of_id; int err; int (*hw_init_fn)(struct rcar_pcie *); - pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; - pcie->dev = &pdev->dev; - platform_set_drvdata(pdev, pcie); + pcie->dev = dev; INIT_LIST_HEAD(&pcie->resources); rcar_pcie_parse_request_of_pci_ranges(pcie); - err = rcar_pcie_get_resources(pdev, pcie); + err = rcar_pcie_get_resources(pcie); if (err < 0) { - dev_err(&pdev->dev, "failed to request resources: %d\n", err); + dev_err(dev, "failed to request resources: %d\n", err); return err; } - err = rcar_pcie_parse_map_dma_ranges(pcie, pdev->dev.of_node); + err = rcar_pcie_parse_map_dma_ranges(pcie, dev->of_node); if (err) return err; - of_id = of_match_device(rcar_pcie_of_match, pcie->dev); + of_id = of_match_device(rcar_pcie_of_match, dev); if (!of_id || !of_id->data) return -EINVAL; hw_init_fn = of_id->data; - pm_runtime_enable(pcie->dev); - err = pm_runtime_get_sync(pcie->dev); + pm_runtime_enable(dev); + err = pm_runtime_get_sync(dev); if (err < 0) { - dev_err(pcie->dev, "pm_runtime_get_sync failed\n"); + dev_err(dev, "pm_runtime_get_sync failed\n"); goto err_pm_disable; } /* Failure to get a link might just be that no cards are inserted */ err = hw_init_fn(pcie); if (err) { - dev_info(&pdev->dev, "PCIe link down\n"); + dev_info(dev, "PCIe link down\n"); err = 0; goto err_pm_put; } data = rcar_pci_read_reg(pcie, MACSR); - dev_info(&pdev->dev, "PCIe x%d: link up\n", (data >> 20) & 0x3f); + dev_info(dev, "PCIe x%d: link up\n", (data >> 20) & 0x3f); if (IS_ENABLED(CONFIG_PCI_MSI)) { err = rcar_pcie_enable_msi(pcie); if (err < 0) { - dev_err(&pdev->dev, + dev_err(dev, "failed to enable MSI support: %d\n", err); goto err_pm_put; @@ -1186,16 +1188,16 @@ static int rcar_pcie_probe(struct platform_device *pdev) return 0; err_pm_put: - pm_runtime_put(pcie->dev); + pm_runtime_put(dev); err_pm_disable: - pm_runtime_disable(pcie->dev); + pm_runtime_disable(dev); return err; } static struct platform_driver rcar_pcie_driver = { .driver = { - .name = DRV_NAME, + .name = "rcar-pcie", .of_match_table = rcar_pcie_of_match, .suppress_bind_attrs = true, }, diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c index b8c82fc812dc..e0b22dab9b7a 100644 --- a/drivers/pci/host/pcie-rockchip.c +++ b/drivers/pci/host/pcie-rockchip.c @@ -972,7 +972,7 @@ static int rockchip_pcie_prog_ob_atu(struct rockchip_pcie *rockchip, return -EINVAL; if (region_no == 0) { if (AXI_REGION_0_SIZE < (2ULL << num_pass_bits)) - return -EINVAL; + return -EINVAL; } if (region_no != 0) { if (AXI_REGION_SIZE < (2ULL << num_pass_bits)) @@ -1091,8 +1091,6 @@ static int rockchip_pcie_probe(struct platform_device *pdev) if (err) goto err_vpcie; - platform_set_drvdata(pdev, rockchip); - rockchip_pcie_enable_interrupts(rockchip); err = rockchip_pcie_init_irq_domain(rockchip); diff --git a/drivers/pci/host/pcie-spear13xx.c b/drivers/pci/host/pcie-spear13xx.c index 09aed85f275a..3cf197ba7f37 100644 --- a/drivers/pci/host/pcie-spear13xx.c +++ b/drivers/pci/host/pcie-spear13xx.c @@ -25,10 +25,10 @@ #include "pcie-designware.h" struct spear13xx_pcie { + struct pcie_port pp; /* DT dbi is pp.dbi_base */ void __iomem *app_base; struct phy *phy; struct clk *clk; - struct pcie_port pp; bool is_gen1; }; @@ -57,96 +57,26 @@ struct pcie_app_reg { }; /* CR0 ID */ -#define RX_LANE_FLIP_EN_ID 0 -#define TX_LANE_FLIP_EN_ID 1 -#define SYS_AUX_PWR_DET_ID 2 #define APP_LTSSM_ENABLE_ID 3 -#define SYS_ATTEN_BUTTON_PRESSED_ID 4 -#define SYS_MRL_SENSOR_STATE_ID 5 -#define SYS_PWR_FAULT_DET_ID 6 -#define SYS_MRL_SENSOR_CHGED_ID 7 -#define SYS_PRE_DET_CHGED_ID 8 -#define SYS_CMD_CPLED_INT_ID 9 -#define APP_INIT_RST_0_ID 11 -#define APP_REQ_ENTR_L1_ID 12 -#define APP_READY_ENTR_L23_ID 13 -#define APP_REQ_EXIT_L1_ID 14 -#define DEVICE_TYPE_EP (0 << 25) -#define DEVICE_TYPE_LEP (1 << 25) #define DEVICE_TYPE_RC (4 << 25) -#define SYS_INT_ID 29 #define MISCTRL_EN_ID 30 #define REG_TRANSLATION_ENABLE 31 -/* CR1 ID */ -#define APPS_PM_XMT_TURNOFF_ID 2 -#define APPS_PM_XMT_PME_ID 5 - /* CR3 ID */ -#define XMLH_LTSSM_STATE_DETECT_QUIET 0x00 -#define XMLH_LTSSM_STATE_DETECT_ACT 0x01 -#define XMLH_LTSSM_STATE_POLL_ACTIVE 0x02 -#define XMLH_LTSSM_STATE_POLL_COMPLIANCE 0x03 -#define XMLH_LTSSM_STATE_POLL_CONFIG 0x04 -#define XMLH_LTSSM_STATE_PRE_DETECT_QUIET 0x05 -#define XMLH_LTSSM_STATE_DETECT_WAIT 0x06 -#define XMLH_LTSSM_STATE_CFG_LINKWD_START 0x07 -#define XMLH_LTSSM_STATE_CFG_LINKWD_ACEPT 0x08 -#define XMLH_LTSSM_STATE_CFG_LANENUM_WAIT 0x09 -#define XMLH_LTSSM_STATE_CFG_LANENUM_ACEPT 0x0A -#define XMLH_LTSSM_STATE_CFG_COMPLETE 0x0B -#define XMLH_LTSSM_STATE_CFG_IDLE 0x0C -#define XMLH_LTSSM_STATE_RCVRY_LOCK 0x0D -#define XMLH_LTSSM_STATE_RCVRY_SPEED 0x0E -#define XMLH_LTSSM_STATE_RCVRY_RCVRCFG 0x0F -#define XMLH_LTSSM_STATE_RCVRY_IDLE 0x10 -#define XMLH_LTSSM_STATE_L0 0x11 -#define XMLH_LTSSM_STATE_L0S 0x12 -#define XMLH_LTSSM_STATE_L123_SEND_EIDLE 0x13 -#define XMLH_LTSSM_STATE_L1_IDLE 0x14 -#define XMLH_LTSSM_STATE_L2_IDLE 0x15 -#define XMLH_LTSSM_STATE_L2_WAKE 0x16 -#define XMLH_LTSSM_STATE_DISABLED_ENTRY 0x17 -#define XMLH_LTSSM_STATE_DISABLED_IDLE 0x18 -#define XMLH_LTSSM_STATE_DISABLED 0x19 -#define XMLH_LTSSM_STATE_LPBK_ENTRY 0x1A -#define XMLH_LTSSM_STATE_LPBK_ACTIVE 0x1B -#define XMLH_LTSSM_STATE_LPBK_EXIT 0x1C -#define XMLH_LTSSM_STATE_LPBK_EXIT_TIMEOUT 0x1D -#define XMLH_LTSSM_STATE_HOT_RESET_ENTRY 0x1E -#define XMLH_LTSSM_STATE_HOT_RESET 0x1F -#define XMLH_LTSSM_STATE_MASK 0x3F #define XMLH_LINK_UP (1 << 6) -/* CR4 ID */ -#define CFG_MSI_EN_ID 18 - /* CR6 */ -#define INTA_CTRL_INT (1 << 7) -#define INTB_CTRL_INT (1 << 8) -#define INTC_CTRL_INT (1 << 9) -#define INTD_CTRL_INT (1 << 10) #define MSI_CTRL_INT (1 << 26) -/* CR19 ID */ -#define VEN_MSI_REQ_ID 11 -#define VEN_MSI_FUN_NUM_ID 8 -#define VEN_MSI_TC_ID 5 -#define VEN_MSI_VECTOR_ID 0 -#define VEN_MSI_REQ_EN ((u32)0x1 << VEN_MSI_REQ_ID) -#define VEN_MSI_FUN_NUM_MASK ((u32)0x7 << VEN_MSI_FUN_NUM_ID) -#define VEN_MSI_TC_MASK ((u32)0x7 << VEN_MSI_TC_ID) -#define VEN_MSI_VECTOR_MASK ((u32)0x1F << VEN_MSI_VECTOR_ID) - #define EXP_CAP_ID_OFFSET 0x70 #define to_spear13xx_pcie(x) container_of(x, struct spear13xx_pcie, pp) -static int spear13xx_pcie_establish_link(struct pcie_port *pp) +static int spear13xx_pcie_establish_link(struct spear13xx_pcie *spear13xx_pcie) { - u32 val; - struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pp); + struct pcie_port *pp = &spear13xx_pcie->pp; struct pcie_app_reg *app_reg = spear13xx_pcie->app_base; + u32 val; u32 exp_cap_off = EXP_CAP_ID_OFFSET; if (dw_pcie_link_up(pp)) { @@ -203,9 +133,9 @@ static int spear13xx_pcie_establish_link(struct pcie_port *pp) static irqreturn_t spear13xx_pcie_irq_handler(int irq, void *arg) { - struct pcie_port *pp = arg; - struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pp); + struct spear13xx_pcie *spear13xx_pcie = arg; struct pcie_app_reg *app_reg = spear13xx_pcie->app_base; + struct pcie_port *pp = &spear13xx_pcie->pp; unsigned int status; status = readl(&app_reg->int_sts); @@ -220,9 +150,9 @@ static irqreturn_t spear13xx_pcie_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -static void spear13xx_pcie_enable_interrupts(struct pcie_port *pp) +static void spear13xx_pcie_enable_interrupts(struct spear13xx_pcie *spear13xx_pcie) { - struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pp); + struct pcie_port *pp = &spear13xx_pcie->pp; struct pcie_app_reg *app_reg = spear13xx_pcie->app_base; /* Enable MSI interrupt */ @@ -246,8 +176,10 @@ static int spear13xx_pcie_link_up(struct pcie_port *pp) static void spear13xx_pcie_host_init(struct pcie_port *pp) { - spear13xx_pcie_establish_link(pp); - spear13xx_pcie_enable_interrupts(pp); + struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pp); + + spear13xx_pcie_establish_link(spear13xx_pcie); + spear13xx_pcie_enable_interrupts(spear13xx_pcie); } static struct pcie_host_ops spear13xx_pcie_host_ops = { @@ -255,10 +187,11 @@ static struct pcie_host_ops spear13xx_pcie_host_ops = { .host_init = spear13xx_pcie_host_init, }; -static int spear13xx_add_pcie_port(struct pcie_port *pp, - struct platform_device *pdev) +static int spear13xx_add_pcie_port(struct spear13xx_pcie *spear13xx_pcie, + struct platform_device *pdev) { - struct device *dev = &pdev->dev; + struct pcie_port *pp = &spear13xx_pcie->pp; + struct device *dev = pp->dev; int ret; pp->irq = platform_get_irq(pdev, 0); @@ -268,7 +201,7 @@ static int spear13xx_add_pcie_port(struct pcie_port *pp, } ret = devm_request_irq(dev, pp->irq, spear13xx_pcie_irq_handler, IRQF_SHARED | IRQF_NO_THREAD, - "spear1340-pcie", pp); + "spear1340-pcie", spear13xx_pcie); if (ret) { dev_err(dev, "failed to request irq %d\n", pp->irq); return ret; @@ -288,10 +221,10 @@ static int spear13xx_add_pcie_port(struct pcie_port *pp, static int spear13xx_pcie_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct spear13xx_pcie *spear13xx_pcie; struct pcie_port *pp; - struct device *dev = &pdev->dev; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = dev->of_node; struct resource *dbi_base; int ret; @@ -323,7 +256,6 @@ static int spear13xx_pcie_probe(struct platform_device *pdev) } pp = &spear13xx_pcie->pp; - pp->dev = dev; dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); @@ -338,7 +270,7 @@ static int spear13xx_pcie_probe(struct platform_device *pdev) if (of_property_read_bool(np, "st,pcie-is-gen1")) spear13xx_pcie->is_gen1 = true; - ret = spear13xx_add_pcie_port(pp, pdev); + ret = spear13xx_add_pcie_port(spear13xx_pcie, pdev); if (ret < 0) goto fail_clk; diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c index 67eae4179290..43eaa4afab94 100644 --- a/drivers/pci/host/pcie-xilinx-nwl.c +++ b/drivers/pci/host/pcie-xilinx-nwl.c @@ -212,6 +212,7 @@ static bool nwl_phy_link_up(struct nwl_pcie *pcie) static int nwl_wait_for_link(struct nwl_pcie *pcie) { + struct device *dev = pcie->dev; int retries; /* check if the link is up or not */ @@ -221,7 +222,7 @@ static int nwl_wait_for_link(struct nwl_pcie *pcie) usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX); } - dev_err(pcie->dev, "PHY link never came up\n"); + dev_err(dev, "PHY link never came up\n"); return -ETIMEDOUT; } @@ -277,6 +278,7 @@ static struct pci_ops nwl_pcie_ops = { static irqreturn_t nwl_pcie_misc_handler(int irq, void *data) { struct nwl_pcie *pcie = data; + struct device *dev = pcie->dev; u32 misc_stat; /* Checking for misc interrupts */ @@ -286,45 +288,43 @@ static irqreturn_t nwl_pcie_misc_handler(int irq, void *data) return IRQ_NONE; if (misc_stat & MSGF_MISC_SR_RXMSG_OVER) - dev_err(pcie->dev, "Received Message FIFO Overflow\n"); + dev_err(dev, "Received Message FIFO Overflow\n"); if (misc_stat & MSGF_MISC_SR_SLAVE_ERR) - dev_err(pcie->dev, "Slave error\n"); + dev_err(dev, "Slave error\n"); if (misc_stat & MSGF_MISC_SR_MASTER_ERR) - dev_err(pcie->dev, "Master error\n"); + dev_err(dev, "Master error\n"); if (misc_stat & MSGF_MISC_SR_I_ADDR_ERR) - dev_err(pcie->dev, - "In Misc Ingress address translation error\n"); + dev_err(dev, "In Misc Ingress address translation error\n"); if (misc_stat & MSGF_MISC_SR_E_ADDR_ERR) - dev_err(pcie->dev, - "In Misc Egress address translation error\n"); + dev_err(dev, "In Misc Egress address translation error\n"); if (misc_stat & MSGF_MISC_SR_FATAL_AER) - dev_err(pcie->dev, "Fatal Error in AER Capability\n"); + dev_err(dev, "Fatal Error in AER Capability\n"); if (misc_stat & MSGF_MISC_SR_NON_FATAL_AER) - dev_err(pcie->dev, "Non-Fatal Error in AER Capability\n"); + dev_err(dev, "Non-Fatal Error in AER Capability\n"); if (misc_stat & MSGF_MISC_SR_CORR_AER) - dev_err(pcie->dev, "Correctable Error in AER Capability\n"); + dev_err(dev, "Correctable Error in AER Capability\n"); if (misc_stat & MSGF_MISC_SR_UR_DETECT) - dev_err(pcie->dev, "Unsupported request Detected\n"); + dev_err(dev, "Unsupported request Detected\n"); if (misc_stat & MSGF_MISC_SR_NON_FATAL_DEV) - dev_err(pcie->dev, "Non-Fatal Error Detected\n"); + dev_err(dev, "Non-Fatal Error Detected\n"); if (misc_stat & MSGF_MISC_SR_FATAL_DEV) - dev_err(pcie->dev, "Fatal Error Detected\n"); + dev_err(dev, "Fatal Error Detected\n"); if (misc_stat & MSGF_MSIC_SR_LINK_AUTO_BWIDTH) - dev_info(pcie->dev, "Link Autonomous Bandwidth Management Status bit set\n"); + dev_info(dev, "Link Autonomous Bandwidth Management Status bit set\n"); if (misc_stat & MSGF_MSIC_SR_LINK_BWIDTH) - dev_info(pcie->dev, "Link Bandwidth Management Status bit set\n"); + dev_info(dev, "Link Bandwidth Management Status bit set\n"); /* Clear misc interrupt status */ nwl_bridge_writel(pcie, misc_stat, MSGF_MISC_STATUS); @@ -494,20 +494,21 @@ static const struct irq_domain_ops dev_msi_domain_ops = { static int nwl_pcie_init_msi_irq_domain(struct nwl_pcie *pcie) { #ifdef CONFIG_PCI_MSI - struct fwnode_handle *fwnode = of_node_to_fwnode(pcie->dev->of_node); + struct device *dev = pcie->dev; + struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); struct nwl_msi *msi = &pcie->msi; msi->dev_domain = irq_domain_add_linear(NULL, INT_PCI_MSI_NR, &dev_msi_domain_ops, pcie); if (!msi->dev_domain) { - dev_err(pcie->dev, "failed to create dev IRQ domain\n"); + dev_err(dev, "failed to create dev IRQ domain\n"); return -ENOMEM; } msi->msi_domain = pci_msi_create_irq_domain(fwnode, &nwl_msi_domain_info, msi->dev_domain); if (!msi->msi_domain) { - dev_err(pcie->dev, "failed to create msi IRQ domain\n"); + dev_err(dev, "failed to create msi IRQ domain\n"); irq_domain_remove(msi->dev_domain); return -ENOMEM; } @@ -517,12 +518,13 @@ static int nwl_pcie_init_msi_irq_domain(struct nwl_pcie *pcie) static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie) { - struct device_node *node = pcie->dev->of_node; + struct device *dev = pcie->dev; + struct device_node *node = dev->of_node; struct device_node *legacy_intc_node; legacy_intc_node = of_get_next_child(node, NULL); if (!legacy_intc_node) { - dev_err(pcie->dev, "No legacy intc node found\n"); + dev_err(dev, "No legacy intc node found\n"); return -EINVAL; } @@ -532,7 +534,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie) pcie); if (!pcie->legacy_irq_domain) { - dev_err(pcie->dev, "failed to create IRQ domain\n"); + dev_err(dev, "failed to create IRQ domain\n"); return -ENOMEM; } @@ -542,7 +544,8 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie) static int nwl_pcie_enable_msi(struct nwl_pcie *pcie, struct pci_bus *bus) { - struct platform_device *pdev = to_platform_device(pcie->dev); + struct device *dev = pcie->dev; + struct platform_device *pdev = to_platform_device(dev); struct nwl_msi *msi = &pcie->msi; unsigned long base; int ret; @@ -557,7 +560,7 @@ static int nwl_pcie_enable_msi(struct nwl_pcie *pcie, struct pci_bus *bus) /* Get msi_1 IRQ number */ msi->irq_msi1 = platform_get_irq_byname(pdev, "msi1"); if (msi->irq_msi1 < 0) { - dev_err(&pdev->dev, "failed to get IRQ#%d\n", msi->irq_msi1); + dev_err(dev, "failed to get IRQ#%d\n", msi->irq_msi1); ret = -EINVAL; goto err; } @@ -568,7 +571,7 @@ static int nwl_pcie_enable_msi(struct nwl_pcie *pcie, struct pci_bus *bus) /* Get msi_0 IRQ number */ msi->irq_msi0 = platform_get_irq_byname(pdev, "msi0"); if (msi->irq_msi0 < 0) { - dev_err(&pdev->dev, "failed to get IRQ#%d\n", msi->irq_msi0); + dev_err(dev, "failed to get IRQ#%d\n", msi->irq_msi0); ret = -EINVAL; goto err; } @@ -579,7 +582,7 @@ static int nwl_pcie_enable_msi(struct nwl_pcie *pcie, struct pci_bus *bus) /* Check for msii_present bit */ ret = nwl_bridge_readl(pcie, I_MSII_CAPABILITIES) & MSII_PRESENT; if (!ret) { - dev_err(pcie->dev, "MSI not present\n"); + dev_err(dev, "MSI not present\n"); ret = -EIO; goto err; } @@ -628,13 +631,14 @@ err: static int nwl_pcie_bridge_init(struct nwl_pcie *pcie) { - struct platform_device *pdev = to_platform_device(pcie->dev); + struct device *dev = pcie->dev; + struct platform_device *pdev = to_platform_device(dev); u32 breg_val, ecam_val, first_busno = 0; int err; breg_val = nwl_bridge_readl(pcie, E_BREG_CAPABILITIES) & BREG_PRESENT; if (!breg_val) { - dev_err(pcie->dev, "BREG is not present\n"); + dev_err(dev, "BREG is not present\n"); return breg_val; } @@ -665,7 +669,7 @@ static int nwl_pcie_bridge_init(struct nwl_pcie *pcie) ecam_val = nwl_bridge_readl(pcie, E_ECAM_CAPABILITIES) & E_ECAM_PRESENT; if (!ecam_val) { - dev_err(pcie->dev, "ECAM is not present\n"); + dev_err(dev, "ECAM is not present\n"); return ecam_val; } @@ -692,23 +696,23 @@ static int nwl_pcie_bridge_init(struct nwl_pcie *pcie) writel(ecam_val, (pcie->ecam_base + PCI_PRIMARY_BUS)); if (nwl_pcie_link_up(pcie)) - dev_info(pcie->dev, "Link is UP\n"); + dev_info(dev, "Link is UP\n"); else - dev_info(pcie->dev, "Link is DOWN\n"); + dev_info(dev, "Link is DOWN\n"); /* Get misc IRQ number */ pcie->irq_misc = platform_get_irq_byname(pdev, "misc"); if (pcie->irq_misc < 0) { - dev_err(&pdev->dev, "failed to get misc IRQ %d\n", + dev_err(dev, "failed to get misc IRQ %d\n", pcie->irq_misc); return -EINVAL; } - err = devm_request_irq(pcie->dev, pcie->irq_misc, + err = devm_request_irq(dev, pcie->irq_misc, nwl_pcie_misc_handler, IRQF_SHARED, "nwl_pcie:misc", pcie); if (err) { - dev_err(pcie->dev, "fail to register misc IRQ#%d\n", + dev_err(dev, "fail to register misc IRQ#%d\n", pcie->irq_misc); return err; } @@ -744,31 +748,32 @@ static int nwl_pcie_bridge_init(struct nwl_pcie *pcie) static int nwl_pcie_parse_dt(struct nwl_pcie *pcie, struct platform_device *pdev) { - struct device_node *node = pcie->dev->of_node; + struct device *dev = pcie->dev; + struct device_node *node = dev->of_node; struct resource *res; const char *type; /* Check for device type */ type = of_get_property(node, "device_type", NULL); if (!type || strcmp(type, "pci")) { - dev_err(pcie->dev, "invalid \"device_type\" %s\n", type); + dev_err(dev, "invalid \"device_type\" %s\n", type); return -EINVAL; } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "breg"); - pcie->breg_base = devm_ioremap_resource(pcie->dev, res); + pcie->breg_base = devm_ioremap_resource(dev, res); if (IS_ERR(pcie->breg_base)) return PTR_ERR(pcie->breg_base); pcie->phys_breg_base = res->start; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcireg"); - pcie->pcireg_base = devm_ioremap_resource(pcie->dev, res); + pcie->pcireg_base = devm_ioremap_resource(dev, res); if (IS_ERR(pcie->pcireg_base)) return PTR_ERR(pcie->pcireg_base); pcie->phys_pcie_reg_base = res->start; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg"); - pcie->ecam_base = devm_ioremap_resource(pcie->dev, res); + pcie->ecam_base = devm_ioremap_resource(dev, res); if (IS_ERR(pcie->ecam_base)) return PTR_ERR(pcie->ecam_base); pcie->phys_ecam_base = res->start; @@ -776,8 +781,7 @@ static int nwl_pcie_parse_dt(struct nwl_pcie *pcie, /* Get intx IRQ number */ pcie->irq_intx = platform_get_irq_byname(pdev, "intx"); if (pcie->irq_intx < 0) { - dev_err(&pdev->dev, "failed to get intx IRQ %d\n", - pcie->irq_intx); + dev_err(dev, "failed to get intx IRQ %d\n", pcie->irq_intx); return -EINVAL; } @@ -794,7 +798,8 @@ static const struct of_device_id nwl_pcie_of_match[] = { static int nwl_pcie_probe(struct platform_device *pdev) { - struct device_node *node = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct device_node *node = dev->of_node; struct nwl_pcie *pcie; struct pci_bus *bus; struct pci_bus *child; @@ -802,42 +807,42 @@ static int nwl_pcie_probe(struct platform_device *pdev) resource_size_t iobase = 0; LIST_HEAD(res); - pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) return -ENOMEM; - pcie->dev = &pdev->dev; + pcie->dev = dev; pcie->ecam_value = NWL_ECAM_VALUE_DEFAULT; err = nwl_pcie_parse_dt(pcie, pdev); if (err) { - dev_err(pcie->dev, "Parsing DT failed\n"); + dev_err(dev, "Parsing DT failed\n"); return err; } err = nwl_pcie_bridge_init(pcie); if (err) { - dev_err(pcie->dev, "HW Initialization failed\n"); + dev_err(dev, "HW Initialization failed\n"); return err; } err = of_pci_get_host_bridge_resources(node, 0, 0xff, &res, &iobase); if (err) { - dev_err(pcie->dev, "Getting bridge resources failed\n"); + dev_err(dev, "Getting bridge resources failed\n"); return err; } - err = devm_request_pci_bus_resources(pcie->dev, &res); + err = devm_request_pci_bus_resources(dev, &res); if (err) goto error; err = nwl_pcie_init_irq_domain(pcie); if (err) { - dev_err(pcie->dev, "Failed creating IRQ Domain\n"); + dev_err(dev, "Failed creating IRQ Domain\n"); goto error; } - bus = pci_create_root_bus(&pdev->dev, pcie->root_busno, + bus = pci_create_root_bus(dev, pcie->root_busno, &nwl_pcie_ops, pcie, &res); if (!bus) { err = -ENOMEM; @@ -847,8 +852,7 @@ static int nwl_pcie_probe(struct platform_device *pdev) if (IS_ENABLED(CONFIG_PCI_MSI)) { err = nwl_pcie_enable_msi(pcie, bus); if (err < 0) { - dev_err(&pdev->dev, - "failed to enable MSI support: %d\n", err); + dev_err(dev, "failed to enable MSI support: %d\n", err); goto error; } } @@ -857,7 +861,6 @@ static int nwl_pcie_probe(struct platform_device *pdev) list_for_each_entry(child, &bus->children, node) pcie_bus_configure_settings(child); pci_bus_add_devices(bus); - platform_set_drvdata(pdev, pcie); return 0; error: diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c index be568039d9d0..c8616fadccf1 100644 --- a/drivers/pci/host/pcie-xilinx.c +++ b/drivers/pci/host/pcie-xilinx.c @@ -140,10 +140,11 @@ static inline bool xilinx_pcie_link_is_up(struct xilinx_pcie_port *port) */ static void xilinx_pcie_clear_err_interrupts(struct xilinx_pcie_port *port) { + struct device *dev = port->dev; unsigned long val = pcie_read(port, XILINX_PCIE_REG_RPEFR); if (val & XILINX_PCIE_RPEFR_ERR_VALID) { - dev_dbg(port->dev, "Requester ID %lu\n", + dev_dbg(dev, "Requester ID %lu\n", val & XILINX_PCIE_RPEFR_REQ_ID); pcie_write(port, XILINX_PCIE_RPEFR_ALL_MASK, XILINX_PCIE_REG_RPEFR); @@ -228,11 +229,10 @@ static void xilinx_pcie_destroy_msi(unsigned int irq) /** * xilinx_pcie_assign_msi - Allocate MSI number - * @port: PCIe port structure * * Return: A valid IRQ on success and error value on failure. */ -static int xilinx_pcie_assign_msi(struct xilinx_pcie_port *port) +static int xilinx_pcie_assign_msi(void) { int pos; @@ -275,7 +275,7 @@ static int xilinx_pcie_msi_setup_irq(struct msi_controller *chip, struct msi_msg msg; phys_addr_t msg_addr; - hwirq = xilinx_pcie_assign_msi(port); + hwirq = xilinx_pcie_assign_msi(); if (hwirq < 0) return hwirq; @@ -383,6 +383,7 @@ static const struct irq_domain_ops intx_domain_ops = { static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) { struct xilinx_pcie_port *port = (struct xilinx_pcie_port *)data; + struct device *dev = port->dev; u32 val, mask, status, msi_data; /* Read interrupt decode and mask registers */ @@ -394,32 +395,32 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) return IRQ_NONE; if (status & XILINX_PCIE_INTR_LINK_DOWN) - dev_warn(port->dev, "Link Down\n"); + dev_warn(dev, "Link Down\n"); if (status & XILINX_PCIE_INTR_ECRC_ERR) - dev_warn(port->dev, "ECRC failed\n"); + dev_warn(dev, "ECRC failed\n"); if (status & XILINX_PCIE_INTR_STR_ERR) - dev_warn(port->dev, "Streaming error\n"); + dev_warn(dev, "Streaming error\n"); if (status & XILINX_PCIE_INTR_HOT_RESET) - dev_info(port->dev, "Hot reset\n"); + dev_info(dev, "Hot reset\n"); if (status & XILINX_PCIE_INTR_CFG_TIMEOUT) - dev_warn(port->dev, "ECAM access timeout\n"); + dev_warn(dev, "ECAM access timeout\n"); if (status & XILINX_PCIE_INTR_CORRECTABLE) { - dev_warn(port->dev, "Correctable error message\n"); + dev_warn(dev, "Correctable error message\n"); xilinx_pcie_clear_err_interrupts(port); } if (status & XILINX_PCIE_INTR_NONFATAL) { - dev_warn(port->dev, "Non fatal error message\n"); + dev_warn(dev, "Non fatal error message\n"); xilinx_pcie_clear_err_interrupts(port); } if (status & XILINX_PCIE_INTR_FATAL) { - dev_warn(port->dev, "Fatal error message\n"); + dev_warn(dev, "Fatal error message\n"); xilinx_pcie_clear_err_interrupts(port); } @@ -429,7 +430,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) /* Check whether interrupt valid */ if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) { - dev_warn(port->dev, "RP Intr FIFO1 read error\n"); + dev_warn(dev, "RP Intr FIFO1 read error\n"); goto error; } @@ -451,7 +452,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) val = pcie_read(port, XILINX_PCIE_REG_RPIFR1); if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) { - dev_warn(port->dev, "RP Intr FIFO1 read error\n"); + dev_warn(dev, "RP Intr FIFO1 read error\n"); goto error; } @@ -471,31 +472,31 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) } if (status & XILINX_PCIE_INTR_SLV_UNSUPP) - dev_warn(port->dev, "Slave unsupported request\n"); + dev_warn(dev, "Slave unsupported request\n"); if (status & XILINX_PCIE_INTR_SLV_UNEXP) - dev_warn(port->dev, "Slave unexpected completion\n"); + dev_warn(dev, "Slave unexpected completion\n"); if (status & XILINX_PCIE_INTR_SLV_COMPL) - dev_warn(port->dev, "Slave completion timeout\n"); + dev_warn(dev, "Slave completion timeout\n"); if (status & XILINX_PCIE_INTR_SLV_ERRP) - dev_warn(port->dev, "Slave Error Poison\n"); + dev_warn(dev, "Slave Error Poison\n"); if (status & XILINX_PCIE_INTR_SLV_CMPABT) - dev_warn(port->dev, "Slave Completer Abort\n"); + dev_warn(dev, "Slave Completer Abort\n"); if (status & XILINX_PCIE_INTR_SLV_ILLBUR) - dev_warn(port->dev, "Slave Illegal Burst\n"); + dev_warn(dev, "Slave Illegal Burst\n"); if (status & XILINX_PCIE_INTR_MST_DECERR) - dev_warn(port->dev, "Master decode error\n"); + dev_warn(dev, "Master decode error\n"); if (status & XILINX_PCIE_INTR_MST_SLVERR) - dev_warn(port->dev, "Master slave error\n"); + dev_warn(dev, "Master slave error\n"); if (status & XILINX_PCIE_INTR_MST_ERRP) - dev_warn(port->dev, "Master error poison\n"); + dev_warn(dev, "Master error poison\n"); error: /* Clear the Interrupt Decode register */ @@ -554,10 +555,12 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port) */ static void xilinx_pcie_init_port(struct xilinx_pcie_port *port) { + struct device *dev = port->dev; + if (xilinx_pcie_link_is_up(port)) - dev_info(port->dev, "PCIe Link is UP\n"); + dev_info(dev, "PCIe Link is UP\n"); else - dev_info(port->dev, "PCIe Link is DOWN\n"); + dev_info(dev, "PCIe Link is DOWN\n"); /* Disable all interrupts */ pcie_write(port, ~XILINX_PCIE_IDR_ALL_MASK, @@ -627,8 +630,8 @@ static int xilinx_pcie_parse_dt(struct xilinx_pcie_port *port) */ static int xilinx_pcie_probe(struct platform_device *pdev) { - struct xilinx_pcie_port *port; struct device *dev = &pdev->dev; + struct xilinx_pcie_port *port; struct pci_bus *bus; int err; resource_size_t iobase = 0; @@ -668,15 +671,14 @@ static int xilinx_pcie_probe(struct platform_device *pdev) if (err) goto error; - bus = pci_create_root_bus(&pdev->dev, 0, - &xilinx_pcie_ops, port, &res); + bus = pci_create_root_bus(dev, 0, &xilinx_pcie_ops, port, &res); if (!bus) { err = -ENOMEM; goto error; } #ifdef CONFIG_PCI_MSI - xilinx_pcie_msi_chip.dev = port->dev; + xilinx_pcie_msi_chip.dev = dev; bus->msi = &xilinx_pcie_msi_chip; #endif pci_scan_child_bus(bus); @@ -685,8 +687,6 @@ static int xilinx_pcie_probe(struct platform_device *pdev) pci_fixup_irqs(pci_common_swizzle, of_irq_parse_and_map_pci); #endif pci_bus_add_devices(bus); - platform_set_drvdata(pdev, port); - return 0; error: diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 460fa6708bfc..2acdb0d6ea89 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -405,7 +405,7 @@ static inline void acerhdf_enable_kernelmode(void) kernelmode = 1; thz_dev->polling_delay = interval*1000; - thermal_zone_device_update(thz_dev); + thermal_zone_device_update(thz_dev, THERMAL_EVENT_UNSPECIFIED); pr_notice("kernel mode fan control ON\n"); } diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index 15f131146501..28551f5a2e07 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -932,30 +932,19 @@ static ssize_t infos_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(infos); -static int parse_arg(const char *buf, unsigned long count, int *val) -{ - if (!count) - return 0; - if (count > 31) - return -EINVAL; - if (sscanf(buf, "%i", val) != 1) - return -EINVAL; - return count; -} - static ssize_t sysfs_acpi_set(struct asus_laptop *asus, const char *buf, size_t count, const char *method) { int rv, value; - rv = parse_arg(buf, count, &value); - if (rv <= 0) + rv = kstrtoint(buf, 0, &value); + if (rv < 0) return rv; if (write_acpi_int(asus->handle, method, value)) return -ENODEV; - return rv; + return count; } /* @@ -975,15 +964,17 @@ static ssize_t ledd_store(struct device *dev, struct device_attribute *attr, struct asus_laptop *asus = dev_get_drvdata(dev); int rv, value; - rv = parse_arg(buf, count, &value); - if (rv > 0) { - if (write_acpi_int(asus->handle, METHOD_LEDD, value)) { - pr_warn("LED display write failed\n"); - return -ENODEV; - } - asus->ledd_status = (u32) value; + rv = kstrtoint(buf, 0, &value); + if (rv < 0) + return rv; + + if (write_acpi_int(asus->handle, METHOD_LEDD, value)) { + pr_warn("LED display write failed\n"); + return -ENODEV; } - return rv; + + asus->ledd_status = (u32) value; + return count; } static DEVICE_ATTR_RW(ledd); @@ -1148,10 +1139,12 @@ static ssize_t display_store(struct device *dev, struct device_attribute *attr, struct asus_laptop *asus = dev_get_drvdata(dev); int rv, value; - rv = parse_arg(buf, count, &value); - if (rv > 0) - asus_set_display(asus, value); - return rv; + rv = kstrtoint(buf, 0, &value); + if (rv < 0) + return rv; + + asus_set_display(asus, value); + return count; } static DEVICE_ATTR_WO(display); @@ -1190,11 +1183,12 @@ static ssize_t ls_switch_store(struct device *dev, struct asus_laptop *asus = dev_get_drvdata(dev); int rv, value; - rv = parse_arg(buf, count, &value); - if (rv > 0) - asus_als_switch(asus, value ? 1 : 0); + rv = kstrtoint(buf, 0, &value); + if (rv < 0) + return rv; - return rv; + asus_als_switch(asus, value ? 1 : 0); + return count; } static DEVICE_ATTR_RW(ls_switch); @@ -1219,14 +1213,15 @@ static ssize_t ls_level_store(struct device *dev, struct device_attribute *attr, struct asus_laptop *asus = dev_get_drvdata(dev); int rv, value; - rv = parse_arg(buf, count, &value); - if (rv > 0) { - value = (0 < value) ? ((15 < value) ? 15 : value) : 0; - /* 0 <= value <= 15 */ - asus_als_level(asus, value); - } + rv = kstrtoint(buf, 0, &value); + if (rv < 0) + return rv; + + value = (0 < value) ? ((15 < value) ? 15 : value) : 0; + /* 0 <= value <= 15 */ + asus_als_level(asus, value); - return rv; + return count; } static DEVICE_ATTR_RW(ls_level); @@ -1301,14 +1296,14 @@ static ssize_t gps_store(struct device *dev, struct device_attribute *attr, int rv, value; int ret; - rv = parse_arg(buf, count, &value); - if (rv <= 0) - return -EINVAL; + rv = kstrtoint(buf, 0, &value); + if (rv < 0) + return rv; ret = asus_gps_switch(asus, !!value); if (ret) return ret; rfkill_set_sw_state(asus->gps.rfkill, !value); - return rv; + return count; } static DEVICE_ATTR_RW(gps); diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index adecc1c555f0..26e4cbc34db8 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -27,6 +27,7 @@ #include <linux/input/sparse-keymap.h> #include <linux/fb.h> #include <linux/dmi.h> +#include <linux/i8042.h> #include "asus-wmi.h" @@ -55,10 +56,34 @@ MODULE_PARM_DESC(wapf, "WAPF value"); static struct quirk_entry *quirks; +static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str, + struct serio *port) +{ + static bool extended; + bool ret = false; + + if (str & I8042_STR_AUXDATA) + return false; + + if (unlikely(data == 0xe1)) { + extended = true; + ret = true; + } else if (unlikely(extended)) { + extended = false; + ret = true; + } + + return ret; +} + static struct quirk_entry quirk_asus_unknown = { .wapf = 0, }; +static struct quirk_entry quirk_asus_q500a = { + .i8042_filter = asus_q500a_i8042_filter, +}; + /* * For those machines that need software to control bt/wifi status * and can't adjust brightness through ACPI interface @@ -87,6 +112,10 @@ static struct quirk_entry quirk_no_rfkill_wapf4 = { .no_rfkill = true, }; +static struct quirk_entry quirk_asus_ux303ub = { + .wmi_backlight_native = true, +}; + static int dmi_matched(const struct dmi_system_id *dmi) { quirks = dmi->driver_data; @@ -96,6 +125,15 @@ static int dmi_matched(const struct dmi_system_id *dmi) static const struct dmi_system_id asus_quirks[] = { { .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. Q500A", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "Q500A"), + }, + .driver_data = &quirk_asus_q500a, + }, + { + .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. U32U", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), @@ -351,11 +389,22 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_no_rfkill, }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. UX303UB", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UX303UB"), + }, + .driver_data = &quirk_asus_ux303ub, + }, {}, }; static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) { + int ret; + quirks = &quirk_asus_unknown; dmi_check_system(asus_quirks); @@ -367,6 +416,15 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) quirks->wapf = wapf; else wapf = quirks->wapf; + + if (quirks->i8042_filter) { + ret = i8042_install_filter(quirks->i8042_filter); + if (ret) { + pr_warn("Unable to install key filter\n"); + return; + } + pr_info("Using i8042 filter function for receiving events\n"); + } } static const struct key_entry asus_nb_wmi_keymap[] = { diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 7c093a0b78bb..ce6ca31a2d09 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2084,6 +2084,9 @@ static int asus_wmi_add(struct platform_device *pdev) if (asus->driver->quirks->wmi_backlight_power) acpi_video_set_dmi_backlight_type(acpi_backlight_vendor); + if (asus->driver->quirks->wmi_backlight_native) + acpi_video_set_dmi_backlight_type(acpi_backlight_native); + if (acpi_video_get_backlight_type() == acpi_backlight_vendor) { err = asus_wmi_backlight_init(asus); if (err && err != -ENODEV) diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 5de1df510ebd..0e19014e9f54 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -28,6 +28,7 @@ #define _ASUS_WMI_H_ #include <linux/platform_device.h> +#include <linux/i8042.h> #define ASUS_WMI_KEY_IGNORE (-1) #define ASUS_WMI_BRN_DOWN 0x20 @@ -43,6 +44,7 @@ struct quirk_entry { bool scalar_panel_brightness; bool store_backlight_power; bool wmi_backlight_power; + bool wmi_backlight_native; int wapf; /* * For machines with AMD graphic chips, it will send out WMI event @@ -51,6 +53,9 @@ struct quirk_entry { * and let the ACPI interrupt to send out the key event. */ int no_display_toggle; + + bool (*i8042_filter)(unsigned char data, unsigned char str, + struct serio *serio); }; struct asus_wmi_driver { diff --git a/drivers/platform/x86/dell-smo8800.c b/drivers/platform/x86/dell-smo8800.c index 0aec4fd4c48e..37e646034ef8 100644 --- a/drivers/platform/x86/dell-smo8800.c +++ b/drivers/platform/x86/dell-smo8800.c @@ -24,6 +24,7 @@ #include <linux/acpi.h> #include <linux/interrupt.h> #include <linux/miscdevice.h> +#include <linux/uaccess.h> struct smo8800_device { u32 irq; /* acpi device irq */ diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c index 520b58a04daa..e8b1b836ca2d 100644 --- a/drivers/platform/x86/intel_pmc_core.c +++ b/drivers/platform/x86/intel_pmc_core.c @@ -100,7 +100,7 @@ static int pmc_core_dbgfs_register(struct pmc_dev *pmcdev) struct dentry *dir, *file; dir = debugfs_create_dir("pmc_core", NULL); - if (IS_ERR_OR_NULL(dir)) + if (!dir) return -ENOMEM; pmcdev->dbgfs_dir = dir; diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index a511d518206b..0bf51d574fa9 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -522,48 +522,36 @@ static struct resource telemetry_res[] = { static int ipc_create_punit_device(void) { struct platform_device *pdev; - int ret; - - pdev = platform_device_alloc(PUNIT_DEVICE_NAME, -1); - if (!pdev) { - dev_err(ipcdev.dev, "Failed to alloc punit platform device\n"); - return -ENOMEM; - } - - pdev->dev.parent = ipcdev.dev; - ret = platform_device_add_resources(pdev, punit_res_array, - ARRAY_SIZE(punit_res_array)); - if (ret) { - dev_err(ipcdev.dev, "Failed to add platform punit resources\n"); - goto err; - } + const struct platform_device_info pdevinfo = { + .parent = ipcdev.dev, + .name = PUNIT_DEVICE_NAME, + .id = -1, + .res = punit_res_array, + .num_res = ARRAY_SIZE(punit_res_array), + }; + + pdev = platform_device_register_full(&pdevinfo); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); - ret = platform_device_add(pdev); - if (ret) { - dev_err(ipcdev.dev, "Failed to add punit platform device\n"); - goto err; - } ipcdev.punit_dev = pdev; return 0; -err: - platform_device_put(pdev); - return ret; } static int ipc_create_tco_device(void) { struct platform_device *pdev; struct resource *res; - int ret; - - pdev = platform_device_alloc(TCO_DEVICE_NAME, -1); - if (!pdev) { - dev_err(ipcdev.dev, "Failed to alloc tco platform device\n"); - return -ENOMEM; - } - - pdev->dev.parent = ipcdev.dev; + const struct platform_device_info pdevinfo = { + .parent = ipcdev.dev, + .name = TCO_DEVICE_NAME, + .id = -1, + .res = tco_res, + .num_res = ARRAY_SIZE(tco_res), + .data = &tco_info, + .size_data = sizeof(tco_info), + }; res = tco_res + TCO_RESOURCE_ACPI_IO; res->start = ipcdev.acpi_io_base + TCO_BASE_OFFSET; @@ -577,45 +565,26 @@ static int ipc_create_tco_device(void) res->start = ipcdev.gcr_base + TCO_PMC_OFFSET; res->end = res->start + TCO_PMC_SIZE - 1; - ret = platform_device_add_resources(pdev, tco_res, ARRAY_SIZE(tco_res)); - if (ret) { - dev_err(ipcdev.dev, "Failed to add tco platform resources\n"); - goto err; - } + pdev = platform_device_register_full(&pdevinfo); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); - ret = platform_device_add_data(pdev, &tco_info, sizeof(tco_info)); - if (ret) { - dev_err(ipcdev.dev, "Failed to add tco platform data\n"); - goto err; - } - - ret = platform_device_add(pdev); - if (ret) { - dev_err(ipcdev.dev, "Failed to add tco platform device\n"); - goto err; - } ipcdev.tco_dev = pdev; return 0; -err: - platform_device_put(pdev); - return ret; } static int ipc_create_telemetry_device(void) { struct platform_device *pdev; struct resource *res; - int ret; - - pdev = platform_device_alloc(TELEMETRY_DEVICE_NAME, -1); - if (!pdev) { - dev_err(ipcdev.dev, - "Failed to allocate telemetry platform device\n"); - return -ENOMEM; - } - - pdev->dev.parent = ipcdev.dev; + const struct platform_device_info pdevinfo = { + .parent = ipcdev.dev, + .name = TELEMETRY_DEVICE_NAME, + .id = -1, + .res = telemetry_res, + .num_res = ARRAY_SIZE(telemetry_res), + }; res = telemetry_res + TELEMETRY_RESOURCE_PUNIT_SSRAM; res->start = ipcdev.telem_punit_ssram_base; @@ -625,26 +594,13 @@ static int ipc_create_telemetry_device(void) res->start = ipcdev.telem_pmc_ssram_base; res->end = res->start + ipcdev.telem_pmc_ssram_size - 1; - ret = platform_device_add_resources(pdev, telemetry_res, - ARRAY_SIZE(telemetry_res)); - if (ret) { - dev_err(ipcdev.dev, - "Failed to add telemetry platform resources\n"); - goto err; - } + pdev = platform_device_register_full(&pdevinfo); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); - ret = platform_device_add(pdev); - if (ret) { - dev_err(ipcdev.dev, - "Failed to add telemetry platform device\n"); - goto err; - } ipcdev.telemetry_dev = pdev; return 0; -err: - platform_device_put(pdev); - return ret; } static int ipc_create_pmc_devices(void) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 9d60a40d8b3f..074bf2fa1c55 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -321,10 +321,9 @@ static int write_acpi_int(const char *methodName, int val) static acpi_status tci_raw(struct toshiba_acpi_dev *dev, const u32 in[TCI_WORDS], u32 out[TCI_WORDS]) { + union acpi_object in_objs[TCI_WORDS], out_objs[TCI_WORDS + 1]; struct acpi_object_list params; - union acpi_object in_objs[TCI_WORDS]; struct acpi_buffer results; - union acpi_object out_objs[TCI_WORDS + 1]; acpi_status status; int i; @@ -387,9 +386,8 @@ static int sci_open(struct toshiba_acpi_dev *dev) { u32 in[TCI_WORDS] = { SCI_OPEN, 0, 0, 0, 0, 0 }; u32 out[TCI_WORDS]; - acpi_status status; + acpi_status status = tci_raw(dev, in, out); - status = tci_raw(dev, in, out); if (ACPI_FAILURE(status)) { pr_err("ACPI call to open SCI failed\n"); return 0; @@ -425,9 +423,8 @@ static void sci_close(struct toshiba_acpi_dev *dev) { u32 in[TCI_WORDS] = { SCI_CLOSE, 0, 0, 0, 0, 0 }; u32 out[TCI_WORDS]; - acpi_status status; + acpi_status status = tci_raw(dev, in, out); - status = tci_raw(dev, in, out); if (ACPI_FAILURE(status)) { pr_err("ACPI call to close SCI failed\n"); return; @@ -479,10 +476,15 @@ static void toshiba_illumination_available(struct toshiba_acpi_dev *dev) status = tci_raw(dev, in, out); sci_close(dev); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { pr_err("ACPI call to query Illumination support failed\n"); - else if (out[0] == TOS_SUCCESS) - dev->illumination_supported = 1; + return; + } + + if (out[0] != TOS_SUCCESS) + return; + + dev->illumination_supported = 1; } static void toshiba_illumination_set(struct led_classdev *cdev, @@ -509,7 +511,8 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev) { struct toshiba_acpi_dev *dev = container_of(cdev, struct toshiba_acpi_dev, led_dev); - u32 state, result; + u32 result; + u32 state; /* First request : initialize communication. */ if (!sci_open(dev)) @@ -546,24 +549,28 @@ static void toshiba_kbd_illum_available(struct toshiba_acpi_dev *dev) sci_close(dev); if (ACPI_FAILURE(status)) { pr_err("ACPI call to query kbd illumination support failed\n"); - } else if (out[0] == TOS_SUCCESS) { - /* - * Check for keyboard backlight timeout max value, - * previous kbd backlight implementation set this to - * 0x3c0003, and now the new implementation set this - * to 0x3c001a, use this to distinguish between them. - */ - if (out[3] == SCI_KBD_TIME_MAX) - dev->kbd_type = 2; - else - dev->kbd_type = 1; - /* Get the current keyboard backlight mode */ - dev->kbd_mode = out[2] & SCI_KBD_MODE_MASK; - /* Get the current time (1-60 seconds) */ - dev->kbd_time = out[2] >> HCI_MISC_SHIFT; - /* Flag as supported */ - dev->kbd_illum_supported = 1; + return; } + + if (out[0] != TOS_SUCCESS) + return; + + /* + * Check for keyboard backlight timeout max value, + * previous kbd backlight implementation set this to + * 0x3c0003, and now the new implementation set this + * to 0x3c001a, use this to distinguish between them. + */ + if (out[3] == SCI_KBD_TIME_MAX) + dev->kbd_type = 2; + else + dev->kbd_type = 1; + /* Get the current keyboard backlight mode */ + dev->kbd_mode = out[2] & SCI_KBD_MODE_MASK; + /* Get the current time (1-60 seconds) */ + dev->kbd_time = out[2] >> HCI_MISC_SHIFT; + /* Flag as supported */ + dev->kbd_illum_supported = 1; } static int toshiba_kbd_illum_status_set(struct toshiba_acpi_dev *dev, u32 time) @@ -672,9 +679,9 @@ static int toshiba_touchpad_get(struct toshiba_acpi_dev *dev, u32 *state) /* Eco Mode support */ static void toshiba_eco_mode_available(struct toshiba_acpi_dev *dev) { - acpi_status status; u32 in[TCI_WORDS] = { HCI_GET, HCI_ECO_MODE, 0, 0, 0, 0 }; u32 out[TCI_WORDS]; + acpi_status status; dev->eco_supported = 0; dev->eco_led_registered = false; @@ -682,7 +689,10 @@ static void toshiba_eco_mode_available(struct toshiba_acpi_dev *dev) status = tci_raw(dev, in, out); if (ACPI_FAILURE(status)) { pr_err("ACPI call to get ECO led failed\n"); - } else if (out[0] == TOS_INPUT_DATA_ERROR) { + return; + } + + if (out[0] == TOS_INPUT_DATA_ERROR) { /* * If we receive 0x8300 (Input Data Error), it means that the * LED device is present, but that we just screwed the input @@ -694,10 +704,15 @@ static void toshiba_eco_mode_available(struct toshiba_acpi_dev *dev) */ in[3] = 1; status = tci_raw(dev, in, out); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { pr_err("ACPI call to get ECO led failed\n"); - else if (out[0] == TOS_SUCCESS) - dev->eco_supported = 1; + return; + } + + if (out[0] != TOS_SUCCESS) + return; + + dev->eco_supported = 1; } } @@ -714,10 +729,11 @@ toshiba_eco_mode_get_status(struct led_classdev *cdev) if (ACPI_FAILURE(status)) { pr_err("ACPI call to get ECO led failed\n"); return LED_OFF; - } else if (out[0] != TOS_SUCCESS) { - return LED_OFF; } + if (out[0] != TOS_SUCCESS) + return LED_OFF; + return out[2] ? LED_FULL : LED_OFF; } @@ -751,10 +767,15 @@ static void toshiba_accelerometer_available(struct toshiba_acpi_dev *dev) * this call also serves as initialization */ status = tci_raw(dev, in, out); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { pr_err("ACPI call to query the accelerometer failed\n"); - else if (out[0] == TOS_SUCCESS) - dev->accelerometer_supported = 1; + return; + } + + if (out[0] != TOS_SUCCESS) + return; + + dev->accelerometer_supported = 1; } static int toshiba_accelerometer_get(struct toshiba_acpi_dev *dev, @@ -769,15 +790,18 @@ static int toshiba_accelerometer_get(struct toshiba_acpi_dev *dev, if (ACPI_FAILURE(status)) { pr_err("ACPI call to query the accelerometer failed\n"); return -EIO; - } else if (out[0] == TOS_NOT_SUPPORTED) { - return -ENODEV; - } else if (out[0] == TOS_SUCCESS) { - *xy = out[2]; - *z = out[4]; - return 0; } - return -EIO; + if (out[0] == TOS_NOT_SUPPORTED) + return -ENODEV; + + if (out[0] != TOS_SUCCESS) + return -EIO; + + *xy = out[2]; + *z = out[4]; + + return 0; } /* Sleep (Charge and Music) utilities support */ @@ -797,24 +821,29 @@ static void toshiba_usb_sleep_charge_available(struct toshiba_acpi_dev *dev) pr_err("ACPI call to get USB Sleep and Charge mode failed\n"); sci_close(dev); return; - } else if (out[0] == TOS_NOT_SUPPORTED) { + } + + if (out[0] != TOS_SUCCESS) { sci_close(dev); return; - } else if (out[0] == TOS_SUCCESS) { - dev->usbsc_mode_base = out[4]; } + dev->usbsc_mode_base = out[4]; + in[5] = SCI_USB_CHARGE_BAT_LVL; status = tci_raw(dev, in, out); sci_close(dev); if (ACPI_FAILURE(status)) { pr_err("ACPI call to get USB Sleep and Charge mode failed\n"); - } else if (out[0] == TOS_SUCCESS) { - dev->usbsc_bat_level = out[2]; - /* Flag as supported */ - dev->usb_sleep_charge_supported = 1; + return; } + if (out[0] != TOS_SUCCESS) + return; + + dev->usbsc_bat_level = out[2]; + /* Flag as supported */ + dev->usb_sleep_charge_supported = 1; } static int toshiba_usb_sleep_charge_get(struct toshiba_acpi_dev *dev, @@ -868,14 +897,19 @@ static int toshiba_sleep_functions_status_get(struct toshiba_acpi_dev *dev, sci_close(dev); if (ACPI_FAILURE(status)) { pr_err("ACPI call to get USB S&C battery level failed\n"); - } else if (out[0] == TOS_NOT_SUPPORTED) { - return -ENODEV; - } else if (out[0] == TOS_SUCCESS) { - *mode = out[2]; - return 0; + return -EIO; } - return -EIO; + if (out[0] == TOS_NOT_SUPPORTED) + return -ENODEV; + + if (out[0] != TOS_SUCCESS) + return -EIO; + + *mode = out[2]; + + return 0; + } static int toshiba_sleep_functions_status_set(struct toshiba_acpi_dev *dev, @@ -892,9 +926,12 @@ static int toshiba_sleep_functions_status_set(struct toshiba_acpi_dev *dev, in[5] = SCI_USB_CHARGE_BAT_LVL; status = tci_raw(dev, in, out); sci_close(dev); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { pr_err("ACPI call to set USB S&C battery level failed\n"); - else if (out[0] == TOS_NOT_SUPPORTED) + return -EIO; + } + + if (out[0] == TOS_NOT_SUPPORTED) return -ENODEV; return out[0] == TOS_SUCCESS ? 0 : -EIO; @@ -915,14 +952,18 @@ static int toshiba_usb_rapid_charge_get(struct toshiba_acpi_dev *dev, sci_close(dev); if (ACPI_FAILURE(status)) { pr_err("ACPI call to get USB Rapid Charge failed\n"); - } else if (out[0] == TOS_NOT_SUPPORTED) { - return -ENODEV; - } else if (out[0] == TOS_SUCCESS || out[0] == TOS_SUCCESS2) { - *state = out[2]; - return 0; + return -EIO; } - return -EIO; + if (out[0] == TOS_NOT_SUPPORTED) + return -ENODEV; + + if (out[0] != TOS_SUCCESS && out[0] != TOS_SUCCESS2) + return -EIO; + + *state = out[2]; + + return 0; } static int toshiba_usb_rapid_charge_set(struct toshiba_acpi_dev *dev, @@ -939,9 +980,12 @@ static int toshiba_usb_rapid_charge_set(struct toshiba_acpi_dev *dev, in[5] = SCI_USB_CHARGE_RAPID_DSP; status = tci_raw(dev, in, out); sci_close(dev); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { pr_err("ACPI call to set USB Rapid Charge failed\n"); - else if (out[0] == TOS_NOT_SUPPORTED) + return -EIO; + } + + if (out[0] == TOS_NOT_SUPPORTED) return -ENODEV; return (out[0] == TOS_SUCCESS || out[0] == TOS_SUCCESS2) ? 0 : -EIO; @@ -1097,14 +1141,18 @@ static int toshiba_hotkey_event_type_get(struct toshiba_acpi_dev *dev, status = tci_raw(dev, in, out); if (ACPI_FAILURE(status)) { pr_err("ACPI call to get System type failed\n"); - } else if (out[0] == TOS_NOT_SUPPORTED) { - return -ENODEV; - } else if (out[0] == TOS_SUCCESS) { - *type = out[3]; - return 0; + return -EIO; } - return -EIO; + if (out[0] == TOS_NOT_SUPPORTED) + return -ENODEV; + + if (out[0] != TOS_SUCCESS) + return -EIO; + + *type = out[3]; + + return 0; } /* Wireless status (RFKill, WLAN, BT, WWAN) */ @@ -1154,7 +1202,6 @@ static void toshiba_wwan_available(struct toshiba_acpi_dev *dev) */ in[3] = HCI_WIRELESS_WWAN; status = tci_raw(dev, in, out); - if (ACPI_FAILURE(status)) { pr_err("ACPI call to get WWAN status failed\n"); return; @@ -1174,7 +1221,6 @@ static int toshiba_wwan_set(struct toshiba_acpi_dev *dev, u32 state) in[3] = HCI_WIRELESS_WWAN_STATUS; status = tci_raw(dev, in, out); - if (ACPI_FAILURE(status)) { pr_err("ACPI call to set WWAN status failed\n"); return -EIO; @@ -1193,7 +1239,6 @@ static int toshiba_wwan_set(struct toshiba_acpi_dev *dev, u32 state) */ in[3] = HCI_WIRELESS_WWAN_POWER; status = tci_raw(dev, in, out); - if (ACPI_FAILURE(status)) { pr_err("ACPI call to set WWAN power failed\n"); return -EIO; @@ -1216,8 +1261,10 @@ static void toshiba_cooling_method_available(struct toshiba_acpi_dev *dev) dev->max_cooling_method = 0; status = tci_raw(dev, in, out); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { pr_err("ACPI call to get Cooling Method failed\n"); + return; + } if (out[0] != TOS_SUCCESS && out[0] != TOS_SUCCESS2) return; @@ -1244,7 +1291,7 @@ static int toshiba_cooling_method_set(struct toshiba_acpi_dev *dev, u32 state) u32 result = hci_write(dev, HCI_COOLING_METHOD, state); if (result == TOS_FAILURE) - pr_err("ACPI call to get Cooling Method failed\n"); + pr_err("ACPI call to set Cooling Method failed\n"); if (result == TOS_NOT_SUPPORTED) return -ENODEV; @@ -1282,9 +1329,9 @@ static struct proc_dir_entry *toshiba_proc_dir; /* LCD Brightness */ static int __get_lcd_brightness(struct toshiba_acpi_dev *dev) { + int brightness = 0; u32 result; u32 value; - int brightness = 0; if (dev->tr_backlight_supported) { int ret = get_tr_backlight_status(dev, &value); @@ -1301,10 +1348,10 @@ static int __get_lcd_brightness(struct toshiba_acpi_dev *dev) pr_err("ACPI call to get LCD Brightness failed\n"); else if (result == TOS_NOT_SUPPORTED) return -ENODEV; - if (result == TOS_SUCCESS) - return brightness + (value >> HCI_LCD_BRIGHTNESS_SHIFT); - return -EIO; + return result == TOS_SUCCESS ? + brightness + (value >> HCI_LCD_BRIGHTNESS_SHIFT) : + -EIO; } static int get_lcd_brightness(struct backlight_device *bd) @@ -1325,15 +1372,15 @@ static int lcd_proc_show(struct seq_file *m, void *v) levels = dev->backlight_dev->props.max_brightness + 1; value = get_lcd_brightness(dev->backlight_dev); - if (value >= 0) { - seq_printf(m, "brightness: %d\n", value); - seq_printf(m, "brightness_levels: %d\n", levels); - return 0; + if (value < 0) { + pr_err("Error reading LCD brightness\n"); + return value; } - pr_err("Error reading LCD brightness\n"); + seq_printf(m, "brightness: %d\n", value); + seq_printf(m, "brightness_levels: %d\n", levels); - return -EIO; + return 0; } static int lcd_proc_open(struct inode *inode, struct file *file) @@ -1377,7 +1424,7 @@ static ssize_t lcd_proc_write(struct file *file, const char __user *buf, struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file)); char cmd[42]; size_t len; - int levels = dev->backlight_dev->props.max_brightness + 1; + int levels; int value; len = min(count, sizeof(cmd) - 1); @@ -1385,6 +1432,7 @@ static ssize_t lcd_proc_write(struct file *file, const char __user *buf, return -EFAULT; cmd[len] = '\0'; + levels = dev->backlight_dev->props.max_brightness + 1; if (sscanf(cmd, " brightness : %i", &value) != 1 && value < 0 && value > levels) return -EINVAL; @@ -1420,20 +1468,21 @@ static int get_video_status(struct toshiba_acpi_dev *dev, u32 *status) static int video_proc_show(struct seq_file *m, void *v) { struct toshiba_acpi_dev *dev = m->private; + int is_lcd, is_crt, is_tv; u32 value; - if (!get_video_status(dev, &value)) { - int is_lcd = (value & HCI_VIDEO_OUT_LCD) ? 1 : 0; - int is_crt = (value & HCI_VIDEO_OUT_CRT) ? 1 : 0; - int is_tv = (value & HCI_VIDEO_OUT_TV) ? 1 : 0; + if (get_video_status(dev, &value)) + return -EIO; - seq_printf(m, "lcd_out: %d\n", is_lcd); - seq_printf(m, "crt_out: %d\n", is_crt); - seq_printf(m, "tv_out: %d\n", is_tv); - return 0; - } + is_lcd = (value & HCI_VIDEO_OUT_LCD) ? 1 : 0; + is_crt = (value & HCI_VIDEO_OUT_CRT) ? 1 : 0; + is_tv = (value & HCI_VIDEO_OUT_TV) ? 1 : 0; - return -EIO; + seq_printf(m, "lcd_out: %d\n", is_lcd); + seq_printf(m, "crt_out: %d\n", is_crt); + seq_printf(m, "tv_out: %d\n", is_tv); + + return 0; } static int video_proc_open(struct inode *inode, struct file *file) @@ -1447,10 +1496,8 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf, struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file)); char *buffer; char *cmd; + int lcd_out, crt_out, tv_out; int remain = count; - int lcd_out = -1; - int crt_out = -1; - int tv_out = -1; int value; int ret; u32 video_out; @@ -1486,6 +1533,7 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf, kfree(cmd); + lcd_out = crt_out = tv_out = -1; ret = get_video_status(dev, &video_out); if (!ret) { unsigned int new_video_out = video_out; @@ -1980,8 +2028,8 @@ static ssize_t usb_sleep_charge_store(struct device *dev, const char *buf, size_t count) { struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev); - u32 mode; int state; + u32 mode; int ret; ret = kstrtoint(buf, 0, &state); @@ -2021,9 +2069,8 @@ static ssize_t sleep_functions_on_battery_show(struct device *dev, char *buf) { struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev); + int bat_lvl, status; u32 state; - int bat_lvl; - int status; int ret; int tmp; diff --git a/drivers/platform/x86/toshiba_bluetooth.c b/drivers/platform/x86/toshiba_bluetooth.c index 5db495dd018e..be1d137c6079 100644 --- a/drivers/platform/x86/toshiba_bluetooth.c +++ b/drivers/platform/x86/toshiba_bluetooth.c @@ -80,7 +80,9 @@ static int toshiba_bluetooth_present(acpi_handle handle) if (ACPI_FAILURE(result)) { pr_err("ACPI call to query Bluetooth presence failed\n"); return -ENXIO; - } else if (!bt_present) { + } + + if (!bt_present) { pr_info("Bluetooth device not present\n"); return -ENODEV; } diff --git a/drivers/platform/x86/toshiba_haps.c b/drivers/platform/x86/toshiba_haps.c index 7f2afc6b5eb9..b3dec521e2b6 100644 --- a/drivers/platform/x86/toshiba_haps.c +++ b/drivers/platform/x86/toshiba_haps.c @@ -59,7 +59,7 @@ static int toshiba_haps_protection_level(acpi_handle handle, int level) return -EIO; } - pr_info("HDD protection level set to: %d\n", level); + pr_debug("HDD protection level set to: %d\n", level); return 0; } @@ -141,7 +141,7 @@ static struct attribute_group haps_attr_group = { */ static void toshiba_haps_notify(struct acpi_device *device, u32 event) { - pr_info("Received event: 0x%x", event); + pr_debug("Received event: 0x%x", event); acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), @@ -168,9 +168,13 @@ static int toshiba_haps_available(acpi_handle handle) * A non existent device as well as having (only) * Solid State Drives can cause the call to fail. */ - status = acpi_evaluate_integer(handle, "_STA", NULL, - &hdd_present); - if (ACPI_FAILURE(status) || !hdd_present) { + status = acpi_evaluate_integer(handle, "_STA", NULL, &hdd_present); + if (ACPI_FAILURE(status)) { + pr_err("ACPI call to query HDD protection failed\n"); + return 0; + } + + if (!hdd_present) { pr_info("HDD protection not available or using SSD\n"); return 0; } diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index d637c933c8a9..58a97d420572 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -193,6 +193,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) if (err) break; + memset(&precise_offset, 0, sizeof(precise_offset)); ts = ktime_to_timespec64(xtstamp.device); precise_offset.device.sec = ts.tv_sec; precise_offset.device.nsec = ts.tv_nsec; diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 80a566a00d04..bf0128899c09 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -262,6 +262,15 @@ config PWM_LPSS_PLATFORM To compile this driver as a module, choose M here: the module will be called pwm-lpss-platform. +config PWM_MESON + tristate "Amlogic Meson PWM driver" + depends on ARCH_MESON + help + The platform driver for Amlogic Meson PWM controller. + + To compile this driver as a module, choose M here: the module + will be called pwm-meson. + config PWM_MTK_DISP tristate "MediaTek display PWM driver" depends on ARCH_MEDIATEK || COMPILE_TEST diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index feef1dd29f73..1194c54efcc2 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_PWM_LPC32XX) += pwm-lpc32xx.o obj-$(CONFIG_PWM_LPSS) += pwm-lpss.o obj-$(CONFIG_PWM_LPSS_PCI) += pwm-lpss-pci.o obj-$(CONFIG_PWM_LPSS_PLATFORM) += pwm-lpss-platform.o +obj-$(CONFIG_PWM_MESON) += pwm-meson.o obj-$(CONFIG_PWM_MTK_DISP) += pwm-mtk-disp.o obj-$(CONFIG_PWM_MXS) += pwm-mxs.o obj-$(CONFIG_PWM_OMAP_DMTIMER) += pwm-omap-dmtimer.o diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 0dbd29e287db..172ef8245811 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -339,6 +339,8 @@ int pwmchip_remove(struct pwm_chip *chip) unsigned int i; int ret = 0; + pwmchip_sysfs_unexport_children(chip); + mutex_lock(&pwm_lock); for (i = 0; i < chip->npwm; i++) { diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c index 65108129d505..01339c152ab0 100644 --- a/drivers/pwm/pwm-berlin.c +++ b/drivers/pwm/pwm-berlin.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/pwm.h> +#include <linux/slab.h> #define BERLIN_PWM_EN 0x0 #define BERLIN_PWM_ENABLE BIT(0) @@ -27,6 +28,13 @@ #define BERLIN_PWM_TCNT 0xc #define BERLIN_PWM_MAX_TCNT 65535 +struct berlin_pwm_channel { + u32 enable; + u32 ctrl; + u32 duty; + u32 tcnt; +}; + struct berlin_pwm_chip { struct pwm_chip chip; struct clk *clk; @@ -55,6 +63,25 @@ static inline void berlin_pwm_writel(struct berlin_pwm_chip *chip, writel_relaxed(value, chip->base + channel * 0x10 + offset); } +static int berlin_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct berlin_pwm_channel *channel; + + channel = kzalloc(sizeof(*channel), GFP_KERNEL); + if (!channel) + return -ENOMEM; + + return pwm_set_chip_data(pwm, channel); +} + +static void berlin_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct berlin_pwm_channel *channel = pwm_get_chip_data(pwm); + + pwm_set_chip_data(pwm, NULL); + kfree(channel); +} + static int berlin_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm_dev, int duty_ns, int period_ns) { @@ -137,6 +164,8 @@ static void berlin_pwm_disable(struct pwm_chip *chip, } static const struct pwm_ops berlin_pwm_ops = { + .request = berlin_pwm_request, + .free = berlin_pwm_free, .config = berlin_pwm_config, .set_polarity = berlin_pwm_set_polarity, .enable = berlin_pwm_enable, @@ -204,12 +233,67 @@ static int berlin_pwm_remove(struct platform_device *pdev) return ret; } +#ifdef CONFIG_PM_SLEEP +static int berlin_pwm_suspend(struct device *dev) +{ + struct berlin_pwm_chip *pwm = dev_get_drvdata(dev); + unsigned int i; + + for (i = 0; i < pwm->chip.npwm; i++) { + struct berlin_pwm_channel *channel; + + channel = pwm_get_chip_data(&pwm->chip.pwms[i]); + if (!channel) + continue; + + channel->enable = berlin_pwm_readl(pwm, i, BERLIN_PWM_ENABLE); + channel->ctrl = berlin_pwm_readl(pwm, i, BERLIN_PWM_CONTROL); + channel->duty = berlin_pwm_readl(pwm, i, BERLIN_PWM_DUTY); + channel->tcnt = berlin_pwm_readl(pwm, i, BERLIN_PWM_TCNT); + } + + clk_disable_unprepare(pwm->clk); + + return 0; +} + +static int berlin_pwm_resume(struct device *dev) +{ + struct berlin_pwm_chip *pwm = dev_get_drvdata(dev); + unsigned int i; + int ret; + + ret = clk_prepare_enable(pwm->clk); + if (ret) + return ret; + + for (i = 0; i < pwm->chip.npwm; i++) { + struct berlin_pwm_channel *channel; + + channel = pwm_get_chip_data(&pwm->chip.pwms[i]); + if (!channel) + continue; + + berlin_pwm_writel(pwm, i, channel->ctrl, BERLIN_PWM_CONTROL); + berlin_pwm_writel(pwm, i, channel->duty, BERLIN_PWM_DUTY); + berlin_pwm_writel(pwm, i, channel->tcnt, BERLIN_PWM_TCNT); + berlin_pwm_writel(pwm, i, channel->enable, BERLIN_PWM_ENABLE); + } + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(berlin_pwm_pm_ops, berlin_pwm_suspend, + berlin_pwm_resume); + static struct platform_driver berlin_pwm_driver = { .probe = berlin_pwm_probe, .remove = berlin_pwm_remove, .driver = { .name = "berlin-pwm", .of_match_table = berlin_pwm_match, + .pm = &berlin_pwm_pm_ops, }, }; module_platform_driver(berlin_pwm_driver); diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c index 99b9acc1a420..f6ca4e8c6253 100644 --- a/drivers/pwm/pwm-cros-ec.c +++ b/drivers/pwm/pwm-cros-ec.c @@ -38,7 +38,7 @@ static int cros_ec_pwm_set_duty(struct cros_ec_device *ec, u8 index, u16 duty) struct { struct cros_ec_command msg; struct ec_params_pwm_set_duty params; - } buf; + } __packed buf; struct ec_params_pwm_set_duty *params = &buf.params; struct cros_ec_command *msg = &buf.msg; @@ -65,7 +65,7 @@ static int __cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index, struct ec_params_pwm_get_duty params; struct ec_response_pwm_get_duty resp; }; - } buf; + } __packed buf; struct ec_params_pwm_get_duty *params = &buf.params; struct ec_response_pwm_get_duty *resp = &buf.resp; struct cros_ec_command *msg = &buf.msg; diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c index 19dc64cab2f0..d7f5f7de030d 100644 --- a/drivers/pwm/pwm-lpc18xx-sct.c +++ b/drivers/pwm/pwm-lpc18xx-sct.c @@ -413,14 +413,18 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) } for (i = 0; i < lpc18xx_pwm->chip.npwm; i++) { + struct lpc18xx_pwm_data *data; + pwm = &lpc18xx_pwm->chip.pwms[i]; - pwm->chip_data = devm_kzalloc(lpc18xx_pwm->dev, - sizeof(struct lpc18xx_pwm_data), - GFP_KERNEL); - if (!pwm->chip_data) { + + data = devm_kzalloc(lpc18xx_pwm->dev, sizeof(*data), + GFP_KERNEL); + if (!data) { ret = -ENOMEM; goto remove_pwmchip; } + + pwm_set_chip_data(pwm, data); } platform_set_drvdata(pdev, lpc18xx_pwm); diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c new file mode 100644 index 000000000000..381871b2bb46 --- /dev/null +++ b/drivers/pwm/pwm-meson.c @@ -0,0 +1,529 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (c) 2016 BayLibre, SAS. + * Author: Neil Armstrong <narmstrong@baylibre.com> + * Copyright (C) 2014 Amlogic, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + * + * BSD LICENSE + * + * Copyright (c) 2016 BayLibre, SAS. + * Author: Neil Armstrong <narmstrong@baylibre.com> + * Copyright (C) 2014 Amlogic, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/clk.h> +#include <linux/clk-provider.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#define REG_PWM_A 0x0 +#define REG_PWM_B 0x4 +#define PWM_HIGH_SHIFT 16 + +#define REG_MISC_AB 0x8 +#define MISC_B_CLK_EN BIT(23) +#define MISC_A_CLK_EN BIT(15) +#define MISC_CLK_DIV_MASK 0x7f +#define MISC_B_CLK_DIV_SHIFT 16 +#define MISC_A_CLK_DIV_SHIFT 8 +#define MISC_B_CLK_SEL_SHIFT 6 +#define MISC_A_CLK_SEL_SHIFT 4 +#define MISC_CLK_SEL_WIDTH 2 +#define MISC_B_EN BIT(1) +#define MISC_A_EN BIT(0) + +static const unsigned int mux_reg_shifts[] = { + MISC_A_CLK_SEL_SHIFT, + MISC_B_CLK_SEL_SHIFT +}; + +struct meson_pwm_channel { + unsigned int hi; + unsigned int lo; + u8 pre_div; + + struct pwm_state state; + + struct clk *clk_parent; + struct clk_mux mux; + struct clk *clk; +}; + +struct meson_pwm_data { + const char * const *parent_names; +}; + +struct meson_pwm { + struct pwm_chip chip; + const struct meson_pwm_data *data; + void __iomem *base; + u8 inverter_mask; + spinlock_t lock; +}; + +static inline struct meson_pwm *to_meson_pwm(struct pwm_chip *chip) +{ + return container_of(chip, struct meson_pwm, chip); +} + +static int meson_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); + struct device *dev = chip->dev; + int err; + + if (!channel) + return -ENODEV; + + if (channel->clk_parent) { + err = clk_set_parent(channel->clk, channel->clk_parent); + if (err < 0) { + dev_err(dev, "failed to set parent %s for %s: %d\n", + __clk_get_name(channel->clk_parent), + __clk_get_name(channel->clk), err); + return err; + } + } + + err = clk_prepare_enable(channel->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock %s: %d\n", + __clk_get_name(channel->clk), err); + return err; + } + + chip->ops->get_state(chip, pwm, &channel->state); + + return 0; +} + +static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); + + if (channel) + clk_disable_unprepare(channel->clk); +} + +static int meson_pwm_calc(struct meson_pwm *meson, + struct meson_pwm_channel *channel, unsigned int id, + unsigned int duty, unsigned int period) +{ + unsigned int pre_div, cnt, duty_cnt; + unsigned long fin_freq = -1, fin_ns; + + if (~(meson->inverter_mask >> id) & 0x1) + duty = period - duty; + + if (period == channel->state.period && + duty == channel->state.duty_cycle) + return 0; + + fin_freq = clk_get_rate(channel->clk); + if (fin_freq == 0) { + dev_err(meson->chip.dev, "invalid source clock frequency\n"); + return -EINVAL; + } + + dev_dbg(meson->chip.dev, "fin_freq: %lu Hz\n", fin_freq); + fin_ns = NSEC_PER_SEC / fin_freq; + + /* Calc pre_div with the period */ + for (pre_div = 0; pre_div < MISC_CLK_DIV_MASK; pre_div++) { + cnt = DIV_ROUND_CLOSEST(period, fin_ns * (pre_div + 1)); + dev_dbg(meson->chip.dev, "fin_ns=%lu pre_div=%u cnt=%u\n", + fin_ns, pre_div, cnt); + if (cnt <= 0xffff) + break; + } + + if (pre_div == MISC_CLK_DIV_MASK) { + dev_err(meson->chip.dev, "unable to get period pre_div\n"); + return -EINVAL; + } + + dev_dbg(meson->chip.dev, "period=%u pre_div=%u cnt=%u\n", period, + pre_div, cnt); + + if (duty == period) { + channel->pre_div = pre_div; + channel->hi = cnt; + channel->lo = 0; + } else if (duty == 0) { + channel->pre_div = pre_div; + channel->hi = 0; + channel->lo = cnt; + } else { + /* Then check is we can have the duty with the same pre_div */ + duty_cnt = DIV_ROUND_CLOSEST(duty, fin_ns * (pre_div + 1)); + if (duty_cnt > 0xffff) { + dev_err(meson->chip.dev, "unable to get duty cycle\n"); + return -EINVAL; + } + + dev_dbg(meson->chip.dev, "duty=%u pre_div=%u duty_cnt=%u\n", + duty, pre_div, duty_cnt); + + channel->pre_div = pre_div; + channel->hi = duty_cnt; + channel->lo = cnt - duty_cnt; + } + + return 0; +} + +static void meson_pwm_enable(struct meson_pwm *meson, + struct meson_pwm_channel *channel, + unsigned int id) +{ + u32 value, clk_shift, clk_enable, enable; + unsigned int offset; + + switch (id) { + case 0: + clk_shift = MISC_A_CLK_DIV_SHIFT; + clk_enable = MISC_A_CLK_EN; + enable = MISC_A_EN; + offset = REG_PWM_A; + break; + + case 1: + clk_shift = MISC_B_CLK_DIV_SHIFT; + clk_enable = MISC_B_CLK_EN; + enable = MISC_B_EN; + offset = REG_PWM_B; + break; + + default: + return; + } + + value = readl(meson->base + REG_MISC_AB); + value &= ~(MISC_CLK_DIV_MASK << clk_shift); + value |= channel->pre_div << clk_shift; + value |= clk_enable; + writel(value, meson->base + REG_MISC_AB); + + value = (channel->hi << PWM_HIGH_SHIFT) | channel->lo; + writel(value, meson->base + offset); + + value = readl(meson->base + REG_MISC_AB); + value |= enable; + writel(value, meson->base + REG_MISC_AB); +} + +static void meson_pwm_disable(struct meson_pwm *meson, unsigned int id) +{ + u32 value, enable; + + switch (id) { + case 0: + enable = MISC_A_EN; + break; + + case 1: + enable = MISC_B_EN; + break; + + default: + return; + } + + value = readl(meson->base + REG_MISC_AB); + value &= ~enable; + writel(value, meson->base + REG_MISC_AB); +} + +static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); + struct meson_pwm *meson = to_meson_pwm(chip); + unsigned long flags; + int err = 0; + + if (!state) + return -EINVAL; + + spin_lock_irqsave(&meson->lock, flags); + + if (!state->enabled) { + meson_pwm_disable(meson, pwm->hwpwm); + channel->state.enabled = false; + + goto unlock; + } + + if (state->period != channel->state.period || + state->duty_cycle != channel->state.duty_cycle || + state->polarity != channel->state.polarity) { + if (channel->state.enabled) { + meson_pwm_disable(meson, pwm->hwpwm); + channel->state.enabled = false; + } + + if (state->polarity != channel->state.polarity) { + if (state->polarity == PWM_POLARITY_NORMAL) + meson->inverter_mask |= BIT(pwm->hwpwm); + else + meson->inverter_mask &= ~BIT(pwm->hwpwm); + } + + err = meson_pwm_calc(meson, channel, pwm->hwpwm, + state->duty_cycle, state->period); + if (err < 0) + goto unlock; + + channel->state.polarity = state->polarity; + channel->state.period = state->period; + channel->state.duty_cycle = state->duty_cycle; + } + + if (state->enabled && !channel->state.enabled) { + meson_pwm_enable(meson, channel, pwm->hwpwm); + channel->state.enabled = true; + } + +unlock: + spin_unlock_irqrestore(&meson->lock, flags); + return err; +} + +static void meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct meson_pwm *meson = to_meson_pwm(chip); + u32 value, mask; + + if (!state) + return; + + switch (pwm->hwpwm) { + case 0: + mask = MISC_A_EN; + break; + + case 1: + mask = MISC_B_EN; + break; + + default: + return; + } + + value = readl(meson->base + REG_MISC_AB); + state->enabled = (value & mask) != 0; +} + +static const struct pwm_ops meson_pwm_ops = { + .request = meson_pwm_request, + .free = meson_pwm_free, + .apply = meson_pwm_apply, + .get_state = meson_pwm_get_state, + .owner = THIS_MODULE, +}; + +static const char * const pwm_meson8b_parent_names[] = { + "xtal", "vid_pll", "fclk_div4", "fclk_div3" +}; + +static const struct meson_pwm_data pwm_meson8b_data = { + .parent_names = pwm_meson8b_parent_names, +}; + +static const char * const pwm_gxbb_parent_names[] = { + "xtal", "hdmi_pll", "fclk_div4", "fclk_div3" +}; + +static const struct meson_pwm_data pwm_gxbb_data = { + .parent_names = pwm_gxbb_parent_names, +}; + +static const struct of_device_id meson_pwm_matches[] = { + { .compatible = "amlogic,meson8b-pwm", .data = &pwm_meson8b_data }, + { .compatible = "amlogic,meson-gxbb-pwm", .data = &pwm_gxbb_data }, + {}, +}; +MODULE_DEVICE_TABLE(of, meson_pwm_matches); + +static int meson_pwm_init_channels(struct meson_pwm *meson, + struct meson_pwm_channel *channels) +{ + struct device *dev = meson->chip.dev; + struct device_node *np = dev->of_node; + struct clk_init_data init; + unsigned int i; + char name[255]; + int err; + + for (i = 0; i < meson->chip.npwm; i++) { + struct meson_pwm_channel *channel = &channels[i]; + + snprintf(name, sizeof(name), "%s#mux%u", np->full_name, i); + + init.name = name; + init.ops = &clk_mux_ops; + init.flags = CLK_IS_BASIC; + init.parent_names = meson->data->parent_names; + init.num_parents = 1 << MISC_CLK_SEL_WIDTH; + + channel->mux.reg = meson->base + REG_MISC_AB; + channel->mux.shift = mux_reg_shifts[i]; + channel->mux.mask = BIT(MISC_CLK_SEL_WIDTH) - 1; + channel->mux.flags = 0; + channel->mux.lock = &meson->lock; + channel->mux.table = NULL; + channel->mux.hw.init = &init; + + channel->clk = devm_clk_register(dev, &channel->mux.hw); + if (IS_ERR(channel->clk)) { + err = PTR_ERR(channel->clk); + dev_err(dev, "failed to register %s: %d\n", name, err); + return err; + } + + snprintf(name, sizeof(name), "clkin%u", i); + + channel->clk_parent = devm_clk_get(dev, name); + if (IS_ERR(channel->clk_parent)) { + err = PTR_ERR(channel->clk_parent); + if (err == -EPROBE_DEFER) + return err; + + channel->clk_parent = NULL; + } + } + + return 0; +} + +static void meson_pwm_add_channels(struct meson_pwm *meson, + struct meson_pwm_channel *channels) +{ + unsigned int i; + + for (i = 0; i < meson->chip.npwm; i++) + pwm_set_chip_data(&meson->chip.pwms[i], &channels[i]); +} + +static int meson_pwm_probe(struct platform_device *pdev) +{ + struct meson_pwm_channel *channels; + struct meson_pwm *meson; + struct resource *regs; + int err; + + meson = devm_kzalloc(&pdev->dev, sizeof(*meson), GFP_KERNEL); + if (!meson) + return -ENOMEM; + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + meson->base = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(meson->base)) + return PTR_ERR(meson->base); + + meson->chip.dev = &pdev->dev; + meson->chip.ops = &meson_pwm_ops; + meson->chip.base = -1; + meson->chip.npwm = 2; + meson->chip.of_xlate = of_pwm_xlate_with_flags; + meson->chip.of_pwm_n_cells = 3; + + meson->data = of_device_get_match_data(&pdev->dev); + meson->inverter_mask = BIT(meson->chip.npwm) - 1; + + channels = devm_kcalloc(&pdev->dev, meson->chip.npwm, sizeof(*meson), + GFP_KERNEL); + if (!channels) + return -ENOMEM; + + err = meson_pwm_init_channels(meson, channels); + if (err < 0) + return err; + + err = pwmchip_add(&meson->chip); + if (err < 0) { + dev_err(&pdev->dev, "failed to register PWM chip: %d\n", err); + return err; + } + + meson_pwm_add_channels(meson, channels); + + platform_set_drvdata(pdev, meson); + + return 0; +} + +static int meson_pwm_remove(struct platform_device *pdev) +{ + struct meson_pwm *meson = platform_get_drvdata(pdev); + + return pwmchip_remove(&meson->chip); +} + +static struct platform_driver meson_pwm_driver = { + .driver = { + .name = "meson-pwm", + .of_match_table = meson_pwm_matches, + }, + .probe = meson_pwm_probe, + .remove = meson_pwm_remove, +}; +module_platform_driver(meson_pwm_driver); + +MODULE_ALIAS("platform:meson-pwm"); +MODULE_DESCRIPTION("Amlogic Meson PWM Generator driver"); +MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c index 0ad3385298c0..893940d45f0d 100644 --- a/drivers/pwm/pwm-mtk-disp.c +++ b/drivers/pwm/pwm-mtk-disp.c @@ -18,30 +18,40 @@ #include <linux/io.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/pwm.h> #include <linux/slab.h> #define DISP_PWM_EN 0x00 -#define PWM_ENABLE_MASK BIT(0) -#define DISP_PWM_COMMIT 0x08 -#define PWM_COMMIT_MASK BIT(0) - -#define DISP_PWM_CON_0 0x10 #define PWM_CLKDIV_SHIFT 16 #define PWM_CLKDIV_MAX 0x3ff #define PWM_CLKDIV_MASK (PWM_CLKDIV_MAX << PWM_CLKDIV_SHIFT) -#define DISP_PWM_CON_1 0x14 #define PWM_PERIOD_BIT_WIDTH 12 #define PWM_PERIOD_MASK ((1 << PWM_PERIOD_BIT_WIDTH) - 1) #define PWM_HIGH_WIDTH_SHIFT 16 #define PWM_HIGH_WIDTH_MASK (0x1fff << PWM_HIGH_WIDTH_SHIFT) +struct mtk_pwm_data { + u32 enable_mask; + unsigned int con0; + u32 con0_sel; + unsigned int con1; + + bool has_commit; + unsigned int commit; + unsigned int commit_mask; + + unsigned int bls_debug; + u32 bls_debug_mask; +}; + struct mtk_disp_pwm { struct pwm_chip chip; + const struct mtk_pwm_data *data; struct clk *clk_main; struct clk *clk_mm; void __iomem *base; @@ -106,12 +116,21 @@ static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, return err; } - mtk_disp_pwm_update_bits(mdp, DISP_PWM_CON_0, PWM_CLKDIV_MASK, + mtk_disp_pwm_update_bits(mdp, mdp->data->con0, + PWM_CLKDIV_MASK, clk_div << PWM_CLKDIV_SHIFT); - mtk_disp_pwm_update_bits(mdp, DISP_PWM_CON_1, - PWM_PERIOD_MASK | PWM_HIGH_WIDTH_MASK, value); - mtk_disp_pwm_update_bits(mdp, DISP_PWM_COMMIT, PWM_COMMIT_MASK, 1); - mtk_disp_pwm_update_bits(mdp, DISP_PWM_COMMIT, PWM_COMMIT_MASK, 0); + mtk_disp_pwm_update_bits(mdp, mdp->data->con1, + PWM_PERIOD_MASK | PWM_HIGH_WIDTH_MASK, + value); + + if (mdp->data->has_commit) { + mtk_disp_pwm_update_bits(mdp, mdp->data->commit, + mdp->data->commit_mask, + mdp->data->commit_mask); + mtk_disp_pwm_update_bits(mdp, mdp->data->commit, + mdp->data->commit_mask, + 0x0); + } clk_disable(mdp->clk_mm); clk_disable(mdp->clk_main); @@ -134,7 +153,8 @@ static int mtk_disp_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) return err; } - mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, PWM_ENABLE_MASK, 1); + mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask, + mdp->data->enable_mask); return 0; } @@ -143,7 +163,8 @@ static void mtk_disp_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip); - mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, PWM_ENABLE_MASK, 0); + mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask, + 0x0); clk_disable(mdp->clk_mm); clk_disable(mdp->clk_main); @@ -166,6 +187,8 @@ static int mtk_disp_pwm_probe(struct platform_device *pdev) if (!mdp) return -ENOMEM; + mdp->data = of_device_get_match_data(&pdev->dev); + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); mdp->base = devm_ioremap_resource(&pdev->dev, r); if (IS_ERR(mdp->base)) @@ -200,6 +223,19 @@ static int mtk_disp_pwm_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mdp); + /* + * For MT2701, disable double buffer before writing register + * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH. + */ + if (!mdp->data->has_commit) { + mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug, + mdp->data->bls_debug_mask, + mdp->data->bls_debug_mask); + mtk_disp_pwm_update_bits(mdp, mdp->data->con0, + mdp->data->con0_sel, + mdp->data->con0_sel); + } + return 0; disable_clk_mm: @@ -221,9 +257,30 @@ static int mtk_disp_pwm_remove(struct platform_device *pdev) return ret; } +static const struct mtk_pwm_data mt2701_pwm_data = { + .enable_mask = BIT(16), + .con0 = 0xa8, + .con0_sel = 0x2, + .con1 = 0xac, + .has_commit = false, + .bls_debug = 0xb0, + .bls_debug_mask = 0x3, +}; + +static const struct mtk_pwm_data mt8173_pwm_data = { + .enable_mask = BIT(0), + .con0 = 0x10, + .con0_sel = 0x0, + .con1 = 0x14, + .has_commit = true, + .commit = 0x8, + .commit_mask = 0x1, +}; + static const struct of_device_id mtk_disp_pwm_of_match[] = { - { .compatible = "mediatek,mt8173-disp-pwm" }, - { .compatible = "mediatek,mt6595-disp-pwm" }, + { .compatible = "mediatek,mt2701-disp-pwm", .data = &mt2701_pwm_data}, + { .compatible = "mediatek,mt6595-disp-pwm", .data = &mt8173_pwm_data}, + { .compatible = "mediatek,mt8173-disp-pwm", .data = &mt8173_pwm_data}, { } }; MODULE_DEVICE_TABLE(of, mtk_disp_pwm_of_match); diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index ada2d326dc3e..f113cda47032 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -193,9 +193,18 @@ static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip, * divider settings and choose the lowest divisor that can generate * frequencies lower than requested. */ - for (div = variant->div_base; div < 4; ++div) - if ((rate >> (variant->bits + div)) < freq) - break; + if (variant->bits < 32) { + /* Only for s3c24xx */ + for (div = variant->div_base; div < 4; ++div) + if ((rate >> (variant->bits + div)) < freq) + break; + } else { + /* + * Other variants have enough counter bits to generate any + * requested rate, so no need to check higher divisors. + */ + div = variant->div_base; + } pwm_samsung_set_divisor(chip, chan, BIT(div)); diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index 92abbd56b9f7..dd82dc840af9 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -1,8 +1,10 @@ /* - * PWM device driver for ST SoCs. - * Author: Ajit Pal Singh <ajitpal.singh@st.com> + * PWM device driver for ST SoCs + * + * Copyright (C) 2013-2016 STMicroelectronics (R&D) Limited * - * Copyright (C) 2013-2014 STMicroelectronics (R&D) Limited + * Author: Ajit Pal Singh <ajitpal.singh@st.com> + * Lee Jones <lee.jones@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -11,6 +13,7 @@ */ #include <linux/clk.h> +#include <linux/interrupt.h> #include <linux/math64.h> #include <linux/mfd/syscon.h> #include <linux/module.h> @@ -18,43 +21,82 @@ #include <linux/platform_device.h> #include <linux/pwm.h> #include <linux/regmap.h> +#include <linux/sched.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/wait.h> + +#define PWM_OUT_VAL(x) (0x00 + (4 * (x))) /* Device's Duty Cycle register */ +#define PWM_CPT_VAL(x) (0x10 + (4 * (x))) /* Capture value */ +#define PWM_CPT_EDGE(x) (0x30 + (4 * (x))) /* Edge to capture on */ -#define STI_DS_REG(ch) (4 * (ch)) /* Channel's Duty Cycle register */ -#define STI_PWMCR 0x50 /* Control/Config register */ -#define STI_INTEN 0x54 /* Interrupt Enable/Disable register */ -#define PWM_PRESCALE_LOW_MASK 0x0f -#define PWM_PRESCALE_HIGH_MASK 0xf0 +#define STI_PWM_CTRL 0x50 /* Control/Config register */ +#define STI_INT_EN 0x54 /* Interrupt Enable/Disable register */ +#define STI_INT_STA 0x58 /* Interrupt Status register */ +#define PWM_INT_ACK 0x5c +#define PWM_PRESCALE_LOW_MASK 0x0f +#define PWM_PRESCALE_HIGH_MASK 0xf0 +#define PWM_CPT_EDGE_MASK 0x03 +#define PWM_INT_ACK_MASK 0x1ff + +#define STI_MAX_CPT_DEVS 4 +#define CPT_DC_MAX 0xff /* Regfield IDs */ enum { + /* Bits in PWM_CTRL*/ PWMCLK_PRESCALE_LOW, PWMCLK_PRESCALE_HIGH, - PWM_EN, - PWM_INT_EN, + CPTCLK_PRESCALE, + + PWM_OUT_EN, + PWM_CPT_EN, + + PWM_CPT_INT_EN, + PWM_CPT_INT_STAT, /* Keep last */ MAX_REGFIELDS }; +/* + * Each capture input can be programmed to detect rising-edge, falling-edge, + * either edge or neither egde. + */ +enum sti_cpt_edge { + CPT_EDGE_DISABLED, + CPT_EDGE_RISING, + CPT_EDGE_FALLING, + CPT_EDGE_BOTH, +}; + +struct sti_cpt_ddata { + u32 snapshot[3]; + unsigned int index; + struct mutex lock; + wait_queue_head_t wait; +}; + struct sti_pwm_compat_data { const struct reg_field *reg_fields; - unsigned int num_chan; + unsigned int pwm_num_devs; + unsigned int cpt_num_devs; unsigned int max_pwm_cnt; unsigned int max_prescale; }; struct sti_pwm_chip { struct device *dev; - struct clk *clk; - unsigned long clk_rate; + struct clk *pwm_clk; + struct clk *cpt_clk; struct regmap *regmap; struct sti_pwm_compat_data *cdata; struct regmap_field *prescale_low; struct regmap_field *prescale_high; - struct regmap_field *pwm_en; - struct regmap_field *pwm_int_en; + struct regmap_field *pwm_out_en; + struct regmap_field *pwm_cpt_en; + struct regmap_field *pwm_cpt_int_en; + struct regmap_field *pwm_cpt_int_stat; struct pwm_chip chip; struct pwm_device *cur; unsigned long configured; @@ -64,10 +106,13 @@ struct sti_pwm_chip { }; static const struct reg_field sti_pwm_regfields[MAX_REGFIELDS] = { - [PWMCLK_PRESCALE_LOW] = REG_FIELD(STI_PWMCR, 0, 3), - [PWMCLK_PRESCALE_HIGH] = REG_FIELD(STI_PWMCR, 11, 14), - [PWM_EN] = REG_FIELD(STI_PWMCR, 9, 9), - [PWM_INT_EN] = REG_FIELD(STI_INTEN, 0, 0), + [PWMCLK_PRESCALE_LOW] = REG_FIELD(STI_PWM_CTRL, 0, 3), + [PWMCLK_PRESCALE_HIGH] = REG_FIELD(STI_PWM_CTRL, 11, 14), + [CPTCLK_PRESCALE] = REG_FIELD(STI_PWM_CTRL, 4, 8), + [PWM_OUT_EN] = REG_FIELD(STI_PWM_CTRL, 9, 9), + [PWM_CPT_EN] = REG_FIELD(STI_PWM_CTRL, 10, 10), + [PWM_CPT_INT_EN] = REG_FIELD(STI_INT_EN, 1, 4), + [PWM_CPT_INT_STAT] = REG_FIELD(STI_INT_STA, 1, 4), }; static inline struct sti_pwm_chip *to_sti_pwmchip(struct pwm_chip *chip) @@ -82,61 +127,68 @@ static int sti_pwm_get_prescale(struct sti_pwm_chip *pc, unsigned long period, unsigned int *prescale) { struct sti_pwm_compat_data *cdata = pc->cdata; - unsigned long val; + unsigned long clk_rate; + unsigned long value; unsigned int ps; + clk_rate = clk_get_rate(pc->pwm_clk); + if (!clk_rate) { + dev_err(pc->dev, "failed to get clock rate\n"); + return -EINVAL; + } + /* - * prescale = ((period_ns * clk_rate) / (10^9 * (max_pwm_count + 1)) - 1 + * prescale = ((period_ns * clk_rate) / (10^9 * (max_pwm_cnt + 1)) - 1 */ - val = NSEC_PER_SEC / pc->clk_rate; - val *= cdata->max_pwm_cnt + 1; + value = NSEC_PER_SEC / clk_rate; + value *= cdata->max_pwm_cnt + 1; - if (period % val) { + if (period % value) return -EINVAL; - } else { - ps = period / val - 1; - if (ps > cdata->max_prescale) - return -EINVAL; - } + + ps = period / value - 1; + if (ps > cdata->max_prescale) + return -EINVAL; + *prescale = ps; return 0; } /* - * For STiH4xx PWM IP, the PWM period is fixed to 256 local clock cycles. - * The only way to change the period (apart from changing the PWM input clock) - * is to change the PWM clock prescaler. - * The prescaler is of 8 bits, so 256 prescaler values and hence - * 256 possible period values are supported (for a particular clock rate). - * The requested period will be applied only if it matches one of these - * 256 values. + * For STiH4xx PWM IP, the PWM period is fixed to 256 local clock cycles. The + * only way to change the period (apart from changing the PWM input clock) is + * to change the PWM clock prescaler. + * + * The prescaler is of 8 bits, so 256 prescaler values and hence 256 possible + * period values are supported (for a particular clock rate). The requested + * period will be applied only if it matches one of these 256 values. */ static int sti_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns) + int duty_ns, int period_ns) { struct sti_pwm_chip *pc = to_sti_pwmchip(chip); struct sti_pwm_compat_data *cdata = pc->cdata; + unsigned int ncfg, value, prescale = 0; struct pwm_device *cur = pc->cur; struct device *dev = pc->dev; - unsigned int prescale = 0, pwmvalx; - int ret; - unsigned int ncfg; bool period_same = false; + int ret; ncfg = hweight_long(pc->configured); if (ncfg) period_same = (period_ns == pwm_get_period(cur)); - /* Allow configuration changes if one of the - * following conditions satisfy. - * 1. No channels have been configured. - * 2. Only one channel has been configured and the new request - * is for the same channel. - * 3. Only one channel has been configured and the new request is - * for a new channel and period of the new channel is same as - * the current configured period. - * 4. More than one channels are configured and period of the new + /* + * Allow configuration changes if one of the following conditions + * satisfy. + * 1. No devices have been configured. + * 2. Only one device has been configured and the new request is for + * the same device. + * 3. Only one device has been configured and the new request is for + * a new device and period of the new device is same as the current + * configured period. + * 4. More than one devices are configured and period of the new * requestis the same as the current period. */ if (!ncfg || @@ -144,7 +196,11 @@ static int sti_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, ((ncfg == 1) && (pwm->hwpwm != cur->hwpwm) && period_same) || ((ncfg > 1) && period_same)) { /* Enable clock before writing to PWM registers. */ - ret = clk_enable(pc->clk); + ret = clk_enable(pc->pwm_clk); + if (ret) + return ret; + + ret = clk_enable(pc->cpt_clk); if (ret) return ret; @@ -153,15 +209,15 @@ static int sti_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, if (ret) goto clk_dis; - ret = - regmap_field_write(pc->prescale_low, - prescale & PWM_PRESCALE_LOW_MASK); + value = prescale & PWM_PRESCALE_LOW_MASK; + + ret = regmap_field_write(pc->prescale_low, value); if (ret) goto clk_dis; - ret = - regmap_field_write(pc->prescale_high, - (prescale & PWM_PRESCALE_HIGH_MASK) >> 4); + value = (prescale & PWM_PRESCALE_HIGH_MASK) >> 4; + + ret = regmap_field_write(pc->prescale_high, value); if (ret) goto clk_dis; } @@ -172,25 +228,26 @@ static int sti_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * PWM pulse = (max_pwm_count + 1) local cycles, * that is continuous pulse: signal never goes low. */ - pwmvalx = cdata->max_pwm_cnt * duty_ns / period_ns; + value = cdata->max_pwm_cnt * duty_ns / period_ns; - ret = regmap_write(pc->regmap, STI_DS_REG(pwm->hwpwm), pwmvalx); + ret = regmap_write(pc->regmap, PWM_OUT_VAL(pwm->hwpwm), value); if (ret) goto clk_dis; - ret = regmap_field_write(pc->pwm_int_en, 0); + ret = regmap_field_write(pc->pwm_cpt_int_en, 0); set_bit(pwm->hwpwm, &pc->configured); pc->cur = pwm; - dev_dbg(dev, "prescale:%u, period:%i, duty:%i, pwmvalx:%u\n", - prescale, period_ns, duty_ns, pwmvalx); + dev_dbg(dev, "prescale:%u, period:%i, duty:%i, value:%u\n", + prescale, period_ns, duty_ns, value); } else { return -EINVAL; } clk_dis: - clk_disable(pc->clk); + clk_disable(pc->pwm_clk); + clk_disable(pc->cpt_clk); return ret; } @@ -201,23 +258,30 @@ static int sti_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) int ret = 0; /* - * Since we have a common enable for all PWM channels, - * do not enable if already enabled. + * Since we have a common enable for all PWM devices, do not enable if + * already enabled. */ mutex_lock(&pc->sti_pwm_lock); + if (!pc->en_count) { - ret = clk_enable(pc->clk); + ret = clk_enable(pc->pwm_clk); + if (ret) + goto out; + + ret = clk_enable(pc->cpt_clk); if (ret) goto out; - ret = regmap_field_write(pc->pwm_en, 1); + ret = regmap_field_write(pc->pwm_out_en, 1); if (ret) { - dev_err(dev, "failed to enable PWM device:%d\n", - pwm->hwpwm); + dev_err(dev, "failed to enable PWM device %u: %d\n", + pwm->hwpwm, ret); goto out; } } + pc->en_count++; + out: mutex_unlock(&pc->sti_pwm_lock); return ret; @@ -228,13 +292,17 @@ static void sti_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) struct sti_pwm_chip *pc = to_sti_pwmchip(chip); mutex_lock(&pc->sti_pwm_lock); + if (--pc->en_count) { mutex_unlock(&pc->sti_pwm_lock); return; } - regmap_field_write(pc->pwm_en, 0); - clk_disable(pc->clk); + regmap_field_write(pc->pwm_out_en, 0); + + clk_disable(pc->pwm_clk); + clk_disable(pc->cpt_clk); + mutex_unlock(&pc->sti_pwm_lock); } @@ -245,7 +313,90 @@ static void sti_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) clear_bit(pwm->hwpwm, &pc->configured); } +static int sti_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_capture *result, unsigned long timeout) +{ + struct sti_pwm_chip *pc = to_sti_pwmchip(chip); + struct sti_pwm_compat_data *cdata = pc->cdata; + struct sti_cpt_ddata *ddata = pwm_get_chip_data(pwm); + struct device *dev = pc->dev; + unsigned int effective_ticks; + unsigned long long high, low; + int ret; + + if (pwm->hwpwm >= cdata->cpt_num_devs) { + dev_err(dev, "device %u is not valid\n", pwm->hwpwm); + return -EINVAL; + } + + mutex_lock(&ddata->lock); + ddata->index = 0; + + /* Prepare capture measurement */ + regmap_write(pc->regmap, PWM_CPT_EDGE(pwm->hwpwm), CPT_EDGE_RISING); + regmap_field_write(pc->pwm_cpt_int_en, BIT(pwm->hwpwm)); + + /* Enable capture */ + ret = regmap_field_write(pc->pwm_cpt_en, 1); + if (ret) { + dev_err(dev, "failed to enable PWM capture %u: %d\n", + pwm->hwpwm, ret); + goto out; + } + + ret = wait_event_interruptible_timeout(ddata->wait, ddata->index > 1, + msecs_to_jiffies(timeout)); + + regmap_write(pc->regmap, PWM_CPT_EDGE(pwm->hwpwm), CPT_EDGE_DISABLED); + + if (ret == -ERESTARTSYS) + goto out; + + switch (ddata->index) { + case 0: + case 1: + /* + * Getting here could mean: + * - input signal is constant of less than 1 Hz + * - there is no input signal at all + * + * In such case the frequency is rounded down to 0 + */ + result->period = 0; + result->duty_cycle = 0; + + break; + + case 2: + /* We have everying we need */ + high = ddata->snapshot[1] - ddata->snapshot[0]; + low = ddata->snapshot[2] - ddata->snapshot[1]; + + effective_ticks = clk_get_rate(pc->cpt_clk); + + result->period = (high + low) * NSEC_PER_SEC; + result->period /= effective_ticks; + + result->duty_cycle = high * NSEC_PER_SEC; + result->duty_cycle /= effective_ticks; + + break; + + default: + dev_err(dev, "internal error\n"); + break; + } + +out: + /* Disable capture */ + regmap_field_write(pc->pwm_cpt_en, 0); + + mutex_unlock(&ddata->lock); + return ret; +} + static const struct pwm_ops sti_pwm_ops = { + .capture = sti_pwm_capture, .config = sti_pwm_config, .enable = sti_pwm_enable, .disable = sti_pwm_disable, @@ -253,17 +404,98 @@ static const struct pwm_ops sti_pwm_ops = { .owner = THIS_MODULE, }; +static irqreturn_t sti_pwm_interrupt(int irq, void *data) +{ + struct sti_pwm_chip *pc = data; + struct device *dev = pc->dev; + struct sti_cpt_ddata *ddata; + int devicenum; + unsigned int cpt_int_stat; + unsigned int reg; + int ret = IRQ_NONE; + + ret = regmap_field_read(pc->pwm_cpt_int_stat, &cpt_int_stat); + if (ret) + return ret; + + while (cpt_int_stat) { + devicenum = ffs(cpt_int_stat) - 1; + + ddata = pwm_get_chip_data(&pc->chip.pwms[devicenum]); + + /* + * Capture input: + * _______ _______ + * | | | | + * __| |_________________| |________ + * ^0 ^1 ^2 + * + * Capture start by the first available rising edge. When a + * capture event occurs, capture value (CPT_VALx) is stored, + * index incremented, capture edge changed. + * + * After the capture, if the index > 1, we have collected the + * necessary data so we signal the thread waiting for it and + * disable the capture by setting capture edge to none + */ + + regmap_read(pc->regmap, + PWM_CPT_VAL(devicenum), + &ddata->snapshot[ddata->index]); + + switch (ddata->index) { + case 0: + case 1: + regmap_read(pc->regmap, PWM_CPT_EDGE(devicenum), ®); + reg ^= PWM_CPT_EDGE_MASK; + regmap_write(pc->regmap, PWM_CPT_EDGE(devicenum), reg); + + ddata->index++; + break; + + case 2: + regmap_write(pc->regmap, + PWM_CPT_EDGE(devicenum), + CPT_EDGE_DISABLED); + wake_up(&ddata->wait); + break; + + default: + dev_err(dev, "Internal error\n"); + } + + cpt_int_stat &= ~BIT_MASK(devicenum); + + ret = IRQ_HANDLED; + } + + /* Just ACK everything */ + regmap_write(pc->regmap, PWM_INT_ACK, PWM_INT_ACK_MASK); + + return ret; +} + static int sti_pwm_probe_dt(struct sti_pwm_chip *pc) { struct device *dev = pc->dev; const struct reg_field *reg_fields; struct device_node *np = dev->of_node; struct sti_pwm_compat_data *cdata = pc->cdata; - u32 num_chan; + u32 num_devs; + int ret; + + ret = of_property_read_u32(np, "st,pwm-num-chan", &num_devs); + if (!ret) + cdata->pwm_num_devs = num_devs; + + ret = of_property_read_u32(np, "st,capture-num-chan", &num_devs); + if (!ret) + cdata->cpt_num_devs = num_devs; - of_property_read_u32(np, "st,pwm-num-chan", &num_chan); - if (num_chan) - cdata->num_chan = num_chan; + if (!cdata->pwm_num_devs && !cdata->cpt_num_devs) { + dev_err(dev, "No channels configured\n"); + return -EINVAL; + } reg_fields = cdata->reg_fields; @@ -277,15 +509,26 @@ static int sti_pwm_probe_dt(struct sti_pwm_chip *pc) if (IS_ERR(pc->prescale_high)) return PTR_ERR(pc->prescale_high); - pc->pwm_en = devm_regmap_field_alloc(dev, pc->regmap, - reg_fields[PWM_EN]); - if (IS_ERR(pc->pwm_en)) - return PTR_ERR(pc->pwm_en); - pc->pwm_int_en = devm_regmap_field_alloc(dev, pc->regmap, - reg_fields[PWM_INT_EN]); - if (IS_ERR(pc->pwm_int_en)) - return PTR_ERR(pc->pwm_int_en); + pc->pwm_out_en = devm_regmap_field_alloc(dev, pc->regmap, + reg_fields[PWM_OUT_EN]); + if (IS_ERR(pc->pwm_out_en)) + return PTR_ERR(pc->pwm_out_en); + + pc->pwm_cpt_en = devm_regmap_field_alloc(dev, pc->regmap, + reg_fields[PWM_CPT_EN]); + if (IS_ERR(pc->pwm_cpt_en)) + return PTR_ERR(pc->pwm_cpt_en); + + pc->pwm_cpt_int_en = devm_regmap_field_alloc(dev, pc->regmap, + reg_fields[PWM_CPT_INT_EN]); + if (IS_ERR(pc->pwm_cpt_int_en)) + return PTR_ERR(pc->pwm_cpt_int_en); + + pc->pwm_cpt_int_stat = devm_regmap_field_alloc(dev, pc->regmap, + reg_fields[PWM_CPT_INT_STAT]); + if (PTR_ERR_OR_ZERO(pc->pwm_cpt_int_stat)) + return PTR_ERR(pc->pwm_cpt_int_stat); return 0; } @@ -302,7 +545,8 @@ static int sti_pwm_probe(struct platform_device *pdev) struct sti_pwm_compat_data *cdata; struct sti_pwm_chip *pc; struct resource *res; - int ret; + unsigned int i; + int irq, ret; pc = devm_kzalloc(dev, sizeof(*pc), GFP_KERNEL); if (!pc) @@ -323,14 +567,28 @@ static int sti_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->regmap)) return PTR_ERR(pc->regmap); + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "Failed to obtain IRQ\n"); + return irq; + } + + ret = devm_request_irq(&pdev->dev, irq, sti_pwm_interrupt, 0, + pdev->name, pc); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to request IRQ\n"); + return ret; + } + /* * Setup PWM data with default values: some values could be replaced * with specific ones provided from Device Tree. */ - cdata->reg_fields = &sti_pwm_regfields[0]; + cdata->reg_fields = sti_pwm_regfields; cdata->max_prescale = 0xff; - cdata->max_pwm_cnt = 255; - cdata->num_chan = 1; + cdata->max_pwm_cnt = 255; + cdata->pwm_num_devs = 0; + cdata->cpt_num_devs = 0; pc->cdata = cdata; pc->dev = dev; @@ -341,36 +599,64 @@ static int sti_pwm_probe(struct platform_device *pdev) if (ret) return ret; - pc->clk = of_clk_get_by_name(dev->of_node, "pwm"); - if (IS_ERR(pc->clk)) { + if (!cdata->pwm_num_devs) + goto skip_pwm; + + pc->pwm_clk = of_clk_get_by_name(dev->of_node, "pwm"); + if (IS_ERR(pc->pwm_clk)) { dev_err(dev, "failed to get PWM clock\n"); - return PTR_ERR(pc->clk); + return PTR_ERR(pc->pwm_clk); } - pc->clk_rate = clk_get_rate(pc->clk); - if (!pc->clk_rate) { - dev_err(dev, "failed to get clock rate\n"); - return -EINVAL; + ret = clk_prepare(pc->pwm_clk); + if (ret) { + dev_err(dev, "failed to prepare clock\n"); + return ret; } - ret = clk_prepare(pc->clk); +skip_pwm: + if (!cdata->cpt_num_devs) + goto skip_cpt; + + pc->cpt_clk = of_clk_get_by_name(dev->of_node, "capture"); + if (IS_ERR(pc->cpt_clk)) { + dev_err(dev, "failed to get PWM capture clock\n"); + return PTR_ERR(pc->cpt_clk); + } + + ret = clk_prepare(pc->cpt_clk); if (ret) { dev_err(dev, "failed to prepare clock\n"); return ret; } +skip_cpt: pc->chip.dev = dev; pc->chip.ops = &sti_pwm_ops; pc->chip.base = -1; - pc->chip.npwm = pc->cdata->num_chan; + pc->chip.npwm = pc->cdata->pwm_num_devs; pc->chip.can_sleep = true; ret = pwmchip_add(&pc->chip); if (ret < 0) { - clk_unprepare(pc->clk); + clk_unprepare(pc->pwm_clk); + clk_unprepare(pc->cpt_clk); return ret; } + for (i = 0; i < cdata->cpt_num_devs; i++) { + struct sti_cpt_ddata *ddata; + + ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + + init_waitqueue_head(&ddata->wait); + mutex_init(&ddata->lock); + + pwm_set_chip_data(&pc->chip.pwms[i], ddata); + } + platform_set_drvdata(pdev, pc); return 0; @@ -381,10 +667,11 @@ static int sti_pwm_remove(struct platform_device *pdev) struct sti_pwm_chip *pc = platform_get_drvdata(pdev); unsigned int i; - for (i = 0; i < pc->cdata->num_chan; i++) + for (i = 0; i < pc->cdata->pwm_num_devs; i++) pwm_disable(&pc->chip.pwms[i]); - clk_unprepare(pc->clk); + clk_unprepare(pc->pwm_clk); + clk_unprepare(pc->cpt_clk); return pwmchip_remove(&pc->chip); } diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index 03a99a53c39e..b0803f6c64d9 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -284,6 +284,12 @@ static const struct sun4i_pwm_data sun4i_pwm_data_a20 = { .npwm = 2, }; +static const struct sun4i_pwm_data sun4i_pwm_data_h3 = { + .has_prescaler_bypass = true, + .has_rdy = true, + .npwm = 1, +}; + static const struct of_device_id sun4i_pwm_dt_ids[] = { { .compatible = "allwinner,sun4i-a10-pwm", @@ -298,6 +304,9 @@ static const struct of_device_id sun4i_pwm_dt_ids[] = { .compatible = "allwinner,sun7i-a20-pwm", .data = &sun4i_pwm_data_a20, }, { + .compatible = "allwinner,sun8i-h3-pwm", + .data = &sun4i_pwm_data_h3, + }, { /* sentinel */ }, }; diff --git a/drivers/pwm/pwm-tipwmss.c b/drivers/pwm/pwm-tipwmss.c index 829f4991c96f..7fa85a1604da 100644 --- a/drivers/pwm/pwm-tipwmss.c +++ b/drivers/pwm/pwm-tipwmss.c @@ -34,7 +34,6 @@ static int pwmss_probe(struct platform_device *pdev) struct device_node *node = pdev->dev.of_node; pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); /* Populate all the child nodes here... */ ret = of_platform_populate(node, NULL, NULL, &pdev->dev); @@ -46,31 +45,13 @@ static int pwmss_probe(struct platform_device *pdev) static int pwmss_remove(struct platform_device *pdev) { - pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); return 0; } -#ifdef CONFIG_PM_SLEEP -static int pwmss_suspend(struct device *dev) -{ - pm_runtime_put_sync(dev); - return 0; -} - -static int pwmss_resume(struct device *dev) -{ - pm_runtime_get_sync(dev); - return 0; -} -#endif - -static SIMPLE_DEV_PM_OPS(pwmss_pm_ops, pwmss_suspend, pwmss_resume); - static struct platform_driver pwmss_driver = { .driver = { .name = "pwmss", - .pm = &pwmss_pm_ops, .of_match_table = pwmss_of_match, }, .probe = pwmss_probe, diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c index 04f76725d591..7a993b056638 100644 --- a/drivers/pwm/pwm-twl.c +++ b/drivers/pwm/pwm-twl.c @@ -269,6 +269,22 @@ static void twl6030_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) goto out; } + val |= TWL6030_PWM_TOGGLE(pwm->hwpwm, TWL6030_PWMXEN); + + ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_TOGGLE3_REG); + if (ret < 0) { + dev_err(chip->dev, "%s: Failed to disable PWM\n", pwm->label); + goto out; + } + + val &= ~TWL6030_PWM_TOGGLE(pwm->hwpwm, TWL6030_PWMXEN); + + ret = twl_i2c_write_u8(TWL6030_MODULE_ID1, val, TWL6030_TOGGLE3_REG); + if (ret < 0) { + dev_err(chip->dev, "%s: Failed to disable PWM\n", pwm->label); + goto out; + } + twl->twl6030_toggle3 = val; out: mutex_unlock(&twl->mutex); diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 18ed725594c3..0296d8178ae2 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -409,6 +409,24 @@ void pwmchip_sysfs_unexport(struct pwm_chip *chip) } } +void pwmchip_sysfs_unexport_children(struct pwm_chip *chip) +{ + struct device *parent; + unsigned int i; + + parent = class_find_device(&pwm_class, NULL, chip, + pwmchip_sysfs_match); + if (!parent) + return; + + for (i = 0; i < chip->npwm; i++) { + struct pwm_device *pwm = &chip->pwms[i]; + + if (test_bit(PWMF_EXPORTED, &pwm->flags)) + pwm_unexport_child(parent, pwm); + } +} + static int __init pwm_sysfs_init(void) { return class_register(&pwm_class); diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c index 3958f50c5975..e0c747aa9f85 100644 --- a/drivers/regulator/max8973-regulator.c +++ b/drivers/regulator/max8973-regulator.c @@ -495,7 +495,8 @@ static irqreturn_t max8973_thermal_irq(int irq, void *data) { struct max8973_chip *mchip = data; - thermal_zone_device_update(mchip->tz_device); + thermal_zone_device_update(mchip->tz_device, + THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 2d702ca6556f..a13541bdc726 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -186,7 +186,7 @@ config HISI_THERMAL config IMX_THERMAL tristate "Temperature sensor driver for Freescale i.MX SoCs" - depends on CPU_THERMAL + depends on (ARCH_MXC && CPU_THERMAL) || COMPILE_TEST depends on MFD_SYSCON depends on OF help @@ -195,6 +195,26 @@ config IMX_THERMAL cpufreq is used as the cooling device to throttle CPUs when the passive trip is crossed. +config MAX77620_THERMAL + tristate "Temperature sensor driver for Maxim MAX77620 PMIC" + depends on MFD_MAX77620 + depends on OF + help + Support for die junction temperature warning alarm for Maxim + Semiconductor PMIC MAX77620 device. Device generates two alarm + interrupts when PMIC die temperature cross the threshold of + 120 degC and 140 degC. + +config QORIQ_THERMAL + tristate "QorIQ Thermal Monitoring Unit" + depends on THERMAL_OF + depends on HAS_IOMEM + help + Support for Thermal Monitoring Unit (TMU) found on QorIQ platforms. + It supports one critical trip point and one passive trip point. The + cpufreq is used as the cooling device to throttle CPUs when the + passive trip is crossed. + config SPEAR_THERMAL tristate "SPEAr thermal sensor driver" depends on PLAT_SPEAR || COMPILE_TEST @@ -332,6 +352,16 @@ menu "ACPI INT340X thermal drivers" source drivers/thermal/int340x_thermal/Kconfig endmenu +config INTEL_BXT_PMIC_THERMAL + tristate "Intel Broxton PMIC thermal driver" + depends on X86 && INTEL_SOC_PMIC && REGMAP + help + Select this driver for Intel Broxton PMIC with ADC channels monitoring + system temperature measurements and alerts. + This driver is used for monitoring the ADC channels of PMIC and handles + the alert trip point interrupts and notifies the thermal framework with + the trip point and temperature details of the zone. + config INTEL_PCH_THERMAL tristate "Intel PCH Thermal Reporting Driver" depends on X86 && PCI @@ -399,4 +429,9 @@ config GENERIC_ADC_THERMAL to this driver. This driver reports the temperature by reading ADC channel and converts it to temperature based on lookup table. +menu "Qualcomm thermal drivers" +depends on (ARCH_QCOM && OF) || COMPILE_TEST +source "drivers/thermal/qcom/Kconfig" +endmenu + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 10b07c14f8a9..c92eb22a41ff 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -37,6 +37,8 @@ obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o obj-$(CONFIG_TANGO_THERMAL) += tango_thermal.o obj-$(CONFIG_IMX_THERMAL) += imx_thermal.o +obj-$(CONFIG_MAX77620_THERMAL) += max77620_thermal.o +obj-$(CONFIG_QORIQ_THERMAL) += qoriq_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o @@ -45,8 +47,10 @@ obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o obj-$(CONFIG_INTEL_QUARK_DTS_THERMAL) += intel_quark_dts_thermal.o obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/ +obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o obj-$(CONFIG_ST_THERMAL) += st/ +obj-$(CONFIG_QCOM_TSENS) += qcom/ obj-$(CONFIG_TEGRA_SOCTHERM) += tegra/ obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o obj-$(CONFIG_MTK_THERMAL) += mtk_thermal.o diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index a32b41783b77..9ce0e9eef923 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -74,7 +74,7 @@ struct power_table { * cpufreq frequencies. * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device. * @node: list_head to link all cpufreq_cooling_device together. - * @last_load: load measured by the latest call to cpufreq_get_actual_power() + * @last_load: load measured by the latest call to cpufreq_get_requested_power() * @time_in_idle: previous reading of the absolute time that this cpu was idle * @time_in_idle_timestamp: wall time of the last invocation of * get_cpu_idle_time_us() diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index 652acd8fbe48..e776cea80cfc 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -306,7 +306,7 @@ static void db8500_thermal_work(struct work_struct *work) if (cur_mode == THERMAL_DEVICE_DISABLED) return; - thermal_zone_device_update(pzone->therm_dev); + thermal_zone_device_update(pzone->therm_dev, THERMAL_EVENT_UNSPECIFIED); dev_dbg(&pzone->therm_dev->device, "thermal work finished.\n"); } diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 01f0015f80dc..81631b110e17 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -312,7 +312,7 @@ static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, unsigned long freq; u32 static_power; - if (state < 0 || state >= dfc->freq_table_size) + if (state >= dfc->freq_table_size) return -EINVAL; freq = dfc->freq_table[state]; diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index bb118a152cbb..fc5e5057f0de 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -65,7 +65,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) if (instance->target == 0 && tz->temperature >= trip_temp) instance->target = 1; else if (instance->target == 1 && - tz->temperature < trip_temp - trip_hyst) + tz->temperature <= trip_temp - trip_hyst) instance->target = 0; dev_dbg(&instance->cdev->device, "target=%d\n", diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 97fad8f51e1c..f6429666a1cf 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -237,7 +237,8 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) if (!data->sensors[i].tzd) continue; - thermal_zone_device_update(data->sensors[i].tzd); + thermal_zone_device_update(data->sensors[i].tzd, + THERMAL_EVENT_UNSPECIFIED); } return IRQ_HANDLED; diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index e473548b5d28..06912f0602b7 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -246,7 +246,7 @@ static int imx_set_mode(struct thermal_zone_device *tz, } data->mode = mode; - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); return 0; } @@ -457,7 +457,7 @@ static irqreturn_t imx_thermal_alarm_irq_thread(int irq, void *dev) dev_dbg(&data->tz->device, "THERMAL ALARM: T > %d\n", data->alarm_temp / 1000); - thermal_zone_device_update(data->tz); + thermal_zone_device_update(data->tz, THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } diff --git a/drivers/thermal/int340x_thermal/int3402_thermal.c b/drivers/thermal/int340x_thermal/int3402_thermal.c index 69df3d960303..8e90b3151a42 100644 --- a/drivers/thermal/int340x_thermal/int3402_thermal.c +++ b/drivers/thermal/int340x_thermal/int3402_thermal.c @@ -35,7 +35,8 @@ static void int3402_notify(acpi_handle handle, u32 event, void *data) case INT3402_PERF_CHANGED_EVENT: break; case INT3402_THERMAL_EVENT: - int340x_thermal_zone_device_update(priv->int340x_zone); + int340x_thermal_zone_device_update(priv->int340x_zone, + THERMAL_TRIP_VIOLATED); break; default: break; diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c index 50a7a08e3a15..c4890c9437eb 100644 --- a/drivers/thermal/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/int340x_thermal/int3403_thermal.c @@ -25,6 +25,7 @@ #define INT3403_TYPE_CHARGER 0x0B #define INT3403_TYPE_BATTERY 0x0C #define INT3403_PERF_CHANGED_EVENT 0x80 +#define INT3403_PERF_TRIP_POINT_CHANGED 0x81 #define INT3403_THERMAL_EVENT 0x90 /* Preserved structure for future expandbility */ @@ -72,7 +73,13 @@ static void int3403_notify(acpi_handle handle, case INT3403_PERF_CHANGED_EVENT: break; case INT3403_THERMAL_EVENT: - int340x_thermal_zone_device_update(obj->int340x_zone); + int340x_thermal_zone_device_update(obj->int340x_zone, + THERMAL_TRIP_VIOLATED); + break; + case INT3403_PERF_TRIP_POINT_CHANGED: + int340x_thermal_read_trips(obj->int340x_zone); + int340x_thermal_zone_device_update(obj->int340x_zone, + THERMAL_TRIP_CHANGED); break; default: dev_err(&priv->pdev->dev, "Unsupported event [0x%x]\n", event); diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c index b9b2666aa94c..145a5c53ff5c 100644 --- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c @@ -177,6 +177,42 @@ static int int340x_thermal_get_trip_config(acpi_handle handle, char *name, return 0; } +int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone) +{ + int trip_cnt = int34x_zone->aux_trip_nr; + int i; + + int34x_zone->crt_trip_id = -1; + if (!int340x_thermal_get_trip_config(int34x_zone->adev->handle, "_CRT", + &int34x_zone->crt_temp)) + int34x_zone->crt_trip_id = trip_cnt++; + + int34x_zone->hot_trip_id = -1; + if (!int340x_thermal_get_trip_config(int34x_zone->adev->handle, "_HOT", + &int34x_zone->hot_temp)) + int34x_zone->hot_trip_id = trip_cnt++; + + int34x_zone->psv_trip_id = -1; + if (!int340x_thermal_get_trip_config(int34x_zone->adev->handle, "_PSV", + &int34x_zone->psv_temp)) + int34x_zone->psv_trip_id = trip_cnt++; + + for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) { + char name[5] = { '_', 'A', 'C', '0' + i, '\0' }; + + if (int340x_thermal_get_trip_config(int34x_zone->adev->handle, + name, + &int34x_zone->act_trips[i].temp)) + break; + + int34x_zone->act_trips[i].id = trip_cnt++; + int34x_zone->act_trips[i].valid = true; + } + + return trip_cnt; +} +EXPORT_SYMBOL_GPL(int340x_thermal_read_trips); + static struct thermal_zone_params int340x_thermal_params = { .governor_name = "user_space", .no_hwmon = true, @@ -188,7 +224,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, struct int34x_thermal_zone *int34x_thermal_zone; acpi_status status; unsigned long long trip_cnt; - int trip_mask = 0, i; + int trip_mask = 0; int ret; int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone), @@ -214,28 +250,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, int34x_thermal_zone->aux_trip_nr = trip_cnt; } - int34x_thermal_zone->crt_trip_id = -1; - if (!int340x_thermal_get_trip_config(adev->handle, "_CRT", - &int34x_thermal_zone->crt_temp)) - int34x_thermal_zone->crt_trip_id = trip_cnt++; - int34x_thermal_zone->hot_trip_id = -1; - if (!int340x_thermal_get_trip_config(adev->handle, "_HOT", - &int34x_thermal_zone->hot_temp)) - int34x_thermal_zone->hot_trip_id = trip_cnt++; - int34x_thermal_zone->psv_trip_id = -1; - if (!int340x_thermal_get_trip_config(adev->handle, "_PSV", - &int34x_thermal_zone->psv_temp)) - int34x_thermal_zone->psv_trip_id = trip_cnt++; - for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) { - char name[5] = { '_', 'A', 'C', '0' + i, '\0' }; + trip_cnt = int340x_thermal_read_trips(int34x_thermal_zone); - if (int340x_thermal_get_trip_config(adev->handle, name, - &int34x_thermal_zone->act_trips[i].temp)) - break; - - int34x_thermal_zone->act_trips[i].id = trip_cnt++; - int34x_thermal_zone->act_trips[i].valid = true; - } int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table( adev->handle); diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/int340x_thermal/int340x_thermal_zone.h index aaadf724ff2e..5f3ba4775c5c 100644 --- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.h +++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.h @@ -46,6 +46,7 @@ struct int34x_thermal_zone { struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *, struct thermal_zone_device_ops *override_ops); void int340x_thermal_zone_remove(struct int34x_thermal_zone *); +int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone); static inline void int340x_thermal_zone_set_priv_data( struct int34x_thermal_zone *tzone, void *priv_data) @@ -60,9 +61,10 @@ static inline void *int340x_thermal_zone_get_priv_data( } static inline void int340x_thermal_zone_device_update( - struct int34x_thermal_zone *tzone) + struct int34x_thermal_zone *tzone, + enum thermal_notify_event event) { - thermal_zone_device_update(tzone->zone); + thermal_zone_device_update(tzone->zone, event); } #endif diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c index 42c1ac057bad..ff3b36f339e3 100644 --- a/drivers/thermal/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c @@ -258,7 +258,8 @@ static void proc_thermal_notify(acpi_handle handle, u32 event, void *data) switch (event) { case PROC_POWER_CAPABILITY_CHANGED: proc_thermal_read_ppcc(proc_priv); - int340x_thermal_zone_device_update(proc_priv->int340x_zone); + int340x_thermal_zone_device_update(proc_priv->int340x_zone, + THERMAL_DEVICE_POWER_CAPABILITY_CHANGED); break; default: dev_err(proc_priv->dev, "Unsupported event [0x%x]\n", event); diff --git a/drivers/thermal/intel_bxt_pmic_thermal.c b/drivers/thermal/intel_bxt_pmic_thermal.c new file mode 100644 index 000000000000..0f19a393ddd8 --- /dev/null +++ b/drivers/thermal/intel_bxt_pmic_thermal.c @@ -0,0 +1,300 @@ +/* + * Intel Broxton PMIC thermal driver + * + * Copyright (C) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include <linux/thermal.h> +#include <linux/platform_device.h> +#include <linux/sched.h> +#include <linux/mfd/intel_soc_pmic.h> + +#define BXTWC_THRM0IRQ 0x4E04 +#define BXTWC_THRM1IRQ 0x4E05 +#define BXTWC_THRM2IRQ 0x4E06 +#define BXTWC_MTHRM0IRQ 0x4E12 +#define BXTWC_MTHRM1IRQ 0x4E13 +#define BXTWC_MTHRM2IRQ 0x4E14 +#define BXTWC_STHRM0IRQ 0x4F19 +#define BXTWC_STHRM1IRQ 0x4F1A +#define BXTWC_STHRM2IRQ 0x4F1B + +struct trip_config_map { + u16 irq_reg; + u16 irq_en; + u16 evt_stat; + u8 irq_mask; + u8 irq_en_mask; + u8 evt_mask; + u8 trip_num; +}; + +struct thermal_irq_map { + char handle[20]; + int num_trips; + const struct trip_config_map *trip_config; +}; + +struct pmic_thermal_data { + const struct thermal_irq_map *maps; + int num_maps; +}; + +static const struct trip_config_map bxtwc_str0_trip_config[] = { + { + .irq_reg = BXTWC_THRM0IRQ, + .irq_mask = 0x01, + .irq_en = BXTWC_MTHRM0IRQ, + .irq_en_mask = 0x01, + .evt_stat = BXTWC_STHRM0IRQ, + .evt_mask = 0x01, + .trip_num = 0 + }, + { + .irq_reg = BXTWC_THRM0IRQ, + .irq_mask = 0x10, + .irq_en = BXTWC_MTHRM0IRQ, + .irq_en_mask = 0x10, + .evt_stat = BXTWC_STHRM0IRQ, + .evt_mask = 0x10, + .trip_num = 1 + } +}; + +static const struct trip_config_map bxtwc_str1_trip_config[] = { + { + .irq_reg = BXTWC_THRM0IRQ, + .irq_mask = 0x02, + .irq_en = BXTWC_MTHRM0IRQ, + .irq_en_mask = 0x02, + .evt_stat = BXTWC_STHRM0IRQ, + .evt_mask = 0x02, + .trip_num = 0 + }, + { + .irq_reg = BXTWC_THRM0IRQ, + .irq_mask = 0x20, + .irq_en = BXTWC_MTHRM0IRQ, + .irq_en_mask = 0x20, + .evt_stat = BXTWC_STHRM0IRQ, + .evt_mask = 0x20, + .trip_num = 1 + }, +}; + +static const struct trip_config_map bxtwc_str2_trip_config[] = { + { + .irq_reg = BXTWC_THRM0IRQ, + .irq_mask = 0x04, + .irq_en = BXTWC_MTHRM0IRQ, + .irq_en_mask = 0x04, + .evt_stat = BXTWC_STHRM0IRQ, + .evt_mask = 0x04, + .trip_num = 0 + }, + { + .irq_reg = BXTWC_THRM0IRQ, + .irq_mask = 0x40, + .irq_en = BXTWC_MTHRM0IRQ, + .irq_en_mask = 0x40, + .evt_stat = BXTWC_STHRM0IRQ, + .evt_mask = 0x40, + .trip_num = 1 + }, +}; + +static const struct trip_config_map bxtwc_str3_trip_config[] = { + { + .irq_reg = BXTWC_THRM2IRQ, + .irq_mask = 0x10, + .irq_en = BXTWC_MTHRM2IRQ, + .irq_en_mask = 0x10, + .evt_stat = BXTWC_STHRM2IRQ, + .evt_mask = 0x10, + .trip_num = 0 + }, +}; + +static const struct thermal_irq_map bxtwc_thermal_irq_map[] = { + { + .handle = "STR0", + .trip_config = bxtwc_str0_trip_config, + .num_trips = ARRAY_SIZE(bxtwc_str0_trip_config), + }, + { + .handle = "STR1", + .trip_config = bxtwc_str1_trip_config, + .num_trips = ARRAY_SIZE(bxtwc_str1_trip_config), + }, + { + .handle = "STR2", + .trip_config = bxtwc_str2_trip_config, + .num_trips = ARRAY_SIZE(bxtwc_str2_trip_config), + }, + { + .handle = "STR3", + .trip_config = bxtwc_str3_trip_config, + .num_trips = ARRAY_SIZE(bxtwc_str3_trip_config), + }, +}; + +static const struct pmic_thermal_data bxtwc_thermal_data = { + .maps = bxtwc_thermal_irq_map, + .num_maps = ARRAY_SIZE(bxtwc_thermal_irq_map), +}; + +static irqreturn_t pmic_thermal_irq_handler(int irq, void *data) +{ + struct platform_device *pdev = data; + struct thermal_zone_device *tzd; + struct pmic_thermal_data *td; + struct intel_soc_pmic *pmic; + struct regmap *regmap; + u8 reg_val, mask, irq_stat, trip; + u16 reg, evt_stat_reg; + int i, j, ret; + + pmic = dev_get_drvdata(pdev->dev.parent); + regmap = pmic->regmap; + td = (struct pmic_thermal_data *) + platform_get_device_id(pdev)->driver_data; + + /* Resolve thermal irqs */ + for (i = 0; i < td->num_maps; i++) { + for (j = 0; j < td->maps[i].num_trips; j++) { + reg = td->maps[i].trip_config[j].irq_reg; + mask = td->maps[i].trip_config[j].irq_mask; + /* + * Read the irq register to resolve whether the + * interrupt was triggered for this sensor + */ + if (regmap_read(regmap, reg, &ret)) + return IRQ_HANDLED; + + reg_val = (u8)ret; + irq_stat = ((u8)ret & mask); + + if (!irq_stat) + continue; + + /* + * Read the status register to find out what + * event occurred i.e a high or a low + */ + evt_stat_reg = td->maps[i].trip_config[j].evt_stat; + if (regmap_read(regmap, evt_stat_reg, &ret)) + return IRQ_HANDLED; + + trip = td->maps[i].trip_config[j].trip_num; + tzd = thermal_zone_get_zone_by_name(td->maps[i].handle); + if (!IS_ERR(tzd)) + thermal_zone_device_update(tzd, + THERMAL_EVENT_UNSPECIFIED); + + /* Clear the appropriate irq */ + regmap_write(regmap, reg, reg_val & mask); + } + } + + return IRQ_HANDLED; +} + +static int pmic_thermal_probe(struct platform_device *pdev) +{ + struct regmap_irq_chip_data *regmap_irq_chip; + struct pmic_thermal_data *thermal_data; + int ret, irq, virq, i, j, pmic_irq_count; + struct intel_soc_pmic *pmic; + struct regmap *regmap; + struct device *dev; + u16 reg; + u8 mask; + + dev = &pdev->dev; + pmic = dev_get_drvdata(pdev->dev.parent); + if (!pmic) { + dev_err(dev, "Failed to get struct intel_soc_pmic pointer\n"); + return -ENODEV; + } + + thermal_data = (struct pmic_thermal_data *) + platform_get_device_id(pdev)->driver_data; + if (!thermal_data) { + dev_err(dev, "No thermal data initialized!!\n"); + return -ENODEV; + } + + regmap = pmic->regmap; + regmap_irq_chip = pmic->irq_chip_data_level2; + + pmic_irq_count = 0; + while ((irq = platform_get_irq(pdev, pmic_irq_count)) != -ENXIO) { + virq = regmap_irq_get_virq(regmap_irq_chip, irq); + if (virq < 0) { + dev_err(dev, "failed to get virq by irq %d\n", irq); + return virq; + } + + ret = devm_request_threaded_irq(&pdev->dev, virq, + NULL, pmic_thermal_irq_handler, + IRQF_ONESHOT, "pmic_thermal", pdev); + + if (ret) { + dev_err(dev, "request irq(%d) failed: %d\n", virq, ret); + return ret; + } + pmic_irq_count++; + } + + /* Enable thermal interrupts */ + for (i = 0; i < thermal_data->num_maps; i++) { + for (j = 0; j < thermal_data->maps[i].num_trips; j++) { + reg = thermal_data->maps[i].trip_config[j].irq_en; + mask = thermal_data->maps[i].trip_config[j].irq_en_mask; + ret = regmap_update_bits(regmap, reg, mask, 0x00); + if (ret) + return ret; + } + } + + return 0; +} + +static const struct platform_device_id pmic_thermal_id_table[] = { + { + .name = "bxt_wcove_thermal", + .driver_data = (kernel_ulong_t)&bxtwc_thermal_data, + }, + {}, +}; + +static struct platform_driver pmic_thermal_driver = { + .probe = pmic_thermal_probe, + .driver = { + .name = "pmic_thermal", + }, + .id_table = pmic_thermal_id_table, +}; + +MODULE_DEVICE_TABLE(platform, pmic_thermal_id_table); +module_platform_driver(pmic_thermal_driver); + +MODULE_AUTHOR("Yegnesh S Iyer <yegnesh.s.iyer@intel.com>"); +MODULE_DESCRIPTION("Intel Broxton PMIC Thermal Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/intel_soc_dts_iosf.c b/drivers/thermal/intel_soc_dts_iosf.c index f72e1db3216f..e0813dfaa278 100644 --- a/drivers/thermal/intel_soc_dts_iosf.c +++ b/drivers/thermal/intel_soc_dts_iosf.c @@ -391,7 +391,8 @@ void intel_soc_dts_iosf_interrupt_handler(struct intel_soc_dts_sensors *sensors) for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) { pr_debug("TZD update for zone %d\n", i); - thermal_zone_device_update(sensors->soc_dts[i].tzone); + thermal_zone_device_update(sensors->soc_dts[i].tzone, + THERMAL_EVENT_UNSPECIFIED); } } else spin_unlock_irqrestore(&sensors->intr_notify_lock, flags); diff --git a/drivers/thermal/max77620_thermal.c b/drivers/thermal/max77620_thermal.c new file mode 100644 index 000000000000..83905ff46e40 --- /dev/null +++ b/drivers/thermal/max77620_thermal.c @@ -0,0 +1,166 @@ +/* + * Junction temperature thermal driver for Maxim Max77620. + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * Author: Laxman Dewangan <ldewangan@nvidia.com> + * Mallikarjun Kasoju <mkasoju@nvidia.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/mfd/max77620.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/slab.h> +#include <linux/thermal.h> + +#define MAX77620_NORMAL_OPERATING_TEMP 100000 +#define MAX77620_TJALARM1_TEMP 120000 +#define MAX77620_TJALARM2_TEMP 140000 + +struct max77620_therm_info { + struct device *dev; + struct regmap *rmap; + struct thermal_zone_device *tz_device; + int irq_tjalarm1; + int irq_tjalarm2; +}; + +/** + * max77620_thermal_read_temp: Read PMIC die temperatue. + * @data: Device specific data. + * temp: Temperature in millidegrees Celsius + * + * The actual temperature of PMIC die is not available from PMIC. + * PMIC only tells the status if it has crossed or not the threshold level + * of 120degC or 140degC. + * If threshold has not been crossed then assume die temperature as 100degC + * else 120degC or 140deG based on the PMIC die temp threshold status. + * + * Return 0 on success otherwise error number to show reason of failure. + */ + +static int max77620_thermal_read_temp(void *data, int *temp) +{ + struct max77620_therm_info *mtherm = data; + unsigned int val; + int ret; + + ret = regmap_read(mtherm->rmap, MAX77620_REG_STATLBT, &val); + if (ret < 0) { + dev_err(mtherm->dev, "Failed to read STATLBT: %d\n", ret); + return ret; + } + + if (val & MAX77620_IRQ_TJALRM2_MASK) + *temp = MAX77620_TJALARM2_TEMP; + else if (val & MAX77620_IRQ_TJALRM1_MASK) + *temp = MAX77620_TJALARM1_TEMP; + else + *temp = MAX77620_NORMAL_OPERATING_TEMP; + + return 0; +} + +static const struct thermal_zone_of_device_ops max77620_thermal_ops = { + .get_temp = max77620_thermal_read_temp, +}; + +static irqreturn_t max77620_thermal_irq(int irq, void *data) +{ + struct max77620_therm_info *mtherm = data; + + if (irq == mtherm->irq_tjalarm1) + dev_warn(mtherm->dev, "Junction Temp Alarm1(120C) occurred\n"); + else if (irq == mtherm->irq_tjalarm2) + dev_crit(mtherm->dev, "Junction Temp Alarm2(140C) occurred\n"); + + thermal_zone_device_update(mtherm->tz_device, + THERMAL_EVENT_UNSPECIFIED); + + return IRQ_HANDLED; +} + +static int max77620_thermal_probe(struct platform_device *pdev) +{ + struct max77620_therm_info *mtherm; + int ret; + + mtherm = devm_kzalloc(&pdev->dev, sizeof(*mtherm), GFP_KERNEL); + if (!mtherm) + return -ENOMEM; + + mtherm->irq_tjalarm1 = platform_get_irq(pdev, 0); + mtherm->irq_tjalarm2 = platform_get_irq(pdev, 1); + if ((mtherm->irq_tjalarm1 < 0) || (mtherm->irq_tjalarm2 < 0)) { + dev_err(&pdev->dev, "Alarm irq number not available\n"); + return -EINVAL; + } + + pdev->dev.of_node = pdev->dev.parent->of_node; + + mtherm->dev = &pdev->dev; + mtherm->rmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!mtherm->rmap) { + dev_err(&pdev->dev, "Failed to get parent regmap\n"); + return -ENODEV; + } + + mtherm->tz_device = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, + mtherm, &max77620_thermal_ops); + if (IS_ERR(mtherm->tz_device)) { + ret = PTR_ERR(mtherm->tz_device); + dev_err(&pdev->dev, "Failed to register thermal zone: %d\n", + ret); + return ret; + } + + ret = devm_request_threaded_irq(&pdev->dev, mtherm->irq_tjalarm1, NULL, + max77620_thermal_irq, + IRQF_ONESHOT | IRQF_SHARED, + dev_name(&pdev->dev), mtherm); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to request irq1: %d\n", ret); + return ret; + } + + ret = devm_request_threaded_irq(&pdev->dev, mtherm->irq_tjalarm2, NULL, + max77620_thermal_irq, + IRQF_ONESHOT | IRQF_SHARED, + dev_name(&pdev->dev), mtherm); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to request irq2: %d\n", ret); + return ret; + } + + platform_set_drvdata(pdev, mtherm); + + return 0; +} + +static struct platform_device_id max77620_thermal_devtype[] = { + { .name = "max77620-thermal", }, + {}, +}; + +static struct platform_driver max77620_thermal_driver = { + .driver = { + .name = "max77620-thermal", + }, + .probe = max77620_thermal_probe, + .id_table = max77620_thermal_devtype, +}; + +module_platform_driver(max77620_thermal_driver); + +MODULE_DESCRIPTION("Max77620 Junction temperature Thermal driver"); +MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>"); +MODULE_AUTHOR("Mallikarjun Kasoju <mkasoju@nvidia.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c index 262ab0a2266f..34169c32d495 100644 --- a/drivers/thermal/mtk_thermal.c +++ b/drivers/thermal/mtk_thermal.c @@ -2,6 +2,7 @@ * Copyright (c) 2015 MediaTek Inc. * Author: Hanyi Wu <hanyi.wu@mediatek.com> * Sascha Hauer <s.hauer@pengutronix.de> + * Dawei Chien <dawei.chien@mediatek.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,6 +22,7 @@ #include <linux/nvmem-consumer.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/io.h> @@ -88,6 +90,7 @@ #define TEMP_ADCVALIDMASK_VALID_HIGH BIT(5) #define TEMP_ADCVALIDMASK_VALID_POS(bit) (bit) +/* MT8173 thermal sensors */ #define MT8173_TS1 0 #define MT8173_TS2 1 #define MT8173_TS3 2 @@ -106,7 +109,12 @@ /* The number of sensing points per bank */ #define MT8173_NUM_SENSORS_PER_ZONE 4 -/* Layout of the fuses providing the calibration data */ +/* + * Layout of the fuses providing the calibration data + * These macros could be used for both MT8173 and MT2701. + * MT8173 has five sensors and need five VTS calibration data, + * and MT2701 has three sensors and need three VTS calibration data. + */ #define MT8173_CALIB_BUF0_VALID BIT(0) #define MT8173_CALIB_BUF1_ADC_GE(x) (((x) >> 22) & 0x3ff) #define MT8173_CALIB_BUF0_VTS_TS1(x) (((x) >> 17) & 0x1ff) @@ -117,24 +125,50 @@ #define MT8173_CALIB_BUF0_DEGC_CALI(x) (((x) >> 1) & 0x3f) #define MT8173_CALIB_BUF0_O_SLOPE(x) (((x) >> 26) & 0x3f) +/* MT2701 thermal sensors */ +#define MT2701_TS1 0 +#define MT2701_TS2 1 +#define MT2701_TSABB 2 + +/* AUXADC channel 11 is used for the temperature sensors */ +#define MT2701_TEMP_AUXADC_CHANNEL 11 + +/* The total number of temperature sensors in the MT2701 */ +#define MT2701_NUM_SENSORS 3 + #define THERMAL_NAME "mtk-thermal" +/* The number of sensing points per bank */ +#define MT2701_NUM_SENSORS_PER_ZONE 3 + struct mtk_thermal; +struct thermal_bank_cfg { + unsigned int num_sensors; + const int *sensors; +}; + struct mtk_thermal_bank { struct mtk_thermal *mt; int id; }; +struct mtk_thermal_data { + s32 num_banks; + s32 num_sensors; + s32 auxadc_channel; + const int *sensor_mux_values; + const int *msr; + const int *adcpnp; + struct thermal_bank_cfg bank_data[]; +}; + struct mtk_thermal { struct device *dev; void __iomem *thermal_base; struct clk *clk_peri_therm; struct clk *clk_auxadc; - - struct mtk_thermal_bank banks[MT8173_NUM_ZONES]; - /* lock: for getting and putting banks */ struct mutex lock; @@ -144,16 +178,44 @@ struct mtk_thermal { s32 o_slope; s32 vts[MT8173_NUM_SENSORS]; + const struct mtk_thermal_data *conf; + struct mtk_thermal_bank banks[]; }; -struct mtk_thermal_bank_cfg { - unsigned int num_sensors; - unsigned int sensors[MT8173_NUM_SENSORS_PER_ZONE]; +/* MT8173 thermal sensor data */ +const int mt8173_bank_data[MT8173_NUM_ZONES][3] = { + { MT8173_TS2, MT8173_TS3 }, + { MT8173_TS2, MT8173_TS4 }, + { MT8173_TS1, MT8173_TS2, MT8173_TSABB }, + { MT8173_TS2 }, }; -static const int sensor_mux_values[MT8173_NUM_SENSORS] = { 0, 1, 2, 3, 16 }; +const int mt8173_msr[MT8173_NUM_SENSORS_PER_ZONE] = { + TEMP_MSR0, TEMP_MSR1, TEMP_MSR2, TEMP_MSR2 +}; -/* +const int mt8173_adcpnp[MT8173_NUM_SENSORS_PER_ZONE] = { + TEMP_ADCPNP0, TEMP_ADCPNP1, TEMP_ADCPNP2, TEMP_ADCPNP3 +}; + +const int mt8173_mux_values[MT8173_NUM_SENSORS] = { 0, 1, 2, 3, 16 }; + +/* MT2701 thermal sensor data */ +const int mt2701_bank_data[MT2701_NUM_SENSORS] = { + MT2701_TS1, MT2701_TS2, MT2701_TSABB +}; + +const int mt2701_msr[MT2701_NUM_SENSORS_PER_ZONE] = { + TEMP_MSR0, TEMP_MSR1, TEMP_MSR2 +}; + +const int mt2701_adcpnp[MT2701_NUM_SENSORS_PER_ZONE] = { + TEMP_ADCPNP0, TEMP_ADCPNP1, TEMP_ADCPNP2 +}; + +const int mt2701_mux_values[MT2701_NUM_SENSORS] = { 0, 1, 16 }; + +/** * The MT8173 thermal controller has four banks. Each bank can read up to * four temperature sensors simultaneously. The MT8173 has a total of 5 * temperature sensors. We use each bank to measure a certain area of the @@ -166,42 +228,53 @@ static const int sensor_mux_values[MT8173_NUM_SENSORS] = { 0, 1, 2, 3, 16 }; * data, and this indeed needs the temperatures of the individual banks * for making better decisions. */ -static const struct mtk_thermal_bank_cfg bank_data[] = { - { - .num_sensors = 2, - .sensors = { MT8173_TS2, MT8173_TS3 }, - }, { - .num_sensors = 2, - .sensors = { MT8173_TS2, MT8173_TS4 }, - }, { - .num_sensors = 3, - .sensors = { MT8173_TS1, MT8173_TS2, MT8173_TSABB }, - }, { - .num_sensors = 1, - .sensors = { MT8173_TS2 }, +static const struct mtk_thermal_data mt8173_thermal_data = { + .auxadc_channel = MT8173_TEMP_AUXADC_CHANNEL, + .num_banks = MT8173_NUM_ZONES, + .num_sensors = MT8173_NUM_SENSORS, + .bank_data = { + { + .num_sensors = 2, + .sensors = mt8173_bank_data[0], + }, { + .num_sensors = 2, + .sensors = mt8173_bank_data[1], + }, { + .num_sensors = 3, + .sensors = mt8173_bank_data[2], + }, { + .num_sensors = 1, + .sensors = mt8173_bank_data[3], + }, }, + .msr = mt8173_msr, + .adcpnp = mt8173_adcpnp, + .sensor_mux_values = mt8173_mux_values, }; -struct mtk_thermal_sense_point { - int msr; - int adcpnp; -}; - -static const struct mtk_thermal_sense_point - sensing_points[MT8173_NUM_SENSORS_PER_ZONE] = { - { - .msr = TEMP_MSR0, - .adcpnp = TEMP_ADCPNP0, - }, { - .msr = TEMP_MSR1, - .adcpnp = TEMP_ADCPNP1, - }, { - .msr = TEMP_MSR2, - .adcpnp = TEMP_ADCPNP2, - }, { - .msr = TEMP_MSR3, - .adcpnp = TEMP_ADCPNP3, +/** + * The MT2701 thermal controller has one bank, which can read up to + * three temperature sensors simultaneously. The MT2701 has a total of 3 + * temperature sensors. + * + * The thermal core only gets the maximum temperature of this one bank, + * so the bank concept wouldn't be necessary here. However, the SVS (Smart + * Voltage Scaling) unit makes its decisions based on the same bank + * data. + */ +static const struct mtk_thermal_data mt2701_thermal_data = { + .auxadc_channel = MT2701_TEMP_AUXADC_CHANNEL, + .num_banks = 1, + .num_sensors = MT2701_NUM_SENSORS, + .bank_data = { + { + .num_sensors = 3, + .sensors = mt2701_bank_data, + }, }, + .msr = mt2701_msr, + .adcpnp = mt2701_adcpnp, + .sensor_mux_values = mt2701_mux_values, }; /** @@ -270,13 +343,16 @@ static void mtk_thermal_put_bank(struct mtk_thermal_bank *bank) static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank) { struct mtk_thermal *mt = bank->mt; + const struct mtk_thermal_data *conf = mt->conf; int i, temp = INT_MIN, max = INT_MIN; u32 raw; - for (i = 0; i < bank_data[bank->id].num_sensors; i++) { - raw = readl(mt->thermal_base + sensing_points[i].msr); + for (i = 0; i < conf->bank_data[bank->id].num_sensors; i++) { + raw = readl(mt->thermal_base + conf->msr[i]); - temp = raw_to_mcelsius(mt, bank_data[bank->id].sensors[i], raw); + temp = raw_to_mcelsius(mt, + conf->bank_data[bank->id].sensors[i], + raw); /* * The first read of a sensor often contains very high bogus @@ -299,7 +375,7 @@ static int mtk_read_temp(void *data, int *temperature) int i; int tempmax = INT_MIN; - for (i = 0; i < MT8173_NUM_ZONES; i++) { + for (i = 0; i < mt->conf->num_banks; i++) { struct mtk_thermal_bank *bank = &mt->banks[i]; mtk_thermal_get_bank(bank); @@ -322,7 +398,7 @@ static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num, u32 apmixed_phys_base, u32 auxadc_phys_base) { struct mtk_thermal_bank *bank = &mt->banks[num]; - const struct mtk_thermal_bank_cfg *cfg = &bank_data[num]; + const struct mtk_thermal_data *conf = mt->conf; int i; bank->id = num; @@ -368,7 +444,7 @@ static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num, * this value will be stored to TEMP_PNPMUXADDR (TEMP_SPARE0) * automatically by hw */ - writel(BIT(MT8173_TEMP_AUXADC_CHANNEL), mt->thermal_base + TEMP_ADCMUX); + writel(BIT(conf->auxadc_channel), mt->thermal_base + TEMP_ADCMUX); /* AHB address for auxadc mux selection */ writel(auxadc_phys_base + AUXADC_CON1_CLR_V, @@ -379,18 +455,18 @@ static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num, mt->thermal_base + TEMP_PNPMUXADDR); /* AHB value for auxadc enable */ - writel(BIT(MT8173_TEMP_AUXADC_CHANNEL), mt->thermal_base + TEMP_ADCEN); + writel(BIT(conf->auxadc_channel), mt->thermal_base + TEMP_ADCEN); /* AHB address for auxadc enable (channel 0 immediate mode selected) */ writel(auxadc_phys_base + AUXADC_CON1_SET_V, mt->thermal_base + TEMP_ADCENADDR); /* AHB address for auxadc valid bit */ - writel(auxadc_phys_base + AUXADC_DATA(MT8173_TEMP_AUXADC_CHANNEL), + writel(auxadc_phys_base + AUXADC_DATA(conf->auxadc_channel), mt->thermal_base + TEMP_ADCVALIDADDR); /* AHB address for auxadc voltage output */ - writel(auxadc_phys_base + AUXADC_DATA(MT8173_TEMP_AUXADC_CHANNEL), + writel(auxadc_phys_base + AUXADC_DATA(conf->auxadc_channel), mt->thermal_base + TEMP_ADCVOLTADDR); /* read valid & voltage are at the same register */ @@ -407,11 +483,12 @@ static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num, writel(TEMP_ADCWRITECTRL_ADC_MUX_WRITE, mt->thermal_base + TEMP_ADCWRITECTRL); - for (i = 0; i < cfg->num_sensors; i++) - writel(sensor_mux_values[cfg->sensors[i]], - mt->thermal_base + sensing_points[i].adcpnp); + for (i = 0; i < conf->bank_data[num].num_sensors; i++) + writel(conf->sensor_mux_values[conf->bank_data[num].sensors[i]], + mt->thermal_base + conf->adcpnp[i]); - writel((1 << cfg->num_sensors) - 1, mt->thermal_base + TEMP_MONCTL0); + writel((1 << conf->bank_data[num].num_sensors) - 1, + mt->thermal_base + TEMP_MONCTL0); writel(TEMP_ADCWRITECTRL_ADC_PNP_WRITE | TEMP_ADCWRITECTRL_ADC_MUX_WRITE, @@ -442,7 +519,7 @@ static int mtk_thermal_get_calibration_data(struct device *dev, /* Start with default values */ mt->adc_ge = 512; - for (i = 0; i < MT8173_NUM_SENSORS; i++) + for (i = 0; i < mt->conf->num_sensors; i++) mt->vts[i] = 260; mt->degc_cali = 40; mt->o_slope = 0; @@ -486,18 +563,37 @@ out: return ret; } +static const struct of_device_id mtk_thermal_of_match[] = { + { + .compatible = "mediatek,mt8173-thermal", + .data = (void *)&mt8173_thermal_data, + }, + { + .compatible = "mediatek,mt2701-thermal", + .data = (void *)&mt2701_thermal_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, mtk_thermal_of_match); + static int mtk_thermal_probe(struct platform_device *pdev) { int ret, i; struct device_node *auxadc, *apmixedsys, *np = pdev->dev.of_node; struct mtk_thermal *mt; struct resource *res; + const struct of_device_id *of_id; u64 auxadc_phys_base, apmixed_phys_base; + struct thermal_zone_device *tzdev; mt = devm_kzalloc(&pdev->dev, sizeof(*mt), GFP_KERNEL); if (!mt) return -ENOMEM; + of_id = of_match_device(mtk_thermal_of_match, &pdev->dev); + if (of_id) + mt->conf = (const struct mtk_thermal_data *)of_id->data; + mt->clk_peri_therm = devm_clk_get(&pdev->dev, "therm"); if (IS_ERR(mt->clk_peri_therm)) return PTR_ERR(mt->clk_peri_therm); @@ -565,17 +661,23 @@ static int mtk_thermal_probe(struct platform_device *pdev) goto err_disable_clk_auxadc; } - for (i = 0; i < MT8173_NUM_ZONES; i++) + for (i = 0; i < mt->conf->num_banks; i++) mtk_thermal_init_bank(mt, i, apmixed_phys_base, auxadc_phys_base); platform_set_drvdata(pdev, mt); - devm_thermal_zone_of_sensor_register(&pdev->dev, 0, mt, - &mtk_thermal_ops); + tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, mt, + &mtk_thermal_ops); + if (IS_ERR(tzdev)) { + ret = PTR_ERR(tzdev); + goto err_disable_clk_peri_therm; + } return 0; +err_disable_clk_peri_therm: + clk_disable_unprepare(mt->clk_peri_therm); err_disable_clk_auxadc: clk_disable_unprepare(mt->clk_auxadc); @@ -592,13 +694,6 @@ static int mtk_thermal_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id mtk_thermal_of_match[] = { - { - .compatible = "mediatek,mt8173-thermal", - }, { - }, -}; - static struct platform_driver mtk_thermal_driver = { .probe = mtk_thermal_probe, .remove = mtk_thermal_remove, @@ -610,6 +705,7 @@ static struct platform_driver mtk_thermal_driver = { module_platform_driver(mtk_thermal_driver); +MODULE_AUTHOR("Dawei Chien <dawei.chien@mediatek.com>"); MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>"); MODULE_AUTHOR("Hanyi Wu <hanyi.wu@mediatek.com>"); MODULE_DESCRIPTION("Mediatek thermal driver"); diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index b8e509c60848..d04ec3b9e5ff 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -101,6 +101,17 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz, return data->ops->get_temp(data->sensor_data, temp); } +static int of_thermal_set_trips(struct thermal_zone_device *tz, + int low, int high) +{ + struct __thermal_zone *data = tz->devdata; + + if (!data->ops || !data->ops->set_trips) + return -EINVAL; + + return data->ops->set_trips(data->sensor_data, low, high); +} + /** * of_thermal_get_ntrips - function to export number of available trip * points. @@ -181,9 +192,6 @@ static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; - if (!data->ops || !data->ops->set_emul_temp) - return -EINVAL; - return data->ops->set_emul_temp(data->sensor_data, temp); } @@ -191,25 +199,11 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, enum thermal_trend *trend) { struct __thermal_zone *data = tz->devdata; - long dev_trend; - int r; if (!data->ops->get_trend) return -EINVAL; - r = data->ops->get_trend(data->sensor_data, &dev_trend); - if (r) - return r; - - /* TODO: These intervals might have some thresholds, but in core code */ - if (dev_trend > 0) - *trend = THERMAL_TREND_RAISING; - else if (dev_trend < 0) - *trend = THERMAL_TREND_DROPPING; - else - *trend = THERMAL_TREND_STABLE; - - return 0; + return data->ops->get_trend(data->sensor_data, trip, trend); } static int of_thermal_bind(struct thermal_zone_device *thermal, @@ -292,7 +286,7 @@ static int of_thermal_set_mode(struct thermal_zone_device *tz, mutex_unlock(&tz->lock); data->mode = mode; - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); return 0; } @@ -427,7 +421,17 @@ thermal_zone_of_add_sensor(struct device_node *zone, tzd->ops->get_temp = of_thermal_get_temp; tzd->ops->get_trend = of_thermal_get_trend; - tzd->ops->set_emul_temp = of_thermal_set_emul_temp; + + /* + * The thermal zone core will calculate the window if they have set the + * optional set_trips pointer. + */ + if (ops->set_trips) + tzd->ops->set_trips = of_thermal_set_trips; + + if (ops->set_emul_temp) + tzd->ops->set_emul_temp = of_thermal_set_emul_temp; + mutex_unlock(&tzd->lock); return tzd; @@ -596,7 +600,7 @@ static int devm_thermal_zone_of_sensor_match(struct device *dev, void *res, * Return: On success returns a valid struct thermal_zone_device, * otherwise, it returns a corresponding ERR_PTR(). Caller must * check the return value with help of IS_ERR() helper. - * Registered hermal_zone_device device will automatically be + * Registered thermal_zone_device device will automatically be * released when device is unbounded. */ struct thermal_zone_device *devm_thermal_zone_of_sensor_register( diff --git a/drivers/thermal/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom-spmi-temp-alarm.c index f8a3c60bef94..819c6d5d7aa7 100644 --- a/drivers/thermal/qcom-spmi-temp-alarm.c +++ b/drivers/thermal/qcom-spmi-temp-alarm.c @@ -150,7 +150,7 @@ static irqreturn_t qpnp_tm_isr(int irq, void *data) { struct qpnp_tm_chip *chip = data; - thermal_zone_device_update(chip->tz_dev); + thermal_zone_device_update(chip->tz_dev, THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig new file mode 100644 index 000000000000..be32e5abce3c --- /dev/null +++ b/drivers/thermal/qcom/Kconfig @@ -0,0 +1,11 @@ +config QCOM_TSENS + tristate "Qualcomm TSENS Temperature Alarm" + depends on THERMAL + depends on QCOM_QFPROM + depends on ARCH_QCOM || COMPILE_TEST + help + This enables the thermal sysfs driver for the TSENS device. It shows + up in Sysfs as a thermal zone with multiple trip points. Disabling the + thermal zone device via the mode file results in disabling the sensor. + Also able to set threshold temperature for both hot and cold and update + when a threshold is reached. diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile new file mode 100644 index 000000000000..2cc2193637e7 --- /dev/null +++ b/drivers/thermal/qcom/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_QCOM_TSENS) += qcom_tsens.o +qcom_tsens-y += tsens.o tsens-common.o tsens-8916.o tsens-8974.o tsens-8960.o tsens-8996.o diff --git a/drivers/thermal/qcom/tsens-8916.c b/drivers/thermal/qcom/tsens-8916.c new file mode 100644 index 000000000000..fdf561b8b81d --- /dev/null +++ b/drivers/thermal/qcom/tsens-8916.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/platform_device.h> +#include "tsens.h" + +/* eeprom layout data for 8916 */ +#define BASE0_MASK 0x0000007f +#define BASE1_MASK 0xfe000000 +#define BASE0_SHIFT 0 +#define BASE1_SHIFT 25 + +#define S0_P1_MASK 0x00000f80 +#define S1_P1_MASK 0x003e0000 +#define S2_P1_MASK 0xf8000000 +#define S3_P1_MASK 0x000003e0 +#define S4_P1_MASK 0x000f8000 + +#define S0_P2_MASK 0x0001f000 +#define S1_P2_MASK 0x07c00000 +#define S2_P2_MASK 0x0000001f +#define S3_P2_MASK 0x00007c00 +#define S4_P2_MASK 0x01f00000 + +#define S0_P1_SHIFT 7 +#define S1_P1_SHIFT 17 +#define S2_P1_SHIFT 27 +#define S3_P1_SHIFT 5 +#define S4_P1_SHIFT 15 + +#define S0_P2_SHIFT 12 +#define S1_P2_SHIFT 22 +#define S2_P2_SHIFT 0 +#define S3_P2_SHIFT 10 +#define S4_P2_SHIFT 20 + +#define CAL_SEL_MASK 0xe0000000 +#define CAL_SEL_SHIFT 29 + +static int calibrate_8916(struct tsens_device *tmdev) +{ + int base0 = 0, base1 = 0, i; + u32 p1[5], p2[5]; + int mode = 0; + u32 *qfprom_cdata, *qfprom_csel; + + qfprom_cdata = (u32 *)qfprom_read(tmdev->dev, "calib"); + if (IS_ERR(qfprom_cdata)) + return PTR_ERR(qfprom_cdata); + + qfprom_csel = (u32 *)qfprom_read(tmdev->dev, "calib_sel"); + if (IS_ERR(qfprom_csel)) + return PTR_ERR(qfprom_csel); + + mode = (qfprom_csel[0] & CAL_SEL_MASK) >> CAL_SEL_SHIFT; + dev_dbg(tmdev->dev, "calibration mode is %d\n", mode); + + switch (mode) { + case TWO_PT_CALIB: + base1 = (qfprom_cdata[1] & BASE1_MASK) >> BASE1_SHIFT; + p2[0] = (qfprom_cdata[0] & S0_P2_MASK) >> S0_P2_SHIFT; + p2[1] = (qfprom_cdata[0] & S1_P2_MASK) >> S1_P2_SHIFT; + p2[2] = (qfprom_cdata[1] & S2_P2_MASK) >> S2_P2_SHIFT; + p2[3] = (qfprom_cdata[1] & S3_P2_MASK) >> S3_P2_SHIFT; + p2[4] = (qfprom_cdata[1] & S4_P2_MASK) >> S4_P2_SHIFT; + for (i = 0; i < tmdev->num_sensors; i++) + p2[i] = ((base1 + p2[i]) << 3); + /* Fall through */ + case ONE_PT_CALIB2: + base0 = (qfprom_cdata[0] & BASE0_MASK); + p1[0] = (qfprom_cdata[0] & S0_P1_MASK) >> S0_P1_SHIFT; + p1[1] = (qfprom_cdata[0] & S1_P1_MASK) >> S1_P1_SHIFT; + p1[2] = (qfprom_cdata[0] & S2_P1_MASK) >> S2_P1_SHIFT; + p1[3] = (qfprom_cdata[1] & S3_P1_MASK) >> S3_P1_SHIFT; + p1[4] = (qfprom_cdata[1] & S4_P1_MASK) >> S4_P1_SHIFT; + for (i = 0; i < tmdev->num_sensors; i++) + p1[i] = (((base0) + p1[i]) << 3); + break; + default: + for (i = 0; i < tmdev->num_sensors; i++) { + p1[i] = 500; + p2[i] = 780; + } + break; + } + + compute_intercept_slope(tmdev, p1, p2, mode); + + return 0; +} + +static const struct tsens_ops ops_8916 = { + .init = init_common, + .calibrate = calibrate_8916, + .get_temp = get_temp_common, +}; + +const struct tsens_data data_8916 = { + .num_sensors = 5, + .ops = &ops_8916, + .hw_ids = (unsigned int []){0, 1, 2, 4, 5 }, +}; diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c new file mode 100644 index 000000000000..0451277d3a8f --- /dev/null +++ b/drivers/thermal/qcom/tsens-8960.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/platform_device.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <linux/regmap.h> +#include <linux/thermal.h> +#include "tsens.h" + +#define CAL_MDEGC 30000 + +#define CONFIG_ADDR 0x3640 +#define CONFIG_ADDR_8660 0x3620 +/* CONFIG_ADDR bitmasks */ +#define CONFIG 0x9b +#define CONFIG_MASK 0xf +#define CONFIG_8660 1 +#define CONFIG_SHIFT_8660 28 +#define CONFIG_MASK_8660 (3 << CONFIG_SHIFT_8660) + +#define STATUS_CNTL_ADDR_8064 0x3660 +#define CNTL_ADDR 0x3620 +/* CNTL_ADDR bitmasks */ +#define EN BIT(0) +#define SW_RST BIT(1) +#define SENSOR0_EN BIT(3) +#define SLP_CLK_ENA BIT(26) +#define SLP_CLK_ENA_8660 BIT(24) +#define MEASURE_PERIOD 1 +#define SENSOR0_SHIFT 3 + +/* INT_STATUS_ADDR bitmasks */ +#define MIN_STATUS_MASK BIT(0) +#define LOWER_STATUS_CLR BIT(1) +#define UPPER_STATUS_CLR BIT(2) +#define MAX_STATUS_MASK BIT(3) + +#define THRESHOLD_ADDR 0x3624 +/* THRESHOLD_ADDR bitmasks */ +#define THRESHOLD_MAX_LIMIT_SHIFT 24 +#define THRESHOLD_MIN_LIMIT_SHIFT 16 +#define THRESHOLD_UPPER_LIMIT_SHIFT 8 +#define THRESHOLD_LOWER_LIMIT_SHIFT 0 + +/* Initial temperature threshold values */ +#define LOWER_LIMIT_TH 0x50 +#define UPPER_LIMIT_TH 0xdf +#define MIN_LIMIT_TH 0x0 +#define MAX_LIMIT_TH 0xff + +#define S0_STATUS_ADDR 0x3628 +#define INT_STATUS_ADDR 0x363c +#define TRDY_MASK BIT(7) +#define TIMEOUT_US 100 + +static int suspend_8960(struct tsens_device *tmdev) +{ + int ret; + unsigned int mask; + struct regmap *map = tmdev->map; + + ret = regmap_read(map, THRESHOLD_ADDR, &tmdev->ctx.threshold); + if (ret) + return ret; + + ret = regmap_read(map, CNTL_ADDR, &tmdev->ctx.control); + if (ret) + return ret; + + if (tmdev->num_sensors > 1) + mask = SLP_CLK_ENA | EN; + else + mask = SLP_CLK_ENA_8660 | EN; + + ret = regmap_update_bits(map, CNTL_ADDR, mask, 0); + if (ret) + return ret; + + return 0; +} + +static int resume_8960(struct tsens_device *tmdev) +{ + int ret; + struct regmap *map = tmdev->map; + + ret = regmap_update_bits(map, CNTL_ADDR, SW_RST, SW_RST); + if (ret) + return ret; + + /* + * Separate CONFIG restore is not needed only for 8660 as + * config is part of CTRL Addr and its restored as such + */ + if (tmdev->num_sensors > 1) { + ret = regmap_update_bits(map, CONFIG_ADDR, CONFIG_MASK, CONFIG); + if (ret) + return ret; + } + + ret = regmap_write(map, THRESHOLD_ADDR, tmdev->ctx.threshold); + if (ret) + return ret; + + ret = regmap_write(map, CNTL_ADDR, tmdev->ctx.control); + if (ret) + return ret; + + return 0; +} + +static int enable_8960(struct tsens_device *tmdev, int id) +{ + int ret; + u32 reg, mask; + + ret = regmap_read(tmdev->map, CNTL_ADDR, ®); + if (ret) + return ret; + + mask = BIT(id + SENSOR0_SHIFT); + ret = regmap_write(tmdev->map, CNTL_ADDR, reg | SW_RST); + if (ret) + return ret; + + if (tmdev->num_sensors > 1) + reg |= mask | SLP_CLK_ENA | EN; + else + reg |= mask | SLP_CLK_ENA_8660 | EN; + + ret = regmap_write(tmdev->map, CNTL_ADDR, reg); + if (ret) + return ret; + + return 0; +} + +static void disable_8960(struct tsens_device *tmdev) +{ + int ret; + u32 reg_cntl; + u32 mask; + + mask = GENMASK(tmdev->num_sensors - 1, 0); + mask <<= SENSOR0_SHIFT; + mask |= EN; + + ret = regmap_read(tmdev->map, CNTL_ADDR, ®_cntl); + if (ret) + return; + + reg_cntl &= ~mask; + + if (tmdev->num_sensors > 1) + reg_cntl &= ~SLP_CLK_ENA; + else + reg_cntl &= ~SLP_CLK_ENA_8660; + + regmap_write(tmdev->map, CNTL_ADDR, reg_cntl); +} + +static int init_8960(struct tsens_device *tmdev) +{ + int ret, i; + u32 reg_cntl; + + tmdev->map = dev_get_regmap(tmdev->dev, NULL); + if (!tmdev->map) + return -ENODEV; + + /* + * The status registers for each sensor are discontiguous + * because some SoCs have 5 sensors while others have more + * but the control registers stay in the same place, i.e + * directly after the first 5 status registers. + */ + for (i = 0; i < tmdev->num_sensors; i++) { + if (i >= 5) + tmdev->sensor[i].status = S0_STATUS_ADDR + 40; + tmdev->sensor[i].status += i * 4; + } + + reg_cntl = SW_RST; + ret = regmap_update_bits(tmdev->map, CNTL_ADDR, SW_RST, reg_cntl); + if (ret) + return ret; + + if (tmdev->num_sensors > 1) { + reg_cntl |= SLP_CLK_ENA | (MEASURE_PERIOD << 18); + reg_cntl &= ~SW_RST; + ret = regmap_update_bits(tmdev->map, CONFIG_ADDR, + CONFIG_MASK, CONFIG); + } else { + reg_cntl |= SLP_CLK_ENA_8660 | (MEASURE_PERIOD << 16); + reg_cntl &= ~CONFIG_MASK_8660; + reg_cntl |= CONFIG_8660 << CONFIG_SHIFT_8660; + } + + reg_cntl |= GENMASK(tmdev->num_sensors - 1, 0) << SENSOR0_SHIFT; + ret = regmap_write(tmdev->map, CNTL_ADDR, reg_cntl); + if (ret) + return ret; + + reg_cntl |= EN; + ret = regmap_write(tmdev->map, CNTL_ADDR, reg_cntl); + if (ret) + return ret; + + return 0; +} + +static int calibrate_8960(struct tsens_device *tmdev) +{ + int i; + char *data; + + ssize_t num_read = tmdev->num_sensors; + struct tsens_sensor *s = tmdev->sensor; + + data = qfprom_read(tmdev->dev, "calib"); + if (IS_ERR(data)) + data = qfprom_read(tmdev->dev, "calib_backup"); + if (IS_ERR(data)) + return PTR_ERR(data); + + for (i = 0; i < num_read; i++, s++) + s->offset = data[i]; + + return 0; +} + +/* Temperature on y axis and ADC-code on x-axis */ +static inline int code_to_mdegC(u32 adc_code, const struct tsens_sensor *s) +{ + int slope, offset; + + slope = thermal_zone_get_slope(s->tzd); + offset = CAL_MDEGC - slope * s->offset; + + return adc_code * slope + offset; +} + +static int get_temp_8960(struct tsens_device *tmdev, int id, int *temp) +{ + int ret; + u32 code, trdy; + const struct tsens_sensor *s = &tmdev->sensor[id]; + unsigned long timeout; + + timeout = jiffies + usecs_to_jiffies(TIMEOUT_US); + do { + ret = regmap_read(tmdev->map, INT_STATUS_ADDR, &trdy); + if (ret) + return ret; + if (!(trdy & TRDY_MASK)) + continue; + ret = regmap_read(tmdev->map, s->status, &code); + if (ret) + return ret; + *temp = code_to_mdegC(code, s); + return 0; + } while (time_before(jiffies, timeout)); + + return -ETIMEDOUT; +} + +static const struct tsens_ops ops_8960 = { + .init = init_8960, + .calibrate = calibrate_8960, + .get_temp = get_temp_8960, + .enable = enable_8960, + .disable = disable_8960, + .suspend = suspend_8960, + .resume = resume_8960, +}; + +const struct tsens_data data_8960 = { + .num_sensors = 11, + .ops = &ops_8960, +}; diff --git a/drivers/thermal/qcom/tsens-8974.c b/drivers/thermal/qcom/tsens-8974.c new file mode 100644 index 000000000000..9baf77e8cbe3 --- /dev/null +++ b/drivers/thermal/qcom/tsens-8974.c @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/platform_device.h> +#include "tsens.h" + +/* eeprom layout data for 8974 */ +#define BASE1_MASK 0xff +#define S0_P1_MASK 0x3f00 +#define S1_P1_MASK 0xfc000 +#define S2_P1_MASK 0x3f00000 +#define S3_P1_MASK 0xfc000000 +#define S4_P1_MASK 0x3f +#define S5_P1_MASK 0xfc0 +#define S6_P1_MASK 0x3f000 +#define S7_P1_MASK 0xfc0000 +#define S8_P1_MASK 0x3f000000 +#define S8_P1_MASK_BKP 0x3f +#define S9_P1_MASK 0x3f +#define S9_P1_MASK_BKP 0xfc0 +#define S10_P1_MASK 0xfc0 +#define S10_P1_MASK_BKP 0x3f000 +#define CAL_SEL_0_1 0xc0000000 +#define CAL_SEL_2 0x40000000 +#define CAL_SEL_SHIFT 30 +#define CAL_SEL_SHIFT_2 28 + +#define S0_P1_SHIFT 8 +#define S1_P1_SHIFT 14 +#define S2_P1_SHIFT 20 +#define S3_P1_SHIFT 26 +#define S5_P1_SHIFT 6 +#define S6_P1_SHIFT 12 +#define S7_P1_SHIFT 18 +#define S8_P1_SHIFT 24 +#define S9_P1_BKP_SHIFT 6 +#define S10_P1_SHIFT 6 +#define S10_P1_BKP_SHIFT 12 + +#define BASE2_SHIFT 12 +#define BASE2_BKP_SHIFT 18 +#define S0_P2_SHIFT 20 +#define S0_P2_BKP_SHIFT 26 +#define S1_P2_SHIFT 26 +#define S2_P2_BKP_SHIFT 6 +#define S3_P2_SHIFT 6 +#define S3_P2_BKP_SHIFT 12 +#define S4_P2_SHIFT 12 +#define S4_P2_BKP_SHIFT 18 +#define S5_P2_SHIFT 18 +#define S5_P2_BKP_SHIFT 24 +#define S6_P2_SHIFT 24 +#define S7_P2_BKP_SHIFT 6 +#define S8_P2_SHIFT 6 +#define S8_P2_BKP_SHIFT 12 +#define S9_P2_SHIFT 12 +#define S9_P2_BKP_SHIFT 18 +#define S10_P2_SHIFT 18 +#define S10_P2_BKP_SHIFT 24 + +#define BASE2_MASK 0xff000 +#define BASE2_BKP_MASK 0xfc0000 +#define S0_P2_MASK 0x3f00000 +#define S0_P2_BKP_MASK 0xfc000000 +#define S1_P2_MASK 0xfc000000 +#define S1_P2_BKP_MASK 0x3f +#define S2_P2_MASK 0x3f +#define S2_P2_BKP_MASK 0xfc0 +#define S3_P2_MASK 0xfc0 +#define S3_P2_BKP_MASK 0x3f000 +#define S4_P2_MASK 0x3f000 +#define S4_P2_BKP_MASK 0xfc0000 +#define S5_P2_MASK 0xfc0000 +#define S5_P2_BKP_MASK 0x3f000000 +#define S6_P2_MASK 0x3f000000 +#define S6_P2_BKP_MASK 0x3f +#define S7_P2_MASK 0x3f +#define S7_P2_BKP_MASK 0xfc0 +#define S8_P2_MASK 0xfc0 +#define S8_P2_BKP_MASK 0x3f000 +#define S9_P2_MASK 0x3f000 +#define S9_P2_BKP_MASK 0xfc0000 +#define S10_P2_MASK 0xfc0000 +#define S10_P2_BKP_MASK 0x3f000000 + +#define BKP_SEL 0x3 +#define BKP_REDUN_SEL 0xe0000000 +#define BKP_REDUN_SHIFT 29 + +#define BIT_APPEND 0x3 + +static int calibrate_8974(struct tsens_device *tmdev) +{ + int base1 = 0, base2 = 0, i; + u32 p1[11], p2[11]; + int mode = 0; + u32 *calib, *bkp; + u32 calib_redun_sel; + + calib = (u32 *)qfprom_read(tmdev->dev, "calib"); + if (IS_ERR(calib)) + return PTR_ERR(calib); + + bkp = (u32 *)qfprom_read(tmdev->dev, "calib_backup"); + if (IS_ERR(bkp)) + return PTR_ERR(bkp); + + calib_redun_sel = bkp[1] & BKP_REDUN_SEL; + calib_redun_sel >>= BKP_REDUN_SHIFT; + + if (calib_redun_sel == BKP_SEL) { + mode = (calib[4] & CAL_SEL_0_1) >> CAL_SEL_SHIFT; + mode |= (calib[5] & CAL_SEL_2) >> CAL_SEL_SHIFT_2; + + switch (mode) { + case TWO_PT_CALIB: + base2 = (bkp[2] & BASE2_BKP_MASK) >> BASE2_BKP_SHIFT; + p2[0] = (bkp[2] & S0_P2_BKP_MASK) >> S0_P2_BKP_SHIFT; + p2[1] = (bkp[3] & S1_P2_BKP_MASK); + p2[2] = (bkp[3] & S2_P2_BKP_MASK) >> S2_P2_BKP_SHIFT; + p2[3] = (bkp[3] & S3_P2_BKP_MASK) >> S3_P2_BKP_SHIFT; + p2[4] = (bkp[3] & S4_P2_BKP_MASK) >> S4_P2_BKP_SHIFT; + p2[5] = (calib[4] & S5_P2_BKP_MASK) >> S5_P2_BKP_SHIFT; + p2[6] = (calib[5] & S6_P2_BKP_MASK); + p2[7] = (calib[5] & S7_P2_BKP_MASK) >> S7_P2_BKP_SHIFT; + p2[8] = (calib[5] & S8_P2_BKP_MASK) >> S8_P2_BKP_SHIFT; + p2[9] = (calib[5] & S9_P2_BKP_MASK) >> S9_P2_BKP_SHIFT; + p2[10] = (calib[5] & S10_P2_BKP_MASK) >> S10_P2_BKP_SHIFT; + /* Fall through */ + case ONE_PT_CALIB: + case ONE_PT_CALIB2: + base1 = bkp[0] & BASE1_MASK; + p1[0] = (bkp[0] & S0_P1_MASK) >> S0_P1_SHIFT; + p1[1] = (bkp[0] & S1_P1_MASK) >> S1_P1_SHIFT; + p1[2] = (bkp[0] & S2_P1_MASK) >> S2_P1_SHIFT; + p1[3] = (bkp[0] & S3_P1_MASK) >> S3_P1_SHIFT; + p1[4] = (bkp[1] & S4_P1_MASK); + p1[5] = (bkp[1] & S5_P1_MASK) >> S5_P1_SHIFT; + p1[6] = (bkp[1] & S6_P1_MASK) >> S6_P1_SHIFT; + p1[7] = (bkp[1] & S7_P1_MASK) >> S7_P1_SHIFT; + p1[8] = (bkp[2] & S8_P1_MASK_BKP) >> S8_P1_SHIFT; + p1[9] = (bkp[2] & S9_P1_MASK_BKP) >> S9_P1_BKP_SHIFT; + p1[10] = (bkp[2] & S10_P1_MASK_BKP) >> S10_P1_BKP_SHIFT; + break; + } + } else { + mode = (calib[1] & CAL_SEL_0_1) >> CAL_SEL_SHIFT; + mode |= (calib[3] & CAL_SEL_2) >> CAL_SEL_SHIFT_2; + + switch (mode) { + case TWO_PT_CALIB: + base2 = (calib[2] & BASE2_MASK) >> BASE2_SHIFT; + p2[0] = (calib[2] & S0_P2_MASK) >> S0_P2_SHIFT; + p2[1] = (calib[2] & S1_P2_MASK) >> S1_P2_SHIFT; + p2[2] = (calib[3] & S2_P2_MASK); + p2[3] = (calib[3] & S3_P2_MASK) >> S3_P2_SHIFT; + p2[4] = (calib[3] & S4_P2_MASK) >> S4_P2_SHIFT; + p2[5] = (calib[3] & S5_P2_MASK) >> S5_P2_SHIFT; + p2[6] = (calib[3] & S6_P2_MASK) >> S6_P2_SHIFT; + p2[7] = (calib[4] & S7_P2_MASK); + p2[8] = (calib[4] & S8_P2_MASK) >> S8_P2_SHIFT; + p2[9] = (calib[4] & S9_P2_MASK) >> S9_P2_SHIFT; + p2[10] = (calib[4] & S10_P2_MASK) >> S10_P2_SHIFT; + /* Fall through */ + case ONE_PT_CALIB: + case ONE_PT_CALIB2: + base1 = calib[0] & BASE1_MASK; + p1[0] = (calib[0] & S0_P1_MASK) >> S0_P1_SHIFT; + p1[1] = (calib[0] & S1_P1_MASK) >> S1_P1_SHIFT; + p1[2] = (calib[0] & S2_P1_MASK) >> S2_P1_SHIFT; + p1[3] = (calib[0] & S3_P1_MASK) >> S3_P1_SHIFT; + p1[4] = (calib[1] & S4_P1_MASK); + p1[5] = (calib[1] & S5_P1_MASK) >> S5_P1_SHIFT; + p1[6] = (calib[1] & S6_P1_MASK) >> S6_P1_SHIFT; + p1[7] = (calib[1] & S7_P1_MASK) >> S7_P1_SHIFT; + p1[8] = (calib[1] & S8_P1_MASK) >> S8_P1_SHIFT; + p1[9] = (calib[2] & S9_P1_MASK); + p1[10] = (calib[2] & S10_P1_MASK) >> S10_P1_SHIFT; + break; + } + } + + switch (mode) { + case ONE_PT_CALIB: + for (i = 0; i < tmdev->num_sensors; i++) + p1[i] += (base1 << 2) | BIT_APPEND; + break; + case TWO_PT_CALIB: + for (i = 0; i < tmdev->num_sensors; i++) { + p2[i] += base2; + p2[i] <<= 2; + p2[i] |= BIT_APPEND; + } + /* Fall through */ + case ONE_PT_CALIB2: + for (i = 0; i < tmdev->num_sensors; i++) { + p1[i] += base1; + p1[i] <<= 2; + p1[i] |= BIT_APPEND; + } + break; + default: + for (i = 0; i < tmdev->num_sensors; i++) + p2[i] = 780; + p1[0] = 502; + p1[1] = 509; + p1[2] = 503; + p1[3] = 509; + p1[4] = 505; + p1[5] = 509; + p1[6] = 507; + p1[7] = 510; + p1[8] = 508; + p1[9] = 509; + p1[10] = 508; + break; + } + + compute_intercept_slope(tmdev, p1, p2, mode); + + return 0; +} + +static const struct tsens_ops ops_8974 = { + .init = init_common, + .calibrate = calibrate_8974, + .get_temp = get_temp_common, +}; + +const struct tsens_data data_8974 = { + .num_sensors = 11, + .ops = &ops_8974, +}; diff --git a/drivers/thermal/qcom/tsens-8996.c b/drivers/thermal/qcom/tsens-8996.c new file mode 100644 index 000000000000..e1f77818d8fa --- /dev/null +++ b/drivers/thermal/qcom/tsens-8996.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include "tsens.h" + +#define STATUS_OFFSET 0x10a0 +#define LAST_TEMP_MASK 0xfff +#define STATUS_VALID_BIT BIT(21) +#define CODE_SIGN_BIT BIT(11) + +static int get_temp_8996(struct tsens_device *tmdev, int id, int *temp) +{ + struct tsens_sensor *s = &tmdev->sensor[id]; + u32 code; + unsigned int sensor_addr; + int last_temp = 0, last_temp2 = 0, last_temp3 = 0, ret; + + sensor_addr = STATUS_OFFSET + s->hw_id * 4; + ret = regmap_read(tmdev->map, sensor_addr, &code); + if (ret) + return ret; + last_temp = code & LAST_TEMP_MASK; + if (code & STATUS_VALID_BIT) + goto done; + + /* Try a second time */ + ret = regmap_read(tmdev->map, sensor_addr, &code); + if (ret) + return ret; + if (code & STATUS_VALID_BIT) { + last_temp = code & LAST_TEMP_MASK; + goto done; + } else { + last_temp2 = code & LAST_TEMP_MASK; + } + + /* Try a third/last time */ + ret = regmap_read(tmdev->map, sensor_addr, &code); + if (ret) + return ret; + if (code & STATUS_VALID_BIT) { + last_temp = code & LAST_TEMP_MASK; + goto done; + } else { + last_temp3 = code & LAST_TEMP_MASK; + } + + if (last_temp == last_temp2) + last_temp = last_temp2; + else if (last_temp2 == last_temp3) + last_temp = last_temp3; +done: + /* Code sign bit is the sign extension for a negative value */ + if (last_temp & CODE_SIGN_BIT) + last_temp |= ~CODE_SIGN_BIT; + + /* Temperatures are in deciCelicius */ + *temp = last_temp * 100; + + return 0; +} + +static const struct tsens_ops ops_8996 = { + .init = init_common, + .get_temp = get_temp_8996, +}; + +const struct tsens_data data_8996 = { + .num_sensors = 13, + .ops = &ops_8996, +}; diff --git a/drivers/thermal/qcom/tsens-common.c b/drivers/thermal/qcom/tsens-common.c new file mode 100644 index 000000000000..b1449ad67fc0 --- /dev/null +++ b/drivers/thermal/qcom/tsens-common.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/err.h> +#include <linux/io.h> +#include <linux/nvmem-consumer.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include "tsens.h" + +#define S0_ST_ADDR 0x1030 +#define SN_ADDR_OFFSET 0x4 +#define SN_ST_TEMP_MASK 0x3ff +#define CAL_DEGC_PT1 30 +#define CAL_DEGC_PT2 120 +#define SLOPE_FACTOR 1000 +#define SLOPE_DEFAULT 3200 + +char *qfprom_read(struct device *dev, const char *cname) +{ + struct nvmem_cell *cell; + ssize_t data; + char *ret; + + cell = nvmem_cell_get(dev, cname); + if (IS_ERR(cell)) + return ERR_CAST(cell); + + ret = nvmem_cell_read(cell, &data); + nvmem_cell_put(cell); + + return ret; +} + +/* + * Use this function on devices where slope and offset calculations + * depend on calibration data read from qfprom. On others the slope + * and offset values are derived from tz->tzp->slope and tz->tzp->offset + * resp. + */ +void compute_intercept_slope(struct tsens_device *tmdev, u32 *p1, + u32 *p2, u32 mode) +{ + int i; + int num, den; + + for (i = 0; i < tmdev->num_sensors; i++) { + dev_dbg(tmdev->dev, + "sensor%d - data_point1:%#x data_point2:%#x\n", + i, p1[i], p2[i]); + + tmdev->sensor[i].slope = SLOPE_DEFAULT; + if (mode == TWO_PT_CALIB) { + /* + * slope (m) = adc_code2 - adc_code1 (y2 - y1)/ + * temp_120_degc - temp_30_degc (x2 - x1) + */ + num = p2[i] - p1[i]; + num *= SLOPE_FACTOR; + den = CAL_DEGC_PT2 - CAL_DEGC_PT1; + tmdev->sensor[i].slope = num / den; + } + + tmdev->sensor[i].offset = (p1[i] * SLOPE_FACTOR) - + (CAL_DEGC_PT1 * + tmdev->sensor[i].slope); + dev_dbg(tmdev->dev, "offset:%d\n", tmdev->sensor[i].offset); + } +} + +static inline int code_to_degc(u32 adc_code, const struct tsens_sensor *s) +{ + int degc, num, den; + + num = (adc_code * SLOPE_FACTOR) - s->offset; + den = s->slope; + + if (num > 0) + degc = num + (den / 2); + else if (num < 0) + degc = num - (den / 2); + else + degc = num; + + degc /= den; + + return degc; +} + +int get_temp_common(struct tsens_device *tmdev, int id, int *temp) +{ + struct tsens_sensor *s = &tmdev->sensor[id]; + u32 code; + unsigned int sensor_addr; + int last_temp = 0, ret; + + sensor_addr = S0_ST_ADDR + s->hw_id * SN_ADDR_OFFSET; + ret = regmap_read(tmdev->map, sensor_addr, &code); + if (ret) + return ret; + last_temp = code & SN_ST_TEMP_MASK; + + *temp = code_to_degc(last_temp, s) * 1000; + + return 0; +} + +static const struct regmap_config tsens_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, +}; + +int __init init_common(struct tsens_device *tmdev) +{ + void __iomem *base; + + base = of_iomap(tmdev->dev->of_node, 0); + if (!base) + return -EINVAL; + + tmdev->map = devm_regmap_init_mmio(tmdev->dev, base, &tsens_config); + if (IS_ERR(tmdev->map)) { + iounmap(base); + return PTR_ERR(tmdev->map); + } + + return 0; +} diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c new file mode 100644 index 000000000000..3f9fe6aa51cc --- /dev/null +++ b/drivers/thermal/qcom/tsens.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/slab.h> +#include <linux/thermal.h> +#include "tsens.h" + +static int tsens_get_temp(void *data, int *temp) +{ + const struct tsens_sensor *s = data; + struct tsens_device *tmdev = s->tmdev; + + return tmdev->ops->get_temp(tmdev, s->id, temp); +} + +static int tsens_get_trend(void *p, int trip, enum thermal_trend *trend) +{ + const struct tsens_sensor *s = p; + struct tsens_device *tmdev = s->tmdev; + + if (tmdev->ops->get_trend) + return tmdev->ops->get_trend(tmdev, s->id, trend); + + return -ENOTSUPP; +} + +static int __maybe_unused tsens_suspend(struct device *dev) +{ + struct tsens_device *tmdev = dev_get_drvdata(dev); + + if (tmdev->ops && tmdev->ops->suspend) + return tmdev->ops->suspend(tmdev); + + return 0; +} + +static int __maybe_unused tsens_resume(struct device *dev) +{ + struct tsens_device *tmdev = dev_get_drvdata(dev); + + if (tmdev->ops && tmdev->ops->resume) + return tmdev->ops->resume(tmdev); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(tsens_pm_ops, tsens_suspend, tsens_resume); + +static const struct of_device_id tsens_table[] = { + { + .compatible = "qcom,msm8916-tsens", + .data = &data_8916, + }, { + .compatible = "qcom,msm8974-tsens", + .data = &data_8974, + }, { + .compatible = "qcom,msm8996-tsens", + .data = &data_8996, + }, + {} +}; +MODULE_DEVICE_TABLE(of, tsens_table); + +static const struct thermal_zone_of_device_ops tsens_of_ops = { + .get_temp = tsens_get_temp, + .get_trend = tsens_get_trend, +}; + +static int tsens_register(struct tsens_device *tmdev) +{ + int i; + struct thermal_zone_device *tzd; + u32 *hw_id, n = tmdev->num_sensors; + + hw_id = devm_kcalloc(tmdev->dev, n, sizeof(u32), GFP_KERNEL); + if (!hw_id) + return -ENOMEM; + + for (i = 0; i < tmdev->num_sensors; i++) { + tmdev->sensor[i].tmdev = tmdev; + tmdev->sensor[i].id = i; + tzd = devm_thermal_zone_of_sensor_register(tmdev->dev, i, + &tmdev->sensor[i], + &tsens_of_ops); + if (IS_ERR(tzd)) + continue; + tmdev->sensor[i].tzd = tzd; + if (tmdev->ops->enable) + tmdev->ops->enable(tmdev, i); + } + return 0; +} + +static int tsens_probe(struct platform_device *pdev) +{ + int ret, i; + struct device *dev; + struct device_node *np; + struct tsens_sensor *s; + struct tsens_device *tmdev; + const struct tsens_data *data; + const struct of_device_id *id; + + if (pdev->dev.of_node) + dev = &pdev->dev; + else + dev = pdev->dev.parent; + + np = dev->of_node; + + id = of_match_node(tsens_table, np); + if (id) + data = id->data; + else + data = &data_8960; + + if (data->num_sensors <= 0) { + dev_err(dev, "invalid number of sensors\n"); + return -EINVAL; + } + + tmdev = devm_kzalloc(dev, sizeof(*tmdev) + + data->num_sensors * sizeof(*s), GFP_KERNEL); + if (!tmdev) + return -ENOMEM; + + tmdev->dev = dev; + tmdev->num_sensors = data->num_sensors; + tmdev->ops = data->ops; + for (i = 0; i < tmdev->num_sensors; i++) { + if (data->hw_ids) + tmdev->sensor[i].hw_id = data->hw_ids[i]; + else + tmdev->sensor[i].hw_id = i; + } + + if (!tmdev->ops || !tmdev->ops->init || !tmdev->ops->get_temp) + return -EINVAL; + + ret = tmdev->ops->init(tmdev); + if (ret < 0) { + dev_err(dev, "tsens init failed\n"); + return ret; + } + + if (tmdev->ops->calibrate) { + ret = tmdev->ops->calibrate(tmdev); + if (ret < 0) { + dev_err(dev, "tsens calibration failed\n"); + return ret; + } + } + + ret = tsens_register(tmdev); + + platform_set_drvdata(pdev, tmdev); + + return ret; +} + +static int tsens_remove(struct platform_device *pdev) +{ + struct tsens_device *tmdev = platform_get_drvdata(pdev); + + if (tmdev->ops->disable) + tmdev->ops->disable(tmdev); + + return 0; +} + +static struct platform_driver tsens_driver = { + .probe = tsens_probe, + .remove = tsens_remove, + .driver = { + .name = "qcom-tsens", + .pm = &tsens_pm_ops, + .of_match_table = tsens_table, + }, +}; +module_platform_driver(tsens_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("QCOM Temperature Sensor driver"); +MODULE_ALIAS("platform:qcom-tsens"); diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h new file mode 100644 index 000000000000..911c1978892b --- /dev/null +++ b/drivers/thermal/qcom/tsens.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __QCOM_TSENS_H__ +#define __QCOM_TSENS_H__ + +#define ONE_PT_CALIB 0x1 +#define ONE_PT_CALIB2 0x2 +#define TWO_PT_CALIB 0x3 + +#include <linux/thermal.h> + +struct tsens_device; + +struct tsens_sensor { + struct tsens_device *tmdev; + struct thermal_zone_device *tzd; + int offset; + int id; + int hw_id; + int slope; + u32 status; +}; + +/** + * struct tsens_ops - operations as supported by the tsens device + * @init: Function to initialize the tsens device + * @calibrate: Function to calibrate the tsens device + * @get_temp: Function which returns the temp in millidegC + * @enable: Function to enable (clocks/power) tsens device + * @disable: Function to disable the tsens device + * @suspend: Function to suspend the tsens device + * @resume: Function to resume the tsens device + * @get_trend: Function to get the thermal/temp trend + */ +struct tsens_ops { + /* mandatory callbacks */ + int (*init)(struct tsens_device *); + int (*calibrate)(struct tsens_device *); + int (*get_temp)(struct tsens_device *, int, int *); + /* optional callbacks */ + int (*enable)(struct tsens_device *, int); + void (*disable)(struct tsens_device *); + int (*suspend)(struct tsens_device *); + int (*resume)(struct tsens_device *); + int (*get_trend)(struct tsens_device *, int, enum thermal_trend *); +}; + +/** + * struct tsens_data - tsens instance specific data + * @num_sensors: Max number of sensors supported by platform + * @ops: operations the tsens instance supports + * @hw_ids: Subset of sensors ids supported by platform, if not the first n + */ +struct tsens_data { + const u32 num_sensors; + const struct tsens_ops *ops; + unsigned int *hw_ids; +}; + +/* Registers to be saved/restored across a context loss */ +struct tsens_context { + int threshold; + int control; +}; + +struct tsens_device { + struct device *dev; + u32 num_sensors; + struct regmap *map; + struct regmap_field *status_field; + struct tsens_context ctx; + bool trdy; + const struct tsens_ops *ops; + struct tsens_sensor sensor[0]; +}; + +char *qfprom_read(struct device *, const char *); +void compute_intercept_slope(struct tsens_device *, u32 *, u32 *, u32); +int init_common(struct tsens_device *); +int get_temp_common(struct tsens_device *, int, int *); + +extern const struct tsens_data data_8916, data_8974, data_8960, data_8996; + +#endif /* __QCOM_TSENS_H__ */ diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c new file mode 100644 index 000000000000..644ba526d9ea --- /dev/null +++ b/drivers/thermal/qoriq_thermal.c @@ -0,0 +1,328 @@ +/* + * Copyright 2016 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/thermal.h> + +#include "thermal_core.h" + +#define SITES_MAX 16 + +/* + * QorIQ TMU Registers + */ +struct qoriq_tmu_site_regs { + u32 tritsr; /* Immediate Temperature Site Register */ + u32 tratsr; /* Average Temperature Site Register */ + u8 res0[0x8]; +}; + +struct qoriq_tmu_regs { + u32 tmr; /* Mode Register */ +#define TMR_DISABLE 0x0 +#define TMR_ME 0x80000000 +#define TMR_ALPF 0x0c000000 + u32 tsr; /* Status Register */ + u32 tmtmir; /* Temperature measurement interval Register */ +#define TMTMIR_DEFAULT 0x0000000f + u8 res0[0x14]; + u32 tier; /* Interrupt Enable Register */ +#define TIER_DISABLE 0x0 + u32 tidr; /* Interrupt Detect Register */ + u32 tiscr; /* Interrupt Site Capture Register */ + u32 ticscr; /* Interrupt Critical Site Capture Register */ + u8 res1[0x10]; + u32 tmhtcrh; /* High Temperature Capture Register */ + u32 tmhtcrl; /* Low Temperature Capture Register */ + u8 res2[0x8]; + u32 tmhtitr; /* High Temperature Immediate Threshold */ + u32 tmhtatr; /* High Temperature Average Threshold */ + u32 tmhtactr; /* High Temperature Average Crit Threshold */ + u8 res3[0x24]; + u32 ttcfgr; /* Temperature Configuration Register */ + u32 tscfgr; /* Sensor Configuration Register */ + u8 res4[0x78]; + struct qoriq_tmu_site_regs site[SITES_MAX]; + u8 res5[0x9f8]; + u32 ipbrr0; /* IP Block Revision Register 0 */ + u32 ipbrr1; /* IP Block Revision Register 1 */ + u8 res6[0x310]; + u32 ttr0cr; /* Temperature Range 0 Control Register */ + u32 ttr1cr; /* Temperature Range 1 Control Register */ + u32 ttr2cr; /* Temperature Range 2 Control Register */ + u32 ttr3cr; /* Temperature Range 3 Control Register */ +}; + +/* + * Thermal zone data + */ +struct qoriq_tmu_data { + struct thermal_zone_device *tz; + struct qoriq_tmu_regs __iomem *regs; + int sensor_id; + bool little_endian; +}; + +static void tmu_write(struct qoriq_tmu_data *p, u32 val, void __iomem *addr) +{ + if (p->little_endian) + iowrite32(val, addr); + else + iowrite32be(val, addr); +} + +static u32 tmu_read(struct qoriq_tmu_data *p, void __iomem *addr) +{ + if (p->little_endian) + return ioread32(addr); + else + return ioread32be(addr); +} + +static int tmu_get_temp(void *p, int *temp) +{ + u32 val; + struct qoriq_tmu_data *data = p; + + val = tmu_read(data, &data->regs->site[data->sensor_id].tritsr); + *temp = (val & 0xff) * 1000; + + return 0; +} + +static int qoriq_tmu_get_sensor_id(void) +{ + int ret, id; + struct of_phandle_args sensor_specs; + struct device_node *np, *sensor_np; + + np = of_find_node_by_name(NULL, "thermal-zones"); + if (!np) + return -ENODEV; + + sensor_np = of_get_next_child(np, NULL); + ret = of_parse_phandle_with_args(sensor_np, "thermal-sensors", + "#thermal-sensor-cells", + 0, &sensor_specs); + if (ret) { + of_node_put(np); + of_node_put(sensor_np); + return ret; + } + + if (sensor_specs.args_count >= 1) { + id = sensor_specs.args[0]; + WARN(sensor_specs.args_count > 1, + "%s: too many cells in sensor specifier %d\n", + sensor_specs.np->name, sensor_specs.args_count); + } else { + id = 0; + } + + of_node_put(np); + of_node_put(sensor_np); + + return id; +} + +static int qoriq_tmu_calibration(struct platform_device *pdev) +{ + int i, val, len; + u32 range[4]; + const u32 *calibration; + struct device_node *np = pdev->dev.of_node; + struct qoriq_tmu_data *data = platform_get_drvdata(pdev); + + if (of_property_read_u32_array(np, "fsl,tmu-range", range, 4)) { + dev_err(&pdev->dev, "missing calibration range.\n"); + return -ENODEV; + } + + /* Init temperature range registers */ + tmu_write(data, range[0], &data->regs->ttr0cr); + tmu_write(data, range[1], &data->regs->ttr1cr); + tmu_write(data, range[2], &data->regs->ttr2cr); + tmu_write(data, range[3], &data->regs->ttr3cr); + + calibration = of_get_property(np, "fsl,tmu-calibration", &len); + if (calibration == NULL || len % 8) { + dev_err(&pdev->dev, "invalid calibration data.\n"); + return -ENODEV; + } + + for (i = 0; i < len; i += 8, calibration += 2) { + val = of_read_number(calibration, 1); + tmu_write(data, val, &data->regs->ttcfgr); + val = of_read_number(calibration + 1, 1); + tmu_write(data, val, &data->regs->tscfgr); + } + + return 0; +} + +static void qoriq_tmu_init_device(struct qoriq_tmu_data *data) +{ + /* Disable interrupt, using polling instead */ + tmu_write(data, TIER_DISABLE, &data->regs->tier); + + /* Set update_interval */ + tmu_write(data, TMTMIR_DEFAULT, &data->regs->tmtmir); + + /* Disable monitoring */ + tmu_write(data, TMR_DISABLE, &data->regs->tmr); +} + +static struct thermal_zone_of_device_ops tmu_tz_ops = { + .get_temp = tmu_get_temp, +}; + +static int qoriq_tmu_probe(struct platform_device *pdev) +{ + int ret; + const struct thermal_trip *trip; + struct qoriq_tmu_data *data; + struct device_node *np = pdev->dev.of_node; + u32 site = 0; + + if (!np) { + dev_err(&pdev->dev, "Device OF-Node is NULL"); + return -ENODEV; + } + + data = devm_kzalloc(&pdev->dev, sizeof(struct qoriq_tmu_data), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + + data->little_endian = of_property_read_bool(np, "little-endian"); + + data->sensor_id = qoriq_tmu_get_sensor_id(); + if (data->sensor_id < 0) { + dev_err(&pdev->dev, "Failed to get sensor id\n"); + ret = -ENODEV; + goto err_iomap; + } + + data->regs = of_iomap(np, 0); + if (!data->regs) { + dev_err(&pdev->dev, "Failed to get memory region\n"); + ret = -ENODEV; + goto err_iomap; + } + + qoriq_tmu_init_device(data); /* TMU initialization */ + + ret = qoriq_tmu_calibration(pdev); /* TMU calibration */ + if (ret < 0) + goto err_tmu; + + data->tz = thermal_zone_of_sensor_register(&pdev->dev, data->sensor_id, + data, &tmu_tz_ops); + if (IS_ERR(data->tz)) { + ret = PTR_ERR(data->tz); + dev_err(&pdev->dev, + "Failed to register thermal zone device %d\n", ret); + goto err_tmu; + } + + trip = of_thermal_get_trip_points(data->tz); + + /* Enable monitoring */ + site |= 0x1 << (15 - data->sensor_id); + tmu_write(data, site | TMR_ME | TMR_ALPF, &data->regs->tmr); + + return 0; + +err_tmu: + iounmap(data->regs); + +err_iomap: + platform_set_drvdata(pdev, NULL); + + return ret; +} + +static int qoriq_tmu_remove(struct platform_device *pdev) +{ + struct qoriq_tmu_data *data = platform_get_drvdata(pdev); + + thermal_zone_of_sensor_unregister(&pdev->dev, data->tz); + + /* Disable monitoring */ + tmu_write(data, TMR_DISABLE, &data->regs->tmr); + + iounmap(data->regs); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int qoriq_tmu_suspend(struct device *dev) +{ + u32 tmr; + struct qoriq_tmu_data *data = dev_get_drvdata(dev); + + /* Disable monitoring */ + tmr = tmu_read(data, &data->regs->tmr); + tmr &= ~TMR_ME; + tmu_write(data, tmr, &data->regs->tmr); + + return 0; +} + +static int qoriq_tmu_resume(struct device *dev) +{ + u32 tmr; + struct qoriq_tmu_data *data = dev_get_drvdata(dev); + + /* Enable monitoring */ + tmr = tmu_read(data, &data->regs->tmr); + tmr |= TMR_ME; + tmu_write(data, tmr, &data->regs->tmr); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(qoriq_tmu_pm_ops, + qoriq_tmu_suspend, qoriq_tmu_resume); + +static const struct of_device_id qoriq_tmu_match[] = { + { .compatible = "fsl,qoriq-tmu", }, + {}, +}; +MODULE_DEVICE_TABLE(of, qoriq_tmu_match); + +static struct platform_driver qoriq_tmu = { + .driver = { + .name = "qoriq_thermal", + .pm = &qoriq_tmu_pm_ops, + .of_match_table = qoriq_tmu_match, + }, + .probe = qoriq_tmu_probe, + .remove = qoriq_tmu_remove, +}; +module_platform_driver(qoriq_tmu); + +MODULE_AUTHOR("Jia Hongtao <hongtao.jia@nxp.com>"); +MODULE_DESCRIPTION("QorIQ Thermal Monitoring Unit driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 5f817923f374..73e5fee6cf1d 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -31,6 +31,8 @@ #include <linux/spinlock.h> #include <linux/thermal.h> +#include "thermal_hwmon.h" + #define IDLE_INTERVAL 5000 #define COMMON_STR 0x00 @@ -75,6 +77,8 @@ struct rcar_thermal_priv { #define rcar_priv_to_dev(priv) ((priv)->common->dev) #define rcar_has_irq_support(priv) ((priv)->common->base) #define rcar_id_to_shift(priv) ((priv)->id * 8) +#define rcar_of_data(dev) ((unsigned long)of_device_get_match_data(dev)) +#define rcar_use_of_thermal(dev) (rcar_of_data(dev) == USE_OF_THERMAL) #define USE_OF_THERMAL 1 static const struct of_device_id rcar_thermal_dt_ids[] = { @@ -354,7 +358,8 @@ static void rcar_thermal_work(struct work_struct *work) return; if (nctemp != cctemp) - thermal_zone_device_update(priv->zone); + thermal_zone_device_update(priv->zone, + THERMAL_EVENT_UNSPECIFIED); } static u32 rcar_thermal_had_changed(struct rcar_thermal_priv *priv, u32 status) @@ -415,7 +420,10 @@ static int rcar_thermal_remove(struct platform_device *pdev) rcar_thermal_for_each_priv(priv, common) { rcar_thermal_irq_disable(priv); - thermal_zone_device_unregister(priv->zone); + if (rcar_use_of_thermal(dev)) + thermal_remove_hwmon_sysfs(priv->zone); + else + thermal_zone_device_unregister(priv->zone); } pm_runtime_put(dev); @@ -430,7 +438,6 @@ static int rcar_thermal_probe(struct platform_device *pdev) struct rcar_thermal_priv *priv; struct device *dev = &pdev->dev; struct resource *res, *irq; - unsigned long of_data = (unsigned long)of_device_get_match_data(dev); int mres = 0; int i; int ret = -ENODEV; @@ -491,7 +498,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) if (ret < 0) goto error_unregister; - if (of_data == USE_OF_THERMAL) + if (rcar_use_of_thermal(dev)) priv->zone = devm_thermal_zone_of_sensor_register( dev, i, priv, &rcar_thermal_zone_of_ops); @@ -508,6 +515,17 @@ static int rcar_thermal_probe(struct platform_device *pdev) goto error_unregister; } + if (rcar_use_of_thermal(dev)) { + /* + * thermal_zone doesn't enable hwmon as default, + * but, enable it here to keep compatible + */ + priv->zone->tzp->no_hwmon = false; + ret = thermal_add_hwmon_sysfs(priv->zone); + if (ret) + goto error_unregister; + } + rcar_thermal_irq_enable(priv); list_move_tail(&priv->list, &common->head); diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c index 5d491f16a866..e227a9f0acf7 100644 --- a/drivers/thermal/rockchip_thermal.c +++ b/drivers/thermal/rockchip_thermal.c @@ -96,6 +96,7 @@ struct chip_tsadc_table { * @initialize: SoC special initialize tsadc controller method * @irq_ack: clear the interrupt * @get_temp: get the temperature + * @set_alarm_temp: set the high temperature interrupt * @set_tshut_temp: set the hardware-controlled shutdown temperature * @set_tshut_mode: set the hardware-controlled shutdown mode * @table: the chip-specific conversion table @@ -119,6 +120,8 @@ struct rockchip_tsadc_chip { /* Per-sensor methods */ int (*get_temp)(struct chip_tsadc_table table, int chn, void __iomem *reg, int *temp); + void (*set_alarm_temp)(struct chip_tsadc_table table, + int chn, void __iomem *reg, int temp); void (*set_tshut_temp)(struct chip_tsadc_table table, int chn, void __iomem *reg, int temp); void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m); @@ -183,6 +186,7 @@ struct rockchip_thermal_data { #define TSADCV2_INT_EN 0x08 #define TSADCV2_INT_PD 0x0c #define TSADCV2_DATA(chn) (0x20 + (chn) * 0x04) +#define TSADCV2_COMP_INT(chn) (0x30 + (chn) * 0x04) #define TSADCV2_COMP_SHUT(chn) (0x40 + (chn) * 0x04) #define TSADCV2_HIGHT_INT_DEBOUNCE 0x60 #define TSADCV2_HIGHT_TSHUT_DEBOUNCE 0x64 @@ -207,18 +211,21 @@ struct rockchip_thermal_data { #define TSADCV2_HIGHT_INT_DEBOUNCE_COUNT 4 #define TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT 4 -#define TSADCV2_AUTO_PERIOD_TIME 250 /* msec */ -#define TSADCV2_AUTO_PERIOD_HT_TIME 50 /* msec */ +#define TSADCV2_AUTO_PERIOD_TIME 250 /* 250ms */ +#define TSADCV2_AUTO_PERIOD_HT_TIME 50 /* 50ms */ +#define TSADCV3_AUTO_PERIOD_TIME 1875 /* 2.5ms */ +#define TSADCV3_AUTO_PERIOD_HT_TIME 1875 /* 2.5ms */ + #define TSADCV2_USER_INTER_PD_SOC 0x340 /* 13 clocks */ #define GRF_SARADC_TESTBIT 0x0e644 #define GRF_TSADC_TESTBIT_L 0x0e648 #define GRF_TSADC_TESTBIT_H 0x0e64c -#define GRF_TSADC_TSEN_PD_ON (0x30003 << 0) -#define GRF_TSADC_TSEN_PD_OFF (0x30000 << 0) #define GRF_SARADC_TESTBIT_ON (0x10001 << 2) #define GRF_TSADC_TESTBIT_H_ON (0x10001 << 2) +#define GRF_TSADC_VCM_EN_L (0x10001 << 7) +#define GRF_TSADC_VCM_EN_H (0x10001 << 7) /** * struct tsadc_table - code to temperature conversion table @@ -394,13 +401,17 @@ static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table, int temp) { int high, low, mid; + u32 error = 0; low = 0; high = table.length - 1; mid = (high + low) / 2; - if (temp < table.id[low].temp || temp > table.id[high].temp) - return 0; + /* Return mask code data when the temp is over table range */ + if (temp < table.id[low].temp || temp > table.id[high].temp) { + error = table.data_mask; + goto exit; + } while (low <= high) { if (temp == table.id[mid].temp) @@ -412,7 +423,9 @@ static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table, mid = (low + high) / 2; } - return 0; +exit: + pr_err("Invalid the conversion, error=%d\n", error); + return error; } static int rk_tsadcv2_code_to_temp(struct chip_tsadc_table table, u32 code, @@ -543,14 +556,34 @@ static void rk_tsadcv3_initialize(struct regmap *grf, void __iomem *regs, /* Set interleave value to workround ic time sync issue */ writel_relaxed(TSADCV2_USER_INTER_PD_SOC, regs + TSADCV2_USER_CON); + + writel_relaxed(TSADCV2_AUTO_PERIOD_TIME, + regs + TSADCV2_AUTO_PERIOD); + writel_relaxed(TSADCV2_HIGHT_INT_DEBOUNCE_COUNT, + regs + TSADCV2_HIGHT_INT_DEBOUNCE); + writel_relaxed(TSADCV2_AUTO_PERIOD_HT_TIME, + regs + TSADCV2_AUTO_PERIOD_HT); + writel_relaxed(TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT, + regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE); + } else { - regmap_write(grf, GRF_TSADC_TESTBIT_L, GRF_TSADC_TSEN_PD_ON); - mdelay(10); - regmap_write(grf, GRF_TSADC_TESTBIT_L, GRF_TSADC_TSEN_PD_OFF); + /* Enable the voltage common mode feature */ + regmap_write(grf, GRF_TSADC_TESTBIT_L, GRF_TSADC_VCM_EN_L); + regmap_write(grf, GRF_TSADC_TESTBIT_H, GRF_TSADC_VCM_EN_H); + usleep_range(15, 100); /* The spec note says at least 15 us */ regmap_write(grf, GRF_SARADC_TESTBIT, GRF_SARADC_TESTBIT_ON); regmap_write(grf, GRF_TSADC_TESTBIT_H, GRF_TSADC_TESTBIT_H_ON); usleep_range(90, 200); /* The spec note says at least 90 us */ + + writel_relaxed(TSADCV3_AUTO_PERIOD_TIME, + regs + TSADCV2_AUTO_PERIOD); + writel_relaxed(TSADCV2_HIGHT_INT_DEBOUNCE_COUNT, + regs + TSADCV2_HIGHT_INT_DEBOUNCE); + writel_relaxed(TSADCV3_AUTO_PERIOD_HT_TIME, + regs + TSADCV2_AUTO_PERIOD_HT); + writel_relaxed(TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT, + regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE); } if (tshut_polarity == TSHUT_HIGH_ACTIVE) @@ -559,14 +592,6 @@ static void rk_tsadcv3_initialize(struct regmap *grf, void __iomem *regs, else writel_relaxed(0U & ~TSADCV2_AUTO_TSHUT_POLARITY_HIGH, regs + TSADCV2_AUTO_CON); - - writel_relaxed(TSADCV2_AUTO_PERIOD_TIME, regs + TSADCV2_AUTO_PERIOD); - writel_relaxed(TSADCV2_HIGHT_INT_DEBOUNCE_COUNT, - regs + TSADCV2_HIGHT_INT_DEBOUNCE); - writel_relaxed(TSADCV2_AUTO_PERIOD_HT_TIME, - regs + TSADCV2_AUTO_PERIOD_HT); - writel_relaxed(TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT, - regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE); } static void rk_tsadcv2_irq_ack(void __iomem *regs) @@ -628,12 +653,34 @@ static int rk_tsadcv2_get_temp(struct chip_tsadc_table table, return rk_tsadcv2_code_to_temp(table, val, temp); } +static void rk_tsadcv2_alarm_temp(struct chip_tsadc_table table, + int chn, void __iomem *regs, int temp) +{ + u32 alarm_value, int_en; + + /* Make sure the value is valid */ + alarm_value = rk_tsadcv2_temp_to_code(table, temp); + if (alarm_value == table.data_mask) + return; + + writel_relaxed(alarm_value & table.data_mask, + regs + TSADCV2_COMP_INT(chn)); + + int_en = readl_relaxed(regs + TSADCV2_INT_EN); + int_en |= TSADCV2_INT_SRC_EN(chn); + writel_relaxed(int_en, regs + TSADCV2_INT_EN); +} + static void rk_tsadcv2_tshut_temp(struct chip_tsadc_table table, int chn, void __iomem *regs, int temp) { u32 tshut_value, val; + /* Make sure the value is valid */ tshut_value = rk_tsadcv2_temp_to_code(table, temp); + if (tshut_value == table.data_mask) + return; + writel_relaxed(tshut_value, regs + TSADCV2_COMP_SHUT(chn)); /* TSHUT will be valid */ @@ -670,6 +717,7 @@ static const struct rockchip_tsadc_chip rk3228_tsadc_data = { .irq_ack = rk_tsadcv3_irq_ack, .control = rk_tsadcv3_control, .get_temp = rk_tsadcv2_get_temp, + .set_alarm_temp = rk_tsadcv2_alarm_temp, .set_tshut_temp = rk_tsadcv2_tshut_temp, .set_tshut_mode = rk_tsadcv2_tshut_mode, @@ -694,6 +742,7 @@ static const struct rockchip_tsadc_chip rk3288_tsadc_data = { .irq_ack = rk_tsadcv2_irq_ack, .control = rk_tsadcv2_control, .get_temp = rk_tsadcv2_get_temp, + .set_alarm_temp = rk_tsadcv2_alarm_temp, .set_tshut_temp = rk_tsadcv2_tshut_temp, .set_tshut_mode = rk_tsadcv2_tshut_mode, @@ -718,6 +767,7 @@ static const struct rockchip_tsadc_chip rk3366_tsadc_data = { .irq_ack = rk_tsadcv3_irq_ack, .control = rk_tsadcv3_control, .get_temp = rk_tsadcv2_get_temp, + .set_alarm_temp = rk_tsadcv2_alarm_temp, .set_tshut_temp = rk_tsadcv2_tshut_temp, .set_tshut_mode = rk_tsadcv2_tshut_mode, @@ -742,6 +792,7 @@ static const struct rockchip_tsadc_chip rk3368_tsadc_data = { .irq_ack = rk_tsadcv2_irq_ack, .control = rk_tsadcv2_control, .get_temp = rk_tsadcv2_get_temp, + .set_alarm_temp = rk_tsadcv2_alarm_temp, .set_tshut_temp = rk_tsadcv2_tshut_temp, .set_tshut_mode = rk_tsadcv2_tshut_mode, @@ -766,6 +817,7 @@ static const struct rockchip_tsadc_chip rk3399_tsadc_data = { .irq_ack = rk_tsadcv3_irq_ack, .control = rk_tsadcv3_control, .get_temp = rk_tsadcv2_get_temp, + .set_alarm_temp = rk_tsadcv2_alarm_temp, .set_tshut_temp = rk_tsadcv2_tshut_temp, .set_tshut_mode = rk_tsadcv2_tshut_mode, @@ -821,11 +873,27 @@ static irqreturn_t rockchip_thermal_alarm_irq_thread(int irq, void *dev) thermal->chip->irq_ack(thermal->regs); for (i = 0; i < thermal->chip->chn_num; i++) - thermal_zone_device_update(thermal->sensors[i].tzd); + thermal_zone_device_update(thermal->sensors[i].tzd, + THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } +static int rockchip_thermal_set_trips(void *_sensor, int low, int high) +{ + struct rockchip_thermal_sensor *sensor = _sensor; + struct rockchip_thermal_data *thermal = sensor->thermal; + const struct rockchip_tsadc_chip *tsadc = thermal->chip; + + dev_dbg(&thermal->pdev->dev, "%s: sensor %d: low: %d, high %d\n", + __func__, sensor->id, low, high); + + tsadc->set_alarm_temp(tsadc->table, + sensor->id, thermal->regs, high); + + return 0; +} + static int rockchip_thermal_get_temp(void *_sensor, int *out_temp) { struct rockchip_thermal_sensor *sensor = _sensor; @@ -843,6 +911,7 @@ static int rockchip_thermal_get_temp(void *_sensor, int *out_temp) static const struct thermal_zone_of_device_ops rockchip_of_thermal_ops = { .get_temp = rockchip_thermal_get_temp, + .set_trips = rockchip_thermal_set_trips, }; static int rockchip_configure_from_dt(struct device *dev, diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index f3ce94ec73b5..ad1186dd6132 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -225,7 +225,7 @@ static void exynos_report_trigger(struct exynos_tmu_data *p) return; } - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); mutex_lock(&tz->lock); /* Find the level for which trip happened */ diff --git a/drivers/thermal/st/st_thermal_memmap.c b/drivers/thermal/st/st_thermal_memmap.c index fc0c9e198710..91d42319de27 100644 --- a/drivers/thermal/st/st_thermal_memmap.c +++ b/drivers/thermal/st/st_thermal_memmap.c @@ -42,7 +42,8 @@ static irqreturn_t st_mmap_thermal_trip_handler(int irq, void *sdata) { struct st_thermal_sensor *sensor = sdata; - thermal_zone_device_update(sensor->thermal_dev); + thermal_zone_device_update(sensor->thermal_dev, + THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } diff --git a/drivers/thermal/tango_thermal.c b/drivers/thermal/tango_thermal.c index 70e0d9f406e9..201304aeafeb 100644 --- a/drivers/thermal/tango_thermal.c +++ b/drivers/thermal/tango_thermal.c @@ -64,6 +64,12 @@ static const struct thermal_zone_of_device_ops ops = { .get_temp = tango_get_temp, }; +static void tango_thermal_init(struct tango_thermal_priv *priv) +{ + writel(0, priv->base + TEMPSI_CFG); + writel(CMD_ON, priv->base + TEMPSI_CMD); +} + static int tango_thermal_probe(struct platform_device *pdev) { struct resource *res; @@ -79,14 +85,22 @@ static int tango_thermal_probe(struct platform_device *pdev) if (IS_ERR(priv->base)) return PTR_ERR(priv->base); + platform_set_drvdata(pdev, priv); priv->thresh_idx = IDX_MIN; - writel(0, priv->base + TEMPSI_CFG); - writel(CMD_ON, priv->base + TEMPSI_CMD); + tango_thermal_init(priv); tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, priv, &ops); return PTR_ERR_OR_ZERO(tzdev); } +static int __maybe_unused tango_thermal_resume(struct device *dev) +{ + tango_thermal_init(dev_get_drvdata(dev)); + return 0; +} + +static SIMPLE_DEV_PM_OPS(tango_thermal_pm, NULL, tango_thermal_resume); + static const struct of_device_id tango_sensor_ids[] = { { .compatible = "sigma,smp8758-thermal", @@ -99,6 +113,7 @@ static struct platform_driver tango_thermal_driver = { .driver = { .name = "tango-thermal", .of_match_table = tango_sensor_ids, + .pm = &tango_thermal_pm, }, }; diff --git a/drivers/thermal/tegra/soctherm.c b/drivers/thermal/tegra/soctherm.c index b8651726201e..7d2db23d71a3 100644 --- a/drivers/thermal/tegra/soctherm.c +++ b/drivers/thermal/tegra/soctherm.c @@ -30,6 +30,7 @@ #include <dt-bindings/thermal/tegra124-soctherm.h> +#include "../thermal_core.h" #include "soctherm.h" #define SENSOR_CONFIG0 0 @@ -67,35 +68,228 @@ #define READBACK_ADD_HALF BIT(7) #define READBACK_NEGATE BIT(0) +/* + * THERMCTL_LEVEL0_GROUP_CPU is defined in soctherm.h + * because it will be used by tegraxxx_soctherm.c + */ +#define THERMCTL_LVL0_CPU0_EN_MASK BIT(8) +#define THERMCTL_LVL0_CPU0_CPU_THROT_MASK (0x3 << 5) +#define THERMCTL_LVL0_CPU0_CPU_THROT_LIGHT 0x1 +#define THERMCTL_LVL0_CPU0_CPU_THROT_HEAVY 0x2 +#define THERMCTL_LVL0_CPU0_GPU_THROT_MASK (0x3 << 3) +#define THERMCTL_LVL0_CPU0_GPU_THROT_LIGHT 0x1 +#define THERMCTL_LVL0_CPU0_GPU_THROT_HEAVY 0x2 +#define THERMCTL_LVL0_CPU0_MEM_THROT_MASK BIT(2) +#define THERMCTL_LVL0_CPU0_STATUS_MASK 0x3 + +#define THERMCTL_LVL0_UP_STATS 0x10 +#define THERMCTL_LVL0_DN_STATS 0x14 + +#define THERMCTL_STATS_CTL 0x94 +#define STATS_CTL_CLR_DN 0x8 +#define STATS_CTL_EN_DN 0x4 +#define STATS_CTL_CLR_UP 0x2 +#define STATS_CTL_EN_UP 0x1 + +#define THROT_GLOBAL_CFG 0x400 +#define THROT_GLOBAL_ENB_MASK BIT(0) + +#define CPU_PSKIP_STATUS 0x418 +#define XPU_PSKIP_STATUS_M_MASK (0xff << 12) +#define XPU_PSKIP_STATUS_N_MASK (0xff << 4) +#define XPU_PSKIP_STATUS_SW_OVERRIDE_MASK BIT(1) +#define XPU_PSKIP_STATUS_ENABLED_MASK BIT(0) + +#define THROT_PRIORITY_LOCK 0x424 +#define THROT_PRIORITY_LOCK_PRIORITY_MASK 0xff + +#define THROT_STATUS 0x428 +#define THROT_STATUS_BREACH_MASK BIT(12) +#define THROT_STATUS_STATE_MASK (0xff << 4) +#define THROT_STATUS_ENABLED_MASK BIT(0) + +#define THROT_PSKIP_CTRL_LITE_CPU 0x430 +#define THROT_PSKIP_CTRL_ENABLE_MASK BIT(31) +#define THROT_PSKIP_CTRL_DIVIDEND_MASK (0xff << 8) +#define THROT_PSKIP_CTRL_DIVISOR_MASK 0xff +#define THROT_PSKIP_CTRL_VECT_GPU_MASK (0x7 << 16) +#define THROT_PSKIP_CTRL_VECT_CPU_MASK (0x7 << 8) +#define THROT_PSKIP_CTRL_VECT2_CPU_MASK 0x7 + +#define THROT_VECT_NONE 0x0 /* 3'b000 */ +#define THROT_VECT_LOW 0x1 /* 3'b001 */ +#define THROT_VECT_MED 0x3 /* 3'b011 */ +#define THROT_VECT_HIGH 0x7 /* 3'b111 */ + +#define THROT_PSKIP_RAMP_LITE_CPU 0x434 +#define THROT_PSKIP_RAMP_SEQ_BYPASS_MODE_MASK BIT(31) +#define THROT_PSKIP_RAMP_DURATION_MASK (0xffff << 8) +#define THROT_PSKIP_RAMP_STEP_MASK 0xff + +#define THROT_PRIORITY_LITE 0x444 +#define THROT_PRIORITY_LITE_PRIO_MASK 0xff + +#define THROT_DELAY_LITE 0x448 +#define THROT_DELAY_LITE_DELAY_MASK 0xff + +/* car register offsets needed for enabling HW throttling */ +#define CAR_SUPER_CCLKG_DIVIDER 0x36c +#define CDIVG_USE_THERM_CONTROLS_MASK BIT(30) + +/* ccroc register offsets needed for enabling HW throttling for Tegra132 */ +#define CCROC_SUPER_CCLKG_DIVIDER 0x024 + +#define CCROC_GLOBAL_CFG 0x148 + +#define CCROC_THROT_PSKIP_RAMP_CPU 0x150 +#define CCROC_THROT_PSKIP_RAMP_SEQ_BYPASS_MODE_MASK BIT(31) +#define CCROC_THROT_PSKIP_RAMP_DURATION_MASK (0xffff << 8) +#define CCROC_THROT_PSKIP_RAMP_STEP_MASK 0xff + +#define CCROC_THROT_PSKIP_CTRL_CPU 0x154 +#define CCROC_THROT_PSKIP_CTRL_ENB_MASK BIT(31) +#define CCROC_THROT_PSKIP_CTRL_DIVIDEND_MASK (0xff << 8) +#define CCROC_THROT_PSKIP_CTRL_DIVISOR_MASK 0xff + /* get val from register(r) mask bits(m) */ #define REG_GET_MASK(r, m) (((r) & (m)) >> (ffs(m) - 1)) /* set val(v) to mask bits(m) of register(r) */ #define REG_SET_MASK(r, m, v) (((r) & ~(m)) | \ (((v) & (m >> (ffs(m) - 1))) << (ffs(m) - 1))) +/* get dividend from the depth */ +#define THROT_DEPTH_DIVIDEND(depth) ((256 * (100 - (depth)) / 100) - 1) + +/* get THROT_PSKIP_xxx offset per LIGHT/HEAVY throt and CPU/GPU dev */ +#define THROT_OFFSET 0x30 +#define THROT_PSKIP_CTRL(throt, dev) (THROT_PSKIP_CTRL_LITE_CPU + \ + (THROT_OFFSET * throt) + (8 * dev)) +#define THROT_PSKIP_RAMP(throt, dev) (THROT_PSKIP_RAMP_LITE_CPU + \ + (THROT_OFFSET * throt) + (8 * dev)) + +/* get THROT_xxx_CTRL offset per LIGHT/HEAVY throt */ +#define THROT_PRIORITY_CTRL(throt) (THROT_PRIORITY_LITE + \ + (THROT_OFFSET * throt)) +#define THROT_DELAY_CTRL(throt) (THROT_DELAY_LITE + \ + (THROT_OFFSET * throt)) + +/* get CCROC_THROT_PSKIP_xxx offset per HIGH/MED/LOW vect*/ +#define CCROC_THROT_OFFSET 0x0c +#define CCROC_THROT_PSKIP_CTRL_CPU_REG(vect) (CCROC_THROT_PSKIP_CTRL_CPU + \ + (CCROC_THROT_OFFSET * vect)) +#define CCROC_THROT_PSKIP_RAMP_CPU_REG(vect) (CCROC_THROT_PSKIP_RAMP_CPU + \ + (CCROC_THROT_OFFSET * vect)) + +/* get THERMCTL_LEVELx offset per CPU/GPU/MEM/TSENSE rg and LEVEL0~3 lv */ +#define THERMCTL_LVL_REGS_SIZE 0x20 +#define THERMCTL_LVL_REG(rg, lv) ((rg) + ((lv) * THERMCTL_LVL_REGS_SIZE)) + static const int min_low_temp = -127000; static const int max_high_temp = 127000; +enum soctherm_throttle_id { + THROTTLE_LIGHT = 0, + THROTTLE_HEAVY, + THROTTLE_SIZE, +}; + +enum soctherm_throttle_dev_id { + THROTTLE_DEV_CPU = 0, + THROTTLE_DEV_GPU, + THROTTLE_DEV_SIZE, +}; + +static const char *const throt_names[] = { + [THROTTLE_LIGHT] = "light", + [THROTTLE_HEAVY] = "heavy", +}; + +struct tegra_soctherm; struct tegra_thermctl_zone { void __iomem *reg; struct device *dev; + struct tegra_soctherm *ts; struct thermal_zone_device *tz; const struct tegra_tsensor_group *sg; }; +struct soctherm_throt_cfg { + const char *name; + unsigned int id; + u8 priority; + u8 cpu_throt_level; + u32 cpu_throt_depth; + struct thermal_cooling_device *cdev; + bool init; +}; + struct tegra_soctherm { struct reset_control *reset; struct clk *clock_tsensor; struct clk *clock_soctherm; void __iomem *regs; - struct thermal_zone_device **thermctl_tzs; + void __iomem *clk_regs; + void __iomem *ccroc_regs; u32 *calib; + struct thermal_zone_device **thermctl_tzs; struct tegra_soctherm_soc *soc; + struct soctherm_throt_cfg throt_cfgs[THROTTLE_SIZE]; + struct dentry *debugfs_dir; }; +/** + * clk_writel() - writes a value to a CAR register + * @ts: pointer to a struct tegra_soctherm + * @v: the value to write + * @reg: the register offset + * + * Writes @v to @reg. No return value. + */ +static inline void clk_writel(struct tegra_soctherm *ts, u32 value, u32 reg) +{ + writel(value, (ts->clk_regs + reg)); +} + +/** + * clk_readl() - reads specified register from CAR IP block + * @ts: pointer to a struct tegra_soctherm + * @reg: register address to be read + * + * Return: the value of the register + */ +static inline u32 clk_readl(struct tegra_soctherm *ts, u32 reg) +{ + return readl(ts->clk_regs + reg); +} + +/** + * ccroc_writel() - writes a value to a CCROC register + * @ts: pointer to a struct tegra_soctherm + * @v: the value to write + * @reg: the register offset + * + * Writes @v to @reg. No return value. + */ +static inline void ccroc_writel(struct tegra_soctherm *ts, u32 value, u32 reg) +{ + writel(value, (ts->ccroc_regs + reg)); +} + +/** + * ccroc_readl() - reads specified register from CCROC IP block + * @ts: pointer to a struct tegra_soctherm + * @reg: register address to be read + * + * Return: the value of the register + */ +static inline u32 ccroc_readl(struct tegra_soctherm *ts, u32 reg) +{ + return readl(ts->ccroc_regs + reg); +} + static void enable_tsensor(struct tegra_soctherm *tegra, unsigned int i) { const struct tegra_tsensor *sensor = &tegra->soc->tsensors[i]; @@ -150,11 +344,17 @@ static int tegra_thermctl_get_temp(void *data, int *out_temp) static int thermtrip_program(struct device *dev, const struct tegra_tsensor_group *sg, int trip_temp); +static int +throttrip_program(struct device *dev, const struct tegra_tsensor_group *sg, + struct soctherm_throt_cfg *stc, int trip_temp); +static struct soctherm_throt_cfg * +find_throttle_cfg_by_name(struct tegra_soctherm *ts, const char *name); static int tegra_thermctl_set_trip_temp(void *data, int trip, int temp) { struct tegra_thermctl_zone *zone = data; struct thermal_zone_device *tz = zone->tz; + struct tegra_soctherm *ts = zone->ts; const struct tegra_tsensor_group *sg = zone->sg; struct device *dev = zone->dev; enum thermal_trip_type type; @@ -167,10 +367,29 @@ static int tegra_thermctl_set_trip_temp(void *data, int trip, int temp) if (ret) return ret; - if (type != THERMAL_TRIP_CRITICAL) - return 0; + if (type == THERMAL_TRIP_CRITICAL) { + return thermtrip_program(dev, sg, temp); + } else if (type == THERMAL_TRIP_HOT) { + int i; + + for (i = 0; i < THROTTLE_SIZE; i++) { + struct thermal_cooling_device *cdev; + struct soctherm_throt_cfg *stc; + + if (!ts->throt_cfgs[i].init) + continue; + + cdev = ts->throt_cfgs[i].cdev; + if (get_thermal_instance(tz, cdev, trip)) + stc = find_throttle_cfg_by_name(ts, cdev->type); + else + continue; + + return throttrip_program(dev, sg, stc, temp); + } + } - return thermtrip_program(dev, sg, temp); + return 0; } static const struct thermal_zone_of_device_ops tegra_of_thermal_ops = { @@ -238,14 +457,110 @@ static int thermtrip_program(struct device *dev, } /** + * throttrip_program() - Configures the hardware to throttle the + * pulse if a given sensor group reaches a given temperature + * @dev: ptr to the struct device for the SOC_THERM IP block + * @sg: pointer to the sensor group to set the thermtrip temperature for + * @stc: pointer to the throttle need to be triggered + * @trip_temp: the temperature in millicelsius to trigger the thermal trip at + * + * Sets the thermal trip threshold and throttle event of the given sensor + * group. If this threshold is crossed, the hardware will trigger the + * throttle. + * + * Note that, although @trip_temp is specified in millicelsius, the + * hardware is programmed in degrees Celsius. + * + * Return: 0 upon success, or %-EINVAL upon failure. + */ +static int throttrip_program(struct device *dev, + const struct tegra_tsensor_group *sg, + struct soctherm_throt_cfg *stc, + int trip_temp) +{ + struct tegra_soctherm *ts = dev_get_drvdata(dev); + int temp, cpu_throt, gpu_throt; + unsigned int throt; + u32 r, reg_off; + + if (!dev || !sg || !stc || !stc->init) + return -EINVAL; + + temp = enforce_temp_range(dev, trip_temp) / ts->soc->thresh_grain; + + /* Hardcode LIGHT on LEVEL1 and HEAVY on LEVEL2 */ + throt = stc->id; + reg_off = THERMCTL_LVL_REG(sg->thermctl_lvl0_offset, throt + 1); + + if (throt == THROTTLE_LIGHT) { + cpu_throt = THERMCTL_LVL0_CPU0_CPU_THROT_LIGHT; + gpu_throt = THERMCTL_LVL0_CPU0_GPU_THROT_LIGHT; + } else { + cpu_throt = THERMCTL_LVL0_CPU0_CPU_THROT_HEAVY; + gpu_throt = THERMCTL_LVL0_CPU0_GPU_THROT_HEAVY; + if (throt != THROTTLE_HEAVY) + dev_warn(dev, + "invalid throt id %d - assuming HEAVY", + throt); + } + + r = readl(ts->regs + reg_off); + r = REG_SET_MASK(r, sg->thermctl_lvl0_up_thresh_mask, temp); + r = REG_SET_MASK(r, sg->thermctl_lvl0_dn_thresh_mask, temp); + r = REG_SET_MASK(r, THERMCTL_LVL0_CPU0_CPU_THROT_MASK, cpu_throt); + r = REG_SET_MASK(r, THERMCTL_LVL0_CPU0_GPU_THROT_MASK, gpu_throt); + r = REG_SET_MASK(r, THERMCTL_LVL0_CPU0_EN_MASK, 1); + writel(r, ts->regs + reg_off); + + return 0; +} + +static struct soctherm_throt_cfg * +find_throttle_cfg_by_name(struct tegra_soctherm *ts, const char *name) +{ + unsigned int i; + + for (i = 0; ts->throt_cfgs[i].name; i++) + if (!strcmp(ts->throt_cfgs[i].name, name)) + return &ts->throt_cfgs[i]; + + return NULL; +} + +static int get_hot_temp(struct thermal_zone_device *tz, int *trip, int *temp) +{ + int ntrips, i, ret; + enum thermal_trip_type type; + + ntrips = of_thermal_get_ntrips(tz); + if (ntrips <= 0) + return -EINVAL; + + for (i = 0; i < ntrips; i++) { + ret = tz->ops->get_trip_type(tz, i, &type); + if (ret) + return -EINVAL; + if (type == THERMAL_TRIP_HOT) { + ret = tz->ops->get_trip_temp(tz, i, temp); + if (!ret) + *trip = i; + + return ret; + } + } + + return -EINVAL; +} + +/** * tegra_soctherm_set_hwtrips() - set HW trip point from DT data * @dev: struct device * of the SOC_THERM instance * * Configure the SOC_THERM HW trip points, setting "THERMTRIP" - * trip points , using "critical" type trip_temp from thermal - * zone. - * After they have been configured, THERMTRIP will take action - * when the configured SoC thermal sensor group reaches a + * "THROTTLE" trip points , using "critical" or "hot" type trip_temp + * from thermal zone. + * After they have been configured, THERMTRIP or THROTTLE will take + * action when the configured SoC thermal sensor group reaches a * certain temperature. * * Return: 0 upon success, or a negative error code on failure. @@ -254,19 +569,24 @@ static int thermtrip_program(struct device *dev, * THERMTRIP has been enabled successfully when a message similar to * this one appears on the serial console: * "thermtrip: will shut down when sensor group XXX reaches YYYYYY mC" + * THROTTLE has been enabled successfully when a message similar to + * this one appears on the serial console: + * ""throttrip: will throttle when sensor group XXX reaches YYYYYY mC" */ static int tegra_soctherm_set_hwtrips(struct device *dev, const struct tegra_tsensor_group *sg, struct thermal_zone_device *tz) { - int temperature; + struct tegra_soctherm *ts = dev_get_drvdata(dev); + struct soctherm_throt_cfg *stc; + int i, trip, temperature; int ret; ret = tz->ops->get_crit_temp(tz, &temperature); if (ret) { dev_warn(dev, "thermtrip: %s: missing critical temperature\n", sg->name); - return ret; + goto set_throttle; } ret = thermtrip_program(dev, sg, temperature); @@ -280,6 +600,43 @@ static int tegra_soctherm_set_hwtrips(struct device *dev, "thermtrip: will shut down when %s reaches %d mC\n", sg->name, temperature); +set_throttle: + ret = get_hot_temp(tz, &trip, &temperature); + if (ret) { + dev_warn(dev, "throttrip: %s: missing hot temperature\n", + sg->name); + return 0; + } + + for (i = 0; i < THROTTLE_SIZE; i++) { + struct thermal_cooling_device *cdev; + + if (!ts->throt_cfgs[i].init) + continue; + + cdev = ts->throt_cfgs[i].cdev; + if (get_thermal_instance(tz, cdev, trip)) + stc = find_throttle_cfg_by_name(ts, cdev->type); + else + continue; + + ret = throttrip_program(dev, sg, stc, temperature); + if (ret) { + dev_err(dev, "throttrip: %s: error during enable\n", + sg->name); + return ret; + } + + dev_info(dev, + "throttrip: will throttle when %s reaches %d mC\n", + sg->name, temperature); + break; + } + + if (i == THROTTLE_SIZE) + dev_warn(dev, "throttrip: %s: missing throttle cdev\n", + sg->name); + return 0; } @@ -291,7 +648,7 @@ static int regs_show(struct seq_file *s, void *data) const struct tegra_tsensor *tsensors = ts->soc->tsensors; const struct tegra_tsensor_group **ttgs = ts->soc->ttgs; u32 r, state; - int i; + int i, level; seq_puts(s, "-----TSENSE (convert HW)-----\n"); @@ -365,6 +722,81 @@ static int regs_show(struct seq_file *s, void *data) state = REG_GET_MASK(r, SENSOR_TEMP2_MEM_TEMP_MASK); seq_printf(s, " MEM(%d)\n", translate_temp(state)); + for (i = 0; i < ts->soc->num_ttgs; i++) { + seq_printf(s, "%s:\n", ttgs[i]->name); + for (level = 0; level < 4; level++) { + s32 v; + u32 mask; + u16 off = ttgs[i]->thermctl_lvl0_offset; + + r = readl(ts->regs + THERMCTL_LVL_REG(off, level)); + + mask = ttgs[i]->thermctl_lvl0_up_thresh_mask; + state = REG_GET_MASK(r, mask); + v = sign_extend32(state, ts->soc->bptt - 1); + v *= ts->soc->thresh_grain; + seq_printf(s, " %d: Up/Dn(%d /", level, v); + + mask = ttgs[i]->thermctl_lvl0_dn_thresh_mask; + state = REG_GET_MASK(r, mask); + v = sign_extend32(state, ts->soc->bptt - 1); + v *= ts->soc->thresh_grain; + seq_printf(s, "%d ) ", v); + + mask = THERMCTL_LVL0_CPU0_EN_MASK; + state = REG_GET_MASK(r, mask); + seq_printf(s, "En(%d) ", state); + + mask = THERMCTL_LVL0_CPU0_CPU_THROT_MASK; + state = REG_GET_MASK(r, mask); + seq_puts(s, "CPU Throt"); + if (!state) + seq_printf(s, "(%s) ", "none"); + else if (state == THERMCTL_LVL0_CPU0_CPU_THROT_LIGHT) + seq_printf(s, "(%s) ", "L"); + else if (state == THERMCTL_LVL0_CPU0_CPU_THROT_HEAVY) + seq_printf(s, "(%s) ", "H"); + else + seq_printf(s, "(%s) ", "H+L"); + + mask = THERMCTL_LVL0_CPU0_GPU_THROT_MASK; + state = REG_GET_MASK(r, mask); + seq_puts(s, "GPU Throt"); + if (!state) + seq_printf(s, "(%s) ", "none"); + else if (state == THERMCTL_LVL0_CPU0_GPU_THROT_LIGHT) + seq_printf(s, "(%s) ", "L"); + else if (state == THERMCTL_LVL0_CPU0_GPU_THROT_HEAVY) + seq_printf(s, "(%s) ", "H"); + else + seq_printf(s, "(%s) ", "H+L"); + + mask = THERMCTL_LVL0_CPU0_STATUS_MASK; + state = REG_GET_MASK(r, mask); + seq_printf(s, "Status(%s)\n", + state == 0 ? "LO" : + state == 1 ? "In" : + state == 2 ? "Res" : "HI"); + } + } + + r = readl(ts->regs + THERMCTL_STATS_CTL); + seq_printf(s, "STATS: Up(%s) Dn(%s)\n", + r & STATS_CTL_EN_UP ? "En" : "--", + r & STATS_CTL_EN_DN ? "En" : "--"); + + for (level = 0; level < 4; level++) { + u16 off; + + off = THERMCTL_LVL0_UP_STATS; + r = readl(ts->regs + THERMCTL_LVL_REG(off, level)); + seq_printf(s, " Level_%d Up(%d) ", level, r); + + off = THERMCTL_LVL0_DN_STATS; + r = readl(ts->regs + THERMCTL_LVL_REG(off, level)); + seq_printf(s, "Dn(%d)\n", r); + } + r = readl(ts->regs + THERMCTL_THERMTRIP_CTL); state = REG_GET_MASK(r, ttgs[0]->thermtrip_any_en_mask); seq_printf(s, "Thermtrip Any En(%d)\n", state); @@ -376,6 +808,32 @@ static int regs_show(struct seq_file *s, void *data) seq_printf(s, "Thresh(%d)\n", state); } + r = readl(ts->regs + THROT_GLOBAL_CFG); + seq_puts(s, "\n"); + seq_printf(s, "GLOBAL THROTTLE CONFIG: 0x%08x\n", r); + + seq_puts(s, "---------------------------------------------------\n"); + r = readl(ts->regs + THROT_STATUS); + state = REG_GET_MASK(r, THROT_STATUS_BREACH_MASK); + seq_printf(s, "THROT STATUS: breach(%d) ", state); + state = REG_GET_MASK(r, THROT_STATUS_STATE_MASK); + seq_printf(s, "state(%d) ", state); + state = REG_GET_MASK(r, THROT_STATUS_ENABLED_MASK); + seq_printf(s, "enabled(%d)\n", state); + + r = readl(ts->regs + CPU_PSKIP_STATUS); + if (ts->soc->use_ccroc) { + state = REG_GET_MASK(r, XPU_PSKIP_STATUS_ENABLED_MASK); + seq_printf(s, "CPU PSKIP STATUS: enabled(%d)\n", state); + } else { + state = REG_GET_MASK(r, XPU_PSKIP_STATUS_M_MASK); + seq_printf(s, "CPU PSKIP STATUS: M(%d) ", state); + state = REG_GET_MASK(r, XPU_PSKIP_STATUS_N_MASK); + seq_printf(s, "N(%d) ", state); + state = REG_GET_MASK(r, XPU_PSKIP_STATUS_ENABLED_MASK); + seq_printf(s, "enabled(%d)\n", state); + } + return 0; } @@ -449,6 +907,326 @@ static int soctherm_clk_enable(struct platform_device *pdev, bool enable) return 0; } +static int throt_get_cdev_max_state(struct thermal_cooling_device *cdev, + unsigned long *max_state) +{ + *max_state = 1; + return 0; +} + +static int throt_get_cdev_cur_state(struct thermal_cooling_device *cdev, + unsigned long *cur_state) +{ + struct tegra_soctherm *ts = cdev->devdata; + u32 r; + + r = readl(ts->regs + THROT_STATUS); + if (REG_GET_MASK(r, THROT_STATUS_STATE_MASK)) + *cur_state = 1; + else + *cur_state = 0; + + return 0; +} + +static int throt_set_cdev_state(struct thermal_cooling_device *cdev, + unsigned long cur_state) +{ + return 0; +} + +static struct thermal_cooling_device_ops throt_cooling_ops = { + .get_max_state = throt_get_cdev_max_state, + .get_cur_state = throt_get_cdev_cur_state, + .set_cur_state = throt_set_cdev_state, +}; + +/** + * soctherm_init_hw_throt_cdev() - Parse the HW throttle configurations + * and register them as cooling devices. + */ +static void soctherm_init_hw_throt_cdev(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct tegra_soctherm *ts = dev_get_drvdata(dev); + struct device_node *np_stc, *np_stcc; + const char *name; + u32 val; + int i, r; + + for (i = 0; i < THROTTLE_SIZE; i++) { + ts->throt_cfgs[i].name = throt_names[i]; + ts->throt_cfgs[i].id = i; + ts->throt_cfgs[i].init = false; + } + + np_stc = of_get_child_by_name(dev->of_node, "throttle-cfgs"); + if (!np_stc) { + dev_info(dev, + "throttle-cfg: no throttle-cfgs - not enabling\n"); + return; + } + + for_each_child_of_node(np_stc, np_stcc) { + struct soctherm_throt_cfg *stc; + struct thermal_cooling_device *tcd; + + name = np_stcc->name; + stc = find_throttle_cfg_by_name(ts, name); + if (!stc) { + dev_err(dev, + "throttle-cfg: could not find %s\n", name); + continue; + } + + r = of_property_read_u32(np_stcc, "nvidia,priority", &val); + if (r) { + dev_info(dev, + "throttle-cfg: %s: missing priority\n", name); + continue; + } + stc->priority = val; + + if (ts->soc->use_ccroc) { + r = of_property_read_u32(np_stcc, + "nvidia,cpu-throt-level", + &val); + if (r) { + dev_info(dev, + "throttle-cfg: %s: missing cpu-throt-level\n", + name); + continue; + } + stc->cpu_throt_level = val; + } else { + r = of_property_read_u32(np_stcc, + "nvidia,cpu-throt-percent", + &val); + if (r) { + dev_info(dev, + "throttle-cfg: %s: missing cpu-throt-percent\n", + name); + continue; + } + stc->cpu_throt_depth = val; + } + + tcd = thermal_of_cooling_device_register(np_stcc, + (char *)name, ts, + &throt_cooling_ops); + of_node_put(np_stcc); + if (IS_ERR_OR_NULL(tcd)) { + dev_err(dev, + "throttle-cfg: %s: failed to register cooling device\n", + name); + continue; + } + + stc->cdev = tcd; + stc->init = true; + } + + of_node_put(np_stc); +} + +/** + * throttlectl_cpu_level_cfg() - programs CCROC NV_THERM level config + * @level: describing the level LOW/MED/HIGH of throttling + * + * It's necessary to set up the CPU-local CCROC NV_THERM instance with + * the M/N values desired for each level. This function does this. + * + * This function pre-programs the CCROC NV_THERM levels in terms of + * pre-configured "Low", "Medium" or "Heavy" throttle levels which are + * mapped to THROT_LEVEL_LOW, THROT_LEVEL_MED and THROT_LEVEL_HVY. + */ +static void throttlectl_cpu_level_cfg(struct tegra_soctherm *ts, int level) +{ + u8 depth, dividend; + u32 r; + + switch (level) { + case TEGRA_SOCTHERM_THROT_LEVEL_LOW: + depth = 50; + break; + case TEGRA_SOCTHERM_THROT_LEVEL_MED: + depth = 75; + break; + case TEGRA_SOCTHERM_THROT_LEVEL_HIGH: + depth = 80; + break; + case TEGRA_SOCTHERM_THROT_LEVEL_NONE: + return; + default: + return; + } + + dividend = THROT_DEPTH_DIVIDEND(depth); + + /* setup PSKIP in ccroc nv_therm registers */ + r = ccroc_readl(ts, CCROC_THROT_PSKIP_RAMP_CPU_REG(level)); + r = REG_SET_MASK(r, CCROC_THROT_PSKIP_RAMP_DURATION_MASK, 0xff); + r = REG_SET_MASK(r, CCROC_THROT_PSKIP_RAMP_STEP_MASK, 0xf); + ccroc_writel(ts, r, CCROC_THROT_PSKIP_RAMP_CPU_REG(level)); + + r = ccroc_readl(ts, CCROC_THROT_PSKIP_CTRL_CPU_REG(level)); + r = REG_SET_MASK(r, CCROC_THROT_PSKIP_CTRL_ENB_MASK, 1); + r = REG_SET_MASK(r, CCROC_THROT_PSKIP_CTRL_DIVIDEND_MASK, dividend); + r = REG_SET_MASK(r, CCROC_THROT_PSKIP_CTRL_DIVISOR_MASK, 0xff); + ccroc_writel(ts, r, CCROC_THROT_PSKIP_CTRL_CPU_REG(level)); +} + +/** + * throttlectl_cpu_level_select() - program CPU pulse skipper config + * @throt: the LIGHT/HEAVY of throttle event id + * + * Pulse skippers are used to throttle clock frequencies. This + * function programs the pulse skippers based on @throt and platform + * data. This function is used on SoCs which have CPU-local pulse + * skipper control, such as T13x. It programs soctherm's interface to + * Denver:CCROC NV_THERM in terms of Low, Medium and HIGH throttling + * vectors. PSKIP_BYPASS mode is set as required per HW spec. + */ +static void throttlectl_cpu_level_select(struct tegra_soctherm *ts, + enum soctherm_throttle_id throt) +{ + u32 r, throt_vect; + + /* Denver:CCROC NV_THERM interface N:3 Mapping */ + switch (ts->throt_cfgs[throt].cpu_throt_level) { + case TEGRA_SOCTHERM_THROT_LEVEL_LOW: + throt_vect = THROT_VECT_LOW; + break; + case TEGRA_SOCTHERM_THROT_LEVEL_MED: + throt_vect = THROT_VECT_MED; + break; + case TEGRA_SOCTHERM_THROT_LEVEL_HIGH: + throt_vect = THROT_VECT_HIGH; + break; + default: + throt_vect = THROT_VECT_NONE; + break; + } + + r = readl(ts->regs + THROT_PSKIP_CTRL(throt, THROTTLE_DEV_CPU)); + r = REG_SET_MASK(r, THROT_PSKIP_CTRL_ENABLE_MASK, 1); + r = REG_SET_MASK(r, THROT_PSKIP_CTRL_VECT_CPU_MASK, throt_vect); + r = REG_SET_MASK(r, THROT_PSKIP_CTRL_VECT2_CPU_MASK, throt_vect); + writel(r, ts->regs + THROT_PSKIP_CTRL(throt, THROTTLE_DEV_CPU)); + + /* bypass sequencer in soc_therm as it is programmed in ccroc */ + r = REG_SET_MASK(0, THROT_PSKIP_RAMP_SEQ_BYPASS_MODE_MASK, 1); + writel(r, ts->regs + THROT_PSKIP_RAMP(throt, THROTTLE_DEV_CPU)); +} + +/** + * throttlectl_cpu_mn() - program CPU pulse skipper configuration + * @throt: the LIGHT/HEAVY of throttle event id + * + * Pulse skippers are used to throttle clock frequencies. This + * function programs the pulse skippers based on @throt and platform + * data. This function is used for CPUs that have "remote" pulse + * skipper control, e.g., the CPU pulse skipper is controlled by the + * SOC_THERM IP block. (SOC_THERM is located outside the CPU + * complex.) + */ +static void throttlectl_cpu_mn(struct tegra_soctherm *ts, + enum soctherm_throttle_id throt) +{ + u32 r; + int depth; + u8 dividend; + + depth = ts->throt_cfgs[throt].cpu_throt_depth; + dividend = THROT_DEPTH_DIVIDEND(depth); + + r = readl(ts->regs + THROT_PSKIP_CTRL(throt, THROTTLE_DEV_CPU)); + r = REG_SET_MASK(r, THROT_PSKIP_CTRL_ENABLE_MASK, 1); + r = REG_SET_MASK(r, THROT_PSKIP_CTRL_DIVIDEND_MASK, dividend); + r = REG_SET_MASK(r, THROT_PSKIP_CTRL_DIVISOR_MASK, 0xff); + writel(r, ts->regs + THROT_PSKIP_CTRL(throt, THROTTLE_DEV_CPU)); + + r = readl(ts->regs + THROT_PSKIP_RAMP(throt, THROTTLE_DEV_CPU)); + r = REG_SET_MASK(r, THROT_PSKIP_RAMP_DURATION_MASK, 0xff); + r = REG_SET_MASK(r, THROT_PSKIP_RAMP_STEP_MASK, 0xf); + writel(r, ts->regs + THROT_PSKIP_RAMP(throt, THROTTLE_DEV_CPU)); +} + +/** + * soctherm_throttle_program() - programs pulse skippers' configuration + * @throt: the LIGHT/HEAVY of the throttle event id. + * + * Pulse skippers are used to throttle clock frequencies. + * This function programs the pulse skippers. + */ +static void soctherm_throttle_program(struct tegra_soctherm *ts, + enum soctherm_throttle_id throt) +{ + u32 r; + struct soctherm_throt_cfg stc = ts->throt_cfgs[throt]; + + if (!stc.init) + return; + + /* Setup PSKIP parameters */ + if (ts->soc->use_ccroc) + throttlectl_cpu_level_select(ts, throt); + else + throttlectl_cpu_mn(ts, throt); + + r = REG_SET_MASK(0, THROT_PRIORITY_LITE_PRIO_MASK, stc.priority); + writel(r, ts->regs + THROT_PRIORITY_CTRL(throt)); + + r = REG_SET_MASK(0, THROT_DELAY_LITE_DELAY_MASK, 0); + writel(r, ts->regs + THROT_DELAY_CTRL(throt)); + + r = readl(ts->regs + THROT_PRIORITY_LOCK); + r = REG_GET_MASK(r, THROT_PRIORITY_LOCK_PRIORITY_MASK); + if (r >= stc.priority) + return; + r = REG_SET_MASK(0, THROT_PRIORITY_LOCK_PRIORITY_MASK, + stc.priority); + writel(r, ts->regs + THROT_PRIORITY_LOCK); +} + +static void tegra_soctherm_throttle(struct device *dev) +{ + struct tegra_soctherm *ts = dev_get_drvdata(dev); + u32 v; + int i; + + /* configure LOW, MED and HIGH levels for CCROC NV_THERM */ + if (ts->soc->use_ccroc) { + throttlectl_cpu_level_cfg(ts, TEGRA_SOCTHERM_THROT_LEVEL_LOW); + throttlectl_cpu_level_cfg(ts, TEGRA_SOCTHERM_THROT_LEVEL_MED); + throttlectl_cpu_level_cfg(ts, TEGRA_SOCTHERM_THROT_LEVEL_HIGH); + } + + /* Thermal HW throttle programming */ + for (i = 0; i < THROTTLE_SIZE; i++) + soctherm_throttle_program(ts, i); + + v = REG_SET_MASK(0, THROT_GLOBAL_ENB_MASK, 1); + if (ts->soc->use_ccroc) { + ccroc_writel(ts, v, CCROC_GLOBAL_CFG); + + v = ccroc_readl(ts, CCROC_SUPER_CCLKG_DIVIDER); + v = REG_SET_MASK(v, CDIVG_USE_THERM_CONTROLS_MASK, 1); + ccroc_writel(ts, v, CCROC_SUPER_CCLKG_DIVIDER); + } else { + writel(v, ts->regs + THROT_GLOBAL_CFG); + + v = clk_readl(ts, CAR_SUPER_CCLKG_DIVIDER); + v = REG_SET_MASK(v, CDIVG_USE_THERM_CONTROLS_MASK, 1); + clk_writel(ts, v, CAR_SUPER_CCLKG_DIVIDER); + } + + /* initialize stats collection */ + v = STATS_CTL_CLR_DN | STATS_CTL_EN_DN | + STATS_CTL_CLR_UP | STATS_CTL_EN_UP; + writel(v, ts->regs + THERMCTL_STATS_CTL); +} + static void soctherm_init(struct platform_device *pdev) { struct tegra_soctherm *tegra = platform_get_drvdata(pdev); @@ -475,6 +1253,9 @@ static void soctherm_init(struct platform_device *pdev) } writel(pdiv, tegra->regs + SENSOR_PDIV); writel(hotspot, tegra->regs + SENSOR_HOTSPOT_OFF); + + /* Configure hw throttle */ + tegra_soctherm_throttle(&pdev->dev); } static const struct of_device_id tegra_soctherm_of_match[] = { @@ -527,10 +1308,31 @@ static int tegra_soctherm_probe(struct platform_device *pdev) tegra->soc = soc; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "soctherm-reg"); tegra->regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(tegra->regs)) + if (IS_ERR(tegra->regs)) { + dev_err(&pdev->dev, "can't get soctherm registers"); return PTR_ERR(tegra->regs); + } + + if (!tegra->soc->use_ccroc) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "car-reg"); + tegra->clk_regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(tegra->clk_regs)) { + dev_err(&pdev->dev, "can't get car clk registers"); + return PTR_ERR(tegra->clk_regs); + } + } else { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "ccroc-reg"); + tegra->ccroc_regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(tegra->ccroc_regs)) { + dev_err(&pdev->dev, "can't get ccroc registers"); + return PTR_ERR(tegra->ccroc_regs); + } + } tegra->reset = devm_reset_control_get(&pdev->dev, "soctherm"); if (IS_ERR(tegra->reset)) { @@ -580,6 +1382,8 @@ static int tegra_soctherm_probe(struct platform_device *pdev) if (err) return err; + soctherm_init_hw_throt_cdev(pdev); + soctherm_init(pdev); for (i = 0; i < soc->num_ttgs; ++i) { @@ -593,6 +1397,7 @@ static int tegra_soctherm_probe(struct platform_device *pdev) zone->reg = tegra->regs + soc->ttgs[i]->sensor_temp_offset; zone->dev = &pdev->dev; zone->sg = soc->ttgs[i]; + zone->ts = tegra; z = devm_thermal_zone_of_sensor_register(&pdev->dev, soc->ttgs[i]->id, zone, @@ -608,7 +1413,9 @@ static int tegra_soctherm_probe(struct platform_device *pdev) tegra->thermctl_tzs[soc->ttgs[i]->id] = z; /* Configure hw trip points */ - tegra_soctherm_set_hwtrips(&pdev->dev, soc->ttgs[i], z); + err = tegra_soctherm_set_hwtrips(&pdev->dev, soc->ttgs[i], z); + if (err) + goto disable_clocks; } soctherm_debug_init(pdev); @@ -661,7 +1468,12 @@ static int __maybe_unused soctherm_resume(struct device *dev) struct thermal_zone_device *tz; tz = tegra->thermctl_tzs[soc->ttgs[i]->id]; - tegra_soctherm_set_hwtrips(dev, soc->ttgs[i], tz); + err = tegra_soctherm_set_hwtrips(dev, soc->ttgs[i], tz); + if (err) { + dev_err(&pdev->dev, + "Resume failed: set hwtrips failed\n"); + return err; + } } return 0; diff --git a/drivers/thermal/tegra/soctherm.h b/drivers/thermal/tegra/soctherm.h index 28e18ec4b4c3..e96ca73fd780 100644 --- a/drivers/thermal/tegra/soctherm.h +++ b/drivers/thermal/tegra/soctherm.h @@ -15,6 +15,11 @@ #ifndef __DRIVERS_THERMAL_TEGRA_SOCTHERM_H #define __DRIVERS_THERMAL_TEGRA_SOCTHERM_H +#define THERMCTL_LEVEL0_GROUP_CPU 0x0 +#define THERMCTL_LEVEL0_GROUP_GPU 0x4 +#define THERMCTL_LEVEL0_GROUP_MEM 0x8 +#define THERMCTL_LEVEL0_GROUP_TSENSE 0xc + #define SENSOR_CONFIG2 8 #define SENSOR_CONFIG2_THERMA_MASK (0xffff << 16) #define SENSOR_CONFIG2_THERMA_SHIFT 16 @@ -65,6 +70,9 @@ struct tegra_tsensor_group { u32 thermtrip_enable_mask; u32 thermtrip_any_en_mask; u32 thermtrip_threshold_mask; + u16 thermctl_lvl0_offset; + u32 thermctl_lvl0_up_thresh_mask; + u32 thermctl_lvl0_dn_thresh_mask; }; struct tegra_tsensor_configuration { @@ -103,6 +111,8 @@ struct tegra_soctherm_soc { const unsigned int num_ttgs; const struct tegra_soctherm_fuse *tfuse; const int thresh_grain; + const unsigned int bptt; + const bool use_ccroc; }; int tegra_calc_shared_calib(const struct tegra_soctherm_fuse *tfuse, diff --git a/drivers/thermal/tegra/tegra124-soctherm.c b/drivers/thermal/tegra/tegra124-soctherm.c index beb9d36b9c8a..36768630f78c 100644 --- a/drivers/thermal/tegra/tegra124-soctherm.c +++ b/drivers/thermal/tegra/tegra124-soctherm.c @@ -28,7 +28,11 @@ #define TEGRA124_THERMTRIP_CPU_THRESH_MASK (0xff << 8) #define TEGRA124_THERMTRIP_TSENSE_THRESH_MASK 0xff +#define TEGRA124_THERMCTL_LVL0_UP_THRESH_MASK (0xff << 17) +#define TEGRA124_THERMCTL_LVL0_DN_THRESH_MASK (0xff << 9) + #define TEGRA124_THRESH_GRAIN 1000 +#define TEGRA124_BPTT 8 static const struct tegra_tsensor_configuration tegra124_tsensor_config = { .tall = 16300, @@ -51,6 +55,9 @@ static const struct tegra_tsensor_group tegra124_tsensor_group_cpu = { .thermtrip_any_en_mask = TEGRA124_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA124_THERMTRIP_CPU_EN_MASK, .thermtrip_threshold_mask = TEGRA124_THERMTRIP_CPU_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_CPU, + .thermctl_lvl0_up_thresh_mask = TEGRA124_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA124_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra124_tsensor_group_gpu = { @@ -66,6 +73,9 @@ static const struct tegra_tsensor_group tegra124_tsensor_group_gpu = { .thermtrip_any_en_mask = TEGRA124_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA124_THERMTRIP_GPU_EN_MASK, .thermtrip_threshold_mask = TEGRA124_THERMTRIP_GPUMEM_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_GPU, + .thermctl_lvl0_up_thresh_mask = TEGRA124_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA124_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra124_tsensor_group_pll = { @@ -79,6 +89,9 @@ static const struct tegra_tsensor_group tegra124_tsensor_group_pll = { .thermtrip_any_en_mask = TEGRA124_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA124_THERMTRIP_TSENSE_EN_MASK, .thermtrip_threshold_mask = TEGRA124_THERMTRIP_TSENSE_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_TSENSE, + .thermctl_lvl0_up_thresh_mask = TEGRA124_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA124_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra124_tsensor_group_mem = { @@ -94,6 +107,9 @@ static const struct tegra_tsensor_group tegra124_tsensor_group_mem = { .thermtrip_any_en_mask = TEGRA124_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA124_THERMTRIP_MEM_EN_MASK, .thermtrip_threshold_mask = TEGRA124_THERMTRIP_GPUMEM_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_MEM, + .thermctl_lvl0_up_thresh_mask = TEGRA124_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA124_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group *tegra124_tsensor_groups[] = { @@ -193,4 +209,6 @@ const struct tegra_soctherm_soc tegra124_soctherm = { .num_ttgs = ARRAY_SIZE(tegra124_tsensor_groups), .tfuse = &tegra124_soctherm_fuse, .thresh_grain = TEGRA124_THRESH_GRAIN, + .bptt = TEGRA124_BPTT, + .use_ccroc = false, }; diff --git a/drivers/thermal/tegra/tegra132-soctherm.c b/drivers/thermal/tegra/tegra132-soctherm.c index e2aa84e1b307..97fa30501eb1 100644 --- a/drivers/thermal/tegra/tegra132-soctherm.c +++ b/drivers/thermal/tegra/tegra132-soctherm.c @@ -28,7 +28,11 @@ #define TEGRA132_THERMTRIP_CPU_THRESH_MASK (0xff << 8) #define TEGRA132_THERMTRIP_TSENSE_THRESH_MASK 0xff +#define TEGRA132_THERMCTL_LVL0_UP_THRESH_MASK (0xff << 17) +#define TEGRA132_THERMCTL_LVL0_DN_THRESH_MASK (0xff << 9) + #define TEGRA132_THRESH_GRAIN 1000 +#define TEGRA132_BPTT 8 static const struct tegra_tsensor_configuration tegra132_tsensor_config = { .tall = 16300, @@ -51,6 +55,9 @@ static const struct tegra_tsensor_group tegra132_tsensor_group_cpu = { .thermtrip_any_en_mask = TEGRA132_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA132_THERMTRIP_CPU_EN_MASK, .thermtrip_threshold_mask = TEGRA132_THERMTRIP_CPU_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_CPU, + .thermctl_lvl0_up_thresh_mask = TEGRA132_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA132_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra132_tsensor_group_gpu = { @@ -66,6 +73,9 @@ static const struct tegra_tsensor_group tegra132_tsensor_group_gpu = { .thermtrip_any_en_mask = TEGRA132_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA132_THERMTRIP_GPU_EN_MASK, .thermtrip_threshold_mask = TEGRA132_THERMTRIP_GPUMEM_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_GPU, + .thermctl_lvl0_up_thresh_mask = TEGRA132_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA132_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra132_tsensor_group_pll = { @@ -79,6 +89,9 @@ static const struct tegra_tsensor_group tegra132_tsensor_group_pll = { .thermtrip_any_en_mask = TEGRA132_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA132_THERMTRIP_TSENSE_EN_MASK, .thermtrip_threshold_mask = TEGRA132_THERMTRIP_TSENSE_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_TSENSE, + .thermctl_lvl0_up_thresh_mask = TEGRA132_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA132_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra132_tsensor_group_mem = { @@ -94,6 +107,9 @@ static const struct tegra_tsensor_group tegra132_tsensor_group_mem = { .thermtrip_any_en_mask = TEGRA132_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA132_THERMTRIP_MEM_EN_MASK, .thermtrip_threshold_mask = TEGRA132_THERMTRIP_GPUMEM_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_MEM, + .thermctl_lvl0_up_thresh_mask = TEGRA132_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA132_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group *tegra132_tsensor_groups[] = { @@ -193,4 +209,6 @@ const struct tegra_soctherm_soc tegra132_soctherm = { .num_ttgs = ARRAY_SIZE(tegra132_tsensor_groups), .tfuse = &tegra132_soctherm_fuse, .thresh_grain = TEGRA132_THRESH_GRAIN, + .bptt = TEGRA132_BPTT, + .use_ccroc = true, }; diff --git a/drivers/thermal/tegra/tegra210-soctherm.c b/drivers/thermal/tegra/tegra210-soctherm.c index 19cc0ab66f0e..ad53169a8e95 100644 --- a/drivers/thermal/tegra/tegra210-soctherm.c +++ b/drivers/thermal/tegra/tegra210-soctherm.c @@ -29,7 +29,11 @@ #define TEGRA210_THERMTRIP_CPU_THRESH_MASK (0x1ff << 9) #define TEGRA210_THERMTRIP_TSENSE_THRESH_MASK 0x1ff +#define TEGRA210_THERMCTL_LVL0_UP_THRESH_MASK (0x1ff << 18) +#define TEGRA210_THERMCTL_LVL0_DN_THRESH_MASK (0x1ff << 9) + #define TEGRA210_THRESH_GRAIN 500 +#define TEGRA210_BPTT 9 static const struct tegra_tsensor_configuration tegra210_tsensor_config = { .tall = 16300, @@ -52,6 +56,9 @@ static const struct tegra_tsensor_group tegra210_tsensor_group_cpu = { .thermtrip_any_en_mask = TEGRA210_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA210_THERMTRIP_CPU_EN_MASK, .thermtrip_threshold_mask = TEGRA210_THERMTRIP_CPU_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_CPU, + .thermctl_lvl0_up_thresh_mask = TEGRA210_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA210_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra210_tsensor_group_gpu = { @@ -67,6 +74,9 @@ static const struct tegra_tsensor_group tegra210_tsensor_group_gpu = { .thermtrip_any_en_mask = TEGRA210_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA210_THERMTRIP_GPU_EN_MASK, .thermtrip_threshold_mask = TEGRA210_THERMTRIP_GPUMEM_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_GPU, + .thermctl_lvl0_up_thresh_mask = TEGRA210_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA210_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra210_tsensor_group_pll = { @@ -80,6 +90,9 @@ static const struct tegra_tsensor_group tegra210_tsensor_group_pll = { .thermtrip_any_en_mask = TEGRA210_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA210_THERMTRIP_TSENSE_EN_MASK, .thermtrip_threshold_mask = TEGRA210_THERMTRIP_TSENSE_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_TSENSE, + .thermctl_lvl0_up_thresh_mask = TEGRA210_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA210_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group tegra210_tsensor_group_mem = { @@ -95,6 +108,9 @@ static const struct tegra_tsensor_group tegra210_tsensor_group_mem = { .thermtrip_any_en_mask = TEGRA210_THERMTRIP_ANY_EN_MASK, .thermtrip_enable_mask = TEGRA210_THERMTRIP_MEM_EN_MASK, .thermtrip_threshold_mask = TEGRA210_THERMTRIP_GPUMEM_THRESH_MASK, + .thermctl_lvl0_offset = THERMCTL_LEVEL0_GROUP_MEM, + .thermctl_lvl0_up_thresh_mask = TEGRA210_THERMCTL_LVL0_UP_THRESH_MASK, + .thermctl_lvl0_dn_thresh_mask = TEGRA210_THERMCTL_LVL0_DN_THRESH_MASK, }; static const struct tegra_tsensor_group *tegra210_tsensor_groups[] = { @@ -194,4 +210,6 @@ const struct tegra_soctherm_soc tegra210_soctherm = { .num_ttgs = ARRAY_SIZE(tegra210_tsensor_groups), .tfuse = &tegra210_soctherm_fuse, .thresh_grain = TEGRA210_THRESH_GRAIN, + .bptt = TEGRA210_BPTT, + .use_ccroc = false, }; diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index e2fc6161dded..226b0b4aced6 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -520,6 +520,56 @@ exit: } EXPORT_SYMBOL_GPL(thermal_zone_get_temp); +void thermal_zone_set_trips(struct thermal_zone_device *tz) +{ + int low = -INT_MAX; + int high = INT_MAX; + int trip_temp, hysteresis; + int i, ret; + + mutex_lock(&tz->lock); + + if (!tz->ops->set_trips || !tz->ops->get_trip_hyst) + goto exit; + + for (i = 0; i < tz->trips; i++) { + int trip_low; + + tz->ops->get_trip_temp(tz, i, &trip_temp); + tz->ops->get_trip_hyst(tz, i, &hysteresis); + + trip_low = trip_temp - hysteresis; + + if (trip_low < tz->temperature && trip_low > low) + low = trip_low; + + if (trip_temp > tz->temperature && trip_temp < high) + high = trip_temp; + } + + /* No need to change trip points */ + if (tz->prev_low_trip == low && tz->prev_high_trip == high) + goto exit; + + tz->prev_low_trip = low; + tz->prev_high_trip = high; + + dev_dbg(&tz->device, + "new temperature boundaries: %d < x < %d\n", low, high); + + /* + * Set a temperature window. When this window is left the driver + * must inform the thermal core via thermal_zone_device_update. + */ + ret = tz->ops->set_trips(tz, low, high); + if (ret) + dev_err(&tz->device, "Failed to set trips: %d\n", ret); + +exit: + mutex_unlock(&tz->lock); +} +EXPORT_SYMBOL_GPL(thermal_zone_set_trips); + static void update_temperature(struct thermal_zone_device *tz) { int temp, ret; @@ -557,7 +607,8 @@ static void thermal_zone_device_reset(struct thermal_zone_device *tz) pos->initialized = false; } -void thermal_zone_device_update(struct thermal_zone_device *tz) +void thermal_zone_device_update(struct thermal_zone_device *tz, + enum thermal_notify_event event) { int count; @@ -569,6 +620,10 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) update_temperature(tz); + thermal_zone_set_trips(tz); + + tz->notify_event = event; + for (count = 0; count < tz->trips; count++) handle_thermal_trip(tz, count); } @@ -579,7 +634,7 @@ static void thermal_zone_device_check(struct work_struct *work) struct thermal_zone_device *tz = container_of(work, struct thermal_zone_device, poll_queue.work); - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); } /* sys I/F for thermal zone */ @@ -703,7 +758,7 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); return count; } @@ -754,6 +809,9 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr, */ ret = tz->ops->set_trip_hyst(tz, trip, temperature); + if (!ret) + thermal_zone_set_trips(tz); + return ret ? ret : count; } @@ -822,7 +880,7 @@ passive_store(struct device *dev, struct device_attribute *attr, tz->forced_passive = state; - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); return count; } @@ -913,7 +971,7 @@ emul_temp_store(struct device *dev, struct device_attribute *attr, } if (!ret) - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); return ret ? ret : count; } @@ -1509,7 +1567,8 @@ __thermal_cooling_device_register(struct device_node *np, mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) if (atomic_cmpxchg(&pos->need_update, 1, 0)) - thermal_zone_device_update(pos); + thermal_zone_device_update(pos, + THERMAL_EVENT_UNSPECIFIED); mutex_unlock(&thermal_list_lock); return cdev; @@ -1952,7 +2011,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, thermal_zone_device_reset(tz); /* Update the new thermal zone and mark it as already updated. */ if (atomic_cmpxchg(&tz->need_update, 1, 0)) - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); return tz; @@ -2069,6 +2128,36 @@ exit: } EXPORT_SYMBOL_GPL(thermal_zone_get_zone_by_name); +/** + * thermal_zone_get_slope - return the slope attribute of the thermal zone + * @tz: thermal zone device with the slope attribute + * + * Return: If the thermal zone device has a slope attribute, return it, else + * return 1. + */ +int thermal_zone_get_slope(struct thermal_zone_device *tz) +{ + if (tz && tz->tzp) + return tz->tzp->slope; + return 1; +} +EXPORT_SYMBOL_GPL(thermal_zone_get_slope); + +/** + * thermal_zone_get_offset - return the offset attribute of the thermal zone + * @tz: thermal zone device with the offset attribute + * + * Return: If the thermal zone device has a offset attribute, return it, else + * return 0. + */ +int thermal_zone_get_offset(struct thermal_zone_device *tz) +{ + if (tz && tz->tzp) + return tz->tzp->offset; + return 0; +} +EXPORT_SYMBOL_GPL(thermal_zone_get_offset); + #ifdef CONFIG_NET static const struct genl_multicast_group thermal_event_mcgrps[] = { { .name = THERMAL_GENL_MCAST_GROUP_NAME, }, @@ -2209,7 +2298,8 @@ static int thermal_pm_notify(struct notifier_block *nb, atomic_set(&in_suspend, 0); list_for_each_entry(tz, &thermal_tz_list, node) { thermal_zone_device_reset(tz); - thermal_zone_device_update(tz); + thermal_zone_device_update(tz, + THERMAL_EVENT_UNSPECIFIED); } break; default: diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index 15c0a9ac2209..0586bd0f2bab 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -52,7 +52,7 @@ static void ti_thermal_work(struct work_struct *work) struct ti_thermal_data *data = container_of(work, struct ti_thermal_data, thermal_wq); - thermal_zone_device_update(data->ti_thermal); + thermal_zone_device_update(data->ti_thermal, THERMAL_EVENT_UNSPECIFIED); dev_dbg(&data->ti_thermal->device, "updated thermal zone %s\n", data->ti_thermal->type); @@ -205,7 +205,7 @@ static int ti_thermal_set_mode(struct thermal_zone_device *thermal, data->mode = mode; ti_bandgap_write_update_interval(bgp, data->sensor_id, data->ti_thermal->polling_delay); - thermal_zone_device_update(data->ti_thermal); + thermal_zone_device_update(data->ti_thermal, THERMAL_EVENT_UNSPECIFIED); dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n", data->ti_thermal->polling_delay); @@ -239,7 +239,7 @@ static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal, return 0; } -static int __ti_thermal_get_trend(void *p, long *trend) +static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend) { struct ti_thermal_data *data = p; struct ti_bandgap *bgp; @@ -252,22 +252,6 @@ static int __ti_thermal_get_trend(void *p, long *trend) if (ret) return ret; - *trend = tr; - - return 0; -} - -/* Get the temperature trend callback functions for thermal zone */ -static int ti_thermal_get_trend(struct thermal_zone_device *thermal, - int trip, enum thermal_trend *trend) -{ - int ret; - long tr; - - ret = __ti_thermal_get_trend(thermal->devdata, &tr); - if (ret) - return ret; - if (tr > 0) *trend = THERMAL_TREND_RAISING; else if (tr < 0) @@ -278,6 +262,13 @@ static int ti_thermal_get_trend(struct thermal_zone_device *thermal, return 0; } +/* Get the temperature trend callback functions for thermal zone */ +static int ti_thermal_get_trend(struct thermal_zone_device *thermal, + int trip, enum thermal_trend *trend) +{ + return __ti_thermal_get_trend(thermal->devdata, trip, trend); +} + /* Get critical temperature callback functions for thermal zone */ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal, int *temp) diff --git a/drivers/thermal/user_space.c b/drivers/thermal/user_space.c index 10adcddc8821..c908150c268d 100644 --- a/drivers/thermal/user_space.c +++ b/drivers/thermal/user_space.c @@ -23,19 +23,30 @@ */ #include <linux/thermal.h> - +#include <linux/slab.h> #include "thermal_core.h" /** * notify_user_space - Notifies user space about thermal events * @tz - thermal_zone_device + * @trip - Trip point index * * This function notifies the user space through UEvents. */ static int notify_user_space(struct thermal_zone_device *tz, int trip) { + char *thermal_prop[5]; + int i; + mutex_lock(&tz->lock); - kobject_uevent(&tz->device.kobj, KOBJ_CHANGE); + thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", tz->type); + thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d", tz->temperature); + thermal_prop[2] = kasprintf(GFP_KERNEL, "TRIP=%d", trip); + thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", tz->notify_event); + thermal_prop[4] = NULL; + kobject_uevent_env(&tz->device.kobj, KOBJ_CHANGE, thermal_prop); + for (i = 0; i < 4; ++i) + kfree(thermal_prop[i]); mutex_unlock(&tz->lock); return 0; } diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 97f0a2bd93ed..95f4c1bcdb4c 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -348,7 +348,8 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) } if (notify) { pr_debug("thermal_zone_device_update\n"); - thermal_zone_device_update(phdev->tzone); + thermal_zone_device_update(phdev->tzone, + THERMAL_EVENT_UNSPECIFIED); } } diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 88b008fb8a4e..af2f117208f1 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -284,12 +284,14 @@ config FB_PM2_FIFO_DISCONNECT config FB_ARMCLCD tristate "ARM PrimeCell PL110 support" depends on ARM || ARM64 || COMPILE_TEST - depends on FB && ARM_AMBA + depends on FB && ARM_AMBA && HAS_IOMEM select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT select FB_MODE_HELPERS if OF select VIDEOMODE_HELPERS if OF + select BACKLIGHT_LCD_SUPPORT if OF + select BACKLIGHT_CLASS_DEVICE if OF help This framebuffer device driver is for the ARM PrimeCell PL110 Colour LCD controller. ARM PrimeCells provide the building @@ -305,6 +307,8 @@ config PLAT_VERSATILE_CLCD def_bool ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS || ARCH_INTEGRATOR depends on ARM depends on FB_ARMCLCD && FB=y + select REGMAP + select MFD_SYSCON config FB_ACORN bool "Acorn VIDC support" @@ -2443,7 +2447,6 @@ config FB_SIMPLE source "drivers/video/fbdev/omap/Kconfig" source "drivers/video/fbdev/omap2/Kconfig" -source "drivers/video/fbdev/exynos/Kconfig" source "drivers/video/fbdev/mmp/Kconfig" config FB_SH_MOBILE_MERAM diff --git a/drivers/video/fbdev/Makefile b/drivers/video/fbdev/Makefile index f6731867dd26..ee8c81405a7f 100644 --- a/drivers/video/fbdev/Makefile +++ b/drivers/video/fbdev/Makefile @@ -6,8 +6,6 @@ obj-y += core/ -obj-$(CONFIG_EXYNOS_VIDEO) += exynos/ - obj-$(CONFIG_FB_MACMODES) += macmodes.o obj-$(CONFIG_FB_WMT_GE_ROPS) += wmt_ge_rops.o @@ -79,6 +77,7 @@ obj-$(CONFIG_FB_ATMEL) += atmel_lcdfb.o obj-$(CONFIG_FB_PVR2) += pvr2fb.o obj-$(CONFIG_FB_VOODOO1) += sstfb.o obj-$(CONFIG_FB_ARMCLCD) += amba-clcd.o +obj-$(CONFIG_ARCH_NOMADIK) += amba-clcd-nomadik.o obj-$(CONFIG_PLAT_VERSATILE_CLCD) += amba-clcd-versatile.o obj-$(CONFIG_FB_GOLDFISH) += goldfishfb.o obj-$(CONFIG_FB_68328) += 68328fb.o diff --git a/drivers/video/fbdev/amba-clcd-nomadik.c b/drivers/video/fbdev/amba-clcd-nomadik.c new file mode 100644 index 000000000000..0c06fcaaa6e8 --- /dev/null +++ b/drivers/video/fbdev/amba-clcd-nomadik.c @@ -0,0 +1,259 @@ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/gpio/consumer.h> +#include <linux/of.h> +#include <linux/of_graph.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <linux/mfd/syscon.h> +#include <linux/regmap.h> + +#include "amba-clcd-nomadik.h" + +static struct gpio_desc *grestb; +static struct gpio_desc *scen; +static struct gpio_desc *scl; +static struct gpio_desc *sda; + +static u8 tpg110_readwrite_reg(bool write, u8 address, u8 outval) +{ + int i; + u8 inval = 0; + + /* Assert SCEN */ + gpiod_set_value_cansleep(scen, 1); + ndelay(150); + /* Hammer out the address */ + for (i = 5; i >= 0; i--) { + if (address & BIT(i)) + gpiod_set_value_cansleep(sda, 1); + else + gpiod_set_value_cansleep(sda, 0); + ndelay(150); + /* Send an SCL pulse */ + gpiod_set_value_cansleep(scl, 1); + ndelay(160); + gpiod_set_value_cansleep(scl, 0); + ndelay(160); + } + + if (write) { + /* WRITE */ + gpiod_set_value_cansleep(sda, 0); + } else { + /* READ */ + gpiod_set_value_cansleep(sda, 1); + } + ndelay(150); + /* Send an SCL pulse */ + gpiod_set_value_cansleep(scl, 1); + ndelay(160); + gpiod_set_value_cansleep(scl, 0); + ndelay(160); + + if (!write) + /* HiZ turn-around cycle */ + gpiod_direction_input(sda); + ndelay(150); + /* Send an SCL pulse */ + gpiod_set_value_cansleep(scl, 1); + ndelay(160); + gpiod_set_value_cansleep(scl, 0); + ndelay(160); + + /* Hammer in/out the data */ + for (i = 7; i >= 0; i--) { + int value; + + if (write) { + value = !!(outval & BIT(i)); + gpiod_set_value_cansleep(sda, value); + } else { + value = gpiod_get_value(sda); + if (value) + inval |= BIT(i); + } + ndelay(150); + /* Send an SCL pulse */ + gpiod_set_value_cansleep(scl, 1); + ndelay(160); + gpiod_set_value_cansleep(scl, 0); + ndelay(160); + } + + gpiod_direction_output(sda, 0); + /* Deassert SCEN */ + gpiod_set_value_cansleep(scen, 0); + /* Satisfies SCEN pulse width */ + udelay(1); + + return inval; +} + +static u8 tpg110_read_reg(u8 address) +{ + return tpg110_readwrite_reg(false, address, 0); +} + +static void tpg110_write_reg(u8 address, u8 outval) +{ + tpg110_readwrite_reg(true, address, outval); +} + +static void tpg110_startup(struct device *dev) +{ + u8 val; + + dev_info(dev, "TPG110 display enable\n"); + /* De-assert the reset signal */ + gpiod_set_value_cansleep(grestb, 0); + mdelay(1); + dev_info(dev, "de-asserted GRESTB\n"); + + /* Test display communication */ + tpg110_write_reg(0x00, 0x55); + val = tpg110_read_reg(0x00); + if (val == 0x55) + dev_info(dev, "passed communication test\n"); + val = tpg110_read_reg(0x01); + dev_info(dev, "TPG110 chip ID: %d version: %d\n", + val>>4, val&0x0f); + + /* Show display resolution */ + val = tpg110_read_reg(0x02); + val &= 7; + switch (val) { + case 0x0: + dev_info(dev, "IN 400x240 RGB -> OUT 800x480 RGB (dual scan)"); + break; + case 0x1: + dev_info(dev, "IN 480x272 RGB -> OUT 800x480 RGB (dual scan)"); + break; + case 0x4: + dev_info(dev, "480x640 RGB"); + break; + case 0x5: + dev_info(dev, "480x272 RGB"); + break; + case 0x6: + dev_info(dev, "640x480 RGB"); + break; + case 0x7: + dev_info(dev, "800x480 RGB"); + break; + default: + dev_info(dev, "ILLEGAL RESOLUTION"); + break; + } + + val = tpg110_read_reg(0x03); + dev_info(dev, "resolution is controlled by %s\n", + (val & BIT(7)) ? "software" : "hardware"); +} + +static void tpg110_enable(struct clcd_fb *fb) +{ + struct device *dev = &fb->dev->dev; + static bool startup; + u8 val; + + if (!startup) { + tpg110_startup(dev); + startup = true; + } + + /* Take chip out of standby */ + val = tpg110_read_reg(0x03); + val |= BIT(0); + tpg110_write_reg(0x03, val); +} + +static void tpg110_disable(struct clcd_fb *fb) +{ + u8 val; + + dev_info(&fb->dev->dev, "TPG110 display disable\n"); + val = tpg110_read_reg(0x03); + /* Put into standby */ + val &= ~BIT(0); + tpg110_write_reg(0x03, val); +} + +static void tpg110_init(struct device *dev, struct device_node *np, + struct clcd_board *board) +{ + dev_info(dev, "TPG110 display init\n"); + + grestb = devm_get_gpiod_from_child(dev, "grestb", &np->fwnode); + if (IS_ERR(grestb)) { + dev_err(dev, "no GRESTB GPIO\n"); + return; + } + /* This asserts the GRESTB signal, putting the display into reset */ + gpiod_direction_output(grestb, 1); + + scen = devm_get_gpiod_from_child(dev, "scen", &np->fwnode); + if (IS_ERR(scen)) { + dev_err(dev, "no SCEN GPIO\n"); + return; + } + gpiod_direction_output(scen, 0); + scl = devm_get_gpiod_from_child(dev, "scl", &np->fwnode); + if (IS_ERR(scl)) { + dev_err(dev, "no SCL GPIO\n"); + return; + } + gpiod_direction_output(scl, 0); + sda = devm_get_gpiod_from_child(dev, "sda", &np->fwnode); + if (IS_ERR(sda)) { + dev_err(dev, "no SDA GPIO\n"); + return; + } + gpiod_direction_output(sda, 0); + board->enable = tpg110_enable; + board->disable = tpg110_disable; +} + +int nomadik_clcd_init_panel(struct clcd_fb *fb, + struct device_node *endpoint) +{ + struct device_node *panel; + + panel = of_graph_get_remote_port_parent(endpoint); + if (!panel) + return -ENODEV; + + if (of_device_is_compatible(panel, "tpo,tpg110")) + tpg110_init(&fb->dev->dev, panel, fb->board); + else + dev_info(&fb->dev->dev, "unknown panel\n"); + + /* Unknown panel, fall through */ + return 0; +} +EXPORT_SYMBOL_GPL(nomadik_clcd_init_panel); + +#define PMU_CTRL_OFFSET 0x0000 +#define PMU_CTRL_LCDNDIF BIT(26) + +int nomadik_clcd_init_board(struct amba_device *adev, + struct clcd_board *board) +{ + struct regmap *pmu_regmap; + + dev_info(&adev->dev, "Nomadik CLCD board init\n"); + pmu_regmap = + syscon_regmap_lookup_by_compatible("stericsson,nomadik-pmu"); + if (IS_ERR(pmu_regmap)) { + dev_err(&adev->dev, "could not find PMU syscon regmap\n"); + return PTR_ERR(pmu_regmap); + } + regmap_update_bits(pmu_regmap, + PMU_CTRL_OFFSET, + PMU_CTRL_LCDNDIF, + 0); + dev_info(&adev->dev, "set PMU mux to CLCD mode\n"); + + return 0; +} +EXPORT_SYMBOL_GPL(nomadik_clcd_init_board); diff --git a/drivers/video/fbdev/amba-clcd-nomadik.h b/drivers/video/fbdev/amba-clcd-nomadik.h new file mode 100644 index 000000000000..50aa9bda69fd --- /dev/null +++ b/drivers/video/fbdev/amba-clcd-nomadik.h @@ -0,0 +1,24 @@ +#ifndef _AMBA_CLCD_NOMADIK_H +#define _AMBA_CLCD_NOMADIK_H + +#include <linux/amba/bus.h> + +#ifdef CONFIG_ARCH_NOMADIK +int nomadik_clcd_init_board(struct amba_device *adev, + struct clcd_board *board); +int nomadik_clcd_init_panel(struct clcd_fb *fb, + struct device_node *endpoint); +#else +static inline int nomadik_clcd_init_board(struct amba_device *adev, + struct clcd_board *board) +{ + return 0; +} +static inline int nomadik_clcd_init_panel(struct clcd_fb *fb, + struct device_node *endpoint) +{ + return 0; +} +#endif + +#endif /* inclusion guard */ diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c index a8a22daa3f9d..19ad8645d93c 100644 --- a/drivers/video/fbdev/amba-clcd-versatile.c +++ b/drivers/video/fbdev/amba-clcd-versatile.c @@ -3,6 +3,12 @@ #include <linux/amba/bus.h> #include <linux/amba/clcd.h> #include <linux/platform_data/video-clcd-versatile.h> +#include <linux/of.h> +#include <linux/of_graph.h> +#include <linux/regmap.h> +#include <linux/mfd/syscon.h> +#include <linux/bitops.h> +#include "amba-clcd-versatile.h" static struct clcd_panel vga = { .mode = { @@ -178,3 +184,392 @@ void versatile_clcd_remove_dma(struct clcd_fb *fb) dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base, fb->fb.fix.smem_start); } + +#ifdef CONFIG_OF + +static struct regmap *versatile_syscon_map; +static struct regmap *versatile_ib2_map; + +/* + * We detect the different syscon types from the compatible strings. + */ +enum versatile_clcd { + INTEGRATOR_CLCD_CM, + VERSATILE_CLCD, + REALVIEW_CLCD_EB, + REALVIEW_CLCD_PB1176, + REALVIEW_CLCD_PB11MP, + REALVIEW_CLCD_PBA8, + REALVIEW_CLCD_PBX, +}; + +static const struct of_device_id versatile_clcd_of_match[] = { + { + .compatible = "arm,core-module-integrator", + .data = (void *)INTEGRATOR_CLCD_CM, + }, + { + .compatible = "arm,versatile-sysreg", + .data = (void *)VERSATILE_CLCD, + }, + { + .compatible = "arm,realview-eb-syscon", + .data = (void *)REALVIEW_CLCD_EB, + }, + { + .compatible = "arm,realview-pb1176-syscon", + .data = (void *)REALVIEW_CLCD_PB1176, + }, + { + .compatible = "arm,realview-pb11mp-syscon", + .data = (void *)REALVIEW_CLCD_PB11MP, + }, + { + .compatible = "arm,realview-pba8-syscon", + .data = (void *)REALVIEW_CLCD_PBA8, + }, + { + .compatible = "arm,realview-pbx-syscon", + .data = (void *)REALVIEW_CLCD_PBX, + }, + {}, +}; + +/* + * Core module CLCD control on the Integrator/CP, bits + * 8 thru 19 of the CM_CONTROL register controls a bunch + * of CLCD settings. + */ +#define INTEGRATOR_HDR_CTRL_OFFSET 0x0C +#define INTEGRATOR_CLCD_LCDBIASEN BIT(8) +#define INTEGRATOR_CLCD_LCDBIASUP BIT(9) +#define INTEGRATOR_CLCD_LCDBIASDN BIT(10) +/* Bits 11,12,13 controls the LCD type */ +#define INTEGRATOR_CLCD_LCDMUX_MASK (BIT(11)|BIT(12)|BIT(13)) +#define INTEGRATOR_CLCD_LCDMUX_LCD24 BIT(11) +#define INTEGRATOR_CLCD_LCDMUX_VGA565 BIT(12) +#define INTEGRATOR_CLCD_LCDMUX_SHARP (BIT(11)|BIT(12)) +#define INTEGRATOR_CLCD_LCDMUX_VGA555 BIT(13) +#define INTEGRATOR_CLCD_LCDMUX_VGA24 (BIT(11)|BIT(12)|BIT(13)) +#define INTEGRATOR_CLCD_LCD0_EN BIT(14) +#define INTEGRATOR_CLCD_LCD1_EN BIT(15) +/* R/L flip on Sharp */ +#define INTEGRATOR_CLCD_LCD_STATIC1 BIT(16) +/* U/D flip on Sharp */ +#define INTEGRATOR_CLCD_LCD_STATIC2 BIT(17) +/* No connection on Sharp */ +#define INTEGRATOR_CLCD_LCD_STATIC BIT(18) +/* 0 = 24bit VGA, 1 = 18bit VGA */ +#define INTEGRATOR_CLCD_LCD_N24BITEN BIT(19) + +#define INTEGRATOR_CLCD_MASK (INTEGRATOR_CLCD_LCDBIASEN | \ + INTEGRATOR_CLCD_LCDBIASUP | \ + INTEGRATOR_CLCD_LCDBIASDN | \ + INTEGRATOR_CLCD_LCDMUX_MASK | \ + INTEGRATOR_CLCD_LCD0_EN | \ + INTEGRATOR_CLCD_LCD1_EN | \ + INTEGRATOR_CLCD_LCD_STATIC1 | \ + INTEGRATOR_CLCD_LCD_STATIC2 | \ + INTEGRATOR_CLCD_LCD_STATIC | \ + INTEGRATOR_CLCD_LCD_N24BITEN) + +static void integrator_clcd_enable(struct clcd_fb *fb) +{ + struct fb_var_screeninfo *var = &fb->fb.var; + u32 val; + + dev_info(&fb->dev->dev, "enable Integrator CLCD connectors\n"); + + /* FIXME: really needed? */ + val = INTEGRATOR_CLCD_LCD_STATIC1 | INTEGRATOR_CLCD_LCD_STATIC2 | + INTEGRATOR_CLCD_LCD0_EN | INTEGRATOR_CLCD_LCD1_EN; + if (var->bits_per_pixel <= 8 || + (var->bits_per_pixel == 16 && var->green.length == 5)) + /* Pseudocolor, RGB555, BGR555 */ + val |= INTEGRATOR_CLCD_LCDMUX_VGA555; + else if (fb->fb.var.bits_per_pixel <= 16) + /* truecolor RGB565 */ + val |= INTEGRATOR_CLCD_LCDMUX_VGA565; + else + val = 0; /* no idea for this, don't trust the docs */ + + regmap_update_bits(versatile_syscon_map, + INTEGRATOR_HDR_CTRL_OFFSET, + INTEGRATOR_CLCD_MASK, + val); +} + +/* + * This configuration register in the Versatile and RealView + * family is uniformly present but appears more and more + * unutilized starting with the RealView series. + */ +#define SYS_CLCD 0x50 +#define SYS_CLCD_MODE_MASK (BIT(0)|BIT(1)) +#define SYS_CLCD_MODE_888 0 +#define SYS_CLCD_MODE_5551 BIT(0) +#define SYS_CLCD_MODE_565_R_LSB BIT(1) +#define SYS_CLCD_MODE_565_B_LSB (BIT(0)|BIT(1)) +#define SYS_CLCD_CONNECTOR_MASK (BIT(2)|BIT(3)|BIT(4)|BIT(5)) +#define SYS_CLCD_NLCDIOON BIT(2) +#define SYS_CLCD_VDDPOSSWITCH BIT(3) +#define SYS_CLCD_PWR3V5SWITCH BIT(4) +#define SYS_CLCD_VDDNEGSWITCH BIT(5) +#define SYS_CLCD_TSNSS BIT(6) /* touchscreen enable */ +#define SYS_CLCD_SSPEXP BIT(7) /* SSP expansion enable */ + +/* The Versatile can detect the connected panel type */ +#define SYS_CLCD_CLCDID_MASK (BIT(8)|BIT(9)|BIT(10)|BIT(11)|BIT(12)) +#define SYS_CLCD_ID_SANYO_3_8 (0x00 << 8) +#define SYS_CLCD_ID_SHARP_8_4 (0x01 << 8) +#define SYS_CLCD_ID_EPSON_2_2 (0x02 << 8) +#define SYS_CLCD_ID_SANYO_2_5 (0x07 << 8) +#define SYS_CLCD_ID_VGA (0x1f << 8) + +#define SYS_CLCD_TSNDAV BIT(13) /* data ready from TS */ + +/* IB2 control register for the Versatile daughterboard */ +#define IB2_CTRL 0x00 +#define IB2_CTRL_LCD_SD BIT(1) /* 1 = shut down LCD */ +#define IB2_CTRL_LCD_BL_ON BIT(0) +#define IB2_CTRL_LCD_MASK (BIT(0)|BIT(1)) + +static void versatile_clcd_disable(struct clcd_fb *fb) +{ + dev_info(&fb->dev->dev, "disable Versatile CLCD connectors\n"); + regmap_update_bits(versatile_syscon_map, + SYS_CLCD, + SYS_CLCD_CONNECTOR_MASK, + 0); + + /* If we're on an IB2 daughterboard, turn off display */ + if (versatile_ib2_map) { + dev_info(&fb->dev->dev, "disable IB2 display\n"); + regmap_update_bits(versatile_ib2_map, + IB2_CTRL, + IB2_CTRL_LCD_MASK, + IB2_CTRL_LCD_SD); + } +} + +static void versatile_clcd_enable(struct clcd_fb *fb) +{ + struct fb_var_screeninfo *var = &fb->fb.var; + u32 val = 0; + + dev_info(&fb->dev->dev, "enable Versatile CLCD connectors\n"); + switch (var->green.length) { + case 5: + val |= SYS_CLCD_MODE_5551; + break; + case 6: + if (var->red.offset == 0) + val |= SYS_CLCD_MODE_565_R_LSB; + else + val |= SYS_CLCD_MODE_565_B_LSB; + break; + case 8: + val |= SYS_CLCD_MODE_888; + break; + } + + /* Set up the MUX */ + regmap_update_bits(versatile_syscon_map, + SYS_CLCD, + SYS_CLCD_MODE_MASK, + val); + + /* Then enable the display */ + regmap_update_bits(versatile_syscon_map, + SYS_CLCD, + SYS_CLCD_CONNECTOR_MASK, + SYS_CLCD_NLCDIOON | SYS_CLCD_PWR3V5SWITCH); + + /* If we're on an IB2 daughterboard, turn on display */ + if (versatile_ib2_map) { + dev_info(&fb->dev->dev, "enable IB2 display\n"); + regmap_update_bits(versatile_ib2_map, + IB2_CTRL, + IB2_CTRL_LCD_MASK, + IB2_CTRL_LCD_BL_ON); + } +} + +static void versatile_clcd_decode(struct clcd_fb *fb, struct clcd_regs *regs) +{ + clcdfb_decode(fb, regs); + + /* Always clear BGR for RGB565: we do the routing externally */ + if (fb->fb.var.green.length == 6) + regs->cntl &= ~CNTL_BGR; +} + +static void realview_clcd_disable(struct clcd_fb *fb) +{ + dev_info(&fb->dev->dev, "disable RealView CLCD connectors\n"); + regmap_update_bits(versatile_syscon_map, + SYS_CLCD, + SYS_CLCD_CONNECTOR_MASK, + 0); +} + +static void realview_clcd_enable(struct clcd_fb *fb) +{ + dev_info(&fb->dev->dev, "enable RealView CLCD connectors\n"); + regmap_update_bits(versatile_syscon_map, + SYS_CLCD, + SYS_CLCD_CONNECTOR_MASK, + SYS_CLCD_NLCDIOON | SYS_CLCD_PWR3V5SWITCH); +} + +struct versatile_panel { + u32 id; + char *compatible; + bool ib2; +}; + +static const struct versatile_panel versatile_panels[] = { + { + .id = SYS_CLCD_ID_VGA, + .compatible = "VGA", + }, + { + .id = SYS_CLCD_ID_SANYO_3_8, + .compatible = "sanyo,tm38qv67a02a", + }, + { + .id = SYS_CLCD_ID_SHARP_8_4, + .compatible = "sharp,lq084v1dg21", + }, + { + .id = SYS_CLCD_ID_EPSON_2_2, + .compatible = "epson,l2f50113t00", + }, + { + .id = SYS_CLCD_ID_SANYO_2_5, + .compatible = "sanyo,alr252rgt", + .ib2 = true, + }, +}; + +static void versatile_panel_probe(struct device *dev, + struct device_node *endpoint) +{ + struct versatile_panel const *vpanel = NULL; + struct device_node *panel = NULL; + u32 val; + int ret; + int i; + + /* + * The Versatile CLCD has a panel auto-detection mechanism. + * We use this and look for the compatible panel in the + * device tree. + */ + ret = regmap_read(versatile_syscon_map, SYS_CLCD, &val); + if (ret) { + dev_err(dev, "cannot read CLCD syscon register\n"); + return; + } + val &= SYS_CLCD_CLCDID_MASK; + + /* First find corresponding panel information */ + for (i = 0; i < ARRAY_SIZE(versatile_panels); i++) { + vpanel = &versatile_panels[i]; + + if (val == vpanel->id) { + dev_err(dev, "autodetected panel \"%s\"\n", + vpanel->compatible); + break; + } + } + if (i == ARRAY_SIZE(versatile_panels)) { + dev_err(dev, "could not auto-detect panel\n"); + return; + } + + panel = of_graph_get_remote_port_parent(endpoint); + if (!panel) { + dev_err(dev, "could not locate panel in DT\n"); + return; + } + if (!of_device_is_compatible(panel, vpanel->compatible)) + dev_err(dev, "panel in DT is not compatible with the " + "auto-detected panel, continuing anyway\n"); + + /* + * If we have a Sanyo 2.5" port + * that we're running on an IB2 and proceed to look for the + * IB2 syscon regmap. + */ + if (!vpanel->ib2) + return; + + versatile_ib2_map = syscon_regmap_lookup_by_compatible( + "arm,versatile-ib2-syscon"); + if (IS_ERR(versatile_ib2_map)) { + dev_err(dev, "could not locate IB2 control register\n"); + versatile_ib2_map = NULL; + return; + } +} + +int versatile_clcd_init_panel(struct clcd_fb *fb, + struct device_node *endpoint) +{ + const struct of_device_id *clcd_id; + enum versatile_clcd versatile_clcd_type; + struct device_node *np; + struct regmap *map; + struct device *dev = &fb->dev->dev; + + np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match, + &clcd_id); + if (!np) { + dev_err(dev, "no Versatile syscon node\n"); + return -ENODEV; + } + versatile_clcd_type = (enum versatile_clcd)clcd_id->data; + + map = syscon_node_to_regmap(np); + if (IS_ERR(map)) { + dev_err(dev, "no Versatile syscon regmap\n"); + return PTR_ERR(map); + } + + switch (versatile_clcd_type) { + case INTEGRATOR_CLCD_CM: + versatile_syscon_map = map; + fb->board->enable = integrator_clcd_enable; + /* Override the caps, we have only these */ + fb->board->caps = CLCD_CAP_5551 | CLCD_CAP_RGB565 | + CLCD_CAP_888; + dev_info(dev, "set up callbacks for Integrator PL110\n"); + break; + case VERSATILE_CLCD: + versatile_syscon_map = map; + fb->board->enable = versatile_clcd_enable; + fb->board->disable = versatile_clcd_disable; + fb->board->decode = versatile_clcd_decode; + versatile_panel_probe(dev, endpoint); + dev_info(dev, "set up callbacks for Versatile\n"); + break; + case REALVIEW_CLCD_EB: + case REALVIEW_CLCD_PB1176: + case REALVIEW_CLCD_PB11MP: + case REALVIEW_CLCD_PBA8: + case REALVIEW_CLCD_PBX: + versatile_syscon_map = map; + fb->board->enable = realview_clcd_enable; + fb->board->disable = realview_clcd_disable; + dev_info(dev, "set up callbacks for RealView PL111\n"); + break; + default: + dev_info(dev, "unknown Versatile system controller\n"); + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(versatile_clcd_init_panel); +#endif diff --git a/drivers/video/fbdev/amba-clcd-versatile.h b/drivers/video/fbdev/amba-clcd-versatile.h new file mode 100644 index 000000000000..1b14359c2cf6 --- /dev/null +++ b/drivers/video/fbdev/amba-clcd-versatile.h @@ -0,0 +1,17 @@ +/* + * Special local versatile callbacks + */ +#include <linux/of.h> +#include <linux/amba/bus.h> +#include <linux/platform_data/video-clcd-versatile.h> + +#if defined(CONFIG_PLAT_VERSATILE_CLCD) && defined(CONFIG_OF) +int versatile_clcd_init_panel(struct clcd_fb *fb, + struct device_node *endpoint); +#else +static inline int versatile_clcd_init_panel(struct clcd_fb *fb, + struct device_node *endpoint) +{ + return 0; +} +#endif diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index 9b158869cb89..ec2671d98abc 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -30,10 +30,14 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_graph.h> +#include <linux/backlight.h> #include <video/display_timing.h> #include <video/of_display_timing.h> #include <video/videomode.h> +#include "amba-clcd-nomadik.h" +#include "amba-clcd-versatile.h" + #define to_clcd(info) container_of(info, struct clcd_fb, fb) /* This is limited to 16 characters when displayed by X startup */ @@ -71,6 +75,11 @@ static void clcdfb_disable(struct clcd_fb *fb) if (fb->board->disable) fb->board->disable(fb); + if (fb->panel->backlight) { + fb->panel->backlight->props.power = FB_BLANK_POWERDOWN; + backlight_update_status(fb->panel->backlight); + } + val = readl(fb->regs + fb->off_cntl); if (val & CNTL_LCDPWR) { val &= ~CNTL_LCDPWR; @@ -117,6 +126,14 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) writel(cntl, fb->regs + fb->off_cntl); /* + * Turn on backlight + */ + if (fb->panel->backlight) { + fb->panel->backlight->props.power = FB_BLANK_UNBLANK; + backlight_update_status(fb->panel->backlight); + } + + /* * finally, enable the interface. */ if (fb->board->enable) @@ -211,6 +228,15 @@ clcdfb_set_bitfields(struct clcd_fb *fb, struct fb_var_screeninfo *var) var->blue.length = 4; } break; + case 24: + if (fb->vendor->packed_24_bit_pixels) { + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + } else { + ret = -EINVAL; + } + break; case 32: /* If we can't do 888, reject */ caps &= CLCD_CAP_888; @@ -297,6 +323,12 @@ static int clcdfb_set_par(struct fb_info *info) clcdfb_disable(fb); + /* Some variants must be clocked here */ + if (fb->vendor->clock_timregs && !fb->clk_enabled) { + fb->clk_enabled = true; + clk_enable(fb->clk); + } + writel(regs.tim0, fb->regs + CLCD_TIM0); writel(regs.tim1, fb->regs + CLCD_TIM1); writel(regs.tim2, fb->regs + CLCD_TIM2); @@ -551,7 +583,7 @@ static int clcdfb_register(struct clcd_fb *fb) #ifdef CONFIG_OF static int clcdfb_of_get_dpi_panel_mode(struct device_node *node, - struct fb_videomode *mode) + struct clcd_panel *clcd_panel) { int err; struct display_timing timing; @@ -563,10 +595,31 @@ static int clcdfb_of_get_dpi_panel_mode(struct device_node *node, videomode_from_timing(&timing, &video); - err = fb_videomode_from_videomode(&video, mode); + err = fb_videomode_from_videomode(&video, &clcd_panel->mode); if (err) return err; + /* Set up some inversion flags */ + if (timing.flags & DISPLAY_FLAGS_PIXDATA_NEGEDGE) + clcd_panel->tim2 |= TIM2_IPC; + else if (!(timing.flags & DISPLAY_FLAGS_PIXDATA_POSEDGE)) + /* + * To preserve backwards compatibility, the IPC (inverted + * pixel clock) flag needs to be set on any display that + * doesn't explicitly specify that the pixel clock is + * active on the negative or positive edge. + */ + clcd_panel->tim2 |= TIM2_IPC; + + if (timing.flags & DISPLAY_FLAGS_HSYNC_LOW) + clcd_panel->tim2 |= TIM2_IHS; + + if (timing.flags & DISPLAY_FLAGS_VSYNC_LOW) + clcd_panel->tim2 |= TIM2_IVS; + + if (timing.flags & DISPLAY_FLAGS_DE_LOW) + clcd_panel->tim2 |= TIM2_IOE; + return 0; } @@ -576,11 +629,34 @@ static int clcdfb_snprintf_mode(char *buf, int size, struct fb_videomode *mode) mode->refresh); } +static int clcdfb_of_get_backlight(struct device_node *endpoint, + struct clcd_panel *clcd_panel) +{ + struct device_node *panel; + struct device_node *backlight; + + panel = of_graph_get_remote_port_parent(endpoint); + if (!panel) + return -ENODEV; + + /* Look up the optional backlight phandle */ + backlight = of_parse_phandle(panel, "backlight", 0); + if (backlight) { + clcd_panel->backlight = of_find_backlight_by_node(backlight); + of_node_put(backlight); + + if (!clcd_panel->backlight) + return -EPROBE_DEFER; + } + return 0; +} + static int clcdfb_of_get_mode(struct device *dev, struct device_node *endpoint, - struct fb_videomode *mode) + struct clcd_panel *clcd_panel) { int err; struct device_node *panel; + struct fb_videomode *mode; char *name; int len; @@ -590,11 +666,12 @@ static int clcdfb_of_get_mode(struct device *dev, struct device_node *endpoint, /* Only directly connected DPI panels supported for now */ if (of_device_is_compatible(panel, "panel-dpi")) - err = clcdfb_of_get_dpi_panel_mode(panel, mode); + err = clcdfb_of_get_dpi_panel_mode(panel, clcd_panel); else err = -ENOENT; if (err) return err; + mode = &clcd_panel->mode; len = clcdfb_snprintf_mode(NULL, 0, mode); name = devm_kzalloc(dev, len + 1, GFP_KERNEL); @@ -616,6 +693,7 @@ static int clcdfb_of_init_tft_panel(struct clcd_fb *fb, u32 r0, u32 g0, u32 b0) } panels[] = { { 0x110, 1, 7, 13, CLCD_CAP_5551 }, { 0x110, 0, 8, 16, CLCD_CAP_888 }, + { 0x110, 16, 8, 0, CLCD_CAP_888 }, { 0x111, 4, 14, 20, CLCD_CAP_444 }, { 0x111, 3, 11, 19, CLCD_CAP_444 | CLCD_CAP_5551 }, { 0x111, 3, 10, 19, CLCD_CAP_444 | CLCD_CAP_5551 | @@ -625,8 +703,8 @@ static int clcdfb_of_init_tft_panel(struct clcd_fb *fb, u32 r0, u32 g0, u32 b0) }; int i; - /* Bypass pixel clock divider, data output on the falling edge */ - fb->panel->tim2 = TIM2_BCD | TIM2_IPC; + /* Bypass pixel clock divider */ + fb->panel->tim2 |= TIM2_BCD; /* TFT display, vert. comp. interrupt at the start of the back porch */ fb->panel->cntl |= CNTL_LCDTFT | CNTL_LCDVCOMP(1); @@ -643,6 +721,49 @@ static int clcdfb_of_init_tft_panel(struct clcd_fb *fb, u32 r0, u32 g0, u32 b0) fb->panel->caps = panels[i].caps; } + /* + * If we actually physically connected the R lines to B and + * vice versa + */ + if (r0 != 0 && b0 == 0) + fb->panel->bgr_connection = true; + + if (fb->panel->caps && fb->vendor->st_bitmux_control) { + /* + * Set up the special bits for the Nomadik control register + * (other platforms tend to do this through an external + * register). + */ + + /* Offset of the highest used color */ + int maxoff = max3(r0, g0, b0); + /* Most significant bit out, highest used bit */ + int msb = 0; + + if (fb->panel->caps & CLCD_CAP_888) { + msb = maxoff + 8 - 1; + } else if (fb->panel->caps & CLCD_CAP_565) { + msb = maxoff + 5 - 1; + fb->panel->cntl |= CNTL_ST_1XBPP_565; + } else if (fb->panel->caps & CLCD_CAP_5551) { + msb = maxoff + 5 - 1; + fb->panel->cntl |= CNTL_ST_1XBPP_5551; + } else if (fb->panel->caps & CLCD_CAP_444) { + msb = maxoff + 4 - 1; + fb->panel->cntl |= CNTL_ST_1XBPP_444; + } + + /* Send out as many bits as we need */ + if (msb > 17) + fb->panel->cntl |= CNTL_ST_CDWID_24; + else if (msb > 15) + fb->panel->cntl |= CNTL_ST_CDWID_18; + else if (msb > 11) + fb->panel->cntl |= CNTL_ST_CDWID_16; + else + fb->panel->cntl |= CNTL_ST_CDWID_12; + } + return fb->panel->caps ? 0 : -EINVAL; } @@ -658,11 +779,24 @@ static int clcdfb_of_init_display(struct clcd_fb *fb) if (!fb->panel) return -ENOMEM; + /* + * Fetch the panel endpoint. + */ endpoint = of_graph_get_next_endpoint(fb->dev->dev.of_node, NULL); if (!endpoint) return -ENODEV; - err = clcdfb_of_get_mode(&fb->dev->dev, endpoint, &fb->panel->mode); + if (fb->vendor->init_panel) { + err = fb->vendor->init_panel(fb, endpoint); + if (err) + return err; + } + + err = clcdfb_of_get_backlight(endpoint, fb->panel); + if (err) + return err; + + err = clcdfb_of_get_mode(&fb->dev->dev, endpoint, fb->panel); if (err) return err; @@ -693,11 +827,11 @@ static int clcdfb_of_init_display(struct clcd_fb *fb) if (of_property_read_u32_array(endpoint, "arm,pl11x,tft-r0g0b0-pads", - tft_r0b0g0, ARRAY_SIZE(tft_r0b0g0)) == 0) - return clcdfb_of_init_tft_panel(fb, tft_r0b0g0[0], - tft_r0b0g0[1], tft_r0b0g0[2]); + tft_r0b0g0, ARRAY_SIZE(tft_r0b0g0)) != 0) + return -ENOENT; - return -ENOENT; + return clcdfb_of_init_tft_panel(fb, tft_r0b0g0[0], + tft_r0b0g0[1], tft_r0b0g0[2]); } static int clcdfb_of_vram_setup(struct clcd_fb *fb) @@ -818,6 +952,7 @@ static struct clcd_board *clcdfb_of_get_board(struct amba_device *dev) static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id) { struct clcd_board *board = dev_get_platdata(&dev->dev); + struct clcd_vendor_data *vendor = id->data; struct clcd_fb *fb; int ret; @@ -827,6 +962,12 @@ static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id) if (!board) return -EINVAL; + if (vendor->init_board) { + ret = vendor->init_board(dev, board); + if (ret) + return ret; + } + ret = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32)); if (ret) goto out; @@ -845,17 +986,18 @@ static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id) } fb->dev = dev; + fb->vendor = vendor; fb->board = board; - dev_info(&fb->dev->dev, "PL%03x rev%u at 0x%08llx\n", - amba_part(dev), amba_rev(dev), + dev_info(&fb->dev->dev, "PL%03x designer %02x rev%u at 0x%08llx\n", + amba_part(dev), amba_manf(dev), amba_rev(dev), (unsigned long long)dev->res.start); ret = fb->board->setup(fb); if (ret) goto free_fb; - ret = clcdfb_register(fb); + ret = clcdfb_register(fb); if (ret == 0) { amba_set_drvdata(dev, fb); goto out; @@ -891,10 +1033,30 @@ static int clcdfb_remove(struct amba_device *dev) return 0; } +static struct clcd_vendor_data vendor_arm = { + /* Sets up the versatile board displays */ + .init_panel = versatile_clcd_init_panel, +}; + +static struct clcd_vendor_data vendor_nomadik = { + .clock_timregs = true, + .packed_24_bit_pixels = true, + .st_bitmux_control = true, + .init_board = nomadik_clcd_init_board, + .init_panel = nomadik_clcd_init_panel, +}; + static struct amba_id clcdfb_id_table[] = { { .id = 0x00041110, .mask = 0x000ffffe, + .data = &vendor_arm, + }, + /* ST Electronics Nomadik variant */ + { + .id = 0x00180110, + .mask = 0x00fffffe, + .data = &vendor_nomadik, }, { 0, 0 }, }; diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c index 1b0b233b8b39..1928cb2b5386 100644 --- a/drivers/video/fbdev/arcfb.c +++ b/drivers/video/fbdev/arcfb.c @@ -79,7 +79,7 @@ struct arcfb_par { spinlock_t lock; }; -static struct fb_fix_screeninfo arcfb_fix = { +static const struct fb_fix_screeninfo arcfb_fix = { .id = "arcfb", .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_MONO01, @@ -89,7 +89,7 @@ static struct fb_fix_screeninfo arcfb_fix = { .accel = FB_ACCEL_NONE, }; -static struct fb_var_screeninfo arcfb_var = { +static const struct fb_var_screeninfo arcfb_var = { .xres = 128, .yres = 64, .xres_virtual = 128, diff --git a/drivers/video/fbdev/asiliantfb.c b/drivers/video/fbdev/asiliantfb.c index 7e8ddf00ccc2..91eea4583382 100644 --- a/drivers/video/fbdev/asiliantfb.c +++ b/drivers/video/fbdev/asiliantfb.c @@ -474,7 +474,7 @@ static void chips_hw_init(struct fb_info *p) write_fr(chips_init_fr[i].addr, chips_init_fr[i].data); } -static struct fb_fix_screeninfo asiliantfb_fix = { +static const struct fb_fix_screeninfo asiliantfb_fix = { .id = "Asiliant 69000", .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_PSEUDOCOLOR, @@ -483,7 +483,7 @@ static struct fb_fix_screeninfo asiliantfb_fix = { .smem_len = 0x200000, /* 2MB */ }; -static struct fb_var_screeninfo asiliantfb_var = { +static const struct fb_var_screeninfo asiliantfb_var = { .xres = 640, .yres = 480, .xres_virtual = 640, diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c index 0a4626886b00..fa07242a78d2 100644 --- a/drivers/video/fbdev/aty/aty128fb.c +++ b/drivers/video/fbdev/aty/aty128fb.c @@ -93,7 +93,7 @@ #ifndef CONFIG_PPC_PMAC /* default mode */ -static struct fb_var_screeninfo default_var = { +static const struct fb_var_screeninfo default_var = { /* 640x480, 60 Hz, Non-Interlaced (25.175 MHz dotclock) */ 640, 480, 640, 480, 0, 0, 8, 0, {0, 8, 0}, {0, 8, 0}, {0, 8, 0}, {0, 0, 0}, @@ -104,7 +104,7 @@ static struct fb_var_screeninfo default_var = { #else /* CONFIG_PPC_PMAC */ /* default to 1024x768 at 75Hz on PPC - this will work * on the iMac, the usual 640x480 @ 60Hz doesn't. */ -static struct fb_var_screeninfo default_var = { +static const struct fb_var_screeninfo default_var = { /* 1024x768, 75 Hz, Non-Interlaced (78.75 MHz dotclock) */ 1024, 768, 1024, 768, 0, 0, 8, 0, {0, 8, 0}, {0, 8, 0}, {0, 8, 0}, {0, 0, 0}, @@ -375,7 +375,7 @@ static const struct aty128_meminfo ddr_sgram = { .name = "64-bit DDR SGRAM", }; -static struct fb_fix_screeninfo aty128fb_fix = { +static const struct fb_fix_screeninfo aty128fb_fix = { .id = "ATY Rage128", .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_PSEUDOCOLOR, diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index f34ed47fcaf8..11026e726b68 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -212,7 +212,7 @@ struct pci_mmap_map { unsigned long prot_mask; }; -static struct fb_fix_screeninfo atyfb_fix = { +static const struct fb_fix_screeninfo atyfb_fix = { .id = "ATY Mach64", .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_PSEUDOCOLOR, diff --git a/drivers/video/fbdev/aty/radeon_monitor.c b/drivers/video/fbdev/aty/radeon_monitor.c index f1ce229de78d..278b421ab3fe 100644 --- a/drivers/video/fbdev/aty/radeon_monitor.c +++ b/drivers/video/fbdev/aty/radeon_monitor.c @@ -4,7 +4,7 @@ #include "../edid.h" -static struct fb_var_screeninfo radeonfb_default_var = { +static const struct fb_var_screeninfo radeonfb_default_var = { .xres = 640, .yres = 480, .xres_virtual = 640, diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c index f9507b1894df..6c2b2ca4a909 100644 --- a/drivers/video/fbdev/au1200fb.c +++ b/drivers/video/fbdev/au1200fb.c @@ -43,6 +43,7 @@ #include <linux/ctype.h> #include <linux/dma-mapping.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <asm/mach-au1x00/au1000.h> #include <asm/mach-au1x00/au1200fb.h> /* platform_data */ diff --git a/drivers/video/fbdev/bfin_adv7393fb.c b/drivers/video/fbdev/bfin_adv7393fb.c index e2d7d039ce3b..542ffaddc6ab 100644 --- a/drivers/video/fbdev/bfin_adv7393fb.c +++ b/drivers/video/fbdev/bfin_adv7393fb.c @@ -375,7 +375,6 @@ static int bfin_adv7393_fb_probe(struct i2c_client *client, { int ret = 0; struct proc_dir_entry *entry; - int num_modes = ARRAY_SIZE(known_modes); struct adv7393fb_device *fbdev = NULL; @@ -384,7 +383,7 @@ static int bfin_adv7393_fb_probe(struct i2c_client *client, return -EINVAL; } - if (mode > num_modes) { + if (mode >= ARRAY_SIZE(known_modes)) { dev_err(&client->dev, "mode %d: not supported", mode); return -EFAULT; } @@ -797,7 +796,7 @@ static struct i2c_driver bfin_adv7393_fb_driver = { static int __init bfin_adv7393_fb_driver_init(void) { -#if defined(CONFIG_I2C_BLACKFIN_TWI) || defined(CONFIG_I2C_BLACKFIN_TWI_MODULE) +#if IS_ENABLED(CONFIG_I2C_BLACKFIN_TWI) request_module("i2c-bfin-twi"); #else request_module("i2c-gpio"); diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index 924bad45c176..37a37c4d04cb 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -50,9 +50,9 @@ static int efifb_setcolreg(unsigned regno, unsigned red, unsigned green, return 1; if (regno < 16) { - red >>= 8; - green >>= 8; - blue >>= 8; + red >>= 16 - info->var.red.length; + green >>= 16 - info->var.green.length; + blue >>= 16 - info->var.blue.length; ((u32 *)(info->pseudo_palette))[regno] = (red << info->var.red.offset) | (green << info->var.green.offset) | diff --git a/drivers/video/fbdev/exynos/Kconfig b/drivers/video/fbdev/exynos/Kconfig deleted file mode 100644 index d916bef94f25..000000000000 --- a/drivers/video/fbdev/exynos/Kconfig +++ /dev/null @@ -1,32 +0,0 @@ -# -# Exynos Video configuration -# - -menuconfig EXYNOS_VIDEO - tristate "Exynos Video driver support" - depends on ARCH_S5PV210 || ARCH_EXYNOS - help - This enables support for EXYNOS Video device. - -if EXYNOS_VIDEO - -# -# MIPI DSI driver -# - -config EXYNOS_MIPI_DSI - tristate "EXYNOS MIPI DSI driver support." - select GENERIC_PHY - help - This enables support for MIPI-DSI device. - -config EXYNOS_LCD_S6E8AX0 - tristate "S6E8AX0 MIPI AMOLED LCD Driver" - depends on EXYNOS_MIPI_DSI && BACKLIGHT_CLASS_DEVICE - depends on (LCD_CLASS_DEVICE = y) - default n - help - If you have an S6E8AX0 MIPI AMOLED LCD Panel, say Y to enable its - LCD control driver. - -endif # EXYNOS_VIDEO diff --git a/drivers/video/fbdev/exynos/Makefile b/drivers/video/fbdev/exynos/Makefile deleted file mode 100644 index 02d8dc522fea..000000000000 --- a/drivers/video/fbdev/exynos/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Makefile for the exynos video drivers. -# - -obj-$(CONFIG_EXYNOS_MIPI_DSI) += exynos-mipi-dsi-mod.o - -exynos-mipi-dsi-mod-objs += exynos_mipi_dsi.o exynos_mipi_dsi_common.o \ - exynos_mipi_dsi_lowlevel.o -obj-$(CONFIG_EXYNOS_LCD_S6E8AX0) += s6e8ax0.o diff --git a/drivers/video/fbdev/exynos/exynos_mipi_dsi.c b/drivers/video/fbdev/exynos/exynos_mipi_dsi.c deleted file mode 100644 index 92e4af3caaf8..000000000000 --- a/drivers/video/fbdev/exynos/exynos_mipi_dsi.c +++ /dev/null @@ -1,574 +0,0 @@ -/* linux/drivers/video/exynos/exynos_mipi_dsi.c - * - * Samsung SoC MIPI-DSIM driver. - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd - * - * InKi Dae, <inki.dae@samsung.com> - * Donghwa Lee, <dh09.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/clk.h> -#include <linux/mutex.h> -#include <linux/wait.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/fb.h> -#include <linux/ctype.h> -#include <linux/platform_device.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/memory.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/kthread.h> -#include <linux/notifier.h> -#include <linux/phy/phy.h> -#include <linux/regulator/consumer.h> -#include <linux/pm_runtime.h> -#include <linux/err.h> - -#include <video/exynos_mipi_dsim.h> - -#include "exynos_mipi_dsi_common.h" -#include "exynos_mipi_dsi_lowlevel.h" - -struct mipi_dsim_ddi { - int bus_id; - struct list_head list; - struct mipi_dsim_lcd_device *dsim_lcd_dev; - struct mipi_dsim_lcd_driver *dsim_lcd_drv; -}; - -static LIST_HEAD(dsim_ddi_list); - -static DEFINE_MUTEX(mipi_dsim_lock); - -static struct mipi_dsim_platform_data *to_dsim_plat(struct platform_device - *pdev) -{ - return pdev->dev.platform_data; -} - -static struct regulator_bulk_data supplies[] = { - { .supply = "vdd11", }, - { .supply = "vdd18", }, -}; - -static int exynos_mipi_regulator_enable(struct mipi_dsim_device *dsim) -{ - int ret; - - mutex_lock(&dsim->lock); - ret = regulator_bulk_enable(ARRAY_SIZE(supplies), supplies); - mutex_unlock(&dsim->lock); - - return ret; -} - -static int exynos_mipi_regulator_disable(struct mipi_dsim_device *dsim) -{ - int ret; - - mutex_lock(&dsim->lock); - ret = regulator_bulk_disable(ARRAY_SIZE(supplies), supplies); - mutex_unlock(&dsim->lock); - - return ret; -} - -/* update all register settings to MIPI DSI controller. */ -static void exynos_mipi_update_cfg(struct mipi_dsim_device *dsim) -{ - /* - * data from Display controller(FIMD) is not transferred in video mode - * but in case of command mode, all settings is not updated to - * registers. - */ - exynos_mipi_dsi_stand_by(dsim, 0); - - exynos_mipi_dsi_init_dsim(dsim); - exynos_mipi_dsi_init_link(dsim); - - exynos_mipi_dsi_set_hs_enable(dsim); - - /* set display timing. */ - exynos_mipi_dsi_set_display_mode(dsim, dsim->dsim_config); - - exynos_mipi_dsi_init_interrupt(dsim); - - /* - * data from Display controller(FIMD) is transferred in video mode - * but in case of command mode, all settings are updated to registers. - */ - exynos_mipi_dsi_stand_by(dsim, 1); -} - -static int exynos_mipi_dsi_early_blank_mode(struct mipi_dsim_device *dsim, - int power) -{ - struct mipi_dsim_lcd_driver *client_drv = dsim->dsim_lcd_drv; - struct mipi_dsim_lcd_device *client_dev = dsim->dsim_lcd_dev; - - switch (power) { - case FB_BLANK_POWERDOWN: - if (dsim->suspended) - return 0; - - if (client_drv && client_drv->suspend) - client_drv->suspend(client_dev); - - clk_disable(dsim->clock); - - exynos_mipi_regulator_disable(dsim); - - dsim->suspended = true; - - break; - default: - break; - } - - return 0; -} - -static int exynos_mipi_dsi_blank_mode(struct mipi_dsim_device *dsim, int power) -{ - struct mipi_dsim_lcd_driver *client_drv = dsim->dsim_lcd_drv; - struct mipi_dsim_lcd_device *client_dev = dsim->dsim_lcd_dev; - - switch (power) { - case FB_BLANK_UNBLANK: - if (!dsim->suspended) - return 0; - - /* lcd panel power on. */ - if (client_drv && client_drv->power_on) - client_drv->power_on(client_dev, 1); - - exynos_mipi_regulator_enable(dsim); - - /* enable MIPI-DSI PHY. */ - phy_power_on(dsim->phy); - - clk_enable(dsim->clock); - - exynos_mipi_update_cfg(dsim); - - /* set lcd panel sequence commands. */ - if (client_drv && client_drv->set_sequence) - client_drv->set_sequence(client_dev); - - dsim->suspended = false; - - break; - case FB_BLANK_NORMAL: - /* TODO. */ - break; - default: - break; - } - - return 0; -} - -int exynos_mipi_dsi_register_lcd_device(struct mipi_dsim_lcd_device *lcd_dev) -{ - struct mipi_dsim_ddi *dsim_ddi; - - if (!lcd_dev->name) { - pr_err("dsim_lcd_device name is NULL.\n"); - return -EFAULT; - } - - dsim_ddi = kzalloc(sizeof(struct mipi_dsim_ddi), GFP_KERNEL); - if (!dsim_ddi) { - pr_err("failed to allocate dsim_ddi object.\n"); - return -ENOMEM; - } - - dsim_ddi->dsim_lcd_dev = lcd_dev; - - mutex_lock(&mipi_dsim_lock); - list_add_tail(&dsim_ddi->list, &dsim_ddi_list); - mutex_unlock(&mipi_dsim_lock); - - return 0; -} - -static struct mipi_dsim_ddi *exynos_mipi_dsi_find_lcd_device( - struct mipi_dsim_lcd_driver *lcd_drv) -{ - struct mipi_dsim_ddi *dsim_ddi, *next; - struct mipi_dsim_lcd_device *lcd_dev; - - mutex_lock(&mipi_dsim_lock); - - list_for_each_entry_safe(dsim_ddi, next, &dsim_ddi_list, list) { - if (!dsim_ddi) - goto out; - - lcd_dev = dsim_ddi->dsim_lcd_dev; - if (!lcd_dev) - continue; - - if ((strcmp(lcd_drv->name, lcd_dev->name)) == 0) { - /** - * bus_id would be used to identify - * connected bus. - */ - dsim_ddi->bus_id = lcd_dev->bus_id; - mutex_unlock(&mipi_dsim_lock); - - return dsim_ddi; - } - - list_del(&dsim_ddi->list); - kfree(dsim_ddi); - } - -out: - mutex_unlock(&mipi_dsim_lock); - - return NULL; -} - -int exynos_mipi_dsi_register_lcd_driver(struct mipi_dsim_lcd_driver *lcd_drv) -{ - struct mipi_dsim_ddi *dsim_ddi; - - if (!lcd_drv->name) { - pr_err("dsim_lcd_driver name is NULL.\n"); - return -EFAULT; - } - - dsim_ddi = exynos_mipi_dsi_find_lcd_device(lcd_drv); - if (!dsim_ddi) { - pr_err("mipi_dsim_ddi object not found.\n"); - return -EFAULT; - } - - dsim_ddi->dsim_lcd_drv = lcd_drv; - - pr_info("registered panel driver(%s) to mipi-dsi driver.\n", - lcd_drv->name); - - return 0; - -} -EXPORT_SYMBOL_GPL(exynos_mipi_dsi_register_lcd_driver); - -static struct mipi_dsim_ddi *exynos_mipi_dsi_bind_lcd_ddi( - struct mipi_dsim_device *dsim, - const char *name) -{ - struct mipi_dsim_ddi *dsim_ddi, *next; - struct mipi_dsim_lcd_driver *lcd_drv; - struct mipi_dsim_lcd_device *lcd_dev; - int ret; - - mutex_lock(&dsim->lock); - - list_for_each_entry_safe(dsim_ddi, next, &dsim_ddi_list, list) { - lcd_drv = dsim_ddi->dsim_lcd_drv; - lcd_dev = dsim_ddi->dsim_lcd_dev; - if (!lcd_drv || !lcd_dev || - (dsim->id != dsim_ddi->bus_id)) - continue; - - dev_dbg(dsim->dev, "lcd_drv->id = %d, lcd_dev->id = %d\n", - lcd_drv->id, lcd_dev->id); - dev_dbg(dsim->dev, "lcd_dev->bus_id = %d, dsim->id = %d\n", - lcd_dev->bus_id, dsim->id); - - if ((strcmp(lcd_drv->name, name) == 0)) { - lcd_dev->master = dsim; - - lcd_dev->dev.parent = dsim->dev; - dev_set_name(&lcd_dev->dev, "%s", lcd_drv->name); - - ret = device_register(&lcd_dev->dev); - if (ret < 0) { - dev_err(dsim->dev, - "can't register %s, status %d\n", - dev_name(&lcd_dev->dev), ret); - mutex_unlock(&dsim->lock); - - return NULL; - } - - dsim->dsim_lcd_dev = lcd_dev; - dsim->dsim_lcd_drv = lcd_drv; - - mutex_unlock(&dsim->lock); - - return dsim_ddi; - } - } - - mutex_unlock(&dsim->lock); - - return NULL; -} - -/* define MIPI-DSI Master operations. */ -static struct mipi_dsim_master_ops master_ops = { - .cmd_read = exynos_mipi_dsi_rd_data, - .cmd_write = exynos_mipi_dsi_wr_data, - .get_dsim_frame_done = exynos_mipi_dsi_get_frame_done_status, - .clear_dsim_frame_done = exynos_mipi_dsi_clear_frame_done, - .set_early_blank_mode = exynos_mipi_dsi_early_blank_mode, - .set_blank_mode = exynos_mipi_dsi_blank_mode, -}; - -static int exynos_mipi_dsi_probe(struct platform_device *pdev) -{ - struct resource *res; - struct mipi_dsim_device *dsim; - struct mipi_dsim_config *dsim_config; - struct mipi_dsim_platform_data *dsim_pd; - struct mipi_dsim_ddi *dsim_ddi; - int ret = -EINVAL; - - dsim = devm_kzalloc(&pdev->dev, sizeof(struct mipi_dsim_device), - GFP_KERNEL); - if (!dsim) { - dev_err(&pdev->dev, "failed to allocate dsim object.\n"); - return -ENOMEM; - } - - dsim->pd = to_dsim_plat(pdev); - dsim->dev = &pdev->dev; - dsim->id = pdev->id; - - /* get mipi_dsim_platform_data. */ - dsim_pd = (struct mipi_dsim_platform_data *)dsim->pd; - if (dsim_pd == NULL) { - dev_err(&pdev->dev, "failed to get platform data for dsim.\n"); - return -EINVAL; - } - /* get mipi_dsim_config. */ - dsim_config = dsim_pd->dsim_config; - if (dsim_config == NULL) { - dev_err(&pdev->dev, "failed to get dsim config data.\n"); - return -EINVAL; - } - - dsim->dsim_config = dsim_config; - dsim->master_ops = &master_ops; - - mutex_init(&dsim->lock); - - ret = devm_regulator_bulk_get(&pdev->dev, ARRAY_SIZE(supplies), - supplies); - if (ret) { - dev_err(&pdev->dev, "Failed to get regulators: %d\n", ret); - return ret; - } - - dsim->phy = devm_phy_get(&pdev->dev, "dsim"); - if (IS_ERR(dsim->phy)) - return PTR_ERR(dsim->phy); - - dsim->clock = devm_clk_get(&pdev->dev, "dsim0"); - if (IS_ERR(dsim->clock)) { - dev_err(&pdev->dev, "failed to get dsim clock source\n"); - return -ENODEV; - } - - clk_enable(dsim->clock); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - dsim->reg_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsim->reg_base)) { - ret = PTR_ERR(dsim->reg_base); - goto error; - } - - mutex_init(&dsim->lock); - - /* bind lcd ddi matched with panel name. */ - dsim_ddi = exynos_mipi_dsi_bind_lcd_ddi(dsim, dsim_pd->lcd_panel_name); - if (!dsim_ddi) { - dev_err(&pdev->dev, "mipi_dsim_ddi object not found.\n"); - ret = -EINVAL; - goto error; - } - - ret = platform_get_irq(pdev, 0); - if (ret < 0) { - dev_err(&pdev->dev, "failed to request dsim irq resource\n"); - goto error; - } - dsim->irq = ret; - - init_completion(&dsim_wr_comp); - init_completion(&dsim_rd_comp); - platform_set_drvdata(pdev, dsim); - - ret = devm_request_irq(&pdev->dev, dsim->irq, - exynos_mipi_dsi_interrupt_handler, - IRQF_SHARED, dev_name(&pdev->dev), dsim); - if (ret != 0) { - dev_err(&pdev->dev, "failed to request dsim irq\n"); - ret = -EINVAL; - goto error; - } - - /* enable interrupts */ - exynos_mipi_dsi_init_interrupt(dsim); - - /* initialize mipi-dsi client(lcd panel). */ - if (dsim_ddi->dsim_lcd_drv && dsim_ddi->dsim_lcd_drv->probe) - dsim_ddi->dsim_lcd_drv->probe(dsim_ddi->dsim_lcd_dev); - - /* in case mipi-dsi has been enabled by bootloader */ - if (dsim_pd->enabled) { - exynos_mipi_regulator_enable(dsim); - goto done; - } - - /* lcd panel power on. */ - if (dsim_ddi->dsim_lcd_drv && dsim_ddi->dsim_lcd_drv->power_on) - dsim_ddi->dsim_lcd_drv->power_on(dsim_ddi->dsim_lcd_dev, 1); - - exynos_mipi_regulator_enable(dsim); - - /* enable MIPI-DSI PHY. */ - phy_power_on(dsim->phy); - - exynos_mipi_update_cfg(dsim); - - /* set lcd panel sequence commands. */ - if (dsim_ddi->dsim_lcd_drv && dsim_ddi->dsim_lcd_drv->set_sequence) - dsim_ddi->dsim_lcd_drv->set_sequence(dsim_ddi->dsim_lcd_dev); - - dsim->suspended = false; - -done: - platform_set_drvdata(pdev, dsim); - - dev_dbg(&pdev->dev, "%s() completed successfully (%s mode)\n", __func__, - dsim_config->e_interface == DSIM_COMMAND ? "CPU" : "RGB"); - - return 0; - -error: - clk_disable(dsim->clock); - return ret; -} - -static int exynos_mipi_dsi_remove(struct platform_device *pdev) -{ - struct mipi_dsim_device *dsim = platform_get_drvdata(pdev); - struct mipi_dsim_ddi *dsim_ddi, *next; - struct mipi_dsim_lcd_driver *dsim_lcd_drv; - - clk_disable(dsim->clock); - - list_for_each_entry_safe(dsim_ddi, next, &dsim_ddi_list, list) { - if (dsim_ddi) { - if (dsim->id != dsim_ddi->bus_id) - continue; - - dsim_lcd_drv = dsim_ddi->dsim_lcd_drv; - - if (dsim_lcd_drv->remove) - dsim_lcd_drv->remove(dsim_ddi->dsim_lcd_dev); - - kfree(dsim_ddi); - } - } - - return 0; -} - -#ifdef CONFIG_PM_SLEEP -static int exynos_mipi_dsi_suspend(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct mipi_dsim_device *dsim = platform_get_drvdata(pdev); - struct mipi_dsim_lcd_driver *client_drv = dsim->dsim_lcd_drv; - struct mipi_dsim_lcd_device *client_dev = dsim->dsim_lcd_dev; - - disable_irq(dsim->irq); - - if (dsim->suspended) - return 0; - - if (client_drv && client_drv->suspend) - client_drv->suspend(client_dev); - - /* disable MIPI-DSI PHY. */ - phy_power_off(dsim->phy); - - clk_disable(dsim->clock); - - exynos_mipi_regulator_disable(dsim); - - dsim->suspended = true; - - return 0; -} - -static int exynos_mipi_dsi_resume(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct mipi_dsim_device *dsim = platform_get_drvdata(pdev); - struct mipi_dsim_lcd_driver *client_drv = dsim->dsim_lcd_drv; - struct mipi_dsim_lcd_device *client_dev = dsim->dsim_lcd_dev; - - enable_irq(dsim->irq); - - if (!dsim->suspended) - return 0; - - /* lcd panel power on. */ - if (client_drv && client_drv->power_on) - client_drv->power_on(client_dev, 1); - - exynos_mipi_regulator_enable(dsim); - - /* enable MIPI-DSI PHY. */ - phy_power_on(dsim->phy); - - clk_enable(dsim->clock); - - exynos_mipi_update_cfg(dsim); - - /* set lcd panel sequence commands. */ - if (client_drv && client_drv->set_sequence) - client_drv->set_sequence(client_dev); - - dsim->suspended = false; - - return 0; -} -#endif - -static const struct dev_pm_ops exynos_mipi_dsi_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(exynos_mipi_dsi_suspend, exynos_mipi_dsi_resume) -}; - -static struct platform_driver exynos_mipi_dsi_driver = { - .probe = exynos_mipi_dsi_probe, - .remove = exynos_mipi_dsi_remove, - .driver = { - .name = "exynos-mipi-dsim", - .pm = &exynos_mipi_dsi_pm_ops, - }, -}; - -module_platform_driver(exynos_mipi_dsi_driver); - -MODULE_AUTHOR("InKi Dae <inki.dae@samsung.com>"); -MODULE_DESCRIPTION("Samsung SoC MIPI-DSI driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/video/fbdev/exynos/exynos_mipi_dsi_common.c b/drivers/video/fbdev/exynos/exynos_mipi_dsi_common.c deleted file mode 100644 index 2358a2fbbbcd..000000000000 --- a/drivers/video/fbdev/exynos/exynos_mipi_dsi_common.c +++ /dev/null @@ -1,880 +0,0 @@ -/* linux/drivers/video/exynos/exynos_mipi_dsi_common.c - * - * Samsung SoC MIPI-DSI common driver. - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd - * - * InKi Dae, <inki.dae@samsung.com> - * Donghwa Lee, <dh09.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/mutex.h> -#include <linux/wait.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/fb.h> -#include <linux/ctype.h> -#include <linux/platform_device.h> -#include <linux/io.h> -#include <linux/memory.h> -#include <linux/delay.h> -#include <linux/irqreturn.h> -#include <linux/kthread.h> - -#include <video/mipi_display.h> -#include <video/exynos_mipi_dsim.h> - -#include "exynos_mipi_dsi_regs.h" -#include "exynos_mipi_dsi_lowlevel.h" -#include "exynos_mipi_dsi_common.h" - -#define MIPI_FIFO_TIMEOUT msecs_to_jiffies(250) -#define MIPI_RX_FIFO_READ_DONE 0x30800002 -#define MIPI_MAX_RX_FIFO 20 -#define MHZ (1000 * 1000) -#define FIN_HZ (24 * MHZ) - -#define DFIN_PLL_MIN_HZ (6 * MHZ) -#define DFIN_PLL_MAX_HZ (12 * MHZ) - -#define DFVCO_MIN_HZ (500 * MHZ) -#define DFVCO_MAX_HZ (1000 * MHZ) - -#define TRY_GET_FIFO_TIMEOUT (5000 * 2) -#define TRY_FIFO_CLEAR (10) - -/* MIPI-DSIM status types. */ -enum { - DSIM_STATE_INIT, /* should be initialized. */ - DSIM_STATE_STOP, /* CPU and LCDC are LP mode. */ - DSIM_STATE_HSCLKEN, /* HS clock was enabled. */ - DSIM_STATE_ULPS -}; - -/* define DSI lane types. */ -enum { - DSIM_LANE_CLOCK = (1 << 0), - DSIM_LANE_DATA0 = (1 << 1), - DSIM_LANE_DATA1 = (1 << 2), - DSIM_LANE_DATA2 = (1 << 3), - DSIM_LANE_DATA3 = (1 << 4) -}; - -static unsigned int dpll_table[15] = { - 100, 120, 170, 220, 270, - 320, 390, 450, 510, 560, - 640, 690, 770, 870, 950 -}; - -irqreturn_t exynos_mipi_dsi_interrupt_handler(int irq, void *dev_id) -{ - struct mipi_dsim_device *dsim = dev_id; - unsigned int intsrc, intmsk; - - intsrc = exynos_mipi_dsi_read_interrupt(dsim); - intmsk = exynos_mipi_dsi_read_interrupt_mask(dsim); - intmsk = ~intmsk & intsrc; - - if (intsrc & INTMSK_RX_DONE) { - complete(&dsim_rd_comp); - dev_dbg(dsim->dev, "MIPI INTMSK_RX_DONE\n"); - } - if (intsrc & INTMSK_FIFO_EMPTY) { - complete(&dsim_wr_comp); - dev_dbg(dsim->dev, "MIPI INTMSK_FIFO_EMPTY\n"); - } - - exynos_mipi_dsi_clear_interrupt(dsim, intmsk); - - return IRQ_HANDLED; -} - -/* - * write long packet to mipi dsi slave - * @dsim: mipi dsim device structure. - * @data0: packet data to send. - * @data1: size of packet data - */ -static void exynos_mipi_dsi_long_data_wr(struct mipi_dsim_device *dsim, - const unsigned char *data0, unsigned int data_size) -{ - unsigned int data_cnt = 0, payload = 0; - - /* in case that data count is more then 4 */ - for (data_cnt = 0; data_cnt < data_size; data_cnt += 4) { - /* - * after sending 4bytes per one time, - * send remainder data less then 4. - */ - if ((data_size - data_cnt) < 4) { - if ((data_size - data_cnt) == 3) { - payload = data0[data_cnt] | - data0[data_cnt + 1] << 8 | - data0[data_cnt + 2] << 16; - dev_dbg(dsim->dev, "count = 3 payload = %x, %x %x %x\n", - payload, data0[data_cnt], - data0[data_cnt + 1], - data0[data_cnt + 2]); - } else if ((data_size - data_cnt) == 2) { - payload = data0[data_cnt] | - data0[data_cnt + 1] << 8; - dev_dbg(dsim->dev, - "count = 2 payload = %x, %x %x\n", payload, - data0[data_cnt], - data0[data_cnt + 1]); - } else if ((data_size - data_cnt) == 1) { - payload = data0[data_cnt]; - } - - exynos_mipi_dsi_wr_tx_data(dsim, payload); - /* send 4bytes per one time. */ - } else { - payload = data0[data_cnt] | - data0[data_cnt + 1] << 8 | - data0[data_cnt + 2] << 16 | - data0[data_cnt + 3] << 24; - - dev_dbg(dsim->dev, - "count = 4 payload = %x, %x %x %x %x\n", - payload, *(u8 *)(data0 + data_cnt), - data0[data_cnt + 1], - data0[data_cnt + 2], - data0[data_cnt + 3]); - - exynos_mipi_dsi_wr_tx_data(dsim, payload); - } - } -} - -int exynos_mipi_dsi_wr_data(struct mipi_dsim_device *dsim, unsigned int data_id, - const unsigned char *data0, unsigned int data_size) -{ - unsigned int check_rx_ack = 0; - - if (dsim->state == DSIM_STATE_ULPS) { - dev_err(dsim->dev, "state is ULPS.\n"); - - return -EINVAL; - } - - /* FIXME!!! why does it need this delay? */ - msleep(20); - - mutex_lock(&dsim->lock); - - switch (data_id) { - /* short packet types of packet types for command. */ - case MIPI_DSI_GENERIC_SHORT_WRITE_0_PARAM: - case MIPI_DSI_GENERIC_SHORT_WRITE_1_PARAM: - case MIPI_DSI_GENERIC_SHORT_WRITE_2_PARAM: - case MIPI_DSI_DCS_SHORT_WRITE: - case MIPI_DSI_DCS_SHORT_WRITE_PARAM: - case MIPI_DSI_SET_MAXIMUM_RETURN_PACKET_SIZE: - exynos_mipi_dsi_wr_tx_header(dsim, data_id, data0[0], data0[1]); - if (check_rx_ack) { - /* process response func should be implemented */ - mutex_unlock(&dsim->lock); - return 0; - } else { - mutex_unlock(&dsim->lock); - return -EINVAL; - } - - /* general command */ - case MIPI_DSI_COLOR_MODE_OFF: - case MIPI_DSI_COLOR_MODE_ON: - case MIPI_DSI_SHUTDOWN_PERIPHERAL: - case MIPI_DSI_TURN_ON_PERIPHERAL: - exynos_mipi_dsi_wr_tx_header(dsim, data_id, data0[0], data0[1]); - if (check_rx_ack) { - /* process response func should be implemented. */ - mutex_unlock(&dsim->lock); - return 0; - } else { - mutex_unlock(&dsim->lock); - return -EINVAL; - } - - /* packet types for video data */ - case MIPI_DSI_V_SYNC_START: - case MIPI_DSI_V_SYNC_END: - case MIPI_DSI_H_SYNC_START: - case MIPI_DSI_H_SYNC_END: - case MIPI_DSI_END_OF_TRANSMISSION: - mutex_unlock(&dsim->lock); - return 0; - - /* long packet type and null packet */ - case MIPI_DSI_NULL_PACKET: - case MIPI_DSI_BLANKING_PACKET: - mutex_unlock(&dsim->lock); - return 0; - case MIPI_DSI_GENERIC_LONG_WRITE: - case MIPI_DSI_DCS_LONG_WRITE: - { - unsigned int size, payload = 0; - reinit_completion(&dsim_wr_comp); - - size = data_size * 4; - - /* if data count is less then 4, then send 3bytes data. */ - if (data_size < 4) { - payload = data0[0] | - data0[1] << 8 | - data0[2] << 16; - - exynos_mipi_dsi_wr_tx_data(dsim, payload); - - dev_dbg(dsim->dev, "count = %d payload = %x,%x %x %x\n", - data_size, payload, data0[0], - data0[1], data0[2]); - - /* in case that data count is more then 4 */ - } else - exynos_mipi_dsi_long_data_wr(dsim, data0, data_size); - - /* put data into header fifo */ - exynos_mipi_dsi_wr_tx_header(dsim, data_id, data_size & 0xff, - (data_size & 0xff00) >> 8); - - if (!wait_for_completion_interruptible_timeout(&dsim_wr_comp, - MIPI_FIFO_TIMEOUT)) { - dev_warn(dsim->dev, "command write timeout.\n"); - mutex_unlock(&dsim->lock); - return -EAGAIN; - } - - if (check_rx_ack) { - /* process response func should be implemented. */ - mutex_unlock(&dsim->lock); - return 0; - } else { - mutex_unlock(&dsim->lock); - return -EINVAL; - } - } - - /* packet typo for video data */ - case MIPI_DSI_PACKED_PIXEL_STREAM_16: - case MIPI_DSI_PACKED_PIXEL_STREAM_18: - case MIPI_DSI_PIXEL_STREAM_3BYTE_18: - case MIPI_DSI_PACKED_PIXEL_STREAM_24: - if (check_rx_ack) { - /* process response func should be implemented. */ - mutex_unlock(&dsim->lock); - return 0; - } else { - mutex_unlock(&dsim->lock); - return -EINVAL; - } - default: - dev_warn(dsim->dev, - "data id %x is not supported current DSI spec.\n", - data_id); - - mutex_unlock(&dsim->lock); - return -EINVAL; - } -} - -static unsigned int exynos_mipi_dsi_long_data_rd(struct mipi_dsim_device *dsim, - unsigned int req_size, unsigned int rx_data, u8 *rx_buf) -{ - unsigned int rcv_pkt, i, j; - u16 rxsize; - - /* for long packet */ - rxsize = (u16)((rx_data & 0x00ffff00) >> 8); - dev_dbg(dsim->dev, "mipi dsi rx size : %d\n", rxsize); - if (rxsize != req_size) { - dev_dbg(dsim->dev, - "received size mismatch received: %d, requested: %d\n", - rxsize, req_size); - goto err; - } - - for (i = 0; i < (rxsize >> 2); i++) { - rcv_pkt = exynos_mipi_dsi_rd_rx_fifo(dsim); - dev_dbg(dsim->dev, "received pkt : %08x\n", rcv_pkt); - for (j = 0; j < 4; j++) { - rx_buf[(i * 4) + j] = - (u8)(rcv_pkt >> (j * 8)) & 0xff; - dev_dbg(dsim->dev, "received value : %02x\n", - (rcv_pkt >> (j * 8)) & 0xff); - } - } - if (rxsize % 4) { - rcv_pkt = exynos_mipi_dsi_rd_rx_fifo(dsim); - dev_dbg(dsim->dev, "received pkt : %08x\n", rcv_pkt); - for (j = 0; j < (rxsize % 4); j++) { - rx_buf[(i * 4) + j] = - (u8)(rcv_pkt >> (j * 8)) & 0xff; - dev_dbg(dsim->dev, "received value : %02x\n", - (rcv_pkt >> (j * 8)) & 0xff); - } - } - - return rxsize; - -err: - return -EINVAL; -} - -static unsigned int exynos_mipi_dsi_response_size(unsigned int req_size) -{ - switch (req_size) { - case 1: - return MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_1BYTE; - case 2: - return MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_2BYTE; - default: - return MIPI_DSI_RX_GENERIC_LONG_READ_RESPONSE; - } -} - -int exynos_mipi_dsi_rd_data(struct mipi_dsim_device *dsim, unsigned int data_id, - unsigned int data0, unsigned int req_size, u8 *rx_buf) -{ - unsigned int rx_data, rcv_pkt, i; - u8 response = 0; - u16 rxsize; - - if (dsim->state == DSIM_STATE_ULPS) { - dev_err(dsim->dev, "state is ULPS.\n"); - - return -EINVAL; - } - - /* FIXME!!! */ - msleep(20); - - mutex_lock(&dsim->lock); - reinit_completion(&dsim_rd_comp); - exynos_mipi_dsi_rd_tx_header(dsim, - MIPI_DSI_SET_MAXIMUM_RETURN_PACKET_SIZE, req_size); - - response = exynos_mipi_dsi_response_size(req_size); - - switch (data_id) { - case MIPI_DSI_GENERIC_READ_REQUEST_0_PARAM: - case MIPI_DSI_GENERIC_READ_REQUEST_1_PARAM: - case MIPI_DSI_GENERIC_READ_REQUEST_2_PARAM: - case MIPI_DSI_DCS_READ: - exynos_mipi_dsi_rd_tx_header(dsim, - data_id, data0); - /* process response func should be implemented. */ - break; - default: - dev_warn(dsim->dev, - "data id %x is not supported current DSI spec.\n", - data_id); - - mutex_unlock(&dsim->lock); - return -EINVAL; - } - - if (!wait_for_completion_interruptible_timeout(&dsim_rd_comp, - MIPI_FIFO_TIMEOUT)) { - pr_err("RX done interrupt timeout\n"); - mutex_unlock(&dsim->lock); - return 0; - } - - msleep(20); - - rx_data = exynos_mipi_dsi_rd_rx_fifo(dsim); - - if ((u8)(rx_data & 0xff) != response) { - printk(KERN_ERR - "mipi dsi wrong response rx_data : %x, response:%x\n", - rx_data, response); - goto clear_rx_fifo; - } - - if (req_size <= 2) { - /* for short packet */ - for (i = 0; i < req_size; i++) - rx_buf[i] = (rx_data >> (8 + (i * 8))) & 0xff; - rxsize = req_size; - } else { - /* for long packet */ - rxsize = exynos_mipi_dsi_long_data_rd(dsim, req_size, rx_data, - rx_buf); - if (rxsize != req_size) - goto clear_rx_fifo; - } - - rcv_pkt = exynos_mipi_dsi_rd_rx_fifo(dsim); - - msleep(20); - - if (rcv_pkt != MIPI_RX_FIFO_READ_DONE) { - dev_info(dsim->dev, - "Can't found RX FIFO READ DONE FLAG : %x\n", rcv_pkt); - goto clear_rx_fifo; - } - - mutex_unlock(&dsim->lock); - - return rxsize; - -clear_rx_fifo: - i = 0; - while (1) { - rcv_pkt = exynos_mipi_dsi_rd_rx_fifo(dsim); - if ((rcv_pkt == MIPI_RX_FIFO_READ_DONE) - || (i > MIPI_MAX_RX_FIFO)) - break; - dev_dbg(dsim->dev, - "mipi dsi clear rx fifo : %08x\n", rcv_pkt); - i++; - } - dev_info(dsim->dev, - "mipi dsi rx done count : %d, rcv_pkt : %08x\n", i, rcv_pkt); - - mutex_unlock(&dsim->lock); - - return 0; -} - -static int exynos_mipi_dsi_pll_on(struct mipi_dsim_device *dsim, - unsigned int enable) -{ - int sw_timeout; - - if (enable) { - sw_timeout = 1000; - - exynos_mipi_dsi_enable_pll(dsim, 1); - while (1) { - sw_timeout--; - if (exynos_mipi_dsi_is_pll_stable(dsim)) - return 0; - if (sw_timeout == 0) - return -EINVAL; - } - } else - exynos_mipi_dsi_enable_pll(dsim, 0); - - return 0; -} - -static unsigned long exynos_mipi_dsi_change_pll(struct mipi_dsim_device *dsim, - unsigned int pre_divider, unsigned int main_divider, - unsigned int scaler) -{ - unsigned long dfin_pll, dfvco, dpll_out; - unsigned int i, freq_band = 0xf; - - dfin_pll = (FIN_HZ / pre_divider); - - /****************************************************** - * Serial Clock(=ByteClk X 8) FreqBand[3:0] * - ****************************************************** - * ~ 99.99 MHz 0000 - * 100 ~ 119.99 MHz 0001 - * 120 ~ 159.99 MHz 0010 - * 160 ~ 199.99 MHz 0011 - * 200 ~ 239.99 MHz 0100 - * 140 ~ 319.99 MHz 0101 - * 320 ~ 389.99 MHz 0110 - * 390 ~ 449.99 MHz 0111 - * 450 ~ 509.99 MHz 1000 - * 510 ~ 559.99 MHz 1001 - * 560 ~ 639.99 MHz 1010 - * 640 ~ 689.99 MHz 1011 - * 690 ~ 769.99 MHz 1100 - * 770 ~ 869.99 MHz 1101 - * 870 ~ 949.99 MHz 1110 - * 950 ~ 1000 MHz 1111 - ******************************************************/ - if (dfin_pll < DFIN_PLL_MIN_HZ || dfin_pll > DFIN_PLL_MAX_HZ) { - dev_warn(dsim->dev, "fin_pll range should be 6MHz ~ 12MHz\n"); - exynos_mipi_dsi_enable_afc(dsim, 0, 0); - } else { - if (dfin_pll < 7 * MHZ) - exynos_mipi_dsi_enable_afc(dsim, 1, 0x1); - else if (dfin_pll < 8 * MHZ) - exynos_mipi_dsi_enable_afc(dsim, 1, 0x0); - else if (dfin_pll < 9 * MHZ) - exynos_mipi_dsi_enable_afc(dsim, 1, 0x3); - else if (dfin_pll < 10 * MHZ) - exynos_mipi_dsi_enable_afc(dsim, 1, 0x2); - else if (dfin_pll < 11 * MHZ) - exynos_mipi_dsi_enable_afc(dsim, 1, 0x5); - else - exynos_mipi_dsi_enable_afc(dsim, 1, 0x4); - } - - dfvco = dfin_pll * main_divider; - dev_dbg(dsim->dev, "dfvco = %lu, dfin_pll = %lu, main_divider = %d\n", - dfvco, dfin_pll, main_divider); - if (dfvco < DFVCO_MIN_HZ || dfvco > DFVCO_MAX_HZ) - dev_warn(dsim->dev, "fvco range should be 500MHz ~ 1000MHz\n"); - - dpll_out = dfvco / (1 << scaler); - dev_dbg(dsim->dev, "dpll_out = %lu, dfvco = %lu, scaler = %d\n", - dpll_out, dfvco, scaler); - - for (i = 0; i < ARRAY_SIZE(dpll_table); i++) { - if (dpll_out < dpll_table[i] * MHZ) { - freq_band = i; - break; - } - } - - dev_dbg(dsim->dev, "freq_band = %d\n", freq_band); - - exynos_mipi_dsi_pll_freq(dsim, pre_divider, main_divider, scaler); - - exynos_mipi_dsi_hs_zero_ctrl(dsim, 0); - exynos_mipi_dsi_prep_ctrl(dsim, 0); - - /* Freq Band */ - exynos_mipi_dsi_pll_freq_band(dsim, freq_band); - - /* Stable time */ - exynos_mipi_dsi_pll_stable_time(dsim, dsim->dsim_config->pll_stable_time); - - /* Enable PLL */ - dev_dbg(dsim->dev, "FOUT of mipi dphy pll is %luMHz\n", - (dpll_out / MHZ)); - - return dpll_out; -} - -static int exynos_mipi_dsi_set_clock(struct mipi_dsim_device *dsim, - unsigned int byte_clk_sel, unsigned int enable) -{ - unsigned int esc_div; - unsigned long esc_clk_error_rate; - unsigned long hs_clk = 0, byte_clk = 0, escape_clk = 0; - - if (enable) { - dsim->e_clk_src = byte_clk_sel; - - /* Escape mode clock and byte clock source */ - exynos_mipi_dsi_set_byte_clock_src(dsim, byte_clk_sel); - - /* DPHY, DSIM Link : D-PHY clock out */ - if (byte_clk_sel == DSIM_PLL_OUT_DIV8) { - hs_clk = exynos_mipi_dsi_change_pll(dsim, - dsim->dsim_config->p, dsim->dsim_config->m, - dsim->dsim_config->s); - if (hs_clk == 0) { - dev_err(dsim->dev, - "failed to get hs clock.\n"); - return -EINVAL; - } - - byte_clk = hs_clk / 8; - exynos_mipi_dsi_enable_pll_bypass(dsim, 0); - exynos_mipi_dsi_pll_on(dsim, 1); - /* DPHY : D-PHY clock out, DSIM link : external clock out */ - } else if (byte_clk_sel == DSIM_EXT_CLK_DIV8) { - dev_warn(dsim->dev, "this project is not support\n"); - dev_warn(dsim->dev, - "external clock source for MIPI DSIM.\n"); - } else if (byte_clk_sel == DSIM_EXT_CLK_BYPASS) { - dev_warn(dsim->dev, "this project is not support\n"); - dev_warn(dsim->dev, - "external clock source for MIPI DSIM\n"); - } - - /* escape clock divider */ - esc_div = byte_clk / (dsim->dsim_config->esc_clk); - dev_dbg(dsim->dev, - "esc_div = %d, byte_clk = %lu, esc_clk = %lu\n", - esc_div, byte_clk, dsim->dsim_config->esc_clk); - if ((byte_clk / esc_div) >= (20 * MHZ) || - (byte_clk / esc_div) > - dsim->dsim_config->esc_clk) - esc_div += 1; - - escape_clk = byte_clk / esc_div; - dev_dbg(dsim->dev, - "escape_clk = %lu, byte_clk = %lu, esc_div = %d\n", - escape_clk, byte_clk, esc_div); - - /* enable escape clock. */ - exynos_mipi_dsi_enable_byte_clock(dsim, 1); - - /* enable byte clk and escape clock */ - exynos_mipi_dsi_set_esc_clk_prs(dsim, 1, esc_div); - /* escape clock on lane */ - exynos_mipi_dsi_enable_esc_clk_on_lane(dsim, - (DSIM_LANE_CLOCK | dsim->data_lane), 1); - - dev_dbg(dsim->dev, "byte clock is %luMHz\n", - (byte_clk / MHZ)); - dev_dbg(dsim->dev, "escape clock that user's need is %lu\n", - (dsim->dsim_config->esc_clk / MHZ)); - dev_dbg(dsim->dev, "escape clock divider is %x\n", esc_div); - dev_dbg(dsim->dev, "escape clock is %luMHz\n", - ((byte_clk / esc_div) / MHZ)); - - if ((byte_clk / esc_div) > escape_clk) { - esc_clk_error_rate = escape_clk / - (byte_clk / esc_div); - dev_warn(dsim->dev, "error rate is %lu over.\n", - (esc_clk_error_rate / 100)); - } else if ((byte_clk / esc_div) < (escape_clk)) { - esc_clk_error_rate = (byte_clk / esc_div) / - escape_clk; - dev_warn(dsim->dev, "error rate is %lu under.\n", - (esc_clk_error_rate / 100)); - } - } else { - exynos_mipi_dsi_enable_esc_clk_on_lane(dsim, - (DSIM_LANE_CLOCK | dsim->data_lane), 0); - exynos_mipi_dsi_set_esc_clk_prs(dsim, 0, 0); - - /* disable escape clock. */ - exynos_mipi_dsi_enable_byte_clock(dsim, 0); - - if (byte_clk_sel == DSIM_PLL_OUT_DIV8) - exynos_mipi_dsi_pll_on(dsim, 0); - } - - return 0; -} - -int exynos_mipi_dsi_init_dsim(struct mipi_dsim_device *dsim) -{ - dsim->state = DSIM_STATE_INIT; - - switch (dsim->dsim_config->e_no_data_lane) { - case DSIM_DATA_LANE_1: - dsim->data_lane = DSIM_LANE_DATA0; - break; - case DSIM_DATA_LANE_2: - dsim->data_lane = DSIM_LANE_DATA0 | DSIM_LANE_DATA1; - break; - case DSIM_DATA_LANE_3: - dsim->data_lane = DSIM_LANE_DATA0 | DSIM_LANE_DATA1 | - DSIM_LANE_DATA2; - break; - case DSIM_DATA_LANE_4: - dsim->data_lane = DSIM_LANE_DATA0 | DSIM_LANE_DATA1 | - DSIM_LANE_DATA2 | DSIM_LANE_DATA3; - break; - default: - dev_info(dsim->dev, "data lane is invalid.\n"); - return -EINVAL; - } - - exynos_mipi_dsi_sw_reset(dsim); - exynos_mipi_dsi_func_reset(dsim); - - exynos_mipi_dsi_dp_dn_swap(dsim, 0); - - return 0; -} - -void exynos_mipi_dsi_init_interrupt(struct mipi_dsim_device *dsim) -{ - unsigned int src = 0; - - src = (INTSRC_SFR_FIFO_EMPTY | INTSRC_RX_DATA_DONE); - exynos_mipi_dsi_set_interrupt(dsim, src, 1); - - src = 0; - src = ~(INTMSK_RX_DONE | INTMSK_FIFO_EMPTY); - exynos_mipi_dsi_set_interrupt_mask(dsim, src, 1); -} - -int exynos_mipi_dsi_enable_frame_done_int(struct mipi_dsim_device *dsim, - unsigned int enable) -{ - /* enable only frame done interrupt */ - exynos_mipi_dsi_set_interrupt_mask(dsim, INTMSK_FRAME_DONE, enable); - - return 0; -} - -void exynos_mipi_dsi_stand_by(struct mipi_dsim_device *dsim, - unsigned int enable) -{ - - /* consider Main display and Sub display. */ - - exynos_mipi_dsi_set_main_stand_by(dsim, enable); -} - -int exynos_mipi_dsi_set_display_mode(struct mipi_dsim_device *dsim, - struct mipi_dsim_config *dsim_config) -{ - struct mipi_dsim_platform_data *dsim_pd; - struct fb_videomode *timing; - - dsim_pd = (struct mipi_dsim_platform_data *)dsim->pd; - timing = (struct fb_videomode *)dsim_pd->lcd_panel_info; - - /* in case of VIDEO MODE (RGB INTERFACE), it sets polarities. */ - if (dsim_config->e_interface == (u32) DSIM_VIDEO) { - if (dsim_config->auto_vertical_cnt == 0) { - exynos_mipi_dsi_set_main_disp_vporch(dsim, - dsim_config->cmd_allow, - timing->lower_margin, - timing->upper_margin); - exynos_mipi_dsi_set_main_disp_hporch(dsim, - timing->right_margin, - timing->left_margin); - exynos_mipi_dsi_set_main_disp_sync_area(dsim, - timing->vsync_len, - timing->hsync_len); - } - } - - exynos_mipi_dsi_set_main_disp_resol(dsim, timing->xres, - timing->yres); - - exynos_mipi_dsi_display_config(dsim, dsim_config); - - dev_info(dsim->dev, "lcd panel ==> width = %d, height = %d\n", - timing->xres, timing->yres); - - return 0; -} - -int exynos_mipi_dsi_init_link(struct mipi_dsim_device *dsim) -{ - unsigned int time_out = 100; - - switch (dsim->state) { - case DSIM_STATE_INIT: - exynos_mipi_dsi_init_fifo_pointer(dsim, 0x1f); - - /* dsi configuration */ - exynos_mipi_dsi_init_config(dsim); - exynos_mipi_dsi_enable_lane(dsim, DSIM_LANE_CLOCK, 1); - exynos_mipi_dsi_enable_lane(dsim, dsim->data_lane, 1); - - /* set clock configuration */ - exynos_mipi_dsi_set_clock(dsim, dsim->dsim_config->e_byte_clk, 1); - - /* check clock and data lane state are stop state */ - while (!(exynos_mipi_dsi_is_lane_state(dsim))) { - time_out--; - if (time_out == 0) { - dev_err(dsim->dev, - "DSI Master is not stop state.\n"); - dev_err(dsim->dev, - "Check initialization process\n"); - - return -EINVAL; - } - } - if (time_out != 0) { - dev_info(dsim->dev, - "DSI Master driver has been completed.\n"); - dev_info(dsim->dev, "DSI Master state is stop state\n"); - } - - dsim->state = DSIM_STATE_STOP; - - /* BTA sequence counters */ - exynos_mipi_dsi_set_stop_state_counter(dsim, - dsim->dsim_config->stop_holding_cnt); - exynos_mipi_dsi_set_bta_timeout(dsim, - dsim->dsim_config->bta_timeout); - exynos_mipi_dsi_set_lpdr_timeout(dsim, - dsim->dsim_config->rx_timeout); - - return 0; - default: - dev_info(dsim->dev, "DSI Master is already init.\n"); - return 0; - } - - return 0; -} - -int exynos_mipi_dsi_set_hs_enable(struct mipi_dsim_device *dsim) -{ - if (dsim->state != DSIM_STATE_STOP) { - dev_warn(dsim->dev, "DSIM is not in stop state.\n"); - return 0; - } - - if (dsim->e_clk_src == DSIM_EXT_CLK_BYPASS) { - dev_warn(dsim->dev, "clock source is external bypass.\n"); - return 0; - } - - dsim->state = DSIM_STATE_HSCLKEN; - - /* set LCDC and CPU transfer mode to HS. */ - exynos_mipi_dsi_set_lcdc_transfer_mode(dsim, 0); - exynos_mipi_dsi_set_cpu_transfer_mode(dsim, 0); - exynos_mipi_dsi_enable_hs_clock(dsim, 1); - - return 0; -} - -int exynos_mipi_dsi_set_data_transfer_mode(struct mipi_dsim_device *dsim, - unsigned int mode) -{ - if (mode) { - if (dsim->state != DSIM_STATE_HSCLKEN) { - dev_err(dsim->dev, "HS Clock lane is not enabled.\n"); - return -EINVAL; - } - - exynos_mipi_dsi_set_lcdc_transfer_mode(dsim, 0); - } else { - if (dsim->state == DSIM_STATE_INIT || dsim->state == - DSIM_STATE_ULPS) { - dev_err(dsim->dev, - "DSI Master is not STOP or HSDT state.\n"); - return -EINVAL; - } - - exynos_mipi_dsi_set_cpu_transfer_mode(dsim, 0); - } - - return 0; -} - -int exynos_mipi_dsi_get_frame_done_status(struct mipi_dsim_device *dsim) -{ - return _exynos_mipi_dsi_get_frame_done_status(dsim); -} - -int exynos_mipi_dsi_clear_frame_done(struct mipi_dsim_device *dsim) -{ - _exynos_mipi_dsi_clear_frame_done(dsim); - - return 0; -} - -int exynos_mipi_dsi_fifo_clear(struct mipi_dsim_device *dsim, - unsigned int val) -{ - int try = TRY_FIFO_CLEAR; - - exynos_mipi_dsi_sw_reset_release(dsim); - exynos_mipi_dsi_func_reset(dsim); - - do { - if (exynos_mipi_dsi_get_sw_reset_release(dsim)) { - exynos_mipi_dsi_init_interrupt(dsim); - dev_dbg(dsim->dev, "reset release done.\n"); - return 0; - } - } while (--try); - - dev_err(dsim->dev, "failed to clear dsim fifo.\n"); - return -EAGAIN; -} - -MODULE_AUTHOR("InKi Dae <inki.dae@samsung.com>"); -MODULE_DESCRIPTION("Samsung SoC MIPI-DSI common driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/video/fbdev/exynos/exynos_mipi_dsi_common.h b/drivers/video/fbdev/exynos/exynos_mipi_dsi_common.h deleted file mode 100644 index 412552274df3..000000000000 --- a/drivers/video/fbdev/exynos/exynos_mipi_dsi_common.h +++ /dev/null @@ -1,46 +0,0 @@ -/* linux/drivers/video/exynos_mipi_dsi_common.h - * - * Header file for Samsung SoC MIPI-DSI common driver. - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd - * - * InKi Dae <inki.dae@samsung.com> - * Donghwa Lee <dh09.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#ifndef _EXYNOS_MIPI_DSI_COMMON_H -#define _EXYNOS_MIPI_DSI_COMMON_H - -static DECLARE_COMPLETION(dsim_rd_comp); -static DECLARE_COMPLETION(dsim_wr_comp); - -int exynos_mipi_dsi_wr_data(struct mipi_dsim_device *dsim, unsigned int data_id, - const unsigned char *data0, unsigned int data_size); -int exynos_mipi_dsi_rd_data(struct mipi_dsim_device *dsim, unsigned int data_id, - unsigned int data0, unsigned int req_size, u8 *rx_buf); -irqreturn_t exynos_mipi_dsi_interrupt_handler(int irq, void *dev_id); -void exynos_mipi_dsi_init_interrupt(struct mipi_dsim_device *dsim); -int exynos_mipi_dsi_init_dsim(struct mipi_dsim_device *dsim); -void exynos_mipi_dsi_stand_by(struct mipi_dsim_device *dsim, - unsigned int enable); -int exynos_mipi_dsi_set_display_mode(struct mipi_dsim_device *dsim, - struct mipi_dsim_config *dsim_info); -int exynos_mipi_dsi_init_link(struct mipi_dsim_device *dsim); -int exynos_mipi_dsi_set_hs_enable(struct mipi_dsim_device *dsim); -int exynos_mipi_dsi_set_data_transfer_mode(struct mipi_dsim_device *dsim, - unsigned int mode); -int exynos_mipi_dsi_enable_frame_done_int(struct mipi_dsim_device *dsim, - unsigned int enable); -int exynos_mipi_dsi_get_frame_done_status(struct mipi_dsim_device *dsim); -int exynos_mipi_dsi_clear_frame_done(struct mipi_dsim_device *dsim); - -extern struct fb_info *registered_fb[FB_MAX] __read_mostly; - -int exynos_mipi_dsi_fifo_clear(struct mipi_dsim_device *dsim, - unsigned int val); - -#endif /* _EXYNOS_MIPI_DSI_COMMON_H */ diff --git a/drivers/video/fbdev/exynos/exynos_mipi_dsi_lowlevel.c b/drivers/video/fbdev/exynos/exynos_mipi_dsi_lowlevel.c deleted file mode 100644 index c148d06540c1..000000000000 --- a/drivers/video/fbdev/exynos/exynos_mipi_dsi_lowlevel.c +++ /dev/null @@ -1,618 +0,0 @@ -/* linux/drivers/video/exynos/exynos_mipi_dsi_lowlevel.c - * - * Samsung SoC MIPI-DSI lowlevel driver. - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd - * - * InKi Dae, <inki.dae@samsung.com> - * Donghwa Lee, <dh09.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/mutex.h> -#include <linux/wait.h> -#include <linux/delay.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/ctype.h> -#include <linux/platform_device.h> -#include <linux/io.h> - -#include <video/exynos_mipi_dsim.h> - -#include "exynos_mipi_dsi_regs.h" -#include "exynos_mipi_dsi_lowlevel.h" - -void exynos_mipi_dsi_func_reset(struct mipi_dsim_device *dsim) -{ - unsigned int reg; - - reg = readl(dsim->reg_base + EXYNOS_DSIM_SWRST); - - reg |= DSIM_FUNCRST; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_SWRST); -} - -void exynos_mipi_dsi_sw_reset(struct mipi_dsim_device *dsim) -{ - unsigned int reg; - - reg = readl(dsim->reg_base + EXYNOS_DSIM_SWRST); - - reg |= DSIM_SWRST; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_SWRST); -} - -void exynos_mipi_dsi_sw_reset_release(struct mipi_dsim_device *dsim) -{ - unsigned int reg; - - reg = readl(dsim->reg_base + EXYNOS_DSIM_INTSRC); - - reg |= INTSRC_SW_RST_RELEASE; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_INTSRC); -} - -int exynos_mipi_dsi_get_sw_reset_release(struct mipi_dsim_device *dsim) -{ - return (readl(dsim->reg_base + EXYNOS_DSIM_INTSRC)) & - INTSRC_SW_RST_RELEASE; -} - -unsigned int exynos_mipi_dsi_read_interrupt_mask(struct mipi_dsim_device *dsim) -{ - unsigned int reg; - - reg = readl(dsim->reg_base + EXYNOS_DSIM_INTMSK); - - return reg; -} - -void exynos_mipi_dsi_set_interrupt_mask(struct mipi_dsim_device *dsim, - unsigned int mode, unsigned int mask) -{ - unsigned int reg = 0; - - if (mask) - reg |= mode; - else - reg &= ~mode; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_INTMSK); -} - -void exynos_mipi_dsi_init_fifo_pointer(struct mipi_dsim_device *dsim, - unsigned int cfg) -{ - unsigned int reg; - - reg = readl(dsim->reg_base + EXYNOS_DSIM_FIFOCTRL); - - writel(reg & ~(cfg), dsim->reg_base + EXYNOS_DSIM_FIFOCTRL); - mdelay(10); - reg |= cfg; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_FIFOCTRL); -} - -/* - * this function set PLL P, M and S value in D-PHY - */ -void exynos_mipi_dsi_set_phy_tunning(struct mipi_dsim_device *dsim, - unsigned int value) -{ - writel(DSIM_AFC_CTL(value), dsim->reg_base + EXYNOS_DSIM_PHYACCHR); -} - -void exynos_mipi_dsi_set_main_stand_by(struct mipi_dsim_device *dsim, - unsigned int enable) -{ - unsigned int reg; - - reg = readl(dsim->reg_base + EXYNOS_DSIM_MDRESOL); - - reg &= ~DSIM_MAIN_STAND_BY; - - if (enable) - reg |= DSIM_MAIN_STAND_BY; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_MDRESOL); -} - -void exynos_mipi_dsi_set_main_disp_resol(struct mipi_dsim_device *dsim, - unsigned int width_resol, unsigned int height_resol) -{ - unsigned int reg; - - /* standby should be set after configuration so set to not ready*/ - reg = (readl(dsim->reg_base + EXYNOS_DSIM_MDRESOL)) & - ~(DSIM_MAIN_STAND_BY); - writel(reg, dsim->reg_base + EXYNOS_DSIM_MDRESOL); - - reg &= ~((0x7ff << 16) | (0x7ff << 0)); - reg |= DSIM_MAIN_VRESOL(height_resol) | DSIM_MAIN_HRESOL(width_resol); - - reg |= DSIM_MAIN_STAND_BY; - writel(reg, dsim->reg_base + EXYNOS_DSIM_MDRESOL); -} - -void exynos_mipi_dsi_set_main_disp_vporch(struct mipi_dsim_device *dsim, - unsigned int cmd_allow, unsigned int vfront, unsigned int vback) -{ - unsigned int reg; - - reg = (readl(dsim->reg_base + EXYNOS_DSIM_MVPORCH)) & - ~((DSIM_CMD_ALLOW_MASK) | (DSIM_STABLE_VFP_MASK) | - (DSIM_MAIN_VBP_MASK)); - - reg |= (DSIM_CMD_ALLOW_SHIFT(cmd_allow & 0xf) | - DSIM_STABLE_VFP_SHIFT(vfront & 0x7ff) | - DSIM_MAIN_VBP_SHIFT(vback & 0x7ff)); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_MVPORCH); -} - -void exynos_mipi_dsi_set_main_disp_hporch(struct mipi_dsim_device *dsim, - unsigned int front, unsigned int back) -{ - unsigned int reg; - - reg = (readl(dsim->reg_base + EXYNOS_DSIM_MHPORCH)) & - ~((DSIM_MAIN_HFP_MASK) | (DSIM_MAIN_HBP_MASK)); - - reg |= DSIM_MAIN_HFP_SHIFT(front) | DSIM_MAIN_HBP_SHIFT(back); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_MHPORCH); -} - -void exynos_mipi_dsi_set_main_disp_sync_area(struct mipi_dsim_device *dsim, - unsigned int vert, unsigned int hori) -{ - unsigned int reg; - - reg = (readl(dsim->reg_base + EXYNOS_DSIM_MSYNC)) & - ~((DSIM_MAIN_VSA_MASK) | (DSIM_MAIN_HSA_MASK)); - - reg |= (DSIM_MAIN_VSA_SHIFT(vert & 0x3ff) | - DSIM_MAIN_HSA_SHIFT(hori)); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_MSYNC); -} - -void exynos_mipi_dsi_set_sub_disp_resol(struct mipi_dsim_device *dsim, - unsigned int vert, unsigned int hori) -{ - unsigned int reg; - - reg = (readl(dsim->reg_base + EXYNOS_DSIM_SDRESOL)) & - ~(DSIM_SUB_STANDY_MASK); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_SDRESOL); - - reg &= ~(DSIM_SUB_VRESOL_MASK) | ~(DSIM_SUB_HRESOL_MASK); - reg |= (DSIM_SUB_VRESOL_SHIFT(vert & 0x7ff) | - DSIM_SUB_HRESOL_SHIFT(hori & 0x7ff)); - writel(reg, dsim->reg_base + EXYNOS_DSIM_SDRESOL); - - reg |= DSIM_SUB_STANDY_SHIFT(1); - writel(reg, dsim->reg_base + EXYNOS_DSIM_SDRESOL); -} - -void exynos_mipi_dsi_init_config(struct mipi_dsim_device *dsim) -{ - struct mipi_dsim_config *dsim_config = dsim->dsim_config; - - unsigned int cfg = (readl(dsim->reg_base + EXYNOS_DSIM_CONFIG)) & - ~((1 << 28) | (0x1f << 20) | (0x3 << 5)); - - cfg = ((DSIM_AUTO_FLUSH(dsim_config->auto_flush)) | - (DSIM_EOT_DISABLE(dsim_config->eot_disable)) | - (DSIM_AUTO_MODE_SHIFT(dsim_config->auto_vertical_cnt)) | - (DSIM_HSE_MODE_SHIFT(dsim_config->hse)) | - (DSIM_HFP_MODE_SHIFT(dsim_config->hfp)) | - (DSIM_HBP_MODE_SHIFT(dsim_config->hbp)) | - (DSIM_HSA_MODE_SHIFT(dsim_config->hsa)) | - (DSIM_NUM_OF_DATALANE_SHIFT(dsim_config->e_no_data_lane))); - - writel(cfg, dsim->reg_base + EXYNOS_DSIM_CONFIG); -} - -void exynos_mipi_dsi_display_config(struct mipi_dsim_device *dsim, - struct mipi_dsim_config *dsim_config) -{ - u32 reg = (readl(dsim->reg_base + EXYNOS_DSIM_CONFIG)) & - ~((0x3 << 26) | (1 << 25) | (0x3 << 18) | (0x7 << 12) | - (0x3 << 16) | (0x7 << 8)); - - if (dsim_config->e_interface == DSIM_VIDEO) - reg |= (1 << 25); - else if (dsim_config->e_interface == DSIM_COMMAND) - reg &= ~(1 << 25); - else { - dev_err(dsim->dev, "unknown lcd type.\n"); - return; - } - - /* main lcd */ - reg |= ((u8) (dsim_config->e_burst_mode) & 0x3) << 26 | - ((u8) (dsim_config->e_virtual_ch) & 0x3) << 18 | - ((u8) (dsim_config->e_pixel_format) & 0x7) << 12; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_CONFIG); -} - -void exynos_mipi_dsi_enable_lane(struct mipi_dsim_device *dsim, unsigned int lane, - unsigned int enable) -{ - unsigned int reg; - - reg = readl(dsim->reg_base + EXYNOS_DSIM_CONFIG); - - if (enable) - reg |= DSIM_LANE_ENx(lane); - else - reg &= ~DSIM_LANE_ENx(lane); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_CONFIG); -} - - -void exynos_mipi_dsi_set_data_lane_number(struct mipi_dsim_device *dsim, - unsigned int count) -{ - unsigned int cfg; - - /* get the data lane number. */ - cfg = DSIM_NUM_OF_DATALANE_SHIFT(count); - - writel(cfg, dsim->reg_base + EXYNOS_DSIM_CONFIG); -} - -void exynos_mipi_dsi_enable_afc(struct mipi_dsim_device *dsim, unsigned int enable, - unsigned int afc_code) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_PHYACCHR); - - if (enable) { - reg |= (1 << 14); - reg &= ~(0x7 << 5); - reg |= (afc_code & 0x7) << 5; - } else - reg &= ~(1 << 14); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PHYACCHR); -} - -void exynos_mipi_dsi_enable_pll_bypass(struct mipi_dsim_device *dsim, - unsigned int enable) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_CLKCTRL)) & - ~(DSIM_PLL_BYPASS_SHIFT(0x1)); - - reg |= DSIM_PLL_BYPASS_SHIFT(enable); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_CLKCTRL); -} - -void exynos_mipi_dsi_set_pll_pms(struct mipi_dsim_device *dsim, unsigned int p, - unsigned int m, unsigned int s) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_PLLCTRL); - - reg |= ((p & 0x3f) << 13) | ((m & 0x1ff) << 4) | ((s & 0x7) << 1); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PLLCTRL); -} - -void exynos_mipi_dsi_pll_freq_band(struct mipi_dsim_device *dsim, - unsigned int freq_band) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_PLLCTRL)) & - ~(DSIM_FREQ_BAND_SHIFT(0x1f)); - - reg |= DSIM_FREQ_BAND_SHIFT(freq_band & 0x1f); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PLLCTRL); -} - -void exynos_mipi_dsi_pll_freq(struct mipi_dsim_device *dsim, - unsigned int pre_divider, unsigned int main_divider, - unsigned int scaler) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_PLLCTRL)) & - ~(0x7ffff << 1); - - reg |= (pre_divider & 0x3f) << 13 | (main_divider & 0x1ff) << 4 | - (scaler & 0x7) << 1; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PLLCTRL); -} - -void exynos_mipi_dsi_pll_stable_time(struct mipi_dsim_device *dsim, - unsigned int lock_time) -{ - writel(lock_time, dsim->reg_base + EXYNOS_DSIM_PLLTMR); -} - -void exynos_mipi_dsi_enable_pll(struct mipi_dsim_device *dsim, unsigned int enable) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_PLLCTRL)) & - ~(DSIM_PLL_EN_SHIFT(0x1)); - - reg |= DSIM_PLL_EN_SHIFT(enable & 0x1); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PLLCTRL); -} - -void exynos_mipi_dsi_set_byte_clock_src(struct mipi_dsim_device *dsim, - unsigned int src) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_CLKCTRL)) & - ~(DSIM_BYTE_CLK_SRC_SHIFT(0x3)); - - reg |= (DSIM_BYTE_CLK_SRC_SHIFT(src)); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_CLKCTRL); -} - -void exynos_mipi_dsi_enable_byte_clock(struct mipi_dsim_device *dsim, - unsigned int enable) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_CLKCTRL)) & - ~(DSIM_BYTE_CLKEN_SHIFT(0x1)); - - reg |= DSIM_BYTE_CLKEN_SHIFT(enable); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_CLKCTRL); -} - -void exynos_mipi_dsi_set_esc_clk_prs(struct mipi_dsim_device *dsim, - unsigned int enable, unsigned int prs_val) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_CLKCTRL)) & - ~(DSIM_ESC_CLKEN_SHIFT(0x1) | 0xffff); - - reg |= DSIM_ESC_CLKEN_SHIFT(enable); - if (enable) - reg |= prs_val; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_CLKCTRL); -} - -void exynos_mipi_dsi_enable_esc_clk_on_lane(struct mipi_dsim_device *dsim, - unsigned int lane_sel, unsigned int enable) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_CLKCTRL); - - if (enable) - reg |= DSIM_LANE_ESC_CLKEN(lane_sel); - else - - reg &= ~DSIM_LANE_ESC_CLKEN(lane_sel); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_CLKCTRL); -} - -void exynos_mipi_dsi_force_dphy_stop_state(struct mipi_dsim_device *dsim, - unsigned int enable) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_ESCMODE)) & - ~(DSIM_FORCE_STOP_STATE_SHIFT(0x1)); - - reg |= (DSIM_FORCE_STOP_STATE_SHIFT(enable & 0x1)); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_ESCMODE); -} - -unsigned int exynos_mipi_dsi_is_lane_state(struct mipi_dsim_device *dsim) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_STATUS); - - /** - * check clock and data lane states. - * if MIPI-DSI controller was enabled at bootloader then - * TX_READY_HS_CLK is enabled otherwise STOP_STATE_CLK. - * so it should be checked for two case. - */ - if ((reg & DSIM_STOP_STATE_DAT(0xf)) && - ((reg & DSIM_STOP_STATE_CLK) || - (reg & DSIM_TX_READY_HS_CLK))) - return 1; - - return 0; -} - -void exynos_mipi_dsi_set_stop_state_counter(struct mipi_dsim_device *dsim, - unsigned int cnt_val) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_ESCMODE)) & - ~(DSIM_STOP_STATE_CNT_SHIFT(0x7ff)); - - reg |= (DSIM_STOP_STATE_CNT_SHIFT(cnt_val & 0x7ff)); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_ESCMODE); -} - -void exynos_mipi_dsi_set_bta_timeout(struct mipi_dsim_device *dsim, - unsigned int timeout) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_TIMEOUT)) & - ~(DSIM_BTA_TOUT_SHIFT(0xff)); - - reg |= (DSIM_BTA_TOUT_SHIFT(timeout)); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_TIMEOUT); -} - -void exynos_mipi_dsi_set_lpdr_timeout(struct mipi_dsim_device *dsim, - unsigned int timeout) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_TIMEOUT)) & - ~(DSIM_LPDR_TOUT_SHIFT(0xffff)); - - reg |= (DSIM_LPDR_TOUT_SHIFT(timeout)); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_TIMEOUT); -} - -void exynos_mipi_dsi_set_cpu_transfer_mode(struct mipi_dsim_device *dsim, - unsigned int lp) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_ESCMODE); - - reg &= ~DSIM_CMD_LPDT_LP; - - if (lp) - reg |= DSIM_CMD_LPDT_LP; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_ESCMODE); -} - -void exynos_mipi_dsi_set_lcdc_transfer_mode(struct mipi_dsim_device *dsim, - unsigned int lp) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_ESCMODE); - - reg &= ~DSIM_TX_LPDT_LP; - - if (lp) - reg |= DSIM_TX_LPDT_LP; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_ESCMODE); -} - -void exynos_mipi_dsi_enable_hs_clock(struct mipi_dsim_device *dsim, - unsigned int enable) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_CLKCTRL)) & - ~(DSIM_TX_REQUEST_HSCLK_SHIFT(0x1)); - - reg |= DSIM_TX_REQUEST_HSCLK_SHIFT(enable); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_CLKCTRL); -} - -void exynos_mipi_dsi_dp_dn_swap(struct mipi_dsim_device *dsim, - unsigned int swap_en) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_PHYACCHR1); - - reg &= ~(0x3 << 0); - reg |= (swap_en & 0x3) << 0; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PHYACCHR1); -} - -void exynos_mipi_dsi_hs_zero_ctrl(struct mipi_dsim_device *dsim, - unsigned int hs_zero) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_PLLCTRL)) & - ~(0xf << 28); - - reg |= ((hs_zero & 0xf) << 28); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PLLCTRL); -} - -void exynos_mipi_dsi_prep_ctrl(struct mipi_dsim_device *dsim, unsigned int prep) -{ - unsigned int reg = (readl(dsim->reg_base + EXYNOS_DSIM_PLLCTRL)) & - ~(0x7 << 20); - - reg |= ((prep & 0x7) << 20); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PLLCTRL); -} - -unsigned int exynos_mipi_dsi_read_interrupt(struct mipi_dsim_device *dsim) -{ - return readl(dsim->reg_base + EXYNOS_DSIM_INTSRC); -} - -void exynos_mipi_dsi_clear_interrupt(struct mipi_dsim_device *dsim, - unsigned int src) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_INTSRC); - - reg |= src; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_INTSRC); -} - -void exynos_mipi_dsi_set_interrupt(struct mipi_dsim_device *dsim, - unsigned int src, unsigned int enable) -{ - unsigned int reg = 0; - - if (enable) - reg |= src; - else - reg &= ~src; - - writel(reg, dsim->reg_base + EXYNOS_DSIM_INTSRC); -} - -unsigned int exynos_mipi_dsi_is_pll_stable(struct mipi_dsim_device *dsim) -{ - unsigned int reg; - - reg = readl(dsim->reg_base + EXYNOS_DSIM_STATUS); - - return reg & (1 << 31) ? 1 : 0; -} - -unsigned int exynos_mipi_dsi_get_fifo_state(struct mipi_dsim_device *dsim) -{ - return readl(dsim->reg_base + EXYNOS_DSIM_FIFOCTRL) & ~(0x1f); -} - -void exynos_mipi_dsi_wr_tx_header(struct mipi_dsim_device *dsim, - unsigned int di, unsigned int data0, unsigned int data1) -{ - unsigned int reg = (data1 << 16) | (data0 << 8) | ((di & 0x3f) << 0); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PKTHDR); -} - -void exynos_mipi_dsi_rd_tx_header(struct mipi_dsim_device *dsim, - unsigned int di, unsigned int data0) -{ - unsigned int reg = (data0 << 8) | (di << 0); - - writel(reg, dsim->reg_base + EXYNOS_DSIM_PKTHDR); -} - -unsigned int exynos_mipi_dsi_rd_rx_fifo(struct mipi_dsim_device *dsim) -{ - return readl(dsim->reg_base + EXYNOS_DSIM_RXFIFO); -} - -unsigned int _exynos_mipi_dsi_get_frame_done_status(struct mipi_dsim_device *dsim) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_INTSRC); - - return (reg & INTSRC_FRAME_DONE) ? 1 : 0; -} - -void _exynos_mipi_dsi_clear_frame_done(struct mipi_dsim_device *dsim) -{ - unsigned int reg = readl(dsim->reg_base + EXYNOS_DSIM_INTSRC); - - writel(reg | INTSRC_FRAME_DONE, dsim->reg_base + - EXYNOS_DSIM_INTSRC); -} - -void exynos_mipi_dsi_wr_tx_data(struct mipi_dsim_device *dsim, - unsigned int tx_data) -{ - writel(tx_data, dsim->reg_base + EXYNOS_DSIM_PAYLOAD); -} diff --git a/drivers/video/fbdev/exynos/exynos_mipi_dsi_lowlevel.h b/drivers/video/fbdev/exynos/exynos_mipi_dsi_lowlevel.h deleted file mode 100644 index 85460701c7ea..000000000000 --- a/drivers/video/fbdev/exynos/exynos_mipi_dsi_lowlevel.h +++ /dev/null @@ -1,112 +0,0 @@ -/* linux/drivers/video/exynos/exynos_mipi_dsi_lowlevel.h - * - * Header file for Samsung SoC MIPI-DSI lowlevel driver. - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd - * - * InKi Dae <inki.dae@samsung.com> - * Donghwa Lee <dh09.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#ifndef _EXYNOS_MIPI_DSI_LOWLEVEL_H -#define _EXYNOS_MIPI_DSI_LOWLEVEL_H - -void exynos_mipi_dsi_func_reset(struct mipi_dsim_device *dsim); -void exynos_mipi_dsi_sw_reset(struct mipi_dsim_device *dsim); -void exynos_mipi_dsi_sw_reset_release(struct mipi_dsim_device *dsim); -int exynos_mipi_dsi_get_sw_reset_release(struct mipi_dsim_device *dsim); -void exynos_mipi_dsi_set_interrupt_mask(struct mipi_dsim_device *dsim, - unsigned int mode, unsigned int mask); -void exynos_mipi_dsi_set_data_lane_number(struct mipi_dsim_device *dsim, - unsigned int count); -void exynos_mipi_dsi_init_fifo_pointer(struct mipi_dsim_device *dsim, - unsigned int cfg); -void exynos_mipi_dsi_set_phy_tunning(struct mipi_dsim_device *dsim, - unsigned int value); -void exynos_mipi_dsi_set_phy_tunning(struct mipi_dsim_device *dsim, - unsigned int value); -void exynos_mipi_dsi_set_main_stand_by(struct mipi_dsim_device *dsim, - unsigned int enable); -void exynos_mipi_dsi_set_main_disp_resol(struct mipi_dsim_device *dsim, - unsigned int width_resol, unsigned int height_resol); -void exynos_mipi_dsi_set_main_disp_vporch(struct mipi_dsim_device *dsim, - unsigned int cmd_allow, unsigned int vfront, unsigned int vback); -void exynos_mipi_dsi_set_main_disp_hporch(struct mipi_dsim_device *dsim, - unsigned int front, unsigned int back); -void exynos_mipi_dsi_set_main_disp_sync_area(struct mipi_dsim_device *dsim, - unsigned int vert, unsigned int hori); -void exynos_mipi_dsi_set_sub_disp_resol(struct mipi_dsim_device *dsim, - unsigned int vert, unsigned int hori); -void exynos_mipi_dsi_init_config(struct mipi_dsim_device *dsim); -void exynos_mipi_dsi_display_config(struct mipi_dsim_device *dsim, - struct mipi_dsim_config *dsim_config); -void exynos_mipi_dsi_set_data_lane_number(struct mipi_dsim_device *dsim, - unsigned int count); -void exynos_mipi_dsi_enable_lane(struct mipi_dsim_device *dsim, unsigned int lane, - unsigned int enable); -void exynos_mipi_dsi_enable_afc(struct mipi_dsim_device *dsim, unsigned int enable, - unsigned int afc_code); -void exynos_mipi_dsi_enable_pll_bypass(struct mipi_dsim_device *dsim, - unsigned int enable); -void exynos_mipi_dsi_set_pll_pms(struct mipi_dsim_device *dsim, unsigned int p, - unsigned int m, unsigned int s); -void exynos_mipi_dsi_pll_freq_band(struct mipi_dsim_device *dsim, - unsigned int freq_band); -void exynos_mipi_dsi_pll_freq(struct mipi_dsim_device *dsim, - unsigned int pre_divider, unsigned int main_divider, - unsigned int scaler); -void exynos_mipi_dsi_pll_stable_time(struct mipi_dsim_device *dsim, - unsigned int lock_time); -void exynos_mipi_dsi_enable_pll(struct mipi_dsim_device *dsim, - unsigned int enable); -void exynos_mipi_dsi_set_byte_clock_src(struct mipi_dsim_device *dsim, - unsigned int src); -void exynos_mipi_dsi_enable_byte_clock(struct mipi_dsim_device *dsim, - unsigned int enable); -void exynos_mipi_dsi_set_esc_clk_prs(struct mipi_dsim_device *dsim, - unsigned int enable, unsigned int prs_val); -void exynos_mipi_dsi_enable_esc_clk_on_lane(struct mipi_dsim_device *dsim, - unsigned int lane_sel, unsigned int enable); -void exynos_mipi_dsi_force_dphy_stop_state(struct mipi_dsim_device *dsim, - unsigned int enable); -unsigned int exynos_mipi_dsi_is_lane_state(struct mipi_dsim_device *dsim); -void exynos_mipi_dsi_set_stop_state_counter(struct mipi_dsim_device *dsim, - unsigned int cnt_val); -void exynos_mipi_dsi_set_bta_timeout(struct mipi_dsim_device *dsim, - unsigned int timeout); -void exynos_mipi_dsi_set_lpdr_timeout(struct mipi_dsim_device *dsim, - unsigned int timeout); -void exynos_mipi_dsi_set_lcdc_transfer_mode(struct mipi_dsim_device *dsim, - unsigned int lp); -void exynos_mipi_dsi_set_cpu_transfer_mode(struct mipi_dsim_device *dsim, - unsigned int lp); -void exynos_mipi_dsi_enable_hs_clock(struct mipi_dsim_device *dsim, - unsigned int enable); -void exynos_mipi_dsi_dp_dn_swap(struct mipi_dsim_device *dsim, - unsigned int swap_en); -void exynos_mipi_dsi_hs_zero_ctrl(struct mipi_dsim_device *dsim, - unsigned int hs_zero); -void exynos_mipi_dsi_prep_ctrl(struct mipi_dsim_device *dsim, unsigned int prep); -unsigned int exynos_mipi_dsi_read_interrupt(struct mipi_dsim_device *dsim); -unsigned int exynos_mipi_dsi_read_interrupt_mask(struct mipi_dsim_device *dsim); -void exynos_mipi_dsi_clear_interrupt(struct mipi_dsim_device *dsim, - unsigned int src); -void exynos_mipi_dsi_set_interrupt(struct mipi_dsim_device *dsim, - unsigned int src, unsigned int enable); -unsigned int exynos_mipi_dsi_is_pll_stable(struct mipi_dsim_device *dsim); -unsigned int exynos_mipi_dsi_get_fifo_state(struct mipi_dsim_device *dsim); -unsigned int _exynos_mipi_dsi_get_frame_done_status(struct mipi_dsim_device *dsim); -void _exynos_mipi_dsi_clear_frame_done(struct mipi_dsim_device *dsim); -void exynos_mipi_dsi_wr_tx_header(struct mipi_dsim_device *dsim, unsigned int di, - unsigned int data0, unsigned int data1); -void exynos_mipi_dsi_wr_tx_data(struct mipi_dsim_device *dsim, - unsigned int tx_data); -void exynos_mipi_dsi_rd_tx_header(struct mipi_dsim_device *dsim, - unsigned int data0, unsigned int data1); -unsigned int exynos_mipi_dsi_rd_rx_fifo(struct mipi_dsim_device *dsim); - -#endif /* _EXYNOS_MIPI_DSI_LOWLEVEL_H */ diff --git a/drivers/video/fbdev/exynos/exynos_mipi_dsi_regs.h b/drivers/video/fbdev/exynos/exynos_mipi_dsi_regs.h deleted file mode 100644 index 4227106d3fd0..000000000000 --- a/drivers/video/fbdev/exynos/exynos_mipi_dsi_regs.h +++ /dev/null @@ -1,149 +0,0 @@ -/* linux/driver/video/exynos/exynos_mipi_dsi_regs.h - * - * Register definition file for Samsung MIPI-DSIM driver - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd - * - * InKi Dae <inki.dae@samsung.com> - * Donghwa Lee <dh09.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#ifndef _EXYNOS_MIPI_DSI_REGS_H -#define _EXYNOS_MIPI_DSI_REGS_H - -#define EXYNOS_DSIM_STATUS 0x0 /* Status register */ -#define EXYNOS_DSIM_SWRST 0x4 /* Software reset register */ -#define EXYNOS_DSIM_CLKCTRL 0x8 /* Clock control register */ -#define EXYNOS_DSIM_TIMEOUT 0xc /* Time out register */ -#define EXYNOS_DSIM_CONFIG 0x10 /* Configuration register */ -#define EXYNOS_DSIM_ESCMODE 0x14 /* Escape mode register */ - -/* Main display image resolution register */ -#define EXYNOS_DSIM_MDRESOL 0x18 -#define EXYNOS_DSIM_MVPORCH 0x1c /* Main display Vporch register */ -#define EXYNOS_DSIM_MHPORCH 0x20 /* Main display Hporch register */ -#define EXYNOS_DSIM_MSYNC 0x24 /* Main display sync area register */ - -/* Sub display image resolution register */ -#define EXYNOS_DSIM_SDRESOL 0x28 -#define EXYNOS_DSIM_INTSRC 0x2c /* Interrupt source register */ -#define EXYNOS_DSIM_INTMSK 0x30 /* Interrupt mask register */ -#define EXYNOS_DSIM_PKTHDR 0x34 /* Packet Header FIFO register */ -#define EXYNOS_DSIM_PAYLOAD 0x38 /* Payload FIFO register */ -#define EXYNOS_DSIM_RXFIFO 0x3c /* Read FIFO register */ -#define EXYNOS_DSIM_FIFOTHLD 0x40 /* FIFO threshold level register */ -#define EXYNOS_DSIM_FIFOCTRL 0x44 /* FIFO status and control register */ - -/* FIFO memory AC characteristic register */ -#define EXYNOS_DSIM_PLLCTRL 0x4c /* PLL control register */ -#define EXYNOS_DSIM_PLLTMR 0x50 /* PLL timer register */ -#define EXYNOS_DSIM_PHYACCHR 0x54 /* D-PHY AC characteristic register */ -#define EXYNOS_DSIM_PHYACCHR1 0x58 /* D-PHY AC characteristic register1 */ - -/* DSIM_STATUS */ -#define DSIM_STOP_STATE_DAT(x) (((x) & 0xf) << 0) -#define DSIM_STOP_STATE_CLK (1 << 8) -#define DSIM_TX_READY_HS_CLK (1 << 10) - -/* DSIM_SWRST */ -#define DSIM_FUNCRST (1 << 16) -#define DSIM_SWRST (1 << 0) - -/* EXYNOS_DSIM_TIMEOUT */ -#define DSIM_LPDR_TOUT_SHIFT(x) ((x) << 0) -#define DSIM_BTA_TOUT_SHIFT(x) ((x) << 16) - -/* EXYNOS_DSIM_CLKCTRL */ -#define DSIM_LANE_ESC_CLKEN(x) (((x) & 0x1f) << 19) -#define DSIM_BYTE_CLKEN_SHIFT(x) ((x) << 24) -#define DSIM_BYTE_CLK_SRC_SHIFT(x) ((x) << 25) -#define DSIM_PLL_BYPASS_SHIFT(x) ((x) << 27) -#define DSIM_ESC_CLKEN_SHIFT(x) ((x) << 28) -#define DSIM_TX_REQUEST_HSCLK_SHIFT(x) ((x) << 31) - -/* EXYNOS_DSIM_CONFIG */ -#define DSIM_LANE_ENx(x) (((x) & 0x1f) << 0) -#define DSIM_NUM_OF_DATALANE_SHIFT(x) ((x) << 5) -#define DSIM_HSA_MODE_SHIFT(x) ((x) << 20) -#define DSIM_HBP_MODE_SHIFT(x) ((x) << 21) -#define DSIM_HFP_MODE_SHIFT(x) ((x) << 22) -#define DSIM_HSE_MODE_SHIFT(x) ((x) << 23) -#define DSIM_AUTO_MODE_SHIFT(x) ((x) << 24) -#define DSIM_EOT_DISABLE(x) ((x) << 28) -#define DSIM_AUTO_FLUSH(x) ((x) << 29) - -#define DSIM_NUM_OF_DATA_LANE(x) ((x) << DSIM_NUM_OF_DATALANE_SHIFT) - -/* EXYNOS_DSIM_ESCMODE */ -#define DSIM_TX_LPDT_LP (1 << 6) -#define DSIM_CMD_LPDT_LP (1 << 7) -#define DSIM_FORCE_STOP_STATE_SHIFT(x) ((x) << 20) -#define DSIM_STOP_STATE_CNT_SHIFT(x) ((x) << 21) - -/* EXYNOS_DSIM_MDRESOL */ -#define DSIM_MAIN_STAND_BY (1 << 31) -#define DSIM_MAIN_VRESOL(x) (((x) & 0x7ff) << 16) -#define DSIM_MAIN_HRESOL(x) (((x) & 0X7ff) << 0) - -/* EXYNOS_DSIM_MVPORCH */ -#define DSIM_CMD_ALLOW_SHIFT(x) ((x) << 28) -#define DSIM_STABLE_VFP_SHIFT(x) ((x) << 16) -#define DSIM_MAIN_VBP_SHIFT(x) ((x) << 0) -#define DSIM_CMD_ALLOW_MASK (0xf << 28) -#define DSIM_STABLE_VFP_MASK (0x7ff << 16) -#define DSIM_MAIN_VBP_MASK (0x7ff << 0) - -/* EXYNOS_DSIM_MHPORCH */ -#define DSIM_MAIN_HFP_SHIFT(x) ((x) << 16) -#define DSIM_MAIN_HBP_SHIFT(x) ((x) << 0) -#define DSIM_MAIN_HFP_MASK ((0xffff) << 16) -#define DSIM_MAIN_HBP_MASK ((0xffff) << 0) - -/* EXYNOS_DSIM_MSYNC */ -#define DSIM_MAIN_VSA_SHIFT(x) ((x) << 22) -#define DSIM_MAIN_HSA_SHIFT(x) ((x) << 0) -#define DSIM_MAIN_VSA_MASK ((0x3ff) << 22) -#define DSIM_MAIN_HSA_MASK ((0xffff) << 0) - -/* EXYNOS_DSIM_SDRESOL */ -#define DSIM_SUB_STANDY_SHIFT(x) ((x) << 31) -#define DSIM_SUB_VRESOL_SHIFT(x) ((x) << 16) -#define DSIM_SUB_HRESOL_SHIFT(x) ((x) << 0) -#define DSIM_SUB_STANDY_MASK ((0x1) << 31) -#define DSIM_SUB_VRESOL_MASK ((0x7ff) << 16) -#define DSIM_SUB_HRESOL_MASK ((0x7ff) << 0) - -/* EXYNOS_DSIM_INTSRC */ -#define INTSRC_PLL_STABLE (1 << 31) -#define INTSRC_SW_RST_RELEASE (1 << 30) -#define INTSRC_SFR_FIFO_EMPTY (1 << 29) -#define INTSRC_FRAME_DONE (1 << 24) -#define INTSRC_RX_DATA_DONE (1 << 18) - -/* EXYNOS_DSIM_INTMSK */ -#define INTMSK_FIFO_EMPTY (1 << 29) -#define INTMSK_BTA (1 << 25) -#define INTMSK_FRAME_DONE (1 << 24) -#define INTMSK_RX_TIMEOUT (1 << 21) -#define INTMSK_BTA_TIMEOUT (1 << 20) -#define INTMSK_RX_DONE (1 << 18) -#define INTMSK_RX_TE (1 << 17) -#define INTMSK_RX_ACK (1 << 16) -#define INTMSK_RX_ECC_ERR (1 << 15) -#define INTMSK_RX_CRC_ERR (1 << 14) - -/* EXYNOS_DSIM_FIFOCTRL */ -#define SFR_HEADER_EMPTY (1 << 22) - -/* EXYNOS_DSIM_PHYACCHR */ -#define DSIM_AFC_CTL(x) (((x) & 0x7) << 5) - -/* EXYNOS_DSIM_PLLCTRL */ -#define DSIM_PLL_EN_SHIFT(x) ((x) << 23) -#define DSIM_FREQ_BAND_SHIFT(x) ((x) << 24) - -#endif /* _EXYNOS_MIPI_DSI_REGS_H */ diff --git a/drivers/video/fbdev/exynos/s6e8ax0.c b/drivers/video/fbdev/exynos/s6e8ax0.c deleted file mode 100644 index de2f3e793786..000000000000 --- a/drivers/video/fbdev/exynos/s6e8ax0.c +++ /dev/null @@ -1,887 +0,0 @@ -/* linux/drivers/video/exynos/s6e8ax0.c - * - * MIPI-DSI based s6e8ax0 AMOLED lcd 4.65 inch panel driver. - * - * Inki Dae, <inki.dae@samsung.com> - * Donghwa Lee, <dh09.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/mutex.h> -#include <linux/wait.h> -#include <linux/ctype.h> -#include <linux/io.h> -#include <linux/delay.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/lcd.h> -#include <linux/fb.h> -#include <linux/backlight.h> -#include <linux/regulator/consumer.h> - -#include <video/mipi_display.h> -#include <video/exynos_mipi_dsim.h> - -#define LDI_MTP_LENGTH 24 -#define DSIM_PM_STABLE_TIME 10 -#define MIN_BRIGHTNESS 0 -#define MAX_BRIGHTNESS 24 -#define GAMMA_TABLE_COUNT 26 - -#define POWER_IS_ON(pwr) ((pwr) == FB_BLANK_UNBLANK) -#define POWER_IS_OFF(pwr) ((pwr) == FB_BLANK_POWERDOWN) -#define POWER_IS_NRM(pwr) ((pwr) == FB_BLANK_NORMAL) - -#define lcd_to_master(a) (a->dsim_dev->master) -#define lcd_to_master_ops(a) ((lcd_to_master(a))->master_ops) - -enum { - DSIM_NONE_STATE = 0, - DSIM_RESUME_COMPLETE = 1, - DSIM_FRAME_DONE = 2, -}; - -struct s6e8ax0 { - struct device *dev; - unsigned int power; - unsigned int id; - unsigned int gamma; - unsigned int acl_enable; - unsigned int cur_acl; - - struct lcd_device *ld; - struct backlight_device *bd; - - struct mipi_dsim_lcd_device *dsim_dev; - struct lcd_platform_data *ddi_pd; - struct mutex lock; - bool enabled; -}; - - -static struct regulator_bulk_data supplies[] = { - { .supply = "vdd3", }, - { .supply = "vci", }, -}; - -static void s6e8ax0_regulator_enable(struct s6e8ax0 *lcd) -{ - int ret = 0; - struct lcd_platform_data *pd = NULL; - - pd = lcd->ddi_pd; - mutex_lock(&lcd->lock); - if (!lcd->enabled) { - ret = regulator_bulk_enable(ARRAY_SIZE(supplies), supplies); - if (ret) - goto out; - - lcd->enabled = true; - } - msleep(pd->power_on_delay); -out: - mutex_unlock(&lcd->lock); -} - -static void s6e8ax0_regulator_disable(struct s6e8ax0 *lcd) -{ - int ret = 0; - - mutex_lock(&lcd->lock); - if (lcd->enabled) { - ret = regulator_bulk_disable(ARRAY_SIZE(supplies), supplies); - if (ret) - goto out; - - lcd->enabled = false; - } -out: - mutex_unlock(&lcd->lock); -} - -static const unsigned char s6e8ax0_22_gamma_30[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xf5, 0x00, 0xff, 0xad, 0xaf, - 0xbA, 0xc3, 0xd8, 0xc5, 0x9f, 0xc6, 0x9e, 0xc1, 0xdc, 0xc0, - 0x00, 0x61, 0x00, 0x5a, 0x00, 0x74, -}; - -static const unsigned char s6e8ax0_22_gamma_50[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xe8, 0x1f, 0xf7, 0xad, 0xc0, - 0xb5, 0xc4, 0xdc, 0xc4, 0x9e, 0xc6, 0x9c, 0xbb, 0xd8, 0xbb, - 0x00, 0x70, 0x00, 0x68, 0x00, 0x86, -}; - -static const unsigned char s6e8ax0_22_gamma_60[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xde, 0x1f, 0xef, 0xad, 0xc4, - 0xb3, 0xc3, 0xdd, 0xc4, 0x9e, 0xc6, 0x9c, 0xbc, 0xd6, 0xba, - 0x00, 0x75, 0x00, 0x6e, 0x00, 0x8d, -}; - -static const unsigned char s6e8ax0_22_gamma_70[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xd8, 0x1f, 0xe7, 0xaf, 0xc8, - 0xb4, 0xc4, 0xdd, 0xc3, 0x9d, 0xc6, 0x9c, 0xbb, 0xd6, 0xb9, - 0x00, 0x7a, 0x00, 0x72, 0x00, 0x93, -}; - -static const unsigned char s6e8ax0_22_gamma_80[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xc9, 0x1f, 0xde, 0xae, 0xc9, - 0xb1, 0xc3, 0xdd, 0xc2, 0x9d, 0xc5, 0x9b, 0xbc, 0xd6, 0xbb, - 0x00, 0x7f, 0x00, 0x77, 0x00, 0x99, -}; - -static const unsigned char s6e8ax0_22_gamma_90[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xc7, 0x1f, 0xd9, 0xb0, 0xcc, - 0xb2, 0xc3, 0xdc, 0xc1, 0x9c, 0xc6, 0x9c, 0xbc, 0xd4, 0xb9, - 0x00, 0x83, 0x00, 0x7b, 0x00, 0x9e, -}; - -static const unsigned char s6e8ax0_22_gamma_100[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xbd, 0x80, 0xcd, 0xba, 0xce, - 0xb3, 0xc4, 0xde, 0xc3, 0x9c, 0xc4, 0x9, 0xb8, 0xd3, 0xb6, - 0x00, 0x88, 0x00, 0x80, 0x00, 0xa5, -}; - -static const unsigned char s6e8ax0_22_gamma_120[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb9, 0x95, 0xc8, 0xb1, 0xcf, - 0xb2, 0xc6, 0xdf, 0xc5, 0x9b, 0xc3, 0x99, 0xb6, 0xd2, 0xb6, - 0x00, 0x8f, 0x00, 0x86, 0x00, 0xac, -}; - -static const unsigned char s6e8ax0_22_gamma_130[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb7, 0xa0, 0xc7, 0xb1, 0xd0, - 0xb2, 0xc4, 0xdd, 0xc3, 0x9a, 0xc3, 0x98, 0xb6, 0xd0, 0xb4, - 0x00, 0x92, 0x00, 0x8a, 0x00, 0xb1, -}; - -static const unsigned char s6e8ax0_22_gamma_140[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb7, 0xa0, 0xc5, 0xb2, 0xd0, - 0xb3, 0xc3, 0xde, 0xc3, 0x9b, 0xc2, 0x98, 0xb6, 0xd0, 0xb4, - 0x00, 0x95, 0x00, 0x8d, 0x00, 0xb5, -}; - -static const unsigned char s6e8ax0_22_gamma_150[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb3, 0xa0, 0xc2, 0xb2, 0xd0, - 0xb2, 0xc1, 0xdd, 0xc2, 0x9b, 0xc2, 0x98, 0xb4, 0xcf, 0xb1, - 0x00, 0x99, 0x00, 0x90, 0x00, 0xba, -}; - -static const unsigned char s6e8ax0_22_gamma_160[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xaf, 0xa5, 0xbf, 0xb0, 0xd0, - 0xb1, 0xc3, 0xde, 0xc2, 0x99, 0xc1, 0x97, 0xb4, 0xce, 0xb1, - 0x00, 0x9c, 0x00, 0x93, 0x00, 0xbe, -}; - -static const unsigned char s6e8ax0_22_gamma_170[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xaf, 0xb5, 0xbf, 0xb1, 0xd1, - 0xb1, 0xc3, 0xde, 0xc3, 0x99, 0xc0, 0x96, 0xb4, 0xce, 0xb1, - 0x00, 0x9f, 0x00, 0x96, 0x00, 0xc2, -}; - -static const unsigned char s6e8ax0_22_gamma_180[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xaf, 0xb7, 0xbe, 0xb3, 0xd2, - 0xb3, 0xc3, 0xde, 0xc2, 0x97, 0xbf, 0x95, 0xb4, 0xcd, 0xb1, - 0x00, 0xa2, 0x00, 0x99, 0x00, 0xc5, -}; - -static const unsigned char s6e8ax0_22_gamma_190[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xaf, 0xb9, 0xbe, 0xb2, 0xd2, - 0xb2, 0xc3, 0xdd, 0xc3, 0x98, 0xbf, 0x95, 0xb2, 0xcc, 0xaf, - 0x00, 0xa5, 0x00, 0x9c, 0x00, 0xc9, -}; - -static const unsigned char s6e8ax0_22_gamma_200[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xaf, 0xb9, 0xbc, 0xb2, 0xd2, - 0xb1, 0xc4, 0xdd, 0xc3, 0x97, 0xbe, 0x95, 0xb1, 0xcb, 0xae, - 0x00, 0xa8, 0x00, 0x9f, 0x00, 0xcd, -}; - -static const unsigned char s6e8ax0_22_gamma_210[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb1, 0xc1, 0xbd, 0xb1, 0xd1, - 0xb1, 0xc2, 0xde, 0xc2, 0x97, 0xbe, 0x94, 0xB0, 0xc9, 0xad, - 0x00, 0xae, 0x00, 0xa4, 0x00, 0xd4, -}; - -static const unsigned char s6e8ax0_22_gamma_220[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb1, 0xc7, 0xbd, 0xb1, 0xd1, - 0xb1, 0xc2, 0xdd, 0xc2, 0x97, 0xbd, 0x94, 0xb0, 0xc9, 0xad, - 0x00, 0xad, 0x00, 0xa2, 0x00, 0xd3, -}; - -static const unsigned char s6e8ax0_22_gamma_230[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb1, 0xc3, 0xbd, 0xb2, 0xd1, - 0xb1, 0xc3, 0xdd, 0xc1, 0x96, 0xbd, 0x94, 0xb0, 0xc9, 0xad, - 0x00, 0xb0, 0x00, 0xa7, 0x00, 0xd7, -}; - -static const unsigned char s6e8ax0_22_gamma_240[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb1, 0xcb, 0xbd, 0xb1, 0xd2, - 0xb1, 0xc3, 0xdD, 0xc2, 0x95, 0xbd, 0x93, 0xaf, 0xc8, 0xab, - 0x00, 0xb3, 0x00, 0xa9, 0x00, 0xdb, -}; - -static const unsigned char s6e8ax0_22_gamma_250[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb3, 0xcc, 0xbe, 0xb0, 0xd2, - 0xb0, 0xc3, 0xdD, 0xc2, 0x94, 0xbc, 0x92, 0xae, 0xc8, 0xab, - 0x00, 0xb6, 0x00, 0xab, 0x00, 0xde, -}; - -static const unsigned char s6e8ax0_22_gamma_260[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb3, 0xd0, 0xbe, 0xaf, 0xd1, - 0xaf, 0xc2, 0xdd, 0xc1, 0x96, 0xbc, 0x93, 0xaf, 0xc8, 0xac, - 0x00, 0xb7, 0x00, 0xad, 0x00, 0xe0, -}; - -static const unsigned char s6e8ax0_22_gamma_270[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb2, 0xcF, 0xbd, 0xb0, 0xd2, - 0xaf, 0xc2, 0xdc, 0xc1, 0x95, 0xbd, 0x93, 0xae, 0xc6, 0xaa, - 0x00, 0xba, 0x00, 0xb0, 0x00, 0xe4, -}; - -static const unsigned char s6e8ax0_22_gamma_280[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb2, 0xd0, 0xbd, 0xaf, 0xd0, - 0xad, 0xc4, 0xdd, 0xc3, 0x95, 0xbd, 0x93, 0xac, 0xc5, 0xa9, - 0x00, 0xbd, 0x00, 0xb2, 0x00, 0xe7, -}; - -static const unsigned char s6e8ax0_22_gamma_300[] = { - 0xfa, 0x01, 0x60, 0x10, 0x60, 0xb5, 0xd3, 0xbd, 0xb1, 0xd2, - 0xb0, 0xc0, 0xdc, 0xc0, 0x94, 0xba, 0x91, 0xac, 0xc5, 0xa9, - 0x00, 0xc2, 0x00, 0xb7, 0x00, 0xed, -}; - -static const unsigned char *s6e8ax0_22_gamma_table[] = { - s6e8ax0_22_gamma_30, - s6e8ax0_22_gamma_50, - s6e8ax0_22_gamma_60, - s6e8ax0_22_gamma_70, - s6e8ax0_22_gamma_80, - s6e8ax0_22_gamma_90, - s6e8ax0_22_gamma_100, - s6e8ax0_22_gamma_120, - s6e8ax0_22_gamma_130, - s6e8ax0_22_gamma_140, - s6e8ax0_22_gamma_150, - s6e8ax0_22_gamma_160, - s6e8ax0_22_gamma_170, - s6e8ax0_22_gamma_180, - s6e8ax0_22_gamma_190, - s6e8ax0_22_gamma_200, - s6e8ax0_22_gamma_210, - s6e8ax0_22_gamma_220, - s6e8ax0_22_gamma_230, - s6e8ax0_22_gamma_240, - s6e8ax0_22_gamma_250, - s6e8ax0_22_gamma_260, - s6e8ax0_22_gamma_270, - s6e8ax0_22_gamma_280, - s6e8ax0_22_gamma_300, -}; - -static void s6e8ax0_panel_cond(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - - static const unsigned char data_to_send[] = { - 0xf8, 0x3d, 0x35, 0x00, 0x00, 0x00, 0x93, 0x00, 0x3c, 0x7d, - 0x08, 0x27, 0x7d, 0x3f, 0x00, 0x00, 0x00, 0x20, 0x04, 0x08, - 0x6e, 0x00, 0x00, 0x00, 0x02, 0x08, 0x08, 0x23, 0x23, 0xc0, - 0xc8, 0x08, 0x48, 0xc1, 0x00, 0xc1, 0xff, 0xff, 0xc8 - }; - static const unsigned char data_to_send_panel_reverse[] = { - 0xf8, 0x19, 0x35, 0x00, 0x00, 0x00, 0x93, 0x00, 0x3c, 0x7d, - 0x08, 0x27, 0x7d, 0x3f, 0x00, 0x00, 0x00, 0x20, 0x04, 0x08, - 0x6e, 0x00, 0x00, 0x00, 0x02, 0x08, 0x08, 0x23, 0x23, 0xc0, - 0xc1, 0x01, 0x41, 0xc1, 0x00, 0xc1, 0xf6, 0xf6, 0xc1 - }; - - if (lcd->dsim_dev->panel_reverse) - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send_panel_reverse, - ARRAY_SIZE(data_to_send_panel_reverse)); - else - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_display_cond(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xf2, 0x80, 0x03, 0x0d - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -/* Gamma 2.2 Setting (200cd, 7500K, 10MPCD) */ -static void s6e8ax0_gamma_cond(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - unsigned int gamma = lcd->bd->props.brightness; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - s6e8ax0_22_gamma_table[gamma], - GAMMA_TABLE_COUNT); -} - -static void s6e8ax0_gamma_update(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xf7, 0x03 - }; - - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_SHORT_WRITE_PARAM, data_to_send, - ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_etc_cond1(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xd1, 0xfe, 0x80, 0x00, 0x01, 0x0b, 0x00, 0x00, 0x40, - 0x0d, 0x00, 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_etc_cond2(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xb6, 0x0c, 0x02, 0x03, 0x32, 0xff, 0x44, 0x44, 0xc0, - 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_etc_cond3(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xe1, 0x10, 0x1c, 0x17, 0x08, 0x1d - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_etc_cond4(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xe2, 0xed, 0x07, 0xc3, 0x13, 0x0d, 0x03 - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_etc_cond5(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xf4, 0xcf, 0x0a, 0x12, 0x10, 0x19, 0x33, 0x02 - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} -static void s6e8ax0_etc_cond6(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xe3, 0x40 - }; - - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_SHORT_WRITE_PARAM, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_etc_cond7(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xe4, 0x00, 0x00, 0x14, 0x80, 0x00, 0x00, 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_elvss_set(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xb1, 0x04, 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_elvss_nvm_set(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xd9, 0x5c, 0x20, 0x0c, 0x0f, 0x41, 0x00, 0x10, 0x11, - 0x12, 0xd1, 0x00, 0x00, 0x00, 0x00, 0x80, 0xcb, 0xed, - 0x64, 0xaf - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_sleep_in(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0x10, 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_SHORT_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_sleep_out(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0x11, 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_SHORT_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_display_on(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0x29, 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_SHORT_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_display_off(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0x28, 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_SHORT_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_apply_level2_key(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xf0, 0x5a, 0x5a - }; - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_acl_on(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xc0, 0x01 - }; - - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_SHORT_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -static void s6e8ax0_acl_off(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - static const unsigned char data_to_send[] = { - 0xc0, 0x00 - }; - - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_SHORT_WRITE, - data_to_send, ARRAY_SIZE(data_to_send)); -} - -/* Full white 50% reducing setting */ -static void s6e8ax0_acl_ctrl_set(struct s6e8ax0 *lcd) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - /* Full white 50% reducing setting */ - static const unsigned char cutoff_50[] = { - 0xc1, 0x47, 0x53, 0x13, 0x53, 0x00, 0x00, 0x02, 0xcf, - 0x00, 0x00, 0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x08, 0x0f, 0x16, 0x1d, 0x24, 0x2a, 0x31, 0x38, - 0x3f, 0x46 - }; - /* Full white 45% reducing setting */ - static const unsigned char cutoff_45[] = { - 0xc1, 0x47, 0x53, 0x13, 0x53, 0x00, 0x00, 0x02, 0xcf, - 0x00, 0x00, 0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x07, 0x0d, 0x13, 0x19, 0x1f, 0x25, 0x2b, 0x31, - 0x37, 0x3d - }; - /* Full white 40% reducing setting */ - static const unsigned char cutoff_40[] = { - 0xc1, 0x47, 0x53, 0x13, 0x53, 0x00, 0x00, 0x02, 0xcf, - 0x00, 0x00, 0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x06, 0x0c, 0x11, 0x16, 0x1c, 0x21, 0x26, 0x2b, - 0x31, 0x36 - }; - - if (lcd->acl_enable) { - if (lcd->cur_acl == 0) { - if (lcd->gamma == 0 || lcd->gamma == 1) { - s6e8ax0_acl_off(lcd); - dev_dbg(&lcd->ld->dev, - "cur_acl=%d\n", lcd->cur_acl); - } else - s6e8ax0_acl_on(lcd); - } - switch (lcd->gamma) { - case 0: /* 30cd */ - s6e8ax0_acl_off(lcd); - lcd->cur_acl = 0; - break; - case 1 ... 3: /* 50cd ~ 90cd */ - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_LONG_WRITE, - cutoff_40, - ARRAY_SIZE(cutoff_40)); - lcd->cur_acl = 40; - break; - case 4 ... 7: /* 120cd ~ 210cd */ - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_LONG_WRITE, - cutoff_45, - ARRAY_SIZE(cutoff_45)); - lcd->cur_acl = 45; - break; - case 8 ... 10: /* 220cd ~ 300cd */ - ops->cmd_write(lcd_to_master(lcd), - MIPI_DSI_DCS_LONG_WRITE, - cutoff_50, - ARRAY_SIZE(cutoff_50)); - lcd->cur_acl = 50; - break; - default: - break; - } - } else { - s6e8ax0_acl_off(lcd); - lcd->cur_acl = 0; - dev_dbg(&lcd->ld->dev, "cur_acl = %d\n", lcd->cur_acl); - } -} - -static void s6e8ax0_read_id(struct s6e8ax0 *lcd, u8 *mtp_id) -{ - unsigned int ret; - unsigned int addr = 0xd1; /* MTP ID */ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - - ret = ops->cmd_read(lcd_to_master(lcd), - MIPI_DSI_GENERIC_READ_REQUEST_1_PARAM, - addr, 3, mtp_id); -} - -static int s6e8ax0_panel_init(struct s6e8ax0 *lcd) -{ - s6e8ax0_apply_level2_key(lcd); - s6e8ax0_sleep_out(lcd); - msleep(1); - s6e8ax0_panel_cond(lcd); - s6e8ax0_display_cond(lcd); - s6e8ax0_gamma_cond(lcd); - s6e8ax0_gamma_update(lcd); - - s6e8ax0_etc_cond1(lcd); - s6e8ax0_etc_cond2(lcd); - s6e8ax0_etc_cond3(lcd); - s6e8ax0_etc_cond4(lcd); - s6e8ax0_etc_cond5(lcd); - s6e8ax0_etc_cond6(lcd); - s6e8ax0_etc_cond7(lcd); - - s6e8ax0_elvss_nvm_set(lcd); - s6e8ax0_elvss_set(lcd); - - s6e8ax0_acl_ctrl_set(lcd); - s6e8ax0_acl_on(lcd); - - /* if ID3 value is not 33h, branch private elvss mode */ - msleep(lcd->ddi_pd->power_on_delay); - - return 0; -} - -static int s6e8ax0_update_gamma_ctrl(struct s6e8ax0 *lcd, int brightness) -{ - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - - ops->cmd_write(lcd_to_master(lcd), MIPI_DSI_DCS_LONG_WRITE, - s6e8ax0_22_gamma_table[brightness], - ARRAY_SIZE(s6e8ax0_22_gamma_table)); - - /* update gamma table. */ - s6e8ax0_gamma_update(lcd); - lcd->gamma = brightness; - - return 0; -} - -static int s6e8ax0_gamma_ctrl(struct s6e8ax0 *lcd, int gamma) -{ - s6e8ax0_update_gamma_ctrl(lcd, gamma); - - return 0; -} - -static int s6e8ax0_set_power(struct lcd_device *ld, int power) -{ - struct s6e8ax0 *lcd = lcd_get_data(ld); - struct mipi_dsim_master_ops *ops = lcd_to_master_ops(lcd); - int ret = 0; - - if (power != FB_BLANK_UNBLANK && power != FB_BLANK_POWERDOWN && - power != FB_BLANK_NORMAL) { - dev_err(lcd->dev, "power value should be 0, 1 or 4.\n"); - return -EINVAL; - } - - if ((power == FB_BLANK_UNBLANK) && ops->set_blank_mode) { - /* LCD power on */ - if ((POWER_IS_ON(power) && POWER_IS_OFF(lcd->power)) - || (POWER_IS_ON(power) && POWER_IS_NRM(lcd->power))) { - ret = ops->set_blank_mode(lcd_to_master(lcd), power); - if (!ret && lcd->power != power) - lcd->power = power; - } - } else if ((power == FB_BLANK_POWERDOWN) && ops->set_early_blank_mode) { - /* LCD power off */ - if ((POWER_IS_OFF(power) && POWER_IS_ON(lcd->power)) || - (POWER_IS_ON(lcd->power) && POWER_IS_NRM(power))) { - ret = ops->set_early_blank_mode(lcd_to_master(lcd), - power); - if (!ret && lcd->power != power) - lcd->power = power; - } - } - - return ret; -} - -static int s6e8ax0_get_power(struct lcd_device *ld) -{ - struct s6e8ax0 *lcd = lcd_get_data(ld); - - return lcd->power; -} - -static int s6e8ax0_set_brightness(struct backlight_device *bd) -{ - int ret = 0, brightness = bd->props.brightness; - struct s6e8ax0 *lcd = bl_get_data(bd); - - if (brightness < MIN_BRIGHTNESS || - brightness > bd->props.max_brightness) { - dev_err(lcd->dev, "lcd brightness should be %d to %d.\n", - MIN_BRIGHTNESS, MAX_BRIGHTNESS); - return -EINVAL; - } - - ret = s6e8ax0_gamma_ctrl(lcd, brightness); - if (ret) { - dev_err(&bd->dev, "lcd brightness setting failed.\n"); - return -EIO; - } - - return ret; -} - -static struct lcd_ops s6e8ax0_lcd_ops = { - .set_power = s6e8ax0_set_power, - .get_power = s6e8ax0_get_power, -}; - -static const struct backlight_ops s6e8ax0_backlight_ops = { - .update_status = s6e8ax0_set_brightness, -}; - -static void s6e8ax0_power_on(struct mipi_dsim_lcd_device *dsim_dev, int power) -{ - struct s6e8ax0 *lcd = dev_get_drvdata(&dsim_dev->dev); - - msleep(lcd->ddi_pd->power_on_delay); - - /* lcd power on */ - if (power) - s6e8ax0_regulator_enable(lcd); - else - s6e8ax0_regulator_disable(lcd); - - msleep(lcd->ddi_pd->reset_delay); - - /* lcd reset */ - if (lcd->ddi_pd->reset) - lcd->ddi_pd->reset(lcd->ld); - msleep(5); -} - -static void s6e8ax0_set_sequence(struct mipi_dsim_lcd_device *dsim_dev) -{ - struct s6e8ax0 *lcd = dev_get_drvdata(&dsim_dev->dev); - - s6e8ax0_panel_init(lcd); - s6e8ax0_display_on(lcd); - - lcd->power = FB_BLANK_UNBLANK; -} - -static int s6e8ax0_probe(struct mipi_dsim_lcd_device *dsim_dev) -{ - struct s6e8ax0 *lcd; - int ret; - u8 mtp_id[3] = {0, }; - - lcd = devm_kzalloc(&dsim_dev->dev, sizeof(struct s6e8ax0), GFP_KERNEL); - if (!lcd) { - dev_err(&dsim_dev->dev, "failed to allocate s6e8ax0 structure.\n"); - return -ENOMEM; - } - - lcd->dsim_dev = dsim_dev; - lcd->ddi_pd = (struct lcd_platform_data *)dsim_dev->platform_data; - lcd->dev = &dsim_dev->dev; - - mutex_init(&lcd->lock); - - ret = devm_regulator_bulk_get(lcd->dev, ARRAY_SIZE(supplies), supplies); - if (ret) { - dev_err(lcd->dev, "Failed to get regulators: %d\n", ret); - return ret; - } - - lcd->ld = devm_lcd_device_register(lcd->dev, "s6e8ax0", lcd->dev, lcd, - &s6e8ax0_lcd_ops); - if (IS_ERR(lcd->ld)) { - dev_err(lcd->dev, "failed to register lcd ops.\n"); - return PTR_ERR(lcd->ld); - } - - lcd->bd = devm_backlight_device_register(lcd->dev, "s6e8ax0-bl", - lcd->dev, lcd, &s6e8ax0_backlight_ops, NULL); - if (IS_ERR(lcd->bd)) { - dev_err(lcd->dev, "failed to register backlight ops.\n"); - return PTR_ERR(lcd->bd); - } - - lcd->bd->props.max_brightness = MAX_BRIGHTNESS; - lcd->bd->props.brightness = MAX_BRIGHTNESS; - - s6e8ax0_read_id(lcd, mtp_id); - if (mtp_id[0] == 0x00) - dev_err(lcd->dev, "read id failed\n"); - - dev_info(lcd->dev, "Read ID : %x, %x, %x\n", - mtp_id[0], mtp_id[1], mtp_id[2]); - - if (mtp_id[2] == 0x33) - dev_info(lcd->dev, - "ID-3 is 0xff does not support dynamic elvss\n"); - else - dev_info(lcd->dev, - "ID-3 is 0x%x support dynamic elvss\n", mtp_id[2]); - - lcd->acl_enable = 1; - lcd->cur_acl = 0; - - dev_set_drvdata(&dsim_dev->dev, lcd); - - dev_dbg(lcd->dev, "probed s6e8ax0 panel driver.\n"); - - return 0; -} - -static int __maybe_unused s6e8ax0_suspend(struct mipi_dsim_lcd_device *dsim_dev) -{ - struct s6e8ax0 *lcd = dev_get_drvdata(&dsim_dev->dev); - - s6e8ax0_sleep_in(lcd); - msleep(lcd->ddi_pd->power_off_delay); - s6e8ax0_display_off(lcd); - - s6e8ax0_regulator_disable(lcd); - - return 0; -} - -static int __maybe_unused s6e8ax0_resume(struct mipi_dsim_lcd_device *dsim_dev) -{ - struct s6e8ax0 *lcd = dev_get_drvdata(&dsim_dev->dev); - - s6e8ax0_sleep_out(lcd); - msleep(lcd->ddi_pd->power_on_delay); - - s6e8ax0_regulator_enable(lcd); - s6e8ax0_set_sequence(dsim_dev); - - return 0; -} - -static struct mipi_dsim_lcd_driver s6e8ax0_dsim_ddi_driver = { - .name = "s6e8ax0", - .id = -1, - - .power_on = s6e8ax0_power_on, - .set_sequence = s6e8ax0_set_sequence, - .probe = s6e8ax0_probe, - .suspend = IS_ENABLED(CONFIG_PM) ? s6e8ax0_suspend : NULL, - .resume = IS_ENABLED(CONFIG_PM) ? s6e8ax0_resume : NULL, -}; - -static int s6e8ax0_init(void) -{ - exynos_mipi_dsi_register_lcd_driver(&s6e8ax0_dsim_ddi_driver); - - return 0; -} - -static void s6e8ax0_exit(void) -{ - return; -} - -module_init(s6e8ax0_init); -module_exit(s6e8ax0_exit); - -MODULE_AUTHOR("Donghwa Lee <dh09.lee@samsung.com>"); -MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>"); -MODULE_DESCRIPTION("MIPI-DSI based s6e8ax0 AMOLED LCD Panel Driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/video/fbdev/hecubafb.c b/drivers/video/fbdev/hecubafb.c index e4031ef39491..8577195cb533 100644 --- a/drivers/video/fbdev/hecubafb.c +++ b/drivers/video/fbdev/hecubafb.c @@ -47,7 +47,7 @@ #define DPY_W 600 #define DPY_H 800 -static struct fb_fix_screeninfo hecubafb_fix = { +static const struct fb_fix_screeninfo hecubafb_fix = { .id = "hecubafb", .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_MONO01, @@ -58,7 +58,7 @@ static struct fb_fix_screeninfo hecubafb_fix = { .accel = FB_ACCEL_NONE, }; -static struct fb_var_screeninfo hecubafb_var = { +static const struct fb_var_screeninfo hecubafb_var = { .xres = DPY_W, .yres = DPY_H, .xres_virtual = DPY_W, diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c index 15d3ccff2965..463028543173 100644 --- a/drivers/video/fbdev/hgafb.c +++ b/drivers/video/fbdev/hgafb.c @@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(hga_reg_lock); /* Framebuffer driver structures */ -static struct fb_var_screeninfo hga_default_var = { +static const struct fb_var_screeninfo hga_default_var = { .xres = 720, .yres = 348, .xres_virtual = 720, diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c index cf5ccd0f2252..7bc5f6056c77 100644 --- a/drivers/video/fbdev/i740fb.c +++ b/drivers/video/fbdev/i740fb.c @@ -82,7 +82,7 @@ struct i740fb_par { #define DACSPEED24_SD 128 #define DACSPEED32 86 -static struct fb_fix_screeninfo i740fb_fix = { +static const struct fb_fix_screeninfo i740fb_fix = { .id = "i740fb", .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_TRUECOLOR, diff --git a/drivers/video/fbdev/i810/i810_main.c b/drivers/video/fbdev/i810/i810_main.c index 025b882a4826..483ab2592d0c 100644 --- a/drivers/video/fbdev/i810/i810_main.c +++ b/drivers/video/fbdev/i810/i810_main.c @@ -1691,7 +1691,7 @@ static int i810_alloc_agp_mem(struct fb_info *info) if (!(par->i810_gtt.i810_cursor_memory = agp_allocate_memory(bridge, par->cursor_heap.size >> 12, AGP_PHYSICAL_MEMORY))) { - printk("i810fb_alloc_cursormem: can't allocate" + printk("i810fb_alloc_cursormem: can't allocate " "cursor memory\n"); agp_backend_release(bridge); return -ENOMEM; diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c index bf207444ba0c..ff2a5d2023e1 100644 --- a/drivers/video/fbdev/intelfb/intelfbdrv.c +++ b/drivers/video/fbdev/intelfb/intelfbdrv.c @@ -1301,11 +1301,6 @@ static int intelfb_check_var(struct fb_var_screeninfo *var, break; } - if (v.xoffset < 0) - v.xoffset = 0; - if (v.yoffset < 0) - v.yoffset = 0; - if (v.xoffset > v.xres_virtual - v.xres) v.xoffset = v.xres_virtual - v.xres; if (v.yoffset > v.yres_virtual - v.yres) diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c index 5bb01533271e..f77478fb3d14 100644 --- a/drivers/video/fbdev/kyro/fbdev.c +++ b/drivers/video/fbdev/kyro/fbdev.c @@ -44,7 +44,7 @@ static struct fb_fix_screeninfo kyro_fix = { .accel = FB_ACCEL_NONE, }; -static struct fb_var_screeninfo kyro_var = { +static const struct fb_var_screeninfo kyro_var = { /* 640x480, 16bpp @ 60 Hz */ .xres = 640, .yres = 480, diff --git a/drivers/video/fbdev/matrox/matroxfb_Ti3026.c b/drivers/video/fbdev/matrox/matroxfb_Ti3026.c index 195ad7cac1ba..68fa037d8cbc 100644 --- a/drivers/video/fbdev/matrox/matroxfb_Ti3026.c +++ b/drivers/video/fbdev/matrox/matroxfb_Ti3026.c @@ -372,7 +372,7 @@ static int Ti3026_init(struct matrox_fb_info *minfo, struct my_timming *m) DBG(__func__) - memcpy(hw->DACreg, MGADACbpp32, sizeof(hw->DACreg)); + memcpy(hw->DACreg, MGADACbpp32, sizeof(MGADACbpp32)); switch (minfo->fbcon.var.bits_per_pixel) { case 4: hw->DACreg[POS3026_XLATCHCTRL] = TVP3026_XLATCHCTRL_16_1; /* or _8_1, they are same */ hw->DACreg[POS3026_XTRUECOLORCTRL] = TVP3026_XTRUECOLORCTRL_PSEUDOCOLOR; diff --git a/drivers/video/fbdev/matrox/matroxfb_g450.c b/drivers/video/fbdev/matrox/matroxfb_g450.c index cff0546ea6fd..f108ae66fc83 100644 --- a/drivers/video/fbdev/matrox/matroxfb_g450.c +++ b/drivers/video/fbdev/matrox/matroxfb_g450.c @@ -433,7 +433,7 @@ static void cve2_init_TVdata(int norm, struct mavenregs* data, const struct outp 0x00, /* 3E written multiple times */ 0x00, /* 3F not written */ } }; - static struct mavenregs ntscregs = { { + static const struct mavenregs ntscregs = { { 0x21, 0xF0, 0x7C, 0x1F, /* 00: chroma subcarrier */ 0x00, 0x00, /* test */ diff --git a/drivers/video/fbdev/mb862xx/mb862xx-i2c.c b/drivers/video/fbdev/mb862xx/mb862xx-i2c.c index c87e17afb3e2..ba96c44f2761 100644 --- a/drivers/video/fbdev/mb862xx/mb862xx-i2c.c +++ b/drivers/video/fbdev/mb862xx/mb862xx-i2c.c @@ -157,17 +157,10 @@ static struct i2c_adapter mb862xx_i2c_adapter = { int mb862xx_i2c_init(struct mb862xxfb_par *par) { - int ret; - mb862xx_i2c_adapter.algo_data = par; par->adap = &mb862xx_i2c_adapter; - ret = i2c_add_adapter(par->adap); - if (ret < 0) { - dev_err(par->dev, "failed to add %s\n", - mb862xx_i2c_adapter.name); - } - return ret; + return i2c_add_adapter(par->adap); } void mb862xx_i2c_exit(struct mb862xxfb_par *par) diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c index f91b1db262b0..8778e01cebac 100644 --- a/drivers/video/fbdev/mx3fb.c +++ b/drivers/video/fbdev/mx3fb.c @@ -845,7 +845,7 @@ static int __set_par(struct fb_info *fbi, bool lock) if (fbi->var.sync & FB_SYNC_SHARP_MODE) mode = IPU_PANEL_SHARP_TFT; - dev_dbg(fbi->device, "pixclock = %ul Hz\n", + dev_dbg(fbi->device, "pixclock = %u Hz\n", (u32) (PICOS2KHZ(fbi->var.pixclock) * 1000UL)); if (sdc_init_panel(mx3fb, mode, diff --git a/drivers/video/fbdev/mxsfb.c b/drivers/video/fbdev/mxsfb.c index 4e6608ceac09..7846f0e8bbbb 100644 --- a/drivers/video/fbdev/mxsfb.c +++ b/drivers/video/fbdev/mxsfb.c @@ -800,6 +800,7 @@ static int mxsfb_init_fbinfo(struct mxsfb_info *host, struct fb_videomode *vmode) { int ret; + struct device *dev = &host->pdev->dev; struct fb_info *fb_info = &host->fb_info; struct fb_var_screeninfo *var = &fb_info->var; dma_addr_t fb_phys; @@ -825,12 +826,10 @@ static int mxsfb_init_fbinfo(struct mxsfb_info *host, /* Memory allocation for framebuffer */ fb_size = SZ_2M; - fb_virt = alloc_pages_exact(fb_size, GFP_DMA); + fb_virt = dma_alloc_wc(dev, PAGE_ALIGN(fb_size), &fb_phys, GFP_KERNEL); if (!fb_virt) return -ENOMEM; - fb_phys = virt_to_phys(fb_virt); - fb_info->fix.smem_start = fb_phys; fb_info->screen_base = fb_virt; fb_info->screen_size = fb_info->fix.smem_len = fb_size; @@ -843,9 +842,11 @@ static int mxsfb_init_fbinfo(struct mxsfb_info *host, static void mxsfb_free_videomem(struct mxsfb_info *host) { + struct device *dev = &host->pdev->dev; struct fb_info *fb_info = &host->fb_info; - free_pages_exact(fb_info->screen_base, fb_info->fix.smem_len); + dma_free_wc(dev, fb_info->screen_size, fb_info->screen_base, + fb_info->fix.smem_start); } static const struct platform_device_id mxsfb_devtype[] = { diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index fb60a8f0cc94..906c6e75c260 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -625,6 +625,21 @@ static void __init offb_init_nodriver(struct device_node *dp, int no_real_node) if (address == OF_BAD_ADDR && addr_prop) address = (u64)addr_prop; if (address != OF_BAD_ADDR) { +#ifdef CONFIG_PCI + const __be32 *vidp, *didp; + u32 vid, did; + struct pci_dev *pdev; + + vidp = of_get_property(dp, "vendor-id", NULL); + didp = of_get_property(dp, "device-id", NULL); + if (vidp && didp) { + vid = be32_to_cpup(vidp); + did = be32_to_cpup(didp); + pdev = pci_get_device(vid, did, NULL); + if (!pdev || pci_enable_device(pdev)) + return; + } +#endif /* kludge for valkyrie */ if (strcmp(dp->name, "valkyrie") == 0) address += 0x1000; diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c index 0e4cee9a8d79..c81f150589e1 100644 --- a/drivers/video/fbdev/omap/lcd_mipid.c +++ b/drivers/video/fbdev/omap/lcd_mipid.c @@ -60,7 +60,6 @@ struct mipid_device { struct mutex mutex; struct lcd_panel panel; - struct workqueue_struct *esd_wq; struct delayed_work esd_work; void (*esd_check)(struct mipid_device *m); }; @@ -390,7 +389,7 @@ static void ls041y3_esd_check(struct mipid_device *md) static void mipid_esd_start_check(struct mipid_device *md) { if (md->esd_check != NULL) - queue_delayed_work(md->esd_wq, &md->esd_work, + schedule_delayed_work(&md->esd_work, MIPID_ESD_CHECK_PERIOD); } @@ -476,11 +475,6 @@ static int mipid_init(struct lcd_panel *panel, struct mipid_device *md = to_mipid_device(panel); md->fbdev = fbdev; - md->esd_wq = create_singlethread_workqueue("mipid_esd"); - if (md->esd_wq == NULL) { - dev_err(&md->spi->dev, "can't create ESD workqueue\n"); - return -ENOMEM; - } INIT_DELAYED_WORK(&md->esd_work, mipid_esd_work); mutex_init(&md->mutex); @@ -500,7 +494,6 @@ static void mipid_cleanup(struct lcd_panel *panel) if (md->enabled) mipid_esd_stop_check(md); - destroy_workqueue(md->esd_wq); } static struct lcd_panel mipid_panel = { diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index b58012b82b6f..8b810696a42b 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -75,8 +75,6 @@ struct panel_drv_data { bool intro_printed; - struct workqueue_struct *workqueue; - bool ulps_enabled; unsigned ulps_timeout; struct delayed_work ulps_work; @@ -232,7 +230,7 @@ static int dsicm_set_update_window(struct panel_drv_data *ddata, static void dsicm_queue_ulps_work(struct panel_drv_data *ddata) { if (ddata->ulps_timeout > 0) - queue_delayed_work(ddata->workqueue, &ddata->ulps_work, + schedule_delayed_work(&ddata->ulps_work, msecs_to_jiffies(ddata->ulps_timeout)); } @@ -1244,11 +1242,6 @@ static int dsicm_probe(struct platform_device *pdev) dev_dbg(dev, "Using GPIO TE\n"); } - ddata->workqueue = create_singlethread_workqueue("dsicm_wq"); - if (ddata->workqueue == NULL) { - dev_err(dev, "can't create workqueue\n"); - return -ENOMEM; - } INIT_DELAYED_WORK(&ddata->ulps_work, dsicm_ulps_work); dsicm_hw_reset(ddata); @@ -1262,7 +1255,7 @@ static int dsicm_probe(struct platform_device *pdev) dev, ddata, &dsicm_bl_ops, &props); if (IS_ERR(bldev)) { r = PTR_ERR(bldev); - goto err_bl; + goto err_reg; } ddata->bldev = bldev; @@ -1285,8 +1278,6 @@ static int dsicm_probe(struct platform_device *pdev) err_sysfs_create: if (bldev != NULL) backlight_device_unregister(bldev); -err_bl: - destroy_workqueue(ddata->workqueue); err_reg: return r; } @@ -1316,7 +1307,6 @@ static int __exit dsicm_remove(struct platform_device *pdev) omap_dss_put_device(ddata->in); dsicm_cancel_ulps_work(ddata); - destroy_workqueue(ddata->workqueue); /* reset, to be sure that the panel is in a valid state */ dsicm_hw_reset(ddata); diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dispc-compat.c b/drivers/video/fbdev/omap2/omapfb/dss/dispc-compat.c index 3691bde4ce0a..a864608c5df1 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dispc-compat.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dispc-compat.c @@ -644,6 +644,7 @@ int omap_dispc_wait_for_irq_interruptible_timeout(u32 irqmask, { int r; + long time_left; DECLARE_COMPLETION_ONSTACK(completion); r = omap_dispc_register_isr(dispc_irq_wait_handler, &completion, @@ -652,15 +653,15 @@ int omap_dispc_wait_for_irq_interruptible_timeout(u32 irqmask, if (r) return r; - timeout = wait_for_completion_interruptible_timeout(&completion, + time_left = wait_for_completion_interruptible_timeout(&completion, timeout); omap_dispc_unregister_isr(dispc_irq_wait_handler, &completion, irqmask); - if (timeout == 0) + if (time_left == 0) return -ETIMEDOUT; - if (timeout == -ERESTARTSYS) + if (time_left == -ERESTARTSYS) return -ERESTARTSYS; return 0; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c index 9e4800a4e3d1..30d49f3800b3 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c @@ -1167,7 +1167,6 @@ static int dsi_regulator_init(struct platform_device *dsidev) { struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev); struct regulator *vdds_dsi; - int r; if (dsi->vdds_dsi_reg != NULL) return 0; @@ -1180,13 +1179,6 @@ static int dsi_regulator_init(struct platform_device *dsidev) return PTR_ERR(vdds_dsi); } - r = regulator_set_voltage(vdds_dsi, 1800000, 1800000); - if (r) { - devm_regulator_put(vdds_dsi); - DSSERR("can't set the DSI regulator voltage\n"); - return r; - } - dsi->vdds_dsi_reg = vdds_dsi; return 0; @@ -5348,7 +5340,7 @@ static int dsi_bind(struct device *dev, struct device *master, void *data) dsi->phy_base = devm_ioremap(&dsidev->dev, res->start, resource_size(res)); - if (!dsi->proto_base) { + if (!dsi->phy_base) { DSSERR("can't ioremap DSI PHY\n"); return -ENOMEM; } @@ -5368,7 +5360,7 @@ static int dsi_bind(struct device *dev, struct device *master, void *data) dsi->pll_base = devm_ioremap(&dsidev->dev, res->start, resource_size(res)); - if (!dsi->proto_base) { + if (!dsi->pll_base) { DSSERR("can't ioremap DSI PLL\n"); return -ENOMEM; } diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c index 926a6f20dbb2..156a254705ea 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c @@ -100,7 +100,6 @@ static irqreturn_t hdmi_irq_handler(int irq, void *data) static int hdmi_init_regulator(void) { - int r; struct regulator *reg; if (hdmi.vdda_reg != NULL) @@ -114,13 +113,6 @@ static int hdmi_init_regulator(void) return PTR_ERR(reg); } - r = regulator_set_voltage(reg, 1800000, 1800000); - if (r) { - devm_regulator_put(reg); - DSSWARN("can't set the regulator voltage\n"); - return r; - } - hdmi.vdda_reg = reg; return 0; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c index 0ee829a165c3..4da36bcab977 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c @@ -119,7 +119,6 @@ static irqreturn_t hdmi_irq_handler(int irq, void *data) static int hdmi_init_regulator(void) { - int r; struct regulator *reg; if (hdmi.vdda_reg != NULL) @@ -131,13 +130,6 @@ static int hdmi_init_regulator(void) return PTR_ERR(reg); } - r = regulator_set_voltage(reg, 1800000, 1800000); - if (r) { - devm_regulator_put(reg); - DSSWARN("can't set the regulator voltage\n"); - return r; - } - hdmi.vdda_reg = reg; return 0; diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c index aa8d28880912..1a4070f719c2 100644 --- a/drivers/video/fbdev/pm2fb.c +++ b/drivers/video/fbdev/pm2fb.c @@ -113,7 +113,7 @@ static struct fb_fix_screeninfo pm2fb_fix = { /* * Default video mode. In case the modedb doesn't work. */ -static struct fb_var_screeninfo pm2fb_var = { +static const struct fb_var_screeninfo pm2fb_var = { /* "640x480, 8 bpp @ 60 Hz */ .xres = 640, .yres = 480, diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index 2c0487f4f805..ef73f14d7ba0 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -2125,7 +2125,7 @@ static int of_get_pxafb_display(struct device *dev, struct device_node *disp, timings = of_get_display_timings(disp); if (!timings) - goto out; + return -EINVAL; ret = -ENOMEM; info->modes = kmalloc_array(timings->num_timings, @@ -2186,6 +2186,7 @@ static int of_get_pxafb_mode_info(struct device *dev, ret = of_property_read_u32(np, "bus-width", &bus_width); if (ret) { dev_err(dev, "no bus-width specified: %d\n", ret); + of_node_put(np); return ret; } diff --git a/drivers/video/fbdev/s1d13xxxfb.c b/drivers/video/fbdev/s1d13xxxfb.c index 96aa46dc696c..5d6179ef0298 100644 --- a/drivers/video/fbdev/s1d13xxxfb.c +++ b/drivers/video/fbdev/s1d13xxxfb.c @@ -83,7 +83,7 @@ static const char *s1d13xxxfb_prod_names[] = { /* * here we define the default struct fb_fix_screeninfo */ -static struct fb_fix_screeninfo s1d13xxxfb_fix = { +static const struct fb_fix_screeninfo s1d13xxxfb_fix = { .id = S1D_FBID, .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_PSEUDOCOLOR, @@ -929,7 +929,7 @@ static int s1d13xxxfb_suspend(struct platform_device *dev, pm_message_t state) s1dfb->disp_save = kmalloc(info->fix.smem_len, GFP_KERNEL); if (!s1dfb->disp_save) { - printk(KERN_ERR PFX "no memory to save screen"); + printk(KERN_ERR PFX "no memory to save screen\n"); return -ENOMEM; } diff --git a/drivers/video/fbdev/s3c2410fb.c b/drivers/video/fbdev/s3c2410fb.c index 0dd86be36afb..a67e4567e656 100644 --- a/drivers/video/fbdev/s3c2410fb.c +++ b/drivers/video/fbdev/s3c2410fb.c @@ -767,7 +767,7 @@ static irqreturn_t s3c2410fb_irq(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef CONFIG_CPU_FREQ +#ifdef CONFIG_ARM_S3C24XX_CPUFREQ static int s3c2410fb_cpufreq_transition(struct notifier_block *nb, unsigned long val, void *data) diff --git a/drivers/video/fbdev/s3c2410fb.h b/drivers/video/fbdev/s3c2410fb.h index 47a17bd23011..cdd11e2f8859 100644 --- a/drivers/video/fbdev/s3c2410fb.h +++ b/drivers/video/fbdev/s3c2410fb.h @@ -32,7 +32,7 @@ struct s3c2410fb_info { unsigned long clk_rate; unsigned int palette_ready; -#ifdef CONFIG_CPU_FREQ +#ifdef CONFIG_ARM_S3C24XX_CPUFREQ struct notifier_block freq_transition; #endif diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index 6c77ab09b0b2..c30a91c1137c 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -1660,7 +1660,7 @@ static struct fb_ops savagefb_ops = { /* --------------------------------------------------------------------- */ -static struct fb_var_screeninfo savagefb_var800x600x8 = { +static const struct fb_var_screeninfo savagefb_var800x600x8 = { .accel_flags = FB_ACCELF_TEXT, .xres = 800, .yres = 600, diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c index e9cf19977285..61f799a515dc 100644 --- a/drivers/video/fbdev/simplefb.c +++ b/drivers/video/fbdev/simplefb.c @@ -33,14 +33,14 @@ #include <linux/parser.h> #include <linux/regulator/consumer.h> -static struct fb_fix_screeninfo simplefb_fix = { +static const struct fb_fix_screeninfo simplefb_fix = { .id = "simple", .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_TRUECOLOR, .accel = FB_ACCEL_NONE, }; -static struct fb_var_screeninfo simplefb_var = { +static const struct fb_var_screeninfo simplefb_var = { .height = -1, .width = -1, .activate = FB_ACTIVATE_NOW, @@ -74,8 +74,14 @@ static int simplefb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, return 0; } +struct simplefb_par; +static void simplefb_clocks_destroy(struct simplefb_par *par); +static void simplefb_regulators_destroy(struct simplefb_par *par); + static void simplefb_destroy(struct fb_info *info) { + simplefb_regulators_destroy(info->par); + simplefb_clocks_destroy(info->par); if (info->screen_base) iounmap(info->screen_base); } @@ -487,11 +493,8 @@ error_fb_release: static int simplefb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); - struct simplefb_par *par = info->par; unregister_framebuffer(info); - simplefb_regulators_destroy(par); - simplefb_clocks_destroy(par); framebuffer_release(info); return 0; diff --git a/drivers/video/fbdev/sm712fb.c b/drivers/video/fbdev/sm712fb.c index 86ae1d4556fc..73cb4ffff3c5 100644 --- a/drivers/video/fbdev/sm712fb.c +++ b/drivers/video/fbdev/sm712fb.c @@ -56,7 +56,7 @@ struct smtcfb_info { void __iomem *smtc_regbaseaddress; /* Memory Map IO starting address */ -static struct fb_var_screeninfo smtcfb_var = { +static const struct fb_var_screeninfo smtcfb_var = { .xres = 1024, .yres = 600, .xres_virtual = 1024, diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index 9279e5f6696e..ec2e7e353685 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -1761,10 +1761,8 @@ error: static void ufx_usb_disconnect(struct usb_interface *interface) { struct ufx_data *dev; - struct fb_info *info; dev = usb_get_intfdata(interface); - info = dev->info; pr_debug("USB disconnect starting\n"); diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c index a9c45c89b15e..2925d5ce8d3e 100644 --- a/drivers/video/fbdev/ssd1307fb.c +++ b/drivers/video/fbdev/ssd1307fb.c @@ -64,7 +64,7 @@ struct ssd1307fb_par { u32 contrast; u32 dclk_div; u32 dclk_frq; - struct ssd1307fb_deviceinfo *device_info; + const struct ssd1307fb_deviceinfo *device_info; struct i2c_client *client; u32 height; struct fb_info *info; @@ -84,7 +84,7 @@ struct ssd1307fb_array { u8 data[0]; }; -static struct fb_fix_screeninfo ssd1307fb_fix = { +static const struct fb_fix_screeninfo ssd1307fb_fix = { .id = "Solomon SSD1307", .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_MONO10, @@ -94,7 +94,7 @@ static struct fb_fix_screeninfo ssd1307fb_fix = { .accel = FB_ACCEL_NONE, }; -static struct fb_var_screeninfo ssd1307fb_var = { +static const struct fb_var_screeninfo ssd1307fb_var = { .bits_per_pixel = 1, }; @@ -559,8 +559,7 @@ static int ssd1307fb_probe(struct i2c_client *client, par->info = info; par->client = client; - par->device_info = (struct ssd1307fb_deviceinfo *)of_match_device( - ssd1307fb_of_match, &client->dev)->data; + par->device_info = of_device_get_match_data(&client->dev); par->reset = of_get_named_gpio(client->dev.of_node, "reset-gpios", 0); diff --git a/drivers/video/fbdev/tdfxfb.c b/drivers/video/fbdev/tdfxfb.c index 621fa441a6db..d5fa313806fe 100644 --- a/drivers/video/fbdev/tdfxfb.c +++ b/drivers/video/fbdev/tdfxfb.c @@ -82,7 +82,7 @@ #define VOODOO3_MAX_PIXCLOCK 300000 #define VOODOO5_MAX_PIXCLOCK 350000 -static struct fb_fix_screeninfo tdfx_fix = { +static const struct fb_fix_screeninfo tdfx_fix = { .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_PSEUDOCOLOR, .ypanstep = 1, @@ -90,7 +90,7 @@ static struct fb_fix_screeninfo tdfx_fix = { .accel = FB_ACCEL_3DFX_BANSHEE }; -static struct fb_var_screeninfo tdfx_var = { +static const struct fb_var_screeninfo tdfx_var = { /* "640x480, 8 bpp @ 60 Hz */ .xres = 640, .yres = 480, diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c index 178ae93b7ebd..98af9e02959b 100644 --- a/drivers/video/fbdev/uvesafb.c +++ b/drivers/video/fbdev/uvesafb.c @@ -33,7 +33,7 @@ static struct cb_id uvesafb_cn_id = { static char v86d_path[PATH_MAX] = "/sbin/v86d"; static char v86d_started; /* has v86d been started by uvesafb? */ -static struct fb_fix_screeninfo uvesafb_fix = { +static const struct fb_fix_screeninfo uvesafb_fix = { .id = "VESA VGA", .type = FB_TYPE_PACKED_PIXELS, .accel = FB_ACCEL_NONE, diff --git a/drivers/video/fbdev/vfb.c b/drivers/video/fbdev/vfb.c index b9c2f81fb6b9..da653a080394 100644 --- a/drivers/video/fbdev/vfb.c +++ b/drivers/video/fbdev/vfb.c @@ -35,76 +35,23 @@ static void *videomemory; static u_long videomemorysize = VIDEOMEMSIZE; module_param(videomemorysize, ulong, 0); +MODULE_PARM_DESC(videomemorysize, "RAM available to frame buffer (in bytes)"); -/********************************************************************** - * - * Memory management - * - **********************************************************************/ -static void *rvmalloc(unsigned long size) -{ - void *mem; - unsigned long adr; - - size = PAGE_ALIGN(size); - mem = vmalloc_32(size); - if (!mem) - return NULL; - - /* - * VFB must clear memory to prevent kernel info - * leakage into userspace - * VGA-based drivers MUST NOT clear memory if - * they want to be able to take over vgacon - */ - - memset(mem, 0, size); - adr = (unsigned long) mem; - while (size > 0) { - SetPageReserved(vmalloc_to_page((void *)adr)); - adr += PAGE_SIZE; - size -= PAGE_SIZE; - } - - return mem; -} - -static void rvfree(void *mem, unsigned long size) -{ - unsigned long adr; - - if (!mem) - return; - - adr = (unsigned long) mem; - while ((long) size > 0) { - ClearPageReserved(vmalloc_to_page((void *)adr)); - adr += PAGE_SIZE; - size -= PAGE_SIZE; - } - vfree(mem); -} +static char *mode_option = NULL; +module_param(mode_option, charp, 0); +MODULE_PARM_DESC(mode_option, "Preferred video mode (e.g. 640x480-8@60)"); -static struct fb_var_screeninfo vfb_default = { +static const struct fb_videomode vfb_default = { .xres = 640, .yres = 480, - .xres_virtual = 640, - .yres_virtual = 480, - .bits_per_pixel = 8, - .red = { 0, 8, 0 }, - .green = { 0, 8, 0 }, - .blue = { 0, 8, 0 }, - .activate = FB_ACTIVATE_TEST, - .height = -1, - .width = -1, - .pixclock = 20000, - .left_margin = 64, - .right_margin = 64, - .upper_margin = 32, - .lower_margin = 32, - .hsync_len = 64, - .vsync_len = 2, - .vmode = FB_VMODE_NONINTERLACED, + .pixclock = 20000, + .left_margin = 64, + .right_margin = 64, + .upper_margin = 32, + .lower_margin = 32, + .hsync_len = 64, + .vsync_len = 2, + .vmode = FB_VMODE_NONINTERLACED, }; static struct fb_fix_screeninfo vfb_fix = { @@ -119,6 +66,7 @@ static struct fb_fix_screeninfo vfb_fix = { static bool vfb_enable __initdata = 0; /* disabled by default */ module_param(vfb_enable, bool, 0); +MODULE_PARM_DESC(vfb_enable, "Enable Virtual FB driver"); static int vfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info); @@ -421,35 +369,7 @@ static int vfb_pan_display(struct fb_var_screeninfo *var, static int vfb_mmap(struct fb_info *info, struct vm_area_struct *vma) { - unsigned long start = vma->vm_start; - unsigned long size = vma->vm_end - vma->vm_start; - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long page, pos; - - if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) - return -EINVAL; - if (size > info->fix.smem_len) - return -EINVAL; - if (offset > info->fix.smem_len - size) - return -EINVAL; - - pos = (unsigned long)info->fix.smem_start + offset; - - while (size > 0) { - page = vmalloc_to_pfn((void *)pos); - if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) { - return -EAGAIN; - } - start += PAGE_SIZE; - pos += PAGE_SIZE; - if (size > PAGE_SIZE) - size -= PAGE_SIZE; - else - size = 0; - } - - return 0; - + return remap_vmalloc_range(vma, (void *)info->fix.smem_start, vma->vm_pgoff); } #ifndef MODULE @@ -477,6 +397,8 @@ static int __init vfb_setup(char *options) /* Test disable for backwards compatibility */ if (!strcmp(this_opt, "disable")) vfb_enable = 0; + else + mode_option = this_opt; } return 1; } @@ -489,12 +411,13 @@ static int __init vfb_setup(char *options) static int vfb_probe(struct platform_device *dev) { struct fb_info *info; + unsigned int size = PAGE_ALIGN(videomemorysize); int retval = -ENOMEM; /* * For real video cards we use ioremap. */ - if (!(videomemory = rvmalloc(videomemorysize))) + if (!(videomemory = vmalloc_32_user(size))) return retval; info = framebuffer_alloc(sizeof(u32) * 256, &dev->dev); @@ -504,11 +427,13 @@ static int vfb_probe(struct platform_device *dev) info->screen_base = (char __iomem *)videomemory; info->fbops = &vfb_ops; - retval = fb_find_mode(&info->var, info, NULL, - NULL, 0, NULL, 8); + if (!fb_find_mode(&info->var, info, mode_option, + NULL, 0, &vfb_default, 8)){ + fb_err(info, "Unable to find usable video mode.\n"); + retval = -EINVAL; + goto err1; + } - if (!retval || (retval == 4)) - info->var = vfb_default; vfb_fix.smem_start = (unsigned long) videomemory; vfb_fix.smem_len = videomemorysize; info->fix = vfb_fix; @@ -533,7 +458,7 @@ err2: err1: framebuffer_release(info); err: - rvfree(videomemory, videomemorysize); + vfree(videomemory); return retval; } @@ -543,7 +468,7 @@ static int vfb_remove(struct platform_device *dev) if (info) { unregister_framebuffer(info); - rvfree(videomemory, videomemorysize); + vfree(videomemory); fb_dealloc_cmap(&info->cmap); framebuffer_release(info); } diff --git a/drivers/video/fbdev/vga16fb.c b/drivers/video/fbdev/vga16fb.c index 283d335a759f..5f0690c8fc93 100644 --- a/drivers/video/fbdev/vga16fb.c +++ b/drivers/video/fbdev/vga16fb.c @@ -85,7 +85,7 @@ static struct fb_var_screeninfo vga16fb_defined = { }; /* name should not depend on EGA/VGA */ -static struct fb_fix_screeninfo vga16fb_fix = { +static const struct fb_fix_screeninfo vga16fb_fix = { .id = "VGA16 VGA", .smem_start = VGA_FB_PHYS, .smem_len = VGA_FB_PHYS_LEN, diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 50dbaa805658..fdd3228e0678 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1844,4 +1844,53 @@ config USBPCWATCHDOG Most people will say N. +comment "Watchdog Pretimeout Governors" + +config WATCHDOG_PRETIMEOUT_GOV + bool "Enable watchdog pretimeout governors" + help + The option allows to select watchdog pretimeout governors. + +if WATCHDOG_PRETIMEOUT_GOV + +choice + prompt "Default Watchdog Pretimeout Governor" + default WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC + help + This option selects a default watchdog pretimeout governor. + The governor takes its action, if a watchdog is capable + to report a pretimeout event. + +config WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP + bool "noop" + select WATCHDOG_PRETIMEOUT_GOV_NOOP + help + Use noop watchdog pretimeout governor by default. If noop + governor is selected by a user, write a short message to + the kernel log buffer and don't do any system changes. + +config WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC + bool "panic" + select WATCHDOG_PRETIMEOUT_GOV_PANIC + help + Use panic watchdog pretimeout governor by default, if + a watchdog pretimeout event happens, consider that + a watchdog feeder is dead and reboot is unavoidable. + +endchoice + +config WATCHDOG_PRETIMEOUT_GOV_NOOP + tristate "Noop watchdog pretimeout governor" + help + Noop watchdog pretimeout governor, only an informational + message is added to kernel log buffer. + +config WATCHDOG_PRETIMEOUT_GOV_PANIC + tristate "Panic watchdog pretimeout governor" + help + Panic watchdog pretimeout governor, on watchdog pretimeout + event put the kernel into panic. + +endif # WATCHDOG_PRETIMEOUT_GOV + endif # WATCHDOG diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index cba00430151b..caa9f4aa492a 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -3,9 +3,15 @@ # # The WatchDog Timer Driver Core. -watchdog-objs += watchdog_core.o watchdog_dev.o obj-$(CONFIG_WATCHDOG_CORE) += watchdog.o +watchdog-objs += watchdog_core.o watchdog_dev.o + +watchdog-$(CONFIG_WATCHDOG_PRETIMEOUT_GOV) += watchdog_pretimeout.o + +obj-$(CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP) += pretimeout_noop.o +obj-$(CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC) += pretimeout_panic.o + # Only one watchdog can succeed. We probe the ISA/PCI/USB based # watchdog-cards first, then the architecture specific watchdog # drivers and then the architecture independent "softdog" driver. diff --git a/drivers/watchdog/asm9260_wdt.c b/drivers/watchdog/asm9260_wdt.c index c9686b2fdafd..d0b59ba0f661 100644 --- a/drivers/watchdog/asm9260_wdt.c +++ b/drivers/watchdog/asm9260_wdt.c @@ -389,7 +389,6 @@ MODULE_DEVICE_TABLE(of, asm9260_wdt_of_match); static struct platform_driver asm9260_wdt_driver = { .driver = { .name = "asm9260-wdt", - .owner = THIS_MODULE, .of_match_table = asm9260_wdt_of_match, }, .probe = asm9260_wdt_probe, diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index 835d310081e1..e2209bf5fa8a 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -35,6 +35,7 @@ #include <linux/err.h> #include <linux/of.h> #include <linux/of_platform.h> +#include <linux/uaccess.h> #define DRIVER_NAME "ath79-wdt" diff --git a/drivers/watchdog/bcm7038_wdt.c b/drivers/watchdog/bcm7038_wdt.c index 4245b65d645c..e238df4d75a2 100644 --- a/drivers/watchdog/bcm7038_wdt.c +++ b/drivers/watchdog/bcm7038_wdt.c @@ -107,7 +107,7 @@ static struct watchdog_info bcm7038_wdt_info = { WDIOF_MAGICCLOSE }; -static struct watchdog_ops bcm7038_wdt_ops = { +static const struct watchdog_ops bcm7038_wdt_ops = { .owner = THIS_MODULE, .start = bcm7038_wdt_start, .stop = bcm7038_wdt_stop, diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c index 4dda9024e229..98acef72334d 100644 --- a/drivers/watchdog/cadence_wdt.c +++ b/drivers/watchdog/cadence_wdt.c @@ -269,7 +269,7 @@ static struct watchdog_info cdns_wdt_info = { }; /* Watchdog Core Ops */ -static struct watchdog_ops cdns_wdt_ops = { +static const struct watchdog_ops cdns_wdt_ops = { .owner = THIS_MODULE, .start = cdns_wdt_start, .stop = cdns_wdt_stop, @@ -424,8 +424,10 @@ static int __maybe_unused cdns_wdt_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); struct cdns_wdt *wdt = platform_get_drvdata(pdev); - cdns_wdt_stop(&wdt->cdns_wdt_device); - clk_disable_unprepare(wdt->clk); + if (watchdog_active(&wdt->cdns_wdt_device)) { + cdns_wdt_stop(&wdt->cdns_wdt_device); + clk_disable_unprepare(wdt->clk); + } return 0; } @@ -442,12 +444,14 @@ static int __maybe_unused cdns_wdt_resume(struct device *dev) struct platform_device *pdev = to_platform_device(dev); struct cdns_wdt *wdt = platform_get_drvdata(pdev); - ret = clk_prepare_enable(wdt->clk); - if (ret) { - dev_err(dev, "unable to enable clock\n"); - return ret; + if (watchdog_active(&wdt->cdns_wdt_device)) { + ret = clk_prepare_enable(wdt->clk); + if (ret) { + dev_err(dev, "unable to enable clock\n"); + return ret; + } + cdns_wdt_start(&wdt->cdns_wdt_device); } - cdns_wdt_start(&wdt->cdns_wdt_device); return 0; } diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index 2acb51cf5504..3c6a3de13a1b 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -54,6 +54,7 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " struct dw_wdt { void __iomem *regs; struct clk *clk; + unsigned long rate; struct notifier_block restart_handler; struct watchdog_device wdd; }; @@ -72,7 +73,7 @@ static inline int dw_wdt_top_in_seconds(struct dw_wdt *dw_wdt, unsigned top) * There are 16 possible timeout values in 0..15 where the number of * cycles is 2 ^ (16 + i) and the watchdog counts down. */ - return (1U << (16 + top)) / clk_get_rate(dw_wdt->clk); + return (1U << (16 + top)) / dw_wdt->rate; } static int dw_wdt_get_top(struct dw_wdt *dw_wdt) @@ -163,7 +164,7 @@ static unsigned int dw_wdt_get_timeleft(struct watchdog_device *wdd) struct dw_wdt *dw_wdt = to_dw_wdt(wdd); return readl(dw_wdt->regs + WDOG_CURRENT_COUNT_REG_OFFSET) / - clk_get_rate(dw_wdt->clk); + dw_wdt->rate; } static const struct watchdog_info dw_wdt_ident = { @@ -231,6 +232,12 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) if (ret) return ret; + dw_wdt->rate = clk_get_rate(dw_wdt->clk); + if (dw_wdt->rate == 0) { + ret = -EINVAL; + goto out_disable_clk; + } + wdd = &dw_wdt->wdd; wdd->info = &dw_wdt_ident; wdd->ops = &dw_wdt_ops; diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 8f89bd8a826a..70c7194e2810 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -39,7 +39,7 @@ #include <asm/nmi.h> #include <asm/frame.h> -#define HPWDT_VERSION "1.3.3" +#define HPWDT_VERSION "1.4.0" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) #define TICKS_TO_SECS(ticks) ((ticks) * 128 / 1000) #define HPWDT_MAX_TIMER TICKS_TO_SECS(65535) @@ -814,7 +814,8 @@ static int hpwdt_init_one(struct pci_dev *dev, * not run on a legacy ASM box. * So we only support the G5 ProLiant servers and higher. */ - if (dev->subsystem_vendor != PCI_VENDOR_ID_HP) { + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP && + dev->subsystem_vendor != PCI_VENDOR_ID_HP_3PAR) { dev_warn(&dev->dev, "This server does not have an iLO2+ ASIC.\n"); return -ENODEV; @@ -823,7 +824,8 @@ static int hpwdt_init_one(struct pci_dev *dev, /* * Ignore all auxilary iLO devices with the following PCI ID */ - if (dev->subsystem_device == 0x1979) + if (dev->subsystem_vendor == PCI_VENDOR_ID_HP && + dev->subsystem_device == 0x1979) return -ENODEV; if (pci_enable_device(dev)) { diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 54cab189a763..06fcb6c8c917 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -629,7 +629,7 @@ static int iTCO_wdt_resume_noirq(struct device *dev) return 0; } -static struct dev_pm_ops iTCO_wdt_pm = { +static const struct dev_pm_ops iTCO_wdt_pm = { .suspend_noirq = iTCO_wdt_suspend_noirq, .resume_noirq = iTCO_wdt_resume_noirq, }; diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 62f346bb4348..4874b0f18650 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -24,6 +24,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/io.h> #include <linux/kernel.h> #include <linux/module.h> @@ -37,18 +38,23 @@ #define IMX2_WDT_WCR 0x00 /* Control Register */ #define IMX2_WDT_WCR_WT (0xFF << 8) /* -> Watchdog Timeout Field */ -#define IMX2_WDT_WCR_WDA (1 << 5) /* -> External Reset WDOG_B */ -#define IMX2_WDT_WCR_SRS (1 << 4) /* -> Software Reset Signal */ -#define IMX2_WDT_WCR_WRE (1 << 3) /* -> WDOG Reset Enable */ -#define IMX2_WDT_WCR_WDE (1 << 2) /* -> Watchdog Enable */ -#define IMX2_WDT_WCR_WDZST (1 << 0) /* -> Watchdog timer Suspend */ +#define IMX2_WDT_WCR_WDA BIT(5) /* -> External Reset WDOG_B */ +#define IMX2_WDT_WCR_SRS BIT(4) /* -> Software Reset Signal */ +#define IMX2_WDT_WCR_WRE BIT(3) /* -> WDOG Reset Enable */ +#define IMX2_WDT_WCR_WDE BIT(2) /* -> Watchdog Enable */ +#define IMX2_WDT_WCR_WDZST BIT(0) /* -> Watchdog timer Suspend */ #define IMX2_WDT_WSR 0x02 /* Service Register */ #define IMX2_WDT_SEQ1 0x5555 /* -> service sequence 1 */ #define IMX2_WDT_SEQ2 0xAAAA /* -> service sequence 2 */ #define IMX2_WDT_WRSR 0x04 /* Reset Status Register */ -#define IMX2_WDT_WRSR_TOUT (1 << 1) /* -> Reset due to Timeout */ +#define IMX2_WDT_WRSR_TOUT BIT(1) /* -> Reset due to Timeout */ + +#define IMX2_WDT_WICR 0x06 /* Interrupt Control Register */ +#define IMX2_WDT_WICR_WIE BIT(15) /* -> Interrupt Enable */ +#define IMX2_WDT_WICR_WTIS BIT(14) /* -> Interrupt Status */ +#define IMX2_WDT_WICR_WICT 0xFF /* -> Interrupt Count Timeout */ #define IMX2_WDT_WMCR 0x08 /* Misc Register */ @@ -80,6 +86,12 @@ static const struct watchdog_info imx2_wdt_info = { .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE, }; +static const struct watchdog_info imx2_wdt_pretimeout_info = { + .identity = "imx2+ watchdog", + .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | + WDIOF_PRETIMEOUT, +}; + static int imx2_wdt_restart(struct watchdog_device *wdog, unsigned long action, void *data) { @@ -169,6 +181,35 @@ static int imx2_wdt_set_timeout(struct watchdog_device *wdog, return 0; } +static int imx2_wdt_set_pretimeout(struct watchdog_device *wdog, + unsigned int new_pretimeout) +{ + struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); + + if (new_pretimeout >= IMX2_WDT_MAX_TIME) + return -EINVAL; + + wdog->pretimeout = new_pretimeout; + + regmap_update_bits(wdev->regmap, IMX2_WDT_WICR, + IMX2_WDT_WICR_WIE | IMX2_WDT_WICR_WICT, + IMX2_WDT_WICR_WIE | (new_pretimeout << 1)); + return 0; +} + +static irqreturn_t imx2_wdt_isr(int irq, void *wdog_arg) +{ + struct watchdog_device *wdog = wdog_arg; + struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); + + regmap_write_bits(wdev->regmap, IMX2_WDT_WICR, + IMX2_WDT_WICR_WTIS, IMX2_WDT_WICR_WTIS); + + watchdog_notify_pretimeout(wdog); + + return IRQ_HANDLED; +} + static int imx2_wdt_start(struct watchdog_device *wdog) { struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); @@ -188,6 +229,7 @@ static const struct watchdog_ops imx2_wdt_ops = { .start = imx2_wdt_start, .ping = imx2_wdt_ping, .set_timeout = imx2_wdt_set_timeout, + .set_pretimeout = imx2_wdt_set_pretimeout, .restart = imx2_wdt_restart, }; @@ -236,6 +278,12 @@ static int __init imx2_wdt_probe(struct platform_device *pdev) wdog->max_hw_heartbeat_ms = IMX2_WDT_MAX_TIME * 1000; wdog->parent = &pdev->dev; + ret = platform_get_irq(pdev, 0); + if (ret > 0) + if (!devm_request_irq(&pdev->dev, ret, imx2_wdt_isr, 0, + dev_name(&pdev->dev), wdog)) + wdog->info = &imx2_wdt_pretimeout_info; + ret = clk_prepare_enable(wdev->clk); if (ret) return ret; diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c index 5bf931ce1353..8e302d0e346c 100644 --- a/drivers/watchdog/kempld_wdt.c +++ b/drivers/watchdog/kempld_wdt.c @@ -430,7 +430,7 @@ static struct watchdog_info kempld_wdt_info = { WDIOF_PRETIMEOUT }; -static struct watchdog_ops kempld_wdt_ops = { +static const struct watchdog_ops kempld_wdt_ops = { .owner = THIS_MODULE, .start = kempld_wdt_start, .stop = kempld_wdt_stop, diff --git a/drivers/watchdog/mt7621_wdt.c b/drivers/watchdog/mt7621_wdt.c index 4a2290f900a8..d5735c12067d 100644 --- a/drivers/watchdog/mt7621_wdt.c +++ b/drivers/watchdog/mt7621_wdt.c @@ -139,7 +139,6 @@ static int mt7621_wdt_probe(struct platform_device *pdev) if (!IS_ERR(mt7621_wdt_reset)) reset_control_deassert(mt7621_wdt_reset); - mt7621_wdt_dev.dev = &pdev->dev; mt7621_wdt_dev.bootstatus = mt7621_wdt_bootcause(); watchdog_init_timeout(&mt7621_wdt_dev, mt7621_wdt_dev.max_timeout, diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c index b2e1b4cbbdc1..fae7fe929ea3 100644 --- a/drivers/watchdog/of_xilinx_wdt.c +++ b/drivers/watchdog/of_xilinx_wdt.c @@ -10,6 +10,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/clk.h> #include <linux/err.h> #include <linux/module.h> #include <linux/types.h> @@ -45,6 +46,7 @@ struct xwdt_device { u32 wdt_interval; spinlock_t spinlock; struct watchdog_device xilinx_wdt_wdd; + struct clk *clk; }; static int xilinx_wdt_start(struct watchdog_device *wdd) @@ -195,16 +197,30 @@ static int xwdt_probe(struct platform_device *pdev) spin_lock_init(&xdev->spinlock); watchdog_set_drvdata(xilinx_wdt_wdd, xdev); + xdev->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(xdev->clk)) { + if (PTR_ERR(xdev->clk) == -ENOENT) + xdev->clk = NULL; + else + return PTR_ERR(xdev->clk); + } + + rc = clk_prepare_enable(xdev->clk); + if (rc) { + dev_err(&pdev->dev, "unable to enable clock\n"); + return rc; + } + rc = xwdt_selftest(xdev); if (rc == XWT_TIMER_FAILED) { dev_err(&pdev->dev, "SelfTest routine error\n"); - return rc; + goto err_clk_disable; } rc = watchdog_register_device(xilinx_wdt_wdd); if (rc) { dev_err(&pdev->dev, "Cannot register watchdog (err=%d)\n", rc); - return rc; + goto err_clk_disable; } dev_info(&pdev->dev, "Xilinx Watchdog Timer at %p with timeout %ds\n", @@ -213,6 +229,10 @@ static int xwdt_probe(struct platform_device *pdev) platform_set_drvdata(pdev, xdev); return 0; +err_clk_disable: + clk_disable_unprepare(xdev->clk); + + return rc; } static int xwdt_remove(struct platform_device *pdev) @@ -220,6 +240,7 @@ static int xwdt_remove(struct platform_device *pdev) struct xwdt_device *xdev = platform_get_drvdata(pdev); watchdog_unregister_device(&xdev->xilinx_wdt_wdd); + clk_disable_unprepare(xdev->clk); return 0; } diff --git a/drivers/watchdog/pretimeout_noop.c b/drivers/watchdog/pretimeout_noop.c new file mode 100644 index 000000000000..85f5299d197c --- /dev/null +++ b/drivers/watchdog/pretimeout_noop.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2015-2016 Mentor Graphics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/watchdog.h> + +#include "watchdog_pretimeout.h" + +/** + * pretimeout_noop - No operation on watchdog pretimeout event + * @wdd - watchdog_device + * + * This function prints a message about pretimeout to kernel log. + */ +static void pretimeout_noop(struct watchdog_device *wdd) +{ + pr_alert("watchdog%d: pretimeout event\n", wdd->id); +} + +static struct watchdog_governor watchdog_gov_noop = { + .name = "noop", + .pretimeout = pretimeout_noop, +}; + +static int __init watchdog_gov_noop_register(void) +{ + return watchdog_register_governor(&watchdog_gov_noop); +} + +static void __exit watchdog_gov_noop_unregister(void) +{ + watchdog_unregister_governor(&watchdog_gov_noop); +} +module_init(watchdog_gov_noop_register); +module_exit(watchdog_gov_noop_unregister); + +MODULE_AUTHOR("Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>"); +MODULE_DESCRIPTION("Panic watchdog pretimeout governor"); +MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/pretimeout_panic.c b/drivers/watchdog/pretimeout_panic.c new file mode 100644 index 000000000000..0c197a1c97f4 --- /dev/null +++ b/drivers/watchdog/pretimeout_panic.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2015-2016 Mentor Graphics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/watchdog.h> + +#include "watchdog_pretimeout.h" + +/** + * pretimeout_panic - Panic on watchdog pretimeout event + * @wdd - watchdog_device + * + * Panic, watchdog has not been fed till pretimeout event. + */ +static void pretimeout_panic(struct watchdog_device *wdd) +{ + panic("watchdog pretimeout event\n"); +} + +static struct watchdog_governor watchdog_gov_panic = { + .name = "panic", + .pretimeout = pretimeout_panic, +}; + +static int __init watchdog_gov_panic_register(void) +{ + return watchdog_register_governor(&watchdog_gov_panic); +} + +static void __exit watchdog_gov_panic_unregister(void) +{ + watchdog_unregister_governor(&watchdog_gov_panic); +} +module_init(watchdog_gov_panic_register); +module_exit(watchdog_gov_panic_unregister); + +MODULE_AUTHOR("Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>"); +MODULE_DESCRIPTION("Panic watchdog pretimeout governor"); +MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c index d1c12278cb6a..0805ee2acd7a 100644 --- a/drivers/watchdog/rn5t618_wdt.c +++ b/drivers/watchdog/rn5t618_wdt.c @@ -136,7 +136,7 @@ static struct watchdog_info rn5t618_wdt_info = { .identity = DRIVER_NAME, }; -static struct watchdog_ops rn5t618_wdt_ops = { +static const struct watchdog_ops rn5t618_wdt_ops = { .owner = THIS_MODULE, .start = rn5t618_wdt_start, .stop = rn5t618_wdt_stop, diff --git a/drivers/watchdog/rt2880_wdt.c b/drivers/watchdog/rt2880_wdt.c index 1967919ae743..14b4fd428fff 100644 --- a/drivers/watchdog/rt2880_wdt.c +++ b/drivers/watchdog/rt2880_wdt.c @@ -158,7 +158,6 @@ static int rt288x_wdt_probe(struct platform_device *pdev) rt288x_wdt_freq = clk_get_rate(rt288x_wdt_clk) / RALINK_WDT_PRESCALE; - rt288x_wdt_dev.dev = &pdev->dev; rt288x_wdt_dev.bootstatus = rt288x_wdt_bootcause(); rt288x_wdt_dev.max_timeout = (0xfffful / rt288x_wdt_freq); rt288x_wdt_dev.parent = &pdev->dev; diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c index b067edf246df..c7bdc986dca1 100644 --- a/drivers/watchdog/softdog.c +++ b/drivers/watchdog/softdog.c @@ -72,10 +72,27 @@ static void softdog_fire(unsigned long data) static struct timer_list softdog_ticktock = TIMER_INITIALIZER(softdog_fire, 0, 0); +static struct watchdog_device softdog_dev; + +static void softdog_pretimeout(unsigned long data) +{ + watchdog_notify_pretimeout(&softdog_dev); +} + +static struct timer_list softdog_preticktock = + TIMER_INITIALIZER(softdog_pretimeout, 0, 0); + static int softdog_ping(struct watchdog_device *w) { if (!mod_timer(&softdog_ticktock, jiffies + (w->timeout * HZ))) __module_get(THIS_MODULE); + + if (w->pretimeout) + mod_timer(&softdog_preticktock, jiffies + + (w->timeout - w->pretimeout) * HZ); + else + del_timer(&softdog_preticktock); + return 0; } @@ -84,15 +101,18 @@ static int softdog_stop(struct watchdog_device *w) if (del_timer(&softdog_ticktock)) module_put(THIS_MODULE); + del_timer(&softdog_preticktock); + return 0; } static struct watchdog_info softdog_info = { .identity = "Software Watchdog", - .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | + WDIOF_PRETIMEOUT, }; -static struct watchdog_ops softdog_ops = { +static const struct watchdog_ops softdog_ops = { .owner = THIS_MODULE, .start = softdog_ping, .stop = softdog_stop, diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c index 14e9badf2bfa..e6100e447dd8 100644 --- a/drivers/watchdog/st_lpc_wdt.c +++ b/drivers/watchdog/st_lpc_wdt.c @@ -52,27 +52,6 @@ struct st_wdog { bool warm_reset; }; -static struct st_wdog_syscfg stid127_syscfg = { - .reset_type_reg = 0x004, - .reset_type_mask = BIT(2), - .enable_reg = 0x000, - .enable_mask = BIT(2), -}; - -static struct st_wdog_syscfg stih415_syscfg = { - .reset_type_reg = 0x0B8, - .reset_type_mask = BIT(6), - .enable_reg = 0x0B4, - .enable_mask = BIT(7), -}; - -static struct st_wdog_syscfg stih416_syscfg = { - .reset_type_reg = 0x88C, - .reset_type_mask = BIT(6), - .enable_reg = 0x888, - .enable_mask = BIT(7), -}; - static struct st_wdog_syscfg stih407_syscfg = { .enable_reg = 0x204, .enable_mask = BIT(19), @@ -83,18 +62,6 @@ static const struct of_device_id st_wdog_match[] = { .compatible = "st,stih407-lpc", .data = &stih407_syscfg, }, - { - .compatible = "st,stih416-lpc", - .data = &stih416_syscfg, - }, - { - .compatible = "st,stih415-lpc", - .data = &stih415_syscfg, - }, - { - .compatible = "st,stid127-lpc", - .data = &stid127_syscfg, - }, {}, }; MODULE_DEVICE_TABLE(of, st_wdog_match); diff --git a/drivers/watchdog/tegra_wdt.c b/drivers/watchdog/tegra_wdt.c index 9ec57608da82..2d53c3f9394f 100644 --- a/drivers/watchdog/tegra_wdt.c +++ b/drivers/watchdog/tegra_wdt.c @@ -178,7 +178,7 @@ static const struct watchdog_info tegra_wdt_info = { .identity = "Tegra Watchdog", }; -static struct watchdog_ops tegra_wdt_ops = { +static const struct watchdog_ops tegra_wdt_ops = { .owner = THIS_MODULE, .start = tegra_wdt_start, .stop = tegra_wdt_stop, diff --git a/drivers/watchdog/txx9wdt.c b/drivers/watchdog/txx9wdt.c index c2da880292bc..6f7a9deb27d0 100644 --- a/drivers/watchdog/txx9wdt.c +++ b/drivers/watchdog/txx9wdt.c @@ -112,7 +112,7 @@ static int __init txx9wdt_probe(struct platform_device *dev) txx9_imclk = NULL; goto exit; } - ret = clk_enable(txx9_imclk); + ret = clk_prepare_enable(txx9_imclk); if (ret) { clk_put(txx9_imclk); txx9_imclk = NULL; @@ -144,7 +144,7 @@ static int __init txx9wdt_probe(struct platform_device *dev) return 0; exit: if (txx9_imclk) { - clk_disable(txx9_imclk); + clk_disable_unprepare(txx9_imclk); clk_put(txx9_imclk); } return ret; @@ -153,7 +153,7 @@ exit: static int __exit txx9wdt_remove(struct platform_device *dev) { watchdog_unregister_device(&txx9wdt); - clk_disable(txx9_imclk); + clk_disable_unprepare(txx9_imclk); clk_put(txx9_imclk); return 0; } diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c index 09e8003039dc..ef2ecaf53a14 100644 --- a/drivers/watchdog/w83627hf_wdt.c +++ b/drivers/watchdog/w83627hf_wdt.c @@ -302,7 +302,7 @@ static struct watchdog_info wdt_info = { .identity = "W83627HF Watchdog", }; -static struct watchdog_ops wdt_ops = { +static const struct watchdog_ops wdt_ops = { .owner = THIS_MODULE, .start = wdt_start, .stop = wdt_stop, diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 6abb83cd7681..74265b2f806c 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -349,7 +349,7 @@ int devm_watchdog_register_device(struct device *dev, struct watchdog_device **rcwdd; int ret; - rcwdd = devres_alloc(devm_watchdog_unregister_device, sizeof(*wdd), + rcwdd = devres_alloc(devm_watchdog_unregister_device, sizeof(*rcwdd), GFP_KERNEL); if (!rcwdd) return -ENOMEM; diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 040bf8382f46..32930a073a12 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -49,6 +49,7 @@ #include <linux/uaccess.h> /* For copy_to_user/put_user/... */ #include "watchdog_core.h" +#include "watchdog_pretimeout.h" /* * struct watchdog_core_data - watchdog core internal data @@ -335,10 +336,14 @@ static int watchdog_set_timeout(struct watchdog_device *wdd, if (watchdog_timeout_invalid(wdd, timeout)) return -EINVAL; - if (wdd->ops->set_timeout) + if (wdd->ops->set_timeout) { err = wdd->ops->set_timeout(wdd, timeout); - else + } else { wdd->timeout = timeout; + /* Disable pretimeout if it doesn't fit the new timeout */ + if (wdd->pretimeout >= wdd->timeout) + wdd->pretimeout = 0; + } watchdog_update_worker(wdd); @@ -346,6 +351,31 @@ static int watchdog_set_timeout(struct watchdog_device *wdd, } /* + * watchdog_set_pretimeout: set the watchdog timer pretimeout + * @wdd: the watchdog device to set the timeout for + * @timeout: pretimeout to set in seconds + */ + +static int watchdog_set_pretimeout(struct watchdog_device *wdd, + unsigned int timeout) +{ + int err = 0; + + if (!(wdd->info->options & WDIOF_PRETIMEOUT)) + return -EOPNOTSUPP; + + if (watchdog_pretimeout_invalid(wdd, timeout)) + return -EINVAL; + + if (wdd->ops->set_pretimeout) + err = wdd->ops->set_pretimeout(wdd, timeout); + else + wdd->pretimeout = timeout; + + return err; +} + +/* * watchdog_get_timeleft: wrapper to get the time left before a reboot * @wdd: the watchdog device to get the remaining time from * @timeleft: the time that's left @@ -429,6 +459,15 @@ static ssize_t timeout_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(timeout); +static ssize_t pretimeout_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct watchdog_device *wdd = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", wdd->pretimeout); +} +static DEVICE_ATTR_RO(pretimeout); + static ssize_t identity_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -450,6 +489,36 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(state); +static ssize_t pretimeout_available_governors_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return watchdog_pretimeout_available_governors_get(buf); +} +static DEVICE_ATTR_RO(pretimeout_available_governors); + +static ssize_t pretimeout_governor_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct watchdog_device *wdd = dev_get_drvdata(dev); + + return watchdog_pretimeout_governor_get(wdd, buf); +} + +static ssize_t pretimeout_governor_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct watchdog_device *wdd = dev_get_drvdata(dev); + int ret = watchdog_pretimeout_governor_set(wdd, buf); + + if (!ret) + ret = count; + + return ret; +} +static DEVICE_ATTR_RW(pretimeout_governor); + static umode_t wdt_is_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -459,6 +528,14 @@ static umode_t wdt_is_visible(struct kobject *kobj, struct attribute *attr, if (attr == &dev_attr_timeleft.attr && !wdd->ops->get_timeleft) mode = 0; + else if (attr == &dev_attr_pretimeout.attr && + !(wdd->info->options & WDIOF_PRETIMEOUT)) + mode = 0; + else if ((attr == &dev_attr_pretimeout_governor.attr || + attr == &dev_attr_pretimeout_available_governors.attr) && + (!(wdd->info->options & WDIOF_PRETIMEOUT) || + !IS_ENABLED(CONFIG_WATCHDOG_PRETIMEOUT_GOV))) + mode = 0; return mode; } @@ -466,10 +543,13 @@ static struct attribute *wdt_attrs[] = { &dev_attr_state.attr, &dev_attr_identity.attr, &dev_attr_timeout.attr, + &dev_attr_pretimeout.attr, &dev_attr_timeleft.attr, &dev_attr_bootstatus.attr, &dev_attr_status.attr, &dev_attr_nowayout.attr, + &dev_attr_pretimeout_governor.attr, + &dev_attr_pretimeout_available_governors.attr, NULL, }; @@ -646,6 +726,16 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, break; err = put_user(val, p); break; + case WDIOC_SETPRETIMEOUT: + if (get_user(val, p)) { + err = -EFAULT; + break; + } + err = watchdog_set_pretimeout(wdd, val); + break; + case WDIOC_GETPRETIMEOUT: + err = put_user(wdd->pretimeout, p); + break; default: err = -ENOTTY; break; @@ -937,6 +1027,12 @@ int watchdog_dev_register(struct watchdog_device *wdd) return PTR_ERR(dev); } + ret = watchdog_register_pretimeout(wdd); + if (ret) { + device_destroy(&watchdog_class, devno); + watchdog_cdev_unregister(wdd); + } + return ret; } @@ -950,6 +1046,7 @@ int watchdog_dev_register(struct watchdog_device *wdd) void watchdog_dev_unregister(struct watchdog_device *wdd) { + watchdog_unregister_pretimeout(wdd); device_destroy(&watchdog_class, wdd->wd_data->cdev.dev); watchdog_cdev_unregister(wdd); } diff --git a/drivers/watchdog/watchdog_pretimeout.c b/drivers/watchdog/watchdog_pretimeout.c new file mode 100644 index 000000000000..9db07bfb4334 --- /dev/null +++ b/drivers/watchdog/watchdog_pretimeout.c @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2015-2016 Mentor Graphics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/watchdog.h> + +#include "watchdog_pretimeout.h" + +/* Default watchdog pretimeout governor */ +static struct watchdog_governor *default_gov; + +/* The spinlock protects default_gov, wdd->gov and pretimeout_list */ +static DEFINE_SPINLOCK(pretimeout_lock); + +/* List of watchdog devices, which can generate a pretimeout event */ +static LIST_HEAD(pretimeout_list); + +struct watchdog_pretimeout { + struct watchdog_device *wdd; + struct list_head entry; +}; + +/* The mutex protects governor list and serializes external interfaces */ +static DEFINE_MUTEX(governor_lock); + +/* List of the registered watchdog pretimeout governors */ +static LIST_HEAD(governor_list); + +struct governor_priv { + struct watchdog_governor *gov; + struct list_head entry; +}; + +static struct governor_priv *find_governor_by_name(const char *gov_name) +{ + struct governor_priv *priv; + + list_for_each_entry(priv, &governor_list, entry) + if (sysfs_streq(gov_name, priv->gov->name)) + return priv; + + return NULL; +} + +int watchdog_pretimeout_available_governors_get(char *buf) +{ + struct governor_priv *priv; + int count = 0; + + mutex_lock(&governor_lock); + + list_for_each_entry(priv, &governor_list, entry) + count += sprintf(buf + count, "%s\n", priv->gov->name); + + mutex_unlock(&governor_lock); + + return count; +} + +int watchdog_pretimeout_governor_get(struct watchdog_device *wdd, char *buf) +{ + int count = 0; + + spin_lock_irq(&pretimeout_lock); + if (wdd->gov) + count = sprintf(buf, "%s\n", wdd->gov->name); + spin_unlock_irq(&pretimeout_lock); + + return count; +} + +int watchdog_pretimeout_governor_set(struct watchdog_device *wdd, + const char *buf) +{ + struct governor_priv *priv; + + mutex_lock(&governor_lock); + + priv = find_governor_by_name(buf); + if (!priv) { + mutex_unlock(&governor_lock); + return -EINVAL; + } + + spin_lock_irq(&pretimeout_lock); + wdd->gov = priv->gov; + spin_unlock_irq(&pretimeout_lock); + + mutex_unlock(&governor_lock); + + return 0; +} + +void watchdog_notify_pretimeout(struct watchdog_device *wdd) +{ + unsigned long flags; + + spin_lock_irqsave(&pretimeout_lock, flags); + if (!wdd->gov) { + spin_unlock_irqrestore(&pretimeout_lock, flags); + return; + } + + wdd->gov->pretimeout(wdd); + spin_unlock_irqrestore(&pretimeout_lock, flags); +} +EXPORT_SYMBOL_GPL(watchdog_notify_pretimeout); + +int watchdog_register_governor(struct watchdog_governor *gov) +{ + struct watchdog_pretimeout *p; + struct governor_priv *priv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + mutex_lock(&governor_lock); + + if (find_governor_by_name(gov->name)) { + mutex_unlock(&governor_lock); + kfree(priv); + return -EBUSY; + } + + priv->gov = gov; + list_add(&priv->entry, &governor_list); + + if (!strncmp(gov->name, WATCHDOG_PRETIMEOUT_DEFAULT_GOV, + WATCHDOG_GOV_NAME_MAXLEN)) { + spin_lock_irq(&pretimeout_lock); + default_gov = gov; + + list_for_each_entry(p, &pretimeout_list, entry) + if (!p->wdd->gov) + p->wdd->gov = default_gov; + spin_unlock_irq(&pretimeout_lock); + } + + mutex_unlock(&governor_lock); + + return 0; +} +EXPORT_SYMBOL(watchdog_register_governor); + +void watchdog_unregister_governor(struct watchdog_governor *gov) +{ + struct watchdog_pretimeout *p; + struct governor_priv *priv, *t; + + mutex_lock(&governor_lock); + + list_for_each_entry_safe(priv, t, &governor_list, entry) { + if (priv->gov == gov) { + list_del(&priv->entry); + kfree(priv); + break; + } + } + + spin_lock_irq(&pretimeout_lock); + list_for_each_entry(p, &pretimeout_list, entry) + if (p->wdd->gov == gov) + p->wdd->gov = default_gov; + spin_unlock_irq(&pretimeout_lock); + + mutex_unlock(&governor_lock); +} +EXPORT_SYMBOL(watchdog_unregister_governor); + +int watchdog_register_pretimeout(struct watchdog_device *wdd) +{ + struct watchdog_pretimeout *p; + + if (!(wdd->info->options & WDIOF_PRETIMEOUT)) + return 0; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + spin_lock_irq(&pretimeout_lock); + list_add(&p->entry, &pretimeout_list); + p->wdd = wdd; + wdd->gov = default_gov; + spin_unlock_irq(&pretimeout_lock); + + return 0; +} + +void watchdog_unregister_pretimeout(struct watchdog_device *wdd) +{ + struct watchdog_pretimeout *p, *t; + + if (!(wdd->info->options & WDIOF_PRETIMEOUT)) + return; + + spin_lock_irq(&pretimeout_lock); + wdd->gov = NULL; + + list_for_each_entry_safe(p, t, &pretimeout_list, entry) { + if (p->wdd == wdd) { + list_del(&p->entry); + break; + } + } + spin_unlock_irq(&pretimeout_lock); + + kfree(p); +} diff --git a/drivers/watchdog/watchdog_pretimeout.h b/drivers/watchdog/watchdog_pretimeout.h new file mode 100644 index 000000000000..a5a32b39c56d --- /dev/null +++ b/drivers/watchdog/watchdog_pretimeout.h @@ -0,0 +1,60 @@ +#ifndef __WATCHDOG_PRETIMEOUT_H +#define __WATCHDOG_PRETIMEOUT_H + +#define WATCHDOG_GOV_NAME_MAXLEN 20 + +struct watchdog_device; + +struct watchdog_governor { + const char name[WATCHDOG_GOV_NAME_MAXLEN]; + void (*pretimeout)(struct watchdog_device *wdd); +}; + +#if IS_ENABLED(CONFIG_WATCHDOG_PRETIMEOUT_GOV) +/* Interfaces to watchdog pretimeout governors */ +int watchdog_register_governor(struct watchdog_governor *gov); +void watchdog_unregister_governor(struct watchdog_governor *gov); + +/* Interfaces to watchdog_dev.c */ +int watchdog_register_pretimeout(struct watchdog_device *wdd); +void watchdog_unregister_pretimeout(struct watchdog_device *wdd); +int watchdog_pretimeout_available_governors_get(char *buf); +int watchdog_pretimeout_governor_get(struct watchdog_device *wdd, char *buf); +int watchdog_pretimeout_governor_set(struct watchdog_device *wdd, + const char *buf); + +#if IS_ENABLED(CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_NOOP) +#define WATCHDOG_PRETIMEOUT_DEFAULT_GOV "noop" +#elif IS_ENABLED(CONFIG_WATCHDOG_PRETIMEOUT_DEFAULT_GOV_PANIC) +#define WATCHDOG_PRETIMEOUT_DEFAULT_GOV "panic" +#endif + +#else +static inline int watchdog_register_pretimeout(struct watchdog_device *wdd) +{ + return 0; +} + +static inline void watchdog_unregister_pretimeout(struct watchdog_device *wdd) +{ +} + +static inline int watchdog_pretimeout_available_governors_get(char *buf) +{ + return -EINVAL; +} + +static inline int watchdog_pretimeout_governor_get(struct watchdog_device *wdd, + char *buf) +{ + return -EINVAL; +} + +static inline int watchdog_pretimeout_governor_set(struct watchdog_device *wdd, + const char *buf) +{ + return -EINVAL; +} +#endif + +#endif diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index fa1efef3c96e..b4e0cea5a64e 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -18,7 +18,10 @@ * GNU General Public License for more details. */ +#include <linux/delay.h> #include <linux/i2c.h> +#include <linux/ihex.h> +#include <linux/firmware.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> @@ -36,6 +39,8 @@ #define ZIIRAVE_STATE_OFF 0x1 #define ZIIRAVE_STATE_ON 0x2 +#define ZIIRAVE_FW_NAME "ziirave_wdt.fw" + static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, "host request", NULL, "illegal configuration", "illegal instruction", "illegal trap", @@ -50,12 +55,35 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, #define ZIIRAVE_WDT_PING 0x9 #define ZIIRAVE_WDT_RESET_DURATION 0xa +#define ZIIRAVE_FIRM_PKT_TOTAL_SIZE 20 +#define ZIIRAVE_FIRM_PKT_DATA_SIZE 16 +#define ZIIRAVE_FIRM_FLASH_MEMORY_START 0x1600 +#define ZIIRAVE_FIRM_FLASH_MEMORY_END 0x2bbf + +/* Received and ready for next Download packet. */ +#define ZIIRAVE_FIRM_DOWNLOAD_ACK 1 +/* Currently writing to flash. Retry Download status in a moment! */ +#define ZIIRAVE_FIRM_DOWNLOAD_BUSY 2 + +/* Wait for ACK timeout in ms */ +#define ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT 50 + +/* Firmware commands */ +#define ZIIRAVE_CMD_DOWNLOAD_START 0x10 +#define ZIIRAVE_CMD_DOWNLOAD_END 0x11 +#define ZIIRAVE_CMD_DOWNLOAD_SET_READ_ADDR 0x12 +#define ZIIRAVE_CMD_DOWNLOAD_READ_BYTE 0x13 +#define ZIIRAVE_CMD_RESET_PROCESSOR 0x0b +#define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER 0x0c +#define ZIIRAVE_CMD_DOWNLOAD_PACKET 0x0e + struct ziirave_wdt_rev { unsigned char major; unsigned char minor; }; struct ziirave_wdt_data { + struct mutex sysfs_mutex; struct watchdog_device wdd; struct ziirave_wdt_rev bootloader_rev; struct ziirave_wdt_rev firmware_rev; @@ -146,6 +174,293 @@ static unsigned int ziirave_wdt_get_timeleft(struct watchdog_device *wdd) return ret; } +static int ziirave_firm_wait_for_ack(struct watchdog_device *wdd) +{ + struct i2c_client *client = to_i2c_client(wdd->parent); + int ret; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT); + do { + if (time_after(jiffies, timeout)) + return -ETIMEDOUT; + + usleep_range(5000, 10000); + + ret = i2c_smbus_read_byte(client); + if (ret < 0) { + dev_err(&client->dev, "Failed to read byte\n"); + return ret; + } + } while (ret == ZIIRAVE_FIRM_DOWNLOAD_BUSY); + + return ret == ZIIRAVE_FIRM_DOWNLOAD_ACK ? 0 : -EIO; +} + +static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u16 addr) +{ + struct i2c_client *client = to_i2c_client(wdd->parent); + u8 address[2]; + + address[0] = addr & 0xff; + address[1] = (addr >> 8) & 0xff; + + return i2c_smbus_write_block_data(client, + ZIIRAVE_CMD_DOWNLOAD_SET_READ_ADDR, + ARRAY_SIZE(address), address); +} + +static int ziirave_firm_write_block_data(struct watchdog_device *wdd, + u8 command, u8 length, const u8 *data, + bool wait_for_ack) +{ + struct i2c_client *client = to_i2c_client(wdd->parent); + int ret; + + ret = i2c_smbus_write_block_data(client, command, length, data); + if (ret) { + dev_err(&client->dev, + "Failed to send command 0x%02x: %d\n", command, ret); + return ret; + } + + if (wait_for_ack) + ret = ziirave_firm_wait_for_ack(wdd); + + return ret; +} + +static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command, + u8 byte, bool wait_for_ack) +{ + return ziirave_firm_write_block_data(wdd, command, 1, &byte, + wait_for_ack); +} + +/* + * ziirave_firm_write_pkt() - Build and write a firmware packet + * + * A packet to send to the firmware is composed by following bytes: + * Length | Addr0 | Addr1 | Data0 .. Data15 | Checksum | + * Where, + * Length: A data byte containing the length of the data. + * Addr0: Low byte of the address. + * Addr1: High byte of the address. + * Data0 .. Data15: Array of 16 bytes of data. + * Checksum: Checksum byte to verify data integrity. + */ +static int ziirave_firm_write_pkt(struct watchdog_device *wdd, + const struct ihex_binrec *rec) +{ + struct i2c_client *client = to_i2c_client(wdd->parent); + u8 i, checksum = 0, packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE]; + int ret; + u16 addr; + + memset(packet, 0, ARRAY_SIZE(packet)); + + /* Packet length */ + packet[0] = (u8)be16_to_cpu(rec->len); + /* Packet address */ + addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1; + packet[1] = addr & 0xff; + packet[2] = (addr & 0xff00) >> 8; + + /* Packet data */ + if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE) + return -EMSGSIZE; + memcpy(packet + 3, rec->data, be16_to_cpu(rec->len)); + + /* Packet checksum */ + for (i = 0; i < ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1; i++) + checksum += packet[i]; + packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1] = checksum; + + ret = ziirave_firm_write_block_data(wdd, ZIIRAVE_CMD_DOWNLOAD_PACKET, + ARRAY_SIZE(packet), packet, true); + if (ret) + dev_err(&client->dev, + "Failed to write firmware packet at address 0x%04x: %d\n", + addr, ret); + + return ret; +} + +static int ziirave_firm_verify(struct watchdog_device *wdd, + const struct firmware *fw) +{ + struct i2c_client *client = to_i2c_client(wdd->parent); + const struct ihex_binrec *rec; + int i, ret; + u8 data[ZIIRAVE_FIRM_PKT_DATA_SIZE]; + u16 addr; + + for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { + /* Zero length marks end of records */ + if (!be16_to_cpu(rec->len)) + break; + + addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1; + if (addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || + addr > ZIIRAVE_FIRM_FLASH_MEMORY_END) + continue; + + ret = ziirave_firm_set_read_addr(wdd, addr); + if (ret) { + dev_err(&client->dev, + "Failed to send SET_READ_ADDR command: %d\n", + ret); + return ret; + } + + for (i = 0; i < ARRAY_SIZE(data); i++) { + ret = i2c_smbus_read_byte_data(client, + ZIIRAVE_CMD_DOWNLOAD_READ_BYTE); + if (ret < 0) { + dev_err(&client->dev, + "Failed to READ DATA: %d\n", ret); + return ret; + } + data[i] = ret; + } + + if (memcmp(data, rec->data, be16_to_cpu(rec->len))) { + dev_err(&client->dev, + "Firmware mismatch at address 0x%04x\n", addr); + return -EINVAL; + } + } + + return 0; +} + +static int ziirave_firm_upload(struct watchdog_device *wdd, + const struct firmware *fw) +{ + struct i2c_client *client = to_i2c_client(wdd->parent); + int ret, words_till_page_break; + const struct ihex_binrec *rec; + struct ihex_binrec *rec_new; + + ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, 1, + false); + if (ret) + return ret; + + msleep(500); + + ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_START, 1, true); + if (ret) + return ret; + + msleep(500); + + for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { + /* Zero length marks end of records */ + if (!be16_to_cpu(rec->len)) + break; + + /* Check max data size */ + if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE) { + dev_err(&client->dev, "Firmware packet too long (%d)\n", + be16_to_cpu(rec->len)); + return -EMSGSIZE; + } + + /* Calculate words till page break */ + words_till_page_break = (64 - ((be32_to_cpu(rec->addr) >> 1) & + 0x3f)); + if ((be16_to_cpu(rec->len) >> 1) > words_till_page_break) { + /* + * Data in passes page boundary, so we need to split in + * two blocks of data. Create a packet with the first + * block of data. + */ + rec_new = kzalloc(sizeof(struct ihex_binrec) + + (words_till_page_break << 1), + GFP_KERNEL); + if (!rec_new) + return -ENOMEM; + + rec_new->len = cpu_to_be16(words_till_page_break << 1); + rec_new->addr = rec->addr; + memcpy(rec_new->data, rec->data, + be16_to_cpu(rec_new->len)); + + ret = ziirave_firm_write_pkt(wdd, rec_new); + kfree(rec_new); + if (ret) + return ret; + + /* Create a packet with the second block of data */ + rec_new = kzalloc(sizeof(struct ihex_binrec) + + be16_to_cpu(rec->len) - + (words_till_page_break << 1), + GFP_KERNEL); + if (!rec_new) + return -ENOMEM; + + /* Remaining bytes */ + rec_new->len = rec->len - + cpu_to_be16(words_till_page_break << 1); + + rec_new->addr = cpu_to_be32(be32_to_cpu(rec->addr) + + (words_till_page_break << 1)); + + memcpy(rec_new->data, + rec->data + (words_till_page_break << 1), + be16_to_cpu(rec_new->len)); + + ret = ziirave_firm_write_pkt(wdd, rec_new); + kfree(rec_new); + if (ret) + return ret; + } else { + ret = ziirave_firm_write_pkt(wdd, rec); + if (ret) + return ret; + } + } + + /* For end of download, the length field will be set to 0 */ + rec_new = kzalloc(sizeof(struct ihex_binrec) + 1, GFP_KERNEL); + if (!rec_new) + return -ENOMEM; + + ret = ziirave_firm_write_pkt(wdd, rec_new); + kfree(rec_new); + if (ret) { + dev_err(&client->dev, "Failed to send EMPTY packet: %d\n", ret); + return ret; + } + + /* This sleep seems to be required */ + msleep(20); + + /* Start firmware verification */ + ret = ziirave_firm_verify(wdd, fw); + if (ret) { + dev_err(&client->dev, + "Failed to verify firmware: %d\n", ret); + return ret; + } + + /* End download operation */ + ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_END, 1, false); + if (ret) + return ret; + + /* Reset the processor */ + ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_RESET_PROCESSOR, 1, + false); + if (ret) + return ret; + + msleep(500); + + return 0; +} + static const struct watchdog_info ziirave_wdt_info = { .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING, .identity = "Zodiac RAVE Watchdog", @@ -166,9 +481,18 @@ static ssize_t ziirave_wdt_sysfs_show_firm(struct device *dev, { struct i2c_client *client = to_i2c_client(dev->parent); struct ziirave_wdt_data *w_priv = i2c_get_clientdata(client); + int ret; + + ret = mutex_lock_interruptible(&w_priv->sysfs_mutex); + if (ret) + return ret; + + ret = sprintf(buf, "02.%02u.%02u", w_priv->firmware_rev.major, + w_priv->firmware_rev.minor); - return sprintf(buf, "02.%02u.%02u", w_priv->firmware_rev.major, - w_priv->firmware_rev.minor); + mutex_unlock(&w_priv->sysfs_mutex); + + return ret; } static DEVICE_ATTR(firmware_version, S_IRUGO, ziirave_wdt_sysfs_show_firm, @@ -180,9 +504,18 @@ static ssize_t ziirave_wdt_sysfs_show_boot(struct device *dev, { struct i2c_client *client = to_i2c_client(dev->parent); struct ziirave_wdt_data *w_priv = i2c_get_clientdata(client); + int ret; - return sprintf(buf, "01.%02u.%02u", w_priv->bootloader_rev.major, - w_priv->bootloader_rev.minor); + ret = mutex_lock_interruptible(&w_priv->sysfs_mutex); + if (ret) + return ret; + + ret = sprintf(buf, "01.%02u.%02u", w_priv->bootloader_rev.major, + w_priv->bootloader_rev.minor); + + mutex_unlock(&w_priv->sysfs_mutex); + + return ret; } static DEVICE_ATTR(bootloader_version, S_IRUGO, ziirave_wdt_sysfs_show_boot, @@ -194,17 +527,81 @@ static ssize_t ziirave_wdt_sysfs_show_reason(struct device *dev, { struct i2c_client *client = to_i2c_client(dev->parent); struct ziirave_wdt_data *w_priv = i2c_get_clientdata(client); + int ret; + + ret = mutex_lock_interruptible(&w_priv->sysfs_mutex); + if (ret) + return ret; + + ret = sprintf(buf, "%s", ziirave_reasons[w_priv->reset_reason]); - return sprintf(buf, "%s", ziirave_reasons[w_priv->reset_reason]); + mutex_unlock(&w_priv->sysfs_mutex); + + return ret; } static DEVICE_ATTR(reset_reason, S_IRUGO, ziirave_wdt_sysfs_show_reason, NULL); +static ssize_t ziirave_wdt_sysfs_store_firm(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev->parent); + struct ziirave_wdt_data *w_priv = i2c_get_clientdata(client); + const struct firmware *fw; + int err; + + err = request_ihex_firmware(&fw, ZIIRAVE_FW_NAME, dev); + if (err) { + dev_err(&client->dev, "Failed to request ihex firmware\n"); + return err; + } + + err = mutex_lock_interruptible(&w_priv->sysfs_mutex); + if (err) + goto release_firmware; + + err = ziirave_firm_upload(&w_priv->wdd, fw); + if (err) { + dev_err(&client->dev, "The firmware update failed: %d\n", err); + goto unlock_mutex; + } + + /* Update firmware version */ + err = ziirave_wdt_revision(client, &w_priv->firmware_rev, + ZIIRAVE_WDT_FIRM_VER_MAJOR); + if (err) { + dev_err(&client->dev, "Failed to read firmware version: %d\n", + err); + goto unlock_mutex; + } + + dev_info(&client->dev, "Firmware updated to version 02.%02u.%02u\n", + w_priv->firmware_rev.major, w_priv->firmware_rev.minor); + + /* Restore the watchdog timeout */ + err = ziirave_wdt_set_timeout(&w_priv->wdd, w_priv->wdd.timeout); + if (err) + dev_err(&client->dev, "Failed to set timeout: %d\n", err); + +unlock_mutex: + mutex_unlock(&w_priv->sysfs_mutex); + +release_firmware: + release_firmware(fw); + + return err ? err : count; +} + +static DEVICE_ATTR(update_firmware, S_IWUSR, NULL, + ziirave_wdt_sysfs_store_firm); + static struct attribute *ziirave_wdt_attrs[] = { &dev_attr_firmware_version.attr, &dev_attr_bootloader_version.attr, &dev_attr_reset_reason.attr, + &dev_attr_update_firmware.attr, NULL }; ATTRIBUTE_GROUPS(ziirave_wdt); @@ -252,6 +649,8 @@ static int ziirave_wdt_probe(struct i2c_client *client, if (!w_priv) return -ENOMEM; + mutex_init(&w_priv->sysfs_mutex); + w_priv->wdd.info = &ziirave_wdt_info; w_priv->wdd.ops = &ziirave_wdt_ops; w_priv->wdd.min_timeout = ZIIRAVE_TIMEOUT_MIN; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c1e9f29c924c..f2d7402abe02 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1209,6 +1209,8 @@ COMPATIBLE_IOCTL(WDIOC_SETOPTIONS) COMPATIBLE_IOCTL(WDIOC_KEEPALIVE) COMPATIBLE_IOCTL(WDIOC_SETTIMEOUT) COMPATIBLE_IOCTL(WDIOC_GETTIMEOUT) +COMPATIBLE_IOCTL(WDIOC_SETPRETIMEOUT) +COMPATIBLE_IOCTL(WDIOC_GETPRETIMEOUT) /* Big R */ COMPATIBLE_IOCTL(RNDGETENTCNT) COMPATIBLE_IOCTL(RNDADDTOENTCNT) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 207ba8d627ca..a4b531be9168 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -428,10 +428,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, if (!nop || !nop->fh_to_dentry) return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); - if (!result) - result = ERR_PTR(-ESTALE); - if (IS_ERR(result)) - return result; + if (PTR_ERR(result) == -ENOMEM) + return ERR_CAST(result); + if (IS_ERR_OR_NULL(result)) + return ERR_PTR(-ESTALE); if (d_is_dir(result)) { /* @@ -541,6 +541,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, err_result: dput(result); + if (err != -ENOMEM) + err = -ESTALE; return ERR_PTR(err); } EXPORT_SYMBOL_GPL(exportfs_decode_fh); diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 5f7b053720ee..6de15709d024 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -76,7 +76,7 @@ static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany) dreq = container_of(d, struct nfs_cache_defer_req, deferred_req); - complete_all(&dreq->completion); + complete(&dreq->completion); nfs_cache_defer_req_put(dreq); } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 52a28311e2a4..532d8e242d4d 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -31,8 +31,6 @@ struct nfs_callback_data { unsigned int users; struct svc_serv *serv; - struct svc_rqst *rqst; - struct task_struct *task; }; static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; @@ -89,15 +87,6 @@ nfs4_callback_svc(void *vrqstp) return 0; } -/* - * Prepare to bring up the NFSv4 callback service - */ -static struct svc_rqst * -nfs4_callback_up(struct svc_serv *serv) -{ - return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); -} - #if defined(CONFIG_NFS_V4_1) /* * The callback service for NFSv4.1 callbacks @@ -139,29 +128,6 @@ nfs41_callback_svc(void *vrqstp) return 0; } -/* - * Bring up the NFSv4.1 callback service - */ -static struct svc_rqst * -nfs41_callback_up(struct svc_serv *serv) -{ - struct svc_rqst *rqstp; - - INIT_LIST_HEAD(&serv->sv_cb_list); - spin_lock_init(&serv->sv_cb_lock); - init_waitqueue_head(&serv->sv_cb_waitq); - rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); - dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp)); - return rqstp; -} - -static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, - struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) -{ - *rqstpp = nfs41_callback_up(serv); - *callback_svc = nfs41_callback_svc; -} - static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { @@ -173,13 +139,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = serv; } #else -static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, - struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) -{ - *rqstpp = ERR_PTR(-ENOTSUPP); - *callback_svc = ERR_PTR(-ENOTSUPP); -} - static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { @@ -189,45 +148,22 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { - struct svc_rqst *rqstp; - int (*callback_svc)(void *vrqstp); - struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + int nrservs = nfs_callback_nr_threads; int ret; nfs_callback_bc_serv(minorversion, xprt, serv); - if (cb_info->task) - return 0; + if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS) + nrservs = NFS4_MIN_NR_CALLBACK_THREADS; - switch (minorversion) { - case 0: - /* v4.0 callback setup */ - rqstp = nfs4_callback_up(serv); - callback_svc = nfs4_callback_svc; - break; - default: - nfs_minorversion_callback_svc_setup(serv, - &rqstp, &callback_svc); - } - - if (IS_ERR(rqstp)) - return PTR_ERR(rqstp); - - svc_sock_update_bufs(serv); + if (serv->sv_nrthreads-1 == nrservs) + return 0; - cb_info->serv = serv; - cb_info->rqst = rqstp; - cb_info->task = kthread_create(callback_svc, cb_info->rqst, - "nfsv4.%u-svc", minorversion); - if (IS_ERR(cb_info->task)) { - ret = PTR_ERR(cb_info->task); - svc_exit_thread(cb_info->rqst); - cb_info->rqst = NULL; - cb_info->task = NULL; + ret = serv->sv_ops->svo_setup(serv, NULL, nrservs); + if (ret) { + serv->sv_ops->svo_setup(serv, NULL, 0); return ret; } - rqstp->rq_task = cb_info->task; - wake_up_process(cb_info->task); dprintk("nfs_callback_up: service started\n"); return 0; } @@ -281,19 +217,41 @@ err_bind: return ret; } -static struct svc_serv_ops nfs_cb_sv_ops = { +static struct svc_serv_ops nfs40_cb_sv_ops = { + .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads, + .svo_module = THIS_MODULE, +}; +#if defined(CONFIG_NFS_V4_1) +static struct svc_serv_ops nfs41_cb_sv_ops = { + .svo_function = nfs41_callback_svc, + .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads, + .svo_module = THIS_MODULE, +}; + +struct svc_serv_ops *nfs4_cb_sv_ops[] = { + [0] = &nfs40_cb_sv_ops, + [1] = &nfs41_cb_sv_ops, +}; +#else +struct svc_serv_ops *nfs4_cb_sv_ops[] = { + [0] = &nfs40_cb_sv_ops, + [1] = NULL, }; +#endif static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; struct svc_serv *serv; + struct svc_serv_ops *sv_ops; /* * Check whether we're already up and running. */ - if (cb_info->task) { + if (cb_info->serv) { /* * Note: increase service usage, because later in case of error * svc_destroy() will be called. @@ -302,6 +260,17 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) return cb_info->serv; } + switch (minorversion) { + case 0: + sv_ops = nfs4_cb_sv_ops[0]; + break; + default: + sv_ops = nfs4_cb_sv_ops[1]; + } + + if (sv_ops == NULL) + return ERR_PTR(-ENOTSUPP); + /* * Sanity check: if there's no task, * we should be the first user ... @@ -310,11 +279,12 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, &nfs_cb_sv_ops); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); } + cb_info->serv = serv; /* As there is only one thread we need to over-ride the * default maximum of 80 connections */ @@ -357,6 +327,8 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) * thread exits. */ err_net: + if (!cb_info->users) + cb_info->serv = NULL; svc_destroy(serv); err_create: mutex_unlock(&nfs_callback_mutex); @@ -374,18 +346,18 @@ err_start: void nfs_callback_down(int minorversion, struct net *net) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + struct svc_serv *serv; mutex_lock(&nfs_callback_mutex); - nfs_callback_down_net(minorversion, cb_info->serv, net); + serv = cb_info->serv; + nfs_callback_down_net(minorversion, serv, net); cb_info->users--; - if (cb_info->users == 0 && cb_info->task != NULL) { - kthread_stop(cb_info->task); - dprintk("nfs_callback_down: service stopped\n"); - svc_exit_thread(cb_info->rqst); + if (cb_info->users == 0) { + svc_get(serv); + serv->sv_ops->svo_setup(serv, NULL, 0); + svc_destroy(serv); dprintk("nfs_callback_down: service destroyed\n"); cb_info->serv = NULL; - cb_info->rqst = NULL; - cb_info->task = NULL; } mutex_unlock(&nfs_callback_mutex); } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 5fe1cecbf9f0..c701c308fac5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -179,6 +179,15 @@ extern __be32 nfs4_callback_devicenotify( struct cb_devicenotifyargs *args, void *dummy, struct cb_process_state *cps); +struct cb_notify_lock_args { + struct nfs_fh cbnl_fh; + struct nfs_lowner cbnl_owner; + bool cbnl_valid; +}; + +extern __be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args, + void *dummy, + struct cb_process_state *cps); #endif /* CONFIG_NFS_V4_1 */ extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, @@ -198,6 +207,9 @@ extern void nfs_callback_down(int minorversion, struct net *net); #define NFS41_BC_MIN_CALLBACKS 1 #define NFS41_BC_MAX_CALLBACKS 1 +#define NFS4_MIN_NR_CALLBACK_THREADS 1 + extern unsigned int nfs_callback_set_tcpport; +extern unsigned short nfs_callback_nr_threads; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f953ef6b2f2e..e9aa235e9d10 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -628,4 +628,20 @@ out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } + +__be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args, void *dummy, + struct cb_process_state *cps) +{ + if (!cps->clp) /* set in cb_sequence */ + return htonl(NFS4ERR_OP_NOT_IN_SESSION); + + dprintk_rcu("NFS: CB_NOTIFY_LOCK request from %s\n", + rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + /* Don't wake anybody if the string looked bogus */ + if (args->cbnl_valid) + __wake_up(&cps->clp->cl_lock_waitq, TASK_NORMAL, 0, args); + + return htonl(NFS4_OK); +} #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 656f68f7fe53..eb094c6011d8 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -35,6 +35,7 @@ (1 + 3) * 4) // seqid, 3 slotids #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_NOTIFY_LOCK_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -72,7 +73,7 @@ static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) return xdr_ressize_check(rqstp, p); } -static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) +static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes) { __be32 *p; @@ -534,6 +535,49 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, return 0; } +static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_args *args) +{ + __be32 *p; + unsigned int len; + + p = read_buf(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + + p = xdr_decode_hyper(p, &args->cbnl_owner.clientid); + len = be32_to_cpu(*p); + + p = read_buf(xdr, len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + + /* Only try to decode if the length is right */ + if (len == 20) { + p += 2; /* skip "lock id:" */ + args->cbnl_owner.s_dev = be32_to_cpu(*p++); + xdr_decode_hyper(p, &args->cbnl_owner.id); + args->cbnl_valid = true; + } else { + args->cbnl_owner.s_dev = 0; + args->cbnl_owner.id = 0; + args->cbnl_valid = false; + } + return 0; +} + +static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_notify_lock_args *args) +{ + __be32 status; + + status = decode_fh(xdr, &args->cbnl_fh); + if (unlikely(status != 0)) + goto out; + status = decode_lockowner(xdr, args); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + #endif /* CONFIG_NFS_V4_1 */ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) @@ -746,6 +790,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_RECALL_SLOT: case OP_CB_LAYOUTRECALL: case OP_CB_NOTIFY_DEVICEID: + case OP_CB_NOTIFY_LOCK: *op = &callback_ops[op_nr]; break; @@ -753,7 +798,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_PUSH_DELEG: case OP_CB_RECALLABLE_OBJ_AVAIL: case OP_CB_WANTS_CANCELLED: - case OP_CB_NOTIFY_LOCK: return htonl(NFS4ERR_NOTSUPP); default: @@ -1006,6 +1050,11 @@ static struct callback_op callback_ops[] = { .decode_args = (callback_decode_arg_t)decode_recallslot_args, .res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ, }, + [OP_CB_NOTIFY_LOCK] = { + .process_op = (callback_process_op_t)nfs4_callback_notify_lock, + .decode_args = (callback_decode_arg_t)decode_notify_lock_args, + .res_maxsize = CB_OP_NOTIFY_LOCK_RES_MAXSZ, + }, #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1e106780a237..7555ba889d1f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -313,7 +313,10 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat continue; /* Match the full socket address */ if (!rpc_cmp_addr_port(sap, clap)) - continue; + /* Match all xprt_switch full socket addresses */ + if (!rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient, + sap)) + continue; atomic_inc(&clp->cl_count); return clp; @@ -785,7 +788,8 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs } fsinfo.fattr = fattr; - fsinfo.layouttype = 0; + fsinfo.nlayouttypes = 0; + memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype)); error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); if (error < 0) goto out_error; @@ -1078,7 +1082,7 @@ void nfs_clients_init(struct net *net) idr_init(&nn->cb_ident_idr); #endif spin_lock_init(&nn->nfs_client_lock); - nn->boot_time = CURRENT_TIME; + nn->boot_time = ktime_get_real(); } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 322c2585bc34..dff600ae0d74 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -41,6 +41,17 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); } +static bool +nfs4_is_valid_delegation(const struct nfs_delegation *delegation, + fmode_t flags) +{ + if (delegation != NULL && (delegation->type & flags) == flags && + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && + !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) + return true; + return false; +} + static int nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) { @@ -50,8 +61,7 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) flags &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL && (delegation->type & flags) == flags && - !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + if (nfs4_is_valid_delegation(delegation, flags)) { if (mark) nfs_mark_delegation_referenced(delegation); ret = 1; @@ -185,15 +195,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, rcu_read_unlock(); put_rpccred(oldcred); trace_nfs4_reclaim_delegation(inode, res->delegation_type); - } else { - /* We appear to have raced with a delegation return. */ - spin_unlock(&delegation->lock); - rcu_read_unlock(); - nfs_inode_set_delegation(inode, cred, res); + return; } - } else { - rcu_read_unlock(); + /* We appear to have raced with a delegation return. */ + spin_unlock(&delegation->lock); } + rcu_read_unlock(); + nfs_inode_set_delegation(inode, cred, res); } static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) @@ -642,28 +650,49 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } -static void nfs_revoke_delegation(struct inode *inode) +static void nfs_mark_delegation_revoked(struct nfs_server *server, + struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; + nfs_mark_return_delegation(server, delegation); +} + +static bool nfs_revoke_delegation(struct inode *inode, + const nfs4_stateid *stateid) { struct nfs_delegation *delegation; + nfs4_stateid tmp; + bool ret = false; + rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL) { - set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); - nfs_mark_return_delegation(NFS_SERVER(inode), delegation); - } + if (delegation == NULL) + goto out; + if (stateid == NULL) { + nfs4_stateid_copy(&tmp, &delegation->stateid); + stateid = &tmp; + } else if (!nfs4_stateid_match(stateid, &delegation->stateid)) + goto out; + nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + ret = true; +out: rcu_read_unlock(); + if (ret) + nfs_inode_find_state_and_recover(inode, stateid); + return ret; } -void nfs_remove_bad_delegation(struct inode *inode) +void nfs_remove_bad_delegation(struct inode *inode, + const nfs4_stateid *stateid) { struct nfs_delegation *delegation; - nfs_revoke_delegation(inode); + if (!nfs_revoke_delegation(inode, stateid)) + return; delegation = nfs_inode_detach_delegation(inode); - if (delegation) { - nfs_inode_find_state_and_recover(inode, &delegation->stateid); + if (delegation) nfs_free_delegation(delegation); - } } EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); @@ -786,8 +815,15 @@ static void nfs_delegation_mark_reclaim_server(struct nfs_server *server) { struct nfs_delegation *delegation; - list_for_each_entry_rcu(delegation, &server->delegations, super_list) + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + /* + * If the delegation may have been admin revoked, then we + * cannot reclaim it. + */ + if (test_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) + continue; set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + } } /** @@ -851,6 +887,141 @@ restart: rcu_read_unlock(); } +static inline bool nfs4_server_rebooted(const struct nfs_client *clp) +{ + return (clp->cl_state & (BIT(NFS4CLNT_CHECK_LEASE) | + BIT(NFS4CLNT_LEASE_EXPIRED) | + BIT(NFS4CLNT_SESSION_RESET))) != 0; +} + +static void nfs_mark_test_expired_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) +{ + if (delegation->stateid.type == NFS4_INVALID_STATEID_TYPE) + return; + clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); + set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state); +} + +static void nfs_inode_mark_test_expired_delegation(struct nfs_server *server, + struct inode *inode) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation) + nfs_mark_test_expired_delegation(server, delegation); + rcu_read_unlock(); + +} + +static void nfs_delegation_mark_test_expired_server(struct nfs_server *server) +{ + struct nfs_delegation *delegation; + + list_for_each_entry_rcu(delegation, &server->delegations, super_list) + nfs_mark_test_expired_delegation(server, delegation); +} + +/** + * nfs_mark_test_expired_all_delegations - mark all delegations for testing + * @clp: nfs_client to process + * + * Iterates through all the delegations associated with this server and + * marks them as needing to be checked for validity. + */ +void nfs_mark_test_expired_all_delegations(struct nfs_client *clp) +{ + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs_delegation_mark_test_expired_server(server); + rcu_read_unlock(); +} + +/** + * nfs_reap_expired_delegations - reap expired delegations + * @clp: nfs_client to process + * + * Iterates through all the delegations associated with this server and + * checks if they have may have been revoked. This function is usually + * expected to be called in cases where the server may have lost its + * lease. + */ +void nfs_reap_expired_delegations(struct nfs_client *clp) +{ + const struct nfs4_minor_version_ops *ops = clp->cl_mvops; + struct nfs_delegation *delegation; + struct nfs_server *server; + struct inode *inode; + struct rpc_cred *cred; + nfs4_stateid stateid; + +restart: + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry_rcu(delegation, &server->delegations, + super_list) { + if (test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags)) + continue; + if (test_bit(NFS_DELEGATION_TEST_EXPIRED, + &delegation->flags) == 0) + continue; + if (!nfs_sb_active(server->super)) + continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) { + rcu_read_unlock(); + nfs_sb_deactive(server->super); + goto restart; + } + cred = get_rpccred_rcu(delegation->cred); + nfs4_stateid_copy(&stateid, &delegation->stateid); + clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); + rcu_read_unlock(); + if (cred != NULL && + ops->test_and_free_expired(server, &stateid, cred) < 0) { + nfs_revoke_delegation(inode, &stateid); + nfs_inode_find_state_and_recover(inode, &stateid); + } + put_rpccred(cred); + if (nfs4_server_rebooted(clp)) { + nfs_inode_mark_test_expired_delegation(server,inode); + iput(inode); + nfs_sb_deactive(server->super); + return; + } + iput(inode); + nfs_sb_deactive(server->super); + goto restart; + } + } + rcu_read_unlock(); +} + +void nfs_inode_find_delegation_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_delegation *delegation; + bool found = false; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation && + nfs4_stateid_match_other(&delegation->stateid, stateid)) { + nfs_mark_test_expired_delegation(NFS_SERVER(inode), delegation); + found = true; + } + rcu_read_unlock(); + if (found) + nfs4_schedule_state_manager(clp); +} + /** * nfs_delegations_present - check for existence of delegations * @clp: client state handle @@ -893,7 +1064,7 @@ bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, flags &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); - ret = (delegation != NULL && (delegation->type & flags) == flags); + ret = nfs4_is_valid_delegation(delegation, flags); if (ret) { nfs4_stateid_copy(dst, &delegation->stateid); nfs_mark_delegation_referenced(delegation); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 64724d252a79..e9d555796873 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -32,6 +32,7 @@ enum { NFS_DELEGATION_REFERENCED, NFS_DELEGATION_RETURNING, NFS_DELEGATION_REVOKED, + NFS_DELEGATION_TEST_EXPIRED, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); @@ -47,11 +48,14 @@ void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); -void nfs_remove_bad_delegation(struct inode *inode); +void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); +void nfs_mark_test_expired_all_delegations(struct nfs_client *clp); +void nfs_reap_expired_delegations(struct nfs_client *clp); + /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); @@ -62,6 +66,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_check_delegation(struct inode *inode, fmode_t flags); bool nfs4_delegation_flush_on_close(const struct inode *inode); +void nfs_inode_find_delegation_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid); #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 06e0bf092ba9..5f1af4cd1a33 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -435,11 +435,11 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) return 0; nfsi = NFS_I(inode); - if (entry->fattr->fileid == nfsi->fileid) - return 1; - if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0) - return 1; - return 0; + if (entry->fattr->fileid != nfsi->fileid) + return 0; + if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0) + return 0; + return 1; } static @@ -496,6 +496,14 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) return; if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID)) return; + if (filename.len == 0) + return; + /* Validate that the name doesn't contain any illegal '\0' */ + if (strnlen(filename.name, filename.len) != filename.len) + return; + /* ...or '/' */ + if (strnchr(filename.name, filename.len, '/')) + return; if (filename.name[0] == '.') { if (filename.len == 1) return; @@ -517,6 +525,8 @@ again: &entry->fattr->fsid)) goto out; if (nfs_same_file(dentry, entry)) { + if (!entry->fh->size) + goto out; nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); status = nfs_refresh_inode(d_inode(dentry), entry->fattr); if (!status) @@ -529,6 +539,10 @@ again: goto again; } } + if (!entry->fh->size) { + d_lookup_done(dentry); + goto out; + } inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label); alias = d_splice_alias(inode, dentry); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 72b7d13ee3c6..bd81bcf3ffcf 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -387,7 +387,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) dreq->iocb->ki_complete(dreq->iocb, res, 0); } - complete_all(&dreq->completion); + complete(&dreq->completion); nfs_direct_req_release(dreq); } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2efbdde36c3e..9ea85ae23c32 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -520,7 +520,9 @@ const struct address_space_operations nfs_file_aops = { .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, .direct_IO = nfs_direct_IO, +#ifdef CONFIG_MIGRATION .migratepage = nfs_migrate_page, +#endif .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, @@ -685,11 +687,6 @@ out_noconflict: goto out; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) -{ - return locks_lock_file_wait(file, fl); -} - static int do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { @@ -722,7 +719,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else - status = do_vfs_lock(filp, fl); + status = locks_lock_file_wait(filp, fl); return status; } @@ -747,7 +744,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else - status = do_vfs_lock(filp, fl); + status = locks_lock_file_wait(filp, fl); if (status < 0) goto out; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 51b51369704c..98ace127bf86 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1080,7 +1080,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, case -NFS4ERR_BAD_STATEID: if (state == NULL) break; - nfs_remove_bad_delegation(state->inode); + nfs_remove_bad_delegation(state->inode, NULL); case -NFS4ERR_OPENMODE: if (state == NULL) break; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a6acce663219..80bcc0befb07 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -534,12 +534,9 @@ void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) } #endif - #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); -#else -#define nfs_migrate_page NULL #endif static inline int @@ -562,7 +559,6 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ -extern void __nfs4_read_done_cb(struct nfs_pgio_header *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); extern int nfs40_walk_client_list(struct nfs_client *clp, @@ -571,6 +567,9 @@ extern int nfs40_walk_client_list(struct nfs_client *clp, extern int nfs41_walk_client_list(struct nfs_client *clp, struct nfs_client **result, struct rpc_cred *cred); +extern int nfs4_test_session_trunk(struct rpc_clnt *, + struct rpc_xprt *, + void *); static inline struct inode *nfs_igrab_and_active(struct inode *inode) { diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index f0e06e4acbef..fbce0d885d4c 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -29,7 +29,7 @@ struct nfs_net { int cb_users[NFS4_MAX_MINOR_VERSION + 1]; #endif spinlock_t nfs_client_lock; - struct timespec boot_time; + ktime_t boot_time; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_nfsfs; #endif diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 64b43b4ad9dd..608501971fe0 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -443,6 +443,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, task = rpc_run_task(&task_setup); if (IS_ERR(task)) return PTR_ERR(task); + rpc_put_task(task); return 0; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9bf64eacba5b..9b3a82abab07 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -39,6 +39,7 @@ enum nfs4_client_state { NFS4CLNT_BIND_CONN_TO_SESSION, NFS4CLNT_MOVED, NFS4CLNT_LEASE_MOVED, + NFS4CLNT_DELEGATION_EXPIRED, }; #define NFS4_RENEW_TIMEOUT 0x01 @@ -57,8 +58,11 @@ struct nfs4_minor_version_ops { struct nfs_fsinfo *); void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); + int (*test_and_free_expired)(struct nfs_server *, + nfs4_stateid *, struct rpc_cred *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); + int (*session_trunk)(struct rpc_clnt *, struct rpc_xprt *, void *); const struct rpc_call_ops *call_sync_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; @@ -156,6 +160,7 @@ enum { NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ + NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */ }; struct nfs4_state { @@ -203,6 +208,11 @@ struct nfs4_state_recovery_ops { struct rpc_cred *); }; +struct nfs4_add_xprt_data { + struct nfs_client *clp; + struct rpc_cred *cred; +}; + struct nfs4_state_maintenance_ops { int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); @@ -278,6 +288,8 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); +extern int nfs4_detect_session_trunking(struct nfs_client *clp, + struct nfs41_exchange_id_res *res, struct rpc_xprt *xprt); static inline bool is_ds_only_client(struct nfs_client *clp) @@ -439,7 +451,7 @@ extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern int nfs4_schedule_migration_recovery(const struct nfs_server *); extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *); -extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); +extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags, bool); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); @@ -471,6 +483,7 @@ extern struct nfs_subversion nfs_v4; struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; +extern unsigned short max_session_cb_slots; extern unsigned short send_implementation_id; extern bool recover_lost_locks; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index cd3b7cfdde16..074ac7131459 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -199,6 +199,9 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; clp->cl_mig_gen = 1; +#if IS_ENABLED(CONFIG_NFS_V4_1) + init_waitqueue_head(&clp->cl_lock_waitq); +#endif return clp; error: @@ -562,15 +565,15 @@ out: /* * Returns true if the client IDs match */ -static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) +static bool nfs4_match_clientids(u64 a, u64 b) { - if (a->cl_clientid != b->cl_clientid) { + if (a != b) { dprintk("NFS: --> %s client ID %llx does not match %llx\n", - __func__, a->cl_clientid, b->cl_clientid); + __func__, a, b); return false; } dprintk("NFS: --> %s client ID %llx matches %llx\n", - __func__, a->cl_clientid, b->cl_clientid); + __func__, a, b); return true; } @@ -578,17 +581,15 @@ static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) * Returns true if the server major ids match */ static bool -nfs4_check_clientid_trunking(struct nfs_client *a, struct nfs_client *b) +nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, + struct nfs41_server_owner *o2) { - struct nfs41_server_owner *o1 = a->cl_serverowner; - struct nfs41_server_owner *o2 = b->cl_serverowner; - if (o1->major_id_sz != o2->major_id_sz) goto out_major_mismatch; if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) goto out_major_mismatch; - dprintk("NFS: --> %s server owners match\n", __func__); + dprintk("NFS: --> %s server owner major IDs match\n", __func__); return true; out_major_mismatch: @@ -597,6 +598,100 @@ out_major_mismatch: return false; } +/* + * Returns true if server minor ids match + */ +static bool +nfs4_check_serverowner_minor_id(struct nfs41_server_owner *o1, + struct nfs41_server_owner *o2) +{ + /* Check eir_server_owner so_minor_id */ + if (o1->minor_id != o2->minor_id) + goto out_minor_mismatch; + + dprintk("NFS: --> %s server owner minor IDs match\n", __func__); + return true; + +out_minor_mismatch: + dprintk("NFS: --> %s server owner minor IDs do not match\n", __func__); + return false; +} + +/* + * Returns true if the server scopes match + */ +static bool +nfs4_check_server_scope(struct nfs41_server_scope *s1, + struct nfs41_server_scope *s2) +{ + if (s1->server_scope_sz != s2->server_scope_sz) + goto out_scope_mismatch; + if (memcmp(s1->server_scope, s2->server_scope, + s1->server_scope_sz) != 0) + goto out_scope_mismatch; + + dprintk("NFS: --> %s server scopes match\n", __func__); + return true; + +out_scope_mismatch: + dprintk("NFS: --> %s server scopes do not match\n", + __func__); + return false; +} + +/** + * nfs4_detect_session_trunking - Checks for session trunking. + * + * Called after a successful EXCHANGE_ID on a multi-addr connection. + * Upon success, add the transport. + * + * @clp: original mount nfs_client + * @res: result structure from an exchange_id using the original mount + * nfs_client with a new multi_addr transport + * + * Returns zero on success, otherwise -EINVAL + * + * Note: since the exchange_id for the new multi_addr transport uses the + * same nfs_client from the original mount, the cl_owner_id is reused, + * so eir_clientowner is the same. + */ +int nfs4_detect_session_trunking(struct nfs_client *clp, + struct nfs41_exchange_id_res *res, + struct rpc_xprt *xprt) +{ + /* Check eir_clientid */ + if (!nfs4_match_clientids(clp->cl_clientid, res->clientid)) + goto out_err; + + /* Check eir_server_owner so_major_id */ + if (!nfs4_check_serverowner_major_id(clp->cl_serverowner, + res->server_owner)) + goto out_err; + + /* Check eir_server_owner so_minor_id */ + if (!nfs4_check_serverowner_minor_id(clp->cl_serverowner, + res->server_owner)) + goto out_err; + + /* Check eir_server_scope */ + if (!nfs4_check_server_scope(clp->cl_serverscope, res->server_scope)) + goto out_err; + + /* Session trunking passed, add the xprt */ + rpc_clnt_xprt_switch_add_xprt(clp->cl_rpcclient, xprt); + + pr_info("NFS: %s: Session trunking succeeded for %s\n", + clp->cl_hostname, + xprt->address_strings[RPC_DISPLAY_ADDR]); + + return 0; +out_err: + pr_info("NFS: %s: Session trunking failed for %s\n", clp->cl_hostname, + xprt->address_strings[RPC_DISPLAY_ADDR]); + + return -EINVAL; +} + /** * nfs41_walk_client_list - Find nfs_client that matches a client/server owner * @@ -650,7 +745,7 @@ int nfs41_walk_client_list(struct nfs_client *new, if (pos->cl_cons_state != NFS_CS_READY) continue; - if (!nfs4_match_clientids(pos, new)) + if (!nfs4_match_clientids(pos->cl_clientid, new->cl_clientid)) continue; /* @@ -658,7 +753,8 @@ int nfs41_walk_client_list(struct nfs_client *new, * client id trunking. In either case, we want to fall back * to using the existing nfs_client. */ - if (!nfs4_check_clientid_trunking(pos, new)) + if (!nfs4_check_serverowner_major_id(pos->cl_serverowner, + new->cl_serverowner)) continue; /* Unlike NFSv4.0, we know that NFSv4.1 always uses the diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e327528a3ce..ad917bd72b38 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -99,8 +99,8 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, #ifdef CONFIG_NFS_V4_1 static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, struct rpc_cred *); -static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *, - struct rpc_cred *); +static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, + struct rpc_cred *, bool); #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL @@ -328,6 +328,33 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start); } +static void nfs4_test_and_free_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops; + + ops->test_and_free_expired(server, stateid, cred); +} + +static void __nfs4_free_revoked_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + stateid->type = NFS4_REVOKED_STATEID_TYPE; + nfs4_test_and_free_stateid(server, stateid, cred); +} + +static void nfs4_free_revoked_stateid(struct nfs_server *server, + const nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + nfs4_stateid tmp; + + nfs4_stateid_copy(&tmp, stateid); + __nfs4_free_revoked_stateid(server, &tmp, cred); +} + static long nfs4_update_delay(long *timeout) { long ret; @@ -370,13 +397,23 @@ static int nfs4_do_handle_exception(struct nfs_server *server, exception->delay = 0; exception->recovering = 0; exception->retry = 0; + + if (stateid == NULL && state != NULL) + stateid = &state->stateid; + switch(errorcode) { case 0: return 0; - case -NFS4ERR_OPENMODE: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: + if (inode != NULL && stateid != NULL) { + nfs_inode_find_state_and_recover(inode, + stateid); + goto wait_on_recovery; + } + case -NFS4ERR_OPENMODE: if (inode) { int err; @@ -395,12 +432,6 @@ static int nfs4_do_handle_exception(struct nfs_server *server, if (ret < 0) break; goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - ret = nfs4_schedule_stateid_recovery(server, state); - if (ret < 0) - break; - } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); @@ -616,6 +647,7 @@ int nfs40_setup_sequence(struct nfs4_slot_table *tbl, } spin_unlock(&tbl->slot_tbl_lock); + slot->privileged = args->sa_privileged ? 1 : 0; args->sa_slot = slot; res->sr_slot = slot; @@ -723,12 +755,20 @@ static int nfs41_sequence_process(struct rpc_task *task, /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: + /* If previous op on slot was interrupted and we reused + * the seq# and got a reply from the cache, then retry + */ + if (task->tk_status == -EREMOTEIO && interrupted) { + ++slot->seq_nr; + goto retry_nowait; + } /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; clp = session->clp; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ - nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags, + !!slot->privileged); nfs41_update_target_slotid(slot->table, slot, res); break; case 1: @@ -875,6 +915,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, } spin_unlock(&tbl->slot_tbl_lock); + slot->privileged = args->sa_privileged ? 1 : 0; args->sa_slot = slot; dprintk("<-- %s slotid=%u seqid=%u\n", __func__, @@ -1353,6 +1394,19 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) nfs4_state_set_mode_locked(state, state->state | fmode); } +#ifdef CONFIG_NFS_V4_1 +static bool nfs_open_stateid_recover_openmode(struct nfs4_state *state) +{ + if (state->n_rdonly && !test_bit(NFS_O_RDONLY_STATE, &state->flags)) + return true; + if (state->n_wronly && !test_bit(NFS_O_WRONLY_STATE, &state->flags)) + return true; + if (state->n_rdwr && !test_bit(NFS_O_RDWR_STATE, &state->flags)) + return true; + return false; +} +#endif /* CONFIG_NFS_V4_1 */ + static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -1369,11 +1423,12 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) } static bool nfs_need_update_open_stateid(struct nfs4_state *state, - nfs4_stateid *stateid) + const nfs4_stateid *stateid, nfs4_stateid *freeme) { if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0) return true; if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { + nfs4_stateid_copy(freeme, &state->open_stateid); nfs_test_and_clear_all_open_stateid(state); return true; } @@ -1437,7 +1492,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_schedule_state_manager(state->owner->so_server->nfs_client); } -static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +static void nfs_set_open_stateid_locked(struct nfs4_state *state, + const nfs4_stateid *stateid, fmode_t fmode, + nfs4_stateid *freeme) { switch (fmode) { case FMODE_READ: @@ -1449,14 +1506,18 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * case FMODE_READ|FMODE_WRITE: set_bit(NFS_O_RDWR_STATE, &state->flags); } - if (!nfs_need_update_open_stateid(state, stateid)) + if (!nfs_need_update_open_stateid(state, stateid, freeme)) return; if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); } -static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) +static void __update_open_stateid(struct nfs4_state *state, + const nfs4_stateid *open_stateid, + const nfs4_stateid *deleg_stateid, + fmode_t fmode, + nfs4_stateid *freeme) { /* * Protect the call to nfs4_state_set_mode_locked and @@ -1469,16 +1530,22 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s set_bit(NFS_DELEGATED_STATE, &state->flags); } if (open_stateid != NULL) - nfs_set_open_stateid_locked(state, open_stateid, fmode); + nfs_set_open_stateid_locked(state, open_stateid, fmode, freeme); write_sequnlock(&state->seqlock); update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); } -static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode) +static int update_open_stateid(struct nfs4_state *state, + const nfs4_stateid *open_stateid, + const nfs4_stateid *delegation, + fmode_t fmode) { + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs_client *clp = server->nfs_client; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *deleg_cur; + nfs4_stateid freeme = {0}; int ret = 0; fmode &= (FMODE_READ|FMODE_WRITE); @@ -1500,7 +1567,8 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); - __update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode); + __update_open_stateid(state, open_stateid, &deleg_cur->stateid, + fmode, &freeme); ret = 1; no_delegation_unlock: spin_unlock(&deleg_cur->lock); @@ -1508,11 +1576,14 @@ no_delegation: rcu_read_unlock(); if (!ret && open_stateid != NULL) { - __update_open_stateid(state, open_stateid, NULL, fmode); + __update_open_stateid(state, open_stateid, NULL, fmode, &freeme); ret = 1; } if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) - nfs4_schedule_state_manager(state->owner->so_server->nfs_client); + nfs4_schedule_state_manager(clp); + if (freeme.type != 0) + nfs4_test_and_free_stateid(server, &freeme, + state->owner->so_cred); return ret; } @@ -1889,7 +1960,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: set_bit(NFS_DELEGATED_STATE, &state->flags); - case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ nfs4_schedule_lease_recovery(server->nfs_client); return -EAGAIN; @@ -1901,6 +1971,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct return -EAGAIN; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OPENMODE: nfs_inode_find_state_and_recover(state->inode, @@ -2382,9 +2453,10 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } -static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) +static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) { - nfs_remove_bad_delegation(state->inode); + nfs_remove_bad_delegation(state->inode, stateid); write_seqlock(&state->seqlock); nfs4_stateid_copy(&state->stateid, &state->open_stateid); write_sequnlock(&state->seqlock); @@ -2394,7 +2466,7 @@ static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) static void nfs40_clear_delegation_stateid(struct nfs4_state *state) { if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) - nfs_finish_clear_delegation_stateid(state); + nfs_finish_clear_delegation_stateid(state, NULL); } static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) @@ -2404,7 +2476,45 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st return nfs4_open_expired(sp, state); } +static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + return -NFS4ERR_BAD_STATEID; +} + #if defined(CONFIG_NFS_V4_1) +static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) +{ + int status; + + switch (stateid->type) { + default: + break; + case NFS4_INVALID_STATEID_TYPE: + case NFS4_SPECIAL_STATEID_TYPE: + return -NFS4ERR_BAD_STATEID; + case NFS4_REVOKED_STATEID_TYPE: + goto out_free; + } + + status = nfs41_test_stateid(server, stateid, cred); + switch (status) { + case -NFS4ERR_EXPIRED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + break; + default: + return status; + } +out_free: + /* Ack the revoked state to the server */ + nfs41_free_stateid(server, stateid, cred, true); + return -NFS4ERR_EXPIRED; +} + static void nfs41_check_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); @@ -2422,23 +2532,68 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) } nfs4_stateid_copy(&stateid, &delegation->stateid); + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + rcu_read_unlock(); + nfs_finish_clear_delegation_stateid(state, &stateid); + return; + } + + if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { + rcu_read_unlock(); + return; + } + cred = get_rpccred(delegation->cred); rcu_read_unlock(); - status = nfs41_test_stateid(server, &stateid, cred); + status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); trace_nfs4_test_delegation_stateid(state, NULL, status); - - if (status != NFS_OK) { - /* Free the stateid unless the server explicitly - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, &stateid, cred); - nfs_finish_clear_delegation_stateid(state); - } + if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) + nfs_finish_clear_delegation_stateid(state, &stateid); put_rpccred(cred); } /** + * nfs41_check_expired_locks - possibly free a lock stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ +static int nfs41_check_expired_locks(struct nfs4_state *state) +{ + int status, ret = NFS_OK; + struct nfs4_lock_state *lsp; + struct nfs_server *server = NFS_SERVER(state->inode); + + if (!test_bit(LK_STATE_IN_USE, &state->flags)) + goto out; + list_for_each_entry(lsp, &state->lock_states, ls_locks) { + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { + struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + + status = nfs41_test_and_free_expired_stateid(server, + &lsp->ls_stateid, + cred); + trace_nfs4_test_lock_stateid(state, lsp, status); + if (status == -NFS4ERR_EXPIRED || + status == -NFS4ERR_BAD_STATEID) { + clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); + lsp->ls_stateid.type = NFS4_INVALID_STATEID_TYPE; + if (!recover_lost_locks) + set_bit(NFS_LOCK_LOST, &lsp->ls_flags); + } else if (status != NFS_OK) { + ret = status; + break; + } + } + }; +out: + return ret; +} + +/** * nfs41_check_open_stateid - possibly free an open stateid * * @state: NFSv4 state for an inode @@ -2453,26 +2608,28 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) struct rpc_cred *cred = state->owner->so_cred; int status; - /* If a state reset has been done, test_stateid is unneeded */ - if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) && - (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) && - (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) + if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) { + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) { + if (nfs4_have_delegation(state->inode, state->state)) + return NFS_OK; + return -NFS4ERR_OPENMODE; + } return -NFS4ERR_BAD_STATEID; - - status = nfs41_test_stateid(server, stateid, cred); + } + status = nfs41_test_and_free_expired_stateid(server, stateid, cred); trace_nfs4_test_open_stateid(state, NULL, status); - if (status != NFS_OK) { - /* Free the stateid unless the server explicitly - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid, cred); - + if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) { clear_bit(NFS_O_RDONLY_STATE, &state->flags); clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); clear_bit(NFS_OPEN_STATE, &state->flags); + stateid->type = NFS4_INVALID_STATEID_TYPE; } - return status; + if (status != NFS_OK) + return status; + if (nfs_open_stateid_recover_openmode(state)) + return -NFS4ERR_OPENMODE; + return NFS_OK; } static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) @@ -2480,6 +2637,9 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st int status; nfs41_check_delegation_stateid(state); + status = nfs41_check_expired_locks(state); + if (status != NFS_OK) + return status; status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); @@ -2537,6 +2697,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, goto out; if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK) + set_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags); dentry = opendata->dentry; if (d_really_is_negative(dentry)) { @@ -2899,9 +3061,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(server, + &calldata->arg.stateid, + task->tk_msg.rpc_cred); case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_EXPIRED: if (!nfs4_stateid_match(&calldata->arg.stateid, &state->open_stateid)) { rpc_restart_call_prepare(task); @@ -4312,7 +4477,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s if (error == 0) { /* block layout checks this! */ server->pnfs_blksize = fsinfo->blksize; - set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); + set_pnfs_layoutdriver(server, fhandle, fsinfo); } return error; @@ -4399,24 +4564,25 @@ static bool nfs4_error_stateid_expired(int err) return false; } -void __nfs4_read_done_cb(struct nfs_pgio_header *hdr) -{ - nfs_invalidate_atime(hdr->inode); -} - static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct nfs_server *server = NFS_SERVER(hdr->inode); trace_nfs4_read(hdr, task->tk_status); - if (nfs4_async_handle_error(task, server, - hdr->args.context->state, - NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return -EAGAIN; + if (task->tk_status < 0) { + struct nfs4_exception exception = { + .inode = hdr->inode, + .state = hdr->args.context->state, + .stateid = &hdr->args.stateid, + }; + task->tk_status = nfs4_async_handle_exception(task, + server, task->tk_status, &exception); + if (exception.retry) { + rpc_restart_call_prepare(task); + return -EAGAIN; + } } - __nfs4_read_done_cb(hdr); if (task->tk_status > 0) renew_lease(server, hdr->timestamp); return 0; @@ -4445,6 +4611,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; + if (task->tk_status > 0) + nfs_invalidate_atime(hdr->inode); return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : nfs4_read_done_cb(task, hdr); } @@ -4482,11 +4650,19 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct inode *inode = hdr->inode; trace_nfs4_write(hdr, task->tk_status); - if (nfs4_async_handle_error(task, NFS_SERVER(inode), - hdr->args.context->state, - NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return -EAGAIN; + if (task->tk_status < 0) { + struct nfs4_exception exception = { + .inode = hdr->inode, + .state = hdr->args.context->state, + .stateid = &hdr->args.stateid, + }; + task->tk_status = nfs4_async_handle_exception(task, + NFS_SERVER(inode), task->tk_status, + &exception); + if (exception.retry) { + rpc_restart_call_prepare(task); + return -EAGAIN; + } } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), hdr->timestamp); @@ -5123,12 +5299,14 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { /* An impossible timestamp guarantees this value * will never match a generated boot time. */ - verf[0] = 0; - verf[1] = cpu_to_be32(NSEC_PER_SEC + 1); + verf[0] = cpu_to_be32(U32_MAX); + verf[1] = cpu_to_be32(U32_MAX); } else { struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); - verf[0] = cpu_to_be32(nn->boot_time.tv_sec); - verf[1] = cpu_to_be32(nn->boot_time.tv_nsec); + u64 ns = ktime_to_ns(nn->boot_time); + + verf[0] = cpu_to_be32(ns >> 32); + verf[1] = cpu_to_be32(ns); } memcpy(bootverf->data, verf, sizeof(bootverf->data)); } @@ -5393,10 +5571,13 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) renew_lease(data->res.server, data->timestamp); case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(data->res.server, + data->args.stateid, + task->tk_msg.rpc_cred); case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: task->tk_status = 0; if (data->roc) pnfs_roc_set_barrier(data->inode, data->roc_barrier); @@ -5528,22 +5709,6 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 return err; } -#define NFS4_LOCK_MINTIMEOUT (1 * HZ) -#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) - -/* - * sleep, with exponential backoff, and retry the LOCK operation. - */ -static unsigned long -nfs4_set_lock_task_retry(unsigned long timeout) -{ - freezable_schedule_timeout_killable_unsafe(timeout); - timeout <<= 1; - if (timeout > NFS4_LOCK_MAXTIMEOUT) - return NFS4_LOCK_MAXTIMEOUT; - return timeout; -} - static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct inode *inode = state->inode; @@ -5600,11 +5765,6 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } -static int do_vfs_lock(struct inode *inode, struct file_lock *fl) -{ - return locks_lock_inode_wait(inode, fl); -} - struct nfs4_unlockdata { struct nfs_locku_args arg; struct nfs_locku_res res; @@ -5657,14 +5817,18 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: renew_lease(calldata->server, calldata->timestamp); - do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl); + locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl); if (nfs4_update_lock_stateid(calldata->lsp, &calldata->res.stateid)) break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(calldata->server, + &calldata->arg.stateid, + task->tk_msg.rpc_cred); case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: if (!nfs4_stateid_match(&calldata->arg.stateid, &calldata->lsp->ls_stateid)) rpc_restart_call_prepare(task); @@ -5765,7 +5929,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); - if (do_vfs_lock(inode, request) == -ENOENT) { + if (locks_lock_inode_wait(inode, request) == -ENOENT) { up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; @@ -5906,7 +6070,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->timestamp); if (data->arg.new_lock) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) { + if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) { rpc_restart_call_prepare(task); break; } @@ -5965,6 +6129,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ { switch (error) { case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; if (new_lock_owner != 0 || @@ -5973,7 +6138,6 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ break; case -NFS4ERR_STALE_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(server->nfs_client); }; } @@ -6083,52 +6247,19 @@ out: } #if defined(CONFIG_NFS_V4_1) -/** - * nfs41_check_expired_locks - possibly free a lock stateid - * - * @state: NFSv4 state for an inode - * - * Returns NFS_OK if recovery for this stateid is now finished. - * Otherwise a negative NFS4ERR value is returned. - */ -static int nfs41_check_expired_locks(struct nfs4_state *state) -{ - int status, ret = -NFS4ERR_BAD_STATEID; - struct nfs4_lock_state *lsp; - struct nfs_server *server = NFS_SERVER(state->inode); - - list_for_each_entry(lsp, &state->lock_states, ls_locks) { - if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { - struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - - status = nfs41_test_stateid(server, - &lsp->ls_stateid, - cred); - trace_nfs4_test_lock_stateid(state, lsp, status); - if (status != NFS_OK) { - /* Free the stateid unless the server - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, - &lsp->ls_stateid, - cred); - clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - ret = status; - } - } - }; - - return ret; -} - static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) { - int status = NFS_OK; + struct nfs4_lock_state *lsp; + int status; - if (test_bit(LK_STATE_IN_USE, &state->flags)) - status = nfs41_check_expired_locks(state); - if (status != NFS_OK) - status = nfs4_lock_expired(state, request); + status = nfs4_set_lock_state(state, request); + if (status != 0) + return status; + lsp = request->fl_u.nfs4_fl.owner; + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) || + test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) + return 0; + status = nfs4_lock_expired(state, request); return status; } #endif @@ -6138,17 +6269,10 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs4_state_owner *sp = state->owner; unsigned char fl_flags = request->fl_flags; - int status = -ENOLCK; + int status; - if ((fl_flags & FL_POSIX) && - !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) - goto out; - /* Is this a delegated open? */ - status = nfs4_set_lock_state(state, request); - if (status != 0) - goto out; request->fl_flags |= FL_ACCESS; - status = do_vfs_lock(state->inode, request); + status = locks_lock_inode_wait(state->inode, request); if (status < 0) goto out; mutex_lock(&sp->so_delegreturn_mutex); @@ -6157,7 +6281,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(state->inode, request); + status = locks_lock_inode_wait(state->inode, request); up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; @@ -6188,12 +6312,124 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +static int +nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd, + struct file_lock *request) +{ + int status = -ERESTARTSYS; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + + while(!signalled()) { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + freezable_schedule_timeout_interruptible(timeout); + timeout *= 2; + timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout); + status = -ERESTARTSYS; + } + return status; +} + +#ifdef CONFIG_NFS_V4_1 +struct nfs4_lock_waiter { + struct task_struct *task; + struct inode *inode; + struct nfs_lowner *owner; + bool notified; +}; + +static int +nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key) +{ + int ret; + struct cb_notify_lock_args *cbnl = key; + struct nfs4_lock_waiter *waiter = wait->private; + struct nfs_lowner *lowner = &cbnl->cbnl_owner, + *wowner = waiter->owner; + + /* Only wake if the callback was for the same owner */ + if (lowner->clientid != wowner->clientid || + lowner->id != wowner->id || + lowner->s_dev != wowner->s_dev) + return 0; + + /* Make sure it's for the right inode */ + if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) + return 0; + + waiter->notified = true; + + /* override "private" so we can use default_wake_function */ + wait->private = waiter->task; + ret = autoremove_wake_function(wait, mode, flags, key); + wait->private = waiter; + return ret; +} + +static int +nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + int status = -ERESTARTSYS; + unsigned long flags; + struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs_client *clp = server->nfs_client; + wait_queue_head_t *q = &clp->cl_lock_waitq; + struct nfs_lowner owner = { .clientid = clp->cl_clientid, + .id = lsp->ls_seqid.owner_id, + .s_dev = server->s_dev }; + struct nfs4_lock_waiter waiter = { .task = current, + .inode = state->inode, + .owner = &owner, + .notified = false }; + wait_queue_t wait; + + /* Don't bother with waitqueue if we don't expect a callback */ + if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags)) + return nfs4_retry_setlk_simple(state, cmd, request); + + init_wait(&wait); + wait.private = &waiter; + wait.func = nfs4_wake_lock_waiter; + add_wait_queue(q, &wait); + + while(!signalled()) { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + + status = -ERESTARTSYS; + spin_lock_irqsave(&q->lock, flags); + if (waiter.notified) { + spin_unlock_irqrestore(&q->lock, flags); + continue; + } + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irqrestore(&q->lock, flags); + + freezable_schedule_timeout_interruptible(NFS4_LOCK_MAXTIMEOUT); + } + + finish_wait(q, &wait); + return status; +} +#else /* !CONFIG_NFS_V4_1 */ +static inline int +nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + return nfs4_retry_setlk_simple(state, cmd, request); +} +#endif + static int nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) { struct nfs_open_context *ctx; struct nfs4_state *state; - unsigned long timeout = NFS4_LOCK_MINTIMEOUT; int status; /* verify open state */ @@ -6220,6 +6456,11 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (state == NULL) return -ENOLCK; + + if ((request->fl_flags & FL_POSIX) && + !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) + return -ENOLCK; + /* * Don't rely on the VFS having checked the file open mode, * since it won't do this for flock() locks. @@ -6234,16 +6475,11 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) return -EBADF; } - do { - status = nfs4_proc_setlk(state, cmd, request); - if ((status != -EAGAIN) || IS_SETLK(cmd)) - break; - timeout = nfs4_set_lock_task_retry(timeout); - status = -ERESTARTSYS; - if (signalled()) - break; - } while(status < 0); - return status; + status = nfs4_set_lock_state(state, request); + if (status != 0) + return status; + + return nfs4_retry_setlk(state, cmd, request); } int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid) @@ -7104,75 +7340,161 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, return 0; } +struct nfs41_exchange_id_data { + struct nfs41_exchange_id_res res; + struct nfs41_exchange_id_args args; + struct rpc_xprt *xprt; + int rpc_status; +}; + +static void nfs4_exchange_id_done(struct rpc_task *task, void *data) +{ + struct nfs41_exchange_id_data *cdata = + (struct nfs41_exchange_id_data *)data; + struct nfs_client *clp = cdata->args.client; + int status = task->tk_status; + + trace_nfs4_exchange_id(clp, status); + + if (status == 0) + status = nfs4_check_cl_exchange_flags(cdata->res.flags); + + if (cdata->xprt && status == 0) { + status = nfs4_detect_session_trunking(clp, &cdata->res, + cdata->xprt); + goto out; + } + + if (status == 0) + status = nfs4_sp4_select_mode(clp, &cdata->res.state_protect); + + if (status == 0) { + clp->cl_clientid = cdata->res.clientid; + clp->cl_exchange_flags = cdata->res.flags; + /* Client ID is not confirmed */ + if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R)) { + clear_bit(NFS4_SESSION_ESTABLISHED, + &clp->cl_session->session_state); + clp->cl_seqid = cdata->res.seqid; + } + + kfree(clp->cl_serverowner); + clp->cl_serverowner = cdata->res.server_owner; + cdata->res.server_owner = NULL; + + /* use the most recent implementation id */ + kfree(clp->cl_implid); + clp->cl_implid = cdata->res.impl_id; + cdata->res.impl_id = NULL; + + if (clp->cl_serverscope != NULL && + !nfs41_same_server_scope(clp->cl_serverscope, + cdata->res.server_scope)) { + dprintk("%s: server_scope mismatch detected\n", + __func__); + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); + kfree(clp->cl_serverscope); + clp->cl_serverscope = NULL; + } + + if (clp->cl_serverscope == NULL) { + clp->cl_serverscope = cdata->res.server_scope; + cdata->res.server_scope = NULL; + } + /* Save the EXCHANGE_ID verifier session trunk tests */ + memcpy(clp->cl_confirm.data, cdata->args.verifier->data, + sizeof(clp->cl_confirm.data)); + } +out: + cdata->rpc_status = status; + return; +} + +static void nfs4_exchange_id_release(void *data) +{ + struct nfs41_exchange_id_data *cdata = + (struct nfs41_exchange_id_data *)data; + + nfs_put_client(cdata->args.client); + if (cdata->xprt) { + xprt_put(cdata->xprt); + rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient); + } + kfree(cdata->res.impl_id); + kfree(cdata->res.server_scope); + kfree(cdata->res.server_owner); + kfree(cdata); +} + +static const struct rpc_call_ops nfs4_exchange_id_call_ops = { + .rpc_call_done = nfs4_exchange_id_done, + .rpc_release = nfs4_exchange_id_release, +}; + /* * _nfs4_proc_exchange_id() * * Wrapper for EXCHANGE_ID operation. */ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, - u32 sp4_how) + u32 sp4_how, struct rpc_xprt *xprt) { nfs4_verifier verifier; - struct nfs41_exchange_id_args args = { - .verifier = &verifier, - .client = clp, -#ifdef CONFIG_NFS_V4_1_MIGRATION - .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID | - EXCHGID4_FLAG_SUPP_MOVED_MIGR, -#else - .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID, -#endif - }; - struct nfs41_exchange_id_res res = { - 0 - }; - int status; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], - .rpc_argp = &args, - .rpc_resp = &res, .rpc_cred = cred, }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .callback_ops = &nfs4_exchange_id_call_ops, + .rpc_message = &msg, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + }; + struct nfs41_exchange_id_data *calldata; + struct rpc_task *task; + int status = -EIO; + + if (!atomic_inc_not_zero(&clp->cl_count)) + goto out; + + status = -ENOMEM; + calldata = kzalloc(sizeof(*calldata), GFP_NOFS); + if (!calldata) + goto out; - nfs4_init_boot_verifier(clp, &verifier); + if (!xprt) + nfs4_init_boot_verifier(clp, &verifier); status = nfs4_init_uniform_client_string(clp); if (status) - goto out; + goto out_calldata; dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_owner_id); - res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), - GFP_NOFS); - if (unlikely(res.server_owner == NULL)) { - status = -ENOMEM; - goto out; - } + calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), + GFP_NOFS); + status = -ENOMEM; + if (unlikely(calldata->res.server_owner == NULL)) + goto out_calldata; - res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), + calldata->res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), GFP_NOFS); - if (unlikely(res.server_scope == NULL)) { - status = -ENOMEM; + if (unlikely(calldata->res.server_scope == NULL)) goto out_server_owner; - } - res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS); - if (unlikely(res.impl_id == NULL)) { - status = -ENOMEM; + calldata->res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS); + if (unlikely(calldata->res.impl_id == NULL)) goto out_server_scope; - } switch (sp4_how) { case SP4_NONE: - args.state_protect.how = SP4_NONE; + calldata->args.state_protect.how = SP4_NONE; break; case SP4_MACH_CRED: - args.state_protect = nfs4_sp4_mach_cred_request; + calldata->args.state_protect = nfs4_sp4_mach_cred_request; break; default: @@ -7181,56 +7503,42 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, status = -EINVAL; goto out_impl_id; } + if (xprt) { + calldata->xprt = xprt; + task_setup_data.rpc_xprt = xprt; + task_setup_data.flags = + RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC; + calldata->args.verifier = &clp->cl_confirm; + } else { + calldata->args.verifier = &verifier; + } + calldata->args.client = clp; +#ifdef CONFIG_NFS_V4_1_MIGRATION + calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | + EXCHGID4_FLAG_BIND_PRINC_STATEID | + EXCHGID4_FLAG_SUPP_MOVED_MIGR, +#else + calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | + EXCHGID4_FLAG_BIND_PRINC_STATEID, +#endif + msg.rpc_argp = &calldata->args; + msg.rpc_resp = &calldata->res; + task_setup_data.callback_data = calldata; - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - trace_nfs4_exchange_id(clp, status); - if (status == 0) - status = nfs4_check_cl_exchange_flags(res.flags); - - if (status == 0) - status = nfs4_sp4_select_mode(clp, &res.state_protect); - - if (status == 0) { - clp->cl_clientid = res.clientid; - clp->cl_exchange_flags = res.flags; - /* Client ID is not confirmed */ - if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) { - clear_bit(NFS4_SESSION_ESTABLISHED, - &clp->cl_session->session_state); - clp->cl_seqid = res.seqid; - } - - kfree(clp->cl_serverowner); - clp->cl_serverowner = res.server_owner; - res.server_owner = NULL; - - /* use the most recent implementation id */ - kfree(clp->cl_implid); - clp->cl_implid = res.impl_id; - res.impl_id = NULL; - - if (clp->cl_serverscope != NULL && - !nfs41_same_server_scope(clp->cl_serverscope, - res.server_scope)) { - dprintk("%s: server_scope mismatch detected\n", - __func__); - set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); - kfree(clp->cl_serverscope); - clp->cl_serverscope = NULL; - } - - if (clp->cl_serverscope == NULL) { - clp->cl_serverscope = res.server_scope; - res.server_scope = NULL; - } + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + status = PTR_ERR(task); + goto out_impl_id; } -out_impl_id: - kfree(res.impl_id); -out_server_scope: - kfree(res.server_scope); -out_server_owner: - kfree(res.server_owner); + if (!xprt) { + status = rpc_wait_for_completion_task(task); + if (!status) + status = calldata->rpc_status; + } else /* session trunking test */ + status = calldata->rpc_status; + + rpc_put_task(task); out: if (clp->cl_implid != NULL) dprintk("NFS reply exchange_id: Server Implementation ID: " @@ -7240,6 +7548,16 @@ out: clp->cl_implid->date.nseconds); dprintk("NFS reply exchange_id: %d\n", status); return status; + +out_impl_id: + kfree(calldata->res.impl_id); +out_server_scope: + kfree(calldata->res.server_scope); +out_server_owner: + kfree(calldata->res.server_owner); +out_calldata: + kfree(calldata); + goto out; } /* @@ -7262,14 +7580,45 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) /* try SP4_MACH_CRED if krb5i/p */ if (authflavor == RPC_AUTH_GSS_KRB5I || authflavor == RPC_AUTH_GSS_KRB5P) { - status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED); + status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED, NULL); if (!status) return 0; } /* try SP4_NONE */ - return _nfs4_proc_exchange_id(clp, cred, SP4_NONE); + return _nfs4_proc_exchange_id(clp, cred, SP4_NONE, NULL); +} + +/** + * nfs4_test_session_trunk + * + * This is an add_xprt_test() test function called from + * rpc_clnt_setup_test_and_add_xprt. + * + * The rpc_xprt_switch is referrenced by rpc_clnt_setup_test_and_add_xprt + * and is dereferrenced in nfs4_exchange_id_release + * + * Upon success, add the new transport to the rpc_clnt + * + * @clnt: struct rpc_clnt to get new transport + * @xprt: the rpc_xprt to test + * @data: call data for _nfs4_proc_exchange_id. + */ +int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, + void *data) +{ + struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data; + u32 sp4_how; + + dprintk("--> %s try %s\n", __func__, + xprt->address_strings[RPC_DISPLAY_ADDR]); + + sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED); + + /* Test connection for session trunking. Async exchange_id call */ + return _nfs4_proc_exchange_id(adata->clp, adata->cred, sp4_how, xprt); } +EXPORT_SYMBOL_GPL(nfs4_test_session_trunk); static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, struct rpc_cred *cred) @@ -7463,7 +7812,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, args->bc_attrs.max_resp_sz = max_bc_payload; args->bc_attrs.max_resp_sz_cached = 0; args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; - args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS; + args->bc_attrs.max_reqs = min_t(unsigned short, max_session_cb_slots, 1); dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", @@ -7510,10 +7859,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args return -EINVAL; if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) return -EINVAL; - /* These would render the backchannel useless: */ - if (rcvd->max_ops != sent->max_ops) + if (rcvd->max_ops > sent->max_ops) return -EINVAL; - if (rcvd->max_reqs != sent->max_reqs) + if (rcvd->max_reqs > sent->max_reqs) return -EINVAL; out: return 0; @@ -7982,6 +8330,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, case -NFS4ERR_RECALLCONFLICT: status = -ERECALLCONFLICT; break; + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: exception->timeout = 0; @@ -7993,6 +8343,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, &lgp->args.ctx->state->stateid)) { spin_unlock(&inode->i_lock); exception->state = lgp->args.ctx->state; + exception->stateid = &lgp->args.stateid; break; } @@ -8591,6 +8942,24 @@ static int _nfs41_test_stateid(struct nfs_server *server, return -res.status; } +static void nfs4_handle_delay_or_session_error(struct nfs_server *server, + int err, struct nfs4_exception *exception) +{ + exception->retry = 0; + switch(err) { + case -NFS4ERR_DELAY: + case -NFS4ERR_RETRY_UNCACHED_REP: + nfs4_handle_exception(server, err, exception); + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + nfs4_do_handle_exception(server, err, exception); + } +} + /** * nfs41_test_stateid - perform a TEST_STATEID operation * @@ -8610,9 +8979,7 @@ static int nfs41_test_stateid(struct nfs_server *server, int err; do { err = _nfs41_test_stateid(server, stateid, cred); - if (err != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, err, &exception); + nfs4_handle_delay_or_session_error(server, err, &exception); } while (exception.retry); return err; } @@ -8657,7 +9024,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = { }; static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, - nfs4_stateid *stateid, + const nfs4_stateid *stateid, struct rpc_cred *cred, bool privileged) { @@ -8687,7 +9054,7 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); if (privileged) nfs4_set_sequence_privileged(&data->args.seq_args); @@ -8700,38 +9067,31 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, * @server: server / transport on which to perform the operation * @stateid: state ID to release * @cred: credential + * @is_recovery: set to true if this call needs to be privileged * - * Returns NFS_OK if the server freed "stateid". Otherwise a - * negative NFS4ERR value is returned. + * Note: this function is always asynchronous. */ static int nfs41_free_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - struct rpc_cred *cred) + const nfs4_stateid *stateid, + struct rpc_cred *cred, + bool is_recovery) { struct rpc_task *task; - int ret; - task = _nfs41_free_stateid(server, stateid, cred, true); + task = _nfs41_free_stateid(server, stateid, cred, is_recovery); if (IS_ERR(task)) return PTR_ERR(task); - ret = rpc_wait_for_completion_task(task); - if (!ret) - ret = task->tk_status; rpc_put_task(task); - return ret; + return 0; } static void nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - struct rpc_task *task; struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); + nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); nfs4_free_lock_state(server, lsp); - if (IS_ERR(task)) - return; - rpc_put_task(task); } static bool nfs41_match_stateid(const nfs4_stateid *s1, @@ -8835,6 +9195,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .free_lock_state = nfs4_release_lockowner, + .test_and_free_expired = nfs40_test_and_free_expired_stateid, .alloc_seqid = nfs_alloc_seqid, .call_sync_ops = &nfs40_call_sync_ops, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, @@ -8862,7 +9223,9 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, + .test_and_free_expired = nfs41_test_and_free_expired_stateid, .alloc_seqid = nfs_alloc_no_seqid, + .session_trunk = nfs4_test_session_trunk, .call_sync_ops = &nfs41_call_sync_ops, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, @@ -8891,7 +9254,9 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, .call_sync_ops = &nfs41_call_sync_ops, + .test_and_free_expired = nfs41_test_and_free_expired_stateid, .alloc_seqid = nfs_alloc_no_seqid, + .session_trunk = nfs4_test_session_trunk, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index f703b755351b..dae385500005 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -9,6 +9,7 @@ /* maximum number of slots to use */ #define NFS4_DEF_SLOT_TABLE_SIZE (64U) +#define NFS4_DEF_CB_SLOT_TABLE_SIZE (1U) #define NFS4_MAX_SLOT_TABLE (1024U) #define NFS4_NO_SLOT ((u32)-1) @@ -22,6 +23,7 @@ struct nfs4_slot { u32 slot_nr; u32 seq_nr; unsigned int interrupted : 1, + privileged : 1, seq_done : 1; }; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cada00aa5096..5f4281ec5f72 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -991,6 +991,8 @@ int nfs4_select_rw_stateid(struct nfs4_state *state, { int ret; + if (!nfs4_valid_open_stateid(state)) + return -EIO; if (cred != NULL) *cred = NULL; ret = nfs4_copy_lock_stateid(dst, state, lockowner); @@ -1303,6 +1305,8 @@ void nfs4_schedule_path_down_recovery(struct nfs_client *clp) static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { + if (!nfs4_valid_open_stateid(state)) + return 0; set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); /* Don't recover state that expired before the reboot */ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) { @@ -1316,6 +1320,8 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) { + if (!nfs4_valid_open_stateid(state)) + return 0; set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags); @@ -1327,9 +1333,8 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_ { struct nfs_client *clp = server->nfs_client; - if (!nfs4_valid_open_stateid(state)) + if (!nfs4_state_mark_reclaim_nograce(clp, state)) return -EBADF; - nfs4_state_mark_reclaim_nograce(clp, state); dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); @@ -1337,6 +1342,35 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_ } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); +static struct nfs4_lock_state * +nfs_state_find_lock_state_by_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + struct nfs4_lock_state *pos; + + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags)) + continue; + if (nfs4_stateid_match_other(&pos->ls_stateid, stateid)) + return pos; + } + return NULL; +} + +static bool nfs_state_lock_state_matches_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + bool found = false; + + if (test_bit(LK_STATE_IN_USE, &state->flags)) { + spin_lock(&state->state_lock); + if (nfs_state_find_lock_state_by_stateid(state, stateid)) + found = true; + spin_unlock(&state->state_lock); + } + return found; +} + void nfs_inode_find_state_and_recover(struct inode *inode, const nfs4_stateid *stateid) { @@ -1351,14 +1385,18 @@ void nfs_inode_find_state_and_recover(struct inode *inode, state = ctx->state; if (state == NULL) continue; - if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + if (nfs4_stateid_match_other(&state->stateid, stateid) && + nfs4_state_mark_reclaim_nograce(clp, state)) { + found = true; continue; - if (!nfs4_stateid_match(&state->stateid, stateid)) - continue; - nfs4_state_mark_reclaim_nograce(clp, state); - found = true; + } + if (nfs_state_lock_state_matches_stateid(state, stateid) && + nfs4_state_mark_reclaim_nograce(clp, state)) + found = true; } spin_unlock(&inode->i_lock); + + nfs_inode_find_delegation_state_and_recover(inode, stateid); if (found) nfs4_schedule_state_manager(clp); } @@ -1498,6 +1536,9 @@ restart: __func__, status); case -ENOENT: case -ENOMEM: + case -EACCES: + case -EROFS: + case -EIO: case -ESTALE: /* Open state on this file cannot be recovered */ nfs4_state_mark_recovery_failed(state, status); @@ -1656,15 +1697,9 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) put_rpccred(cred); } -static void nfs_delegation_clear_all(struct nfs_client *clp) -{ - nfs_delegation_mark_reclaim(clp); - nfs_delegation_reap_unclaimed(clp); -} - static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) { - nfs_delegation_clear_all(clp); + nfs_mark_test_expired_all_delegations(clp); nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); } @@ -2195,7 +2230,7 @@ static void nfs41_handle_all_state_revoked(struct nfs_client *clp) static void nfs41_handle_some_state_revoked(struct nfs_client *clp) { - nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); + nfs4_state_start_reclaim_nograce(clp); nfs4_schedule_state_manager(clp); dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname); @@ -2227,13 +2262,22 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } -void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) +void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags, + bool recovery) { if (!flags) return; dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n", __func__, clp->cl_hostname, clp->cl_clientid, flags); + /* + * If we're called from the state manager thread, then assume we're + * already handling the RECLAIM_NEEDED and/or STATE_REVOKED. + * Those flags are expected to remain set until we're done + * recovering (see RFC5661, section 18.46.3). + */ + if (recovery) + goto out_recovery; if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) nfs41_handle_server_reboot(clp); @@ -2246,6 +2290,7 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) nfs4_schedule_lease_moved_recovery(clp); if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) nfs41_handle_recallable_state_revoked(clp); +out_recovery: if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT) nfs41_handle_backchannel_fault(clp); else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | @@ -2410,6 +2455,13 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs4_state_end_reclaim_reboot(clp); } + /* Detect expired delegations... */ + if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) { + section = "detect expired delegations"; + nfs_reap_expired_delegations(clp); + continue; + } + /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { section = "reclaim nograce"; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7bd3a5c09d31..fc89e5ed07ee 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1850,7 +1850,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ /* authsys_parms rfc1831 */ - *p++ = cpu_to_be32(nn->boot_time.tv_nsec); /* stamp */ + *p++ = cpu_to_be32(ktime_to_ns(nn->boot_time)); /* stamp */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ @@ -4725,34 +4725,37 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, } /* - * Decode potentially multiple layout types. Currently we only support - * one layout driver per file system. + * Decode potentially multiple layout types. */ -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, - uint32_t *layouttype) +static int decode_pnfs_layout_types(struct xdr_stream *xdr, + struct nfs_fsinfo *fsinfo) { __be32 *p; - int num; + uint32_t i; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; - num = be32_to_cpup(p); + fsinfo->nlayouttypes = be32_to_cpup(p); /* pNFS is not supported by the underlying file system */ - if (num == 0) { - *layouttype = 0; + if (fsinfo->nlayouttypes == 0) return 0; - } - if (num > 1) - printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout " - "drivers per filesystem not supported\n", __func__); /* Decode and set first layout type, move xdr->p past unused types */ - p = xdr_inline_decode(xdr, num * 4); + p = xdr_inline_decode(xdr, fsinfo->nlayouttypes * 4); if (unlikely(!p)) goto out_overflow; - *layouttype = be32_to_cpup(p); + + /* If we get too many, then just cap it at the max */ + if (fsinfo->nlayouttypes > NFS_MAX_LAYOUT_TYPES) { + printk(KERN_INFO "NFS: %s: Warning: Too many (%u) pNFS layout types\n", + __func__, fsinfo->nlayouttypes); + fsinfo->nlayouttypes = NFS_MAX_LAYOUT_TYPES; + } + + for(i = 0; i < fsinfo->nlayouttypes; ++i) + fsinfo->layouttype[i] = be32_to_cpup(p++); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -4764,7 +4767,7 @@ out_overflow: * Note we must ensure that layouttype is set in any non-error case. */ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, - uint32_t *layouttype) + struct nfs_fsinfo *fsinfo) { int status = 0; @@ -4772,10 +4775,9 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U))) return -EIO; if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) { - status = decode_first_pnfs_layout_type(xdr, layouttype); + status = decode_pnfs_layout_types(xdr, fsinfo); bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES; - } else - *layouttype = 0; + } return status; } @@ -4856,7 +4858,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); if (status != 0) goto xdr_error; - status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); + status = decode_attr_pnfstype(xdr, bitmap, fsinfo); if (status != 0) goto xdr_error; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2c93a85eda51..56b2d96f9103 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -30,6 +30,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_page.h> #include <linux/module.h> +#include <linux/sort.h> #include "internal.h" #include "pnfs.h" #include "iostat.h" @@ -99,35 +100,79 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) } /* + * When the server sends a list of layout types, we choose one in the order + * given in the list below. + * + * FIXME: should this list be configurable in some fashion? module param? + * mount option? something else? + */ +static const u32 ld_prefs[] = { + LAYOUT_SCSI, + LAYOUT_BLOCK_VOLUME, + LAYOUT_OSD2_OBJECTS, + LAYOUT_FLEX_FILES, + LAYOUT_NFSV4_1_FILES, + 0 +}; + +static int +ld_cmp(const void *e1, const void *e2) +{ + u32 ld1 = *((u32 *)e1); + u32 ld2 = *((u32 *)e2); + int i; + + for (i = 0; ld_prefs[i] != 0; i++) { + if (ld1 == ld_prefs[i]) + return -1; + + if (ld2 == ld_prefs[i]) + return 1; + } + return 0; +} + +/* * Try to set the server's pnfs module to the pnfs layout type specified by id. * Currently only one pNFS layout driver per filesystem is supported. * - * @id layout type. Zero (illegal layout type) indicates pNFS not in use. + * @ids array of layout types supported by MDS. */ void set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, - u32 id) + struct nfs_fsinfo *fsinfo) { struct pnfs_layoutdriver_type *ld_type = NULL; + u32 id; + int i; - if (id == 0) - goto out_no_driver; if (!(server->nfs_client->cl_exchange_flags & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { - printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", - __func__, id, server->nfs_client->cl_exchange_flags); + printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n", + __func__, server->nfs_client->cl_exchange_flags); goto out_no_driver; } - ld_type = find_pnfs_driver(id); - if (!ld_type) { - request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); + + sort(fsinfo->layouttype, fsinfo->nlayouttypes, + sizeof(*fsinfo->layouttype), ld_cmp, NULL); + + for (i = 0; i < fsinfo->nlayouttypes; i++) { + id = fsinfo->layouttype[i]; ld_type = find_pnfs_driver(id); if (!ld_type) { - dprintk("%s: No pNFS module found for %u.\n", - __func__, id); - goto out_no_driver; + request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, + id); + ld_type = find_pnfs_driver(id); } + if (ld_type) + break; + } + + if (!ld_type) { + dprintk("%s: No pNFS module found!\n", __func__); + goto out_no_driver; } + server->pnfs_curr_ld = ld_type; if (ld_type->set_layoutdriver && ld_type->set_layoutdriver(server, mntfh)) { @@ -2185,10 +2230,8 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - if (likely(!hdr->pnfs_error)) { - __nfs4_read_done_cb(hdr); + if (likely(!hdr->pnfs_error)) hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_read_error(hdr); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 31d99b2927b0..5c295512c967 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -236,7 +236,7 @@ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); -void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); +void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); @@ -657,7 +657,8 @@ pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) } static inline void set_pnfs_layoutdriver(struct nfs_server *s, - const struct nfs_fh *mntfh, u32 id) + const struct nfs_fh *mntfh, + struct nfs_fsinfo *fsinfo) { } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index f3468b57a32a..53b4705abcc7 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -690,13 +690,50 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, dprintk("%s: DS %s: trying address %s\n", __func__, ds->ds_remotestr, da->da_remotestr); - clp = nfs4_set_ds_client(mds_srv, - (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, - timeo, retrans, minor_version, - au_flavor); - if (!IS_ERR(clp)) - break; + if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) { + struct xprt_create xprt_args = { + .ident = XPRT_TRANSPORT_TCP, + .net = clp->cl_net, + .dstaddr = (struct sockaddr *)&da->da_addr, + .addrlen = da->da_addrlen, + .servername = clp->cl_hostname, + }; + struct nfs4_add_xprt_data xprtdata = { + .clp = clp, + .cred = nfs4_get_clid_cred(clp), + }; + struct rpc_add_xprt_test rpcdata = { + .add_xprt_test = clp->cl_mvops->session_trunk, + .data = &xprtdata, + }; + + /** + * Test this address for session trunking and + * add as an alias + */ + rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, + rpc_clnt_setup_test_and_add_xprt, + &rpcdata); + if (xprtdata.cred) + put_rpccred(xprtdata.cred); + } else { + clp = nfs4_set_ds_client(mds_srv, + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP, + timeo, retrans, minor_version, + au_flavor); + if (IS_ERR(clp)) + continue; + + status = nfs4_init_ds_session(clp, + mds_srv->nfs_client->cl_lease_time); + if (status) { + nfs_put_client(clp); + clp = ERR_PTR(-EIO); + continue; + } + + } } if (IS_ERR(clp)) { @@ -704,18 +741,11 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, goto out; } - status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); - if (status) - goto out_put; - smp_wmb(); ds->ds_clp = clp; dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; -out_put: - nfs_put_client(clp); - goto out; } /* diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d39601381adf..001796bcd6c8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2848,19 +2848,23 @@ out_invalid_transport_udp: * NFS client for backwards compatibility */ unsigned int nfs_callback_set_tcpport; +unsigned short nfs_callback_nr_threads; /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600; /* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ bool nfs4_disable_idmapping = true; unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; +unsigned short max_session_cb_slots = NFS4_DEF_CB_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; bool recover_lost_locks = false; +EXPORT_SYMBOL_GPL(nfs_callback_nr_threads); EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); +EXPORT_SYMBOL_GPL(max_session_cb_slots); EXPORT_SYMBOL_GPL(send_implementation_id); EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); EXPORT_SYMBOL_GPL(recover_lost_locks); @@ -2887,6 +2891,9 @@ static const struct kernel_param_ops param_ops_portnr = { #define param_check_portnr(name, p) __param_check(name, p, unsigned int); module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); +module_param_named(callback_nr_threads, nfs_callback_nr_threads, ushort, 0644); +MODULE_PARM_DESC(callback_nr_threads, "Number of threads that will be " + "assigned to the NFSv4 callback channels."); module_param(nfs_idmap_cache_timeout, int, 0644); module_param(nfs4_disable_idmapping, bool, 0644); module_param_string(nfs4_unique_id, nfs4_client_id_uniquifier, @@ -2896,6 +2903,9 @@ MODULE_PARM_DESC(nfs4_disable_idmapping, module_param(max_session_slots, ushort, 0644); MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " "requests the client will negotiate"); +module_param(max_session_cb_slots, ushort, 0644); +MODULE_PARM_DESC(max_session_slots, "Maximum number of parallel NFSv4.1 " + "callbacks the client will process for a given server"); module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index df880e9fa71f..b67287383010 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -126,6 +126,7 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp, const struct nfsd4_layout_ops ff_layout_ops = { .notify_types = NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, + .disable_recalls = true, .proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo, .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, .proc_layoutget = nfsd4_ff_proc_layoutget, diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 5fbf3bbd00d0..b10d557f9c9e 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -84,6 +84,7 @@ struct nfsd_net { struct list_head client_lru; struct list_head close_lru; struct list_head del_recall_lru; + struct list_head blocked_locks_lru; struct delayed_work laundromat_work; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 04c68d900324..211dc2aed8e1 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -448,7 +448,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr, { int status; - if (cb->cb_minorversion == 0) + if (cb->cb_clp->cl_minorversion == 0) return 0; status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status); @@ -485,7 +485,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, const struct nfs4_delegation *dp = cb_to_delegation(cb); struct nfs4_cb_compound_hdr hdr = { .ident = cb->cb_clp->cl_cb_ident, - .minorversion = cb->cb_minorversion, + .minorversion = cb->cb_clp->cl_minorversion, }; encode_cb_compound4args(xdr, &hdr); @@ -594,7 +594,7 @@ static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, container_of(cb, struct nfs4_layout_stateid, ls_recall); struct nfs4_cb_compound_hdr hdr = { .ident = 0, - .minorversion = cb->cb_minorversion, + .minorversion = cb->cb_clp->cl_minorversion, }; encode_cb_compound4args(xdr, &hdr); @@ -623,6 +623,62 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, } #endif /* CONFIG_NFSD_PNFS */ +static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 8 + 4 + so->so_owner.len); + p = xdr_encode_opaque_fixed(p, &so->so_client->cl_clientid, 8); + xdr_encode_opaque(p, so->so_owner.data, so->so_owner.len); +} + +static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfsd4_callback *cb) +{ + const struct nfsd4_blocked_lock *nbl = + container_of(cb, struct nfsd4_blocked_lock, nbl_cb); + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner; + struct nfs4_cb_compound_hdr hdr = { + .ident = 0, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + __be32 *p; + + BUG_ON(hdr.minorversion == 0); + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(OP_CB_NOTIFY_LOCK); + encode_nfs_fh4(xdr, &nbl->nbl_fh); + encode_stateowner(xdr, &lo->lo_owner); + hdr.nops++; + + encode_cb_nops(&hdr); +} + +static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfsd4_callback *cb) +{ + struct nfs4_cb_compound_hdr hdr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + + if (cb) { + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + } + return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status); +} + /* * RPC procedure tables */ @@ -643,6 +699,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = { #ifdef CONFIG_NFSD_PNFS PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), #endif + PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), }; static struct rpc_version nfs_cb_version4 = { @@ -862,7 +919,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) struct nfs4_client *clp = cb->cb_clp; u32 minorversion = clp->cl_minorversion; - cb->cb_minorversion = minorversion; /* * cb_seq_status is only set in decode_cb_sequence4res, * and so will remain 1 if an rpc level failure occurs. diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2be9602b0221..42aace4fc4c8 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -174,7 +174,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid) list_del_init(&ls->ls_perfile); spin_unlock(&fp->fi_lock); - vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); + if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) + vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); fput(ls->ls_file); if (ls->ls_recalled) @@ -189,6 +190,9 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls) struct file_lock *fl; int status; + if (nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls) + return 0; + fl = locks_alloc_lock(); if (!fl) return -ENOMEM; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 1fb222752b2b..abb09b580389 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1010,47 +1010,97 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } static __be32 -nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - struct nfsd4_clone *clone) +nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + stateid_t *src_stateid, struct file **src, + stateid_t *dst_stateid, struct file **dst) { - struct file *src, *dst; __be32 status; status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, - &clone->cl_src_stateid, RD_STATE, - &src, NULL); + src_stateid, RD_STATE, src, NULL); if (status) { dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); goto out; } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, - &clone->cl_dst_stateid, WR_STATE, - &dst, NULL); + dst_stateid, WR_STATE, dst, NULL); if (status) { dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); goto out_put_src; } /* fix up for NFS-specific error code */ - if (!S_ISREG(file_inode(src)->i_mode) || - !S_ISREG(file_inode(dst)->i_mode)) { + if (!S_ISREG(file_inode(*src)->i_mode) || + !S_ISREG(file_inode(*dst)->i_mode)) { status = nfserr_wrong_type; goto out_put_dst; } +out: + return status; +out_put_dst: + fput(*dst); +out_put_src: + fput(*src); + goto out; +} + +static __be32 +nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_clone *clone) +{ + struct file *src, *dst; + __be32 status; + + status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src, + &clone->cl_dst_stateid, &dst); + if (status) + goto out; + status = nfsd4_clone_file_range(src, clone->cl_src_pos, dst, clone->cl_dst_pos, clone->cl_count); -out_put_dst: fput(dst); -out_put_src: fput(src); out: return status; } static __be32 +nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy) +{ + struct file *src, *dst; + __be32 status; + ssize_t bytes; + + status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, &src, + ©->cp_dst_stateid, &dst); + if (status) + goto out; + + bytes = nfsd_copy_file_range(src, copy->cp_src_pos, + dst, copy->cp_dst_pos, copy->cp_count); + + if (bytes < 0) + status = nfserrno(bytes); + else { + copy->cp_res.wr_bytes_written = bytes; + copy->cp_res.wr_stable_how = NFS_UNSTABLE; + copy->cp_consecutive = 1; + copy->cp_synchronous = 1; + gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); + status = nfs_ok; + } + + fput(src); + fput(dst); +out: + return status; +} + +static __be32 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_fallocate *fallocate, int flags) { @@ -1966,6 +2016,18 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd op_encode_channel_attrs_maxsz) * sizeof(__be32); } +static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 1 /* wr_callback */ + + op_encode_stateid_maxsz /* wr_callback */ + + 2 /* wr_count */ + + 1 /* wr_committed */ + + op_encode_verifier_maxsz + + 1 /* cr_consecutive */ + + 1 /* cr_synchronous */) * sizeof(__be32); +} + #ifdef CONFIG_NFSD_PNFS /* * At this stage we don't really know what layout driver will handle the request, @@ -2328,6 +2390,12 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_CLONE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, + [OP_COPY] = { + .op_func = (nfsd4op_func)nfsd4_copy, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_COPY", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize, + }, [OP_SEEK] = { .op_func = (nfsd4op_func)nfsd4_seek, .op_name = "OP_SEEK", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 39bfaba9c99c..9752beb78659 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab; static void free_session(struct nfsd4_session *); static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; static bool is_session_dead(struct nfsd4_session *ses) { @@ -210,6 +211,85 @@ static void nfsd4_put_session(struct nfsd4_session *ses) spin_unlock(&nn->client_lock); } +static struct nfsd4_blocked_lock * +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, + struct nfsd_net *nn) +{ + struct nfsd4_blocked_lock *cur, *found = NULL; + + spin_lock(&nn->client_lock); + list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { + if (fh_match(fh, &cur->nbl_fh)) { + list_del_init(&cur->nbl_list); + list_del_init(&cur->nbl_lru); + found = cur; + break; + } + } + spin_unlock(&nn->client_lock); + if (found) + posix_unblock_lock(&found->nbl_lock); + return found; +} + +static struct nfsd4_blocked_lock * +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, + struct nfsd_net *nn) +{ + struct nfsd4_blocked_lock *nbl; + + nbl = find_blocked_lock(lo, fh, nn); + if (!nbl) { + nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); + if (nbl) { + fh_copy_shallow(&nbl->nbl_fh, fh); + locks_init_lock(&nbl->nbl_lock); + nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, + &nfsd4_cb_notify_lock_ops, + NFSPROC4_CLNT_CB_NOTIFY_LOCK); + } + } + return nbl; +} + +static void +free_blocked_lock(struct nfsd4_blocked_lock *nbl) +{ + locks_release_private(&nbl->nbl_lock); + kfree(nbl); +} + +static int +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) +{ + /* + * Since this is just an optimization, we don't try very hard if it + * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and + * just quit trying on anything else. + */ + switch (task->tk_status) { + case -NFS4ERR_DELAY: + rpc_delay(task, 1 * HZ); + return 0; + default: + return 1; + } +} + +static void +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb) +{ + struct nfsd4_blocked_lock *nbl = container_of(cb, + struct nfsd4_blocked_lock, nbl_cb); + + free_blocked_lock(nbl); +} + +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { + .done = nfsd4_cb_notify_lock_done, + .release = nfsd4_cb_notify_lock_release, +}; + static inline struct nfs4_stateowner * nfs4_get_stateowner(struct nfs4_stateowner *sop) { @@ -3224,9 +3304,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, goto out; /* cases below refer to rfc 3530 section 14.2.34: */ if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { - if (conf && !unconf) /* case 2: probable retransmit */ + if (conf && same_verf(&confirm, &conf->cl_confirm)) { + /* case 2: probable retransmit */ status = nfs_ok; - else /* case 4: client hasn't noticed we rebooted yet? */ + } else /* case 4: client hasn't noticed we rebooted yet? */ status = nfserr_stale_clientid; goto out; } @@ -4410,9 +4491,11 @@ out: * To finish the open response, we just need to set the rflags. */ open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; - if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && - !nfsd4_has_session(&resp->cstate)) + if (nfsd4_has_session(&resp->cstate)) + open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK; + else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; + if (dp) nfs4_put_stid(&dp->dl_stid); if (stp) @@ -4501,6 +4584,7 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct nfs4_ol_stateid *stp; + struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; time_t t, new_timeo = nn->nfsd4_lease; @@ -4569,6 +4653,41 @@ nfs4_laundromat(struct nfsd_net *nn) } spin_unlock(&nn->client_lock); + /* + * It's possible for a client to try and acquire an already held lock + * that is being held for a long time, and then lose interest in it. + * So, we clean out any un-revisited request after a lease period + * under the assumption that the client is no longer interested. + * + * RFC5661, sec. 9.6 states that the client must not rely on getting + * notifications and must continue to poll for locks, even when the + * server supports them. Thus this shouldn't lead to clients blocking + * indefinitely once the lock does become free. + */ + BUG_ON(!list_empty(&reaplist)); + spin_lock(&nn->client_lock); + while (!list_empty(&nn->blocked_locks_lru)) { + nbl = list_first_entry(&nn->blocked_locks_lru, + struct nfsd4_blocked_lock, nbl_lru); + if (time_after((unsigned long)nbl->nbl_time, + (unsigned long)cutoff)) { + t = nbl->nbl_time - cutoff; + new_timeo = min(new_timeo, t); + break; + } + list_move(&nbl->nbl_lru, &reaplist); + list_del_init(&nbl->nbl_list); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&reaplist)) { + nbl = list_first_entry(&nn->blocked_locks_lru, + struct nfsd4_blocked_lock, nbl_lru); + list_del_init(&nbl->nbl_lru); + posix_unblock_lock(&nbl->nbl_lock); + free_blocked_lock(nbl); + } + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); return new_timeo; } @@ -5309,7 +5428,31 @@ nfsd4_fl_put_owner(fl_owner_t owner) nfs4_put_stateowner(&lo->lo_owner); } +static void +nfsd4_lm_notify(struct file_lock *fl) +{ + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; + struct net *net = lo->lo_owner.so_client->net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd4_blocked_lock *nbl = container_of(fl, + struct nfsd4_blocked_lock, nbl_lock); + bool queue = false; + + /* An empty list means that something else is going to be using it */ + spin_lock(&nn->client_lock); + if (!list_empty(&nbl->nbl_list)) { + list_del_init(&nbl->nbl_list); + list_del_init(&nbl->nbl_lru); + queue = true; + } + spin_unlock(&nn->client_lock); + + if (queue) + nfsd4_run_cb(&nbl->nbl_cb); +} + static const struct lock_manager_operations nfsd_posix_mng_ops = { + .lm_notify = nfsd4_lm_notify, .lm_get_owner = nfsd4_fl_get_owner, .lm_put_owner = nfsd4_fl_put_owner, }; @@ -5407,6 +5550,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); if (!lo) return NULL; + INIT_LIST_HEAD(&lo->lo_blocked); INIT_LIST_HEAD(&lo->lo_owner.so_stateids); lo->lo_owner.so_is_open_owner = 0; lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; @@ -5588,12 +5732,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *open_stp = NULL; struct nfs4_file *fp; struct file *filp = NULL; + struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; __be32 status = 0; int lkflg; int err; bool new = false; + unsigned char fl_type; + unsigned int fl_flags = FL_POSIX; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -5658,46 +5805,55 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!locks_in_grace(net) && lock->lk_reclaim) goto out; - file_lock = locks_alloc_lock(); - if (!file_lock) { - dprintk("NFSD: %s: unable to allocate lock!\n", __func__); - status = nfserr_jukebox; - goto out; - } - fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { - case NFS4_READ_LT: case NFS4_READW_LT: + if (nfsd4_has_session(cstate)) + fl_flags |= FL_SLEEP; + /* Fallthrough */ + case NFS4_READ_LT: spin_lock(&fp->fi_lock); filp = find_readable_file_locked(fp); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); spin_unlock(&fp->fi_lock); - file_lock->fl_type = F_RDLCK; + fl_type = F_RDLCK; break; - case NFS4_WRITE_LT: case NFS4_WRITEW_LT: + if (nfsd4_has_session(cstate)) + fl_flags |= FL_SLEEP; + /* Fallthrough */ + case NFS4_WRITE_LT: spin_lock(&fp->fi_lock); filp = find_writeable_file_locked(fp); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); spin_unlock(&fp->fi_lock); - file_lock->fl_type = F_WRLCK; + fl_type = F_WRLCK; break; default: status = nfserr_inval; goto out; } + if (!filp) { status = nfserr_openmode; goto out; } + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); + if (!nbl) { + dprintk("NFSD: %s: unable to allocate block!\n", __func__); + status = nfserr_jukebox; + goto out; + } + + file_lock = &nbl->nbl_lock; + file_lock->fl_type = fl_type; file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); file_lock->fl_pid = current->tgid; file_lock->fl_file = filp; - file_lock->fl_flags = FL_POSIX; + file_lock->fl_flags = fl_flags; file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_start = lock->lk_offset; file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); @@ -5710,18 +5866,29 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } + if (fl_flags & FL_SLEEP) { + nbl->nbl_time = jiffies; + spin_lock(&nn->client_lock); + list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); + list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); + spin_unlock(&nn->client_lock); + } + err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); - switch (-err) { + switch (err) { case 0: /* success! */ nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; break; - case (EAGAIN): /* conflock holds conflicting lock */ + case FILE_LOCK_DEFERRED: + nbl = NULL; + /* Fallthrough */ + case -EAGAIN: /* conflock holds conflicting lock */ status = nfserr_denied; dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); nfs4_set_lock_denied(conflock, &lock->lk_denied); break; - case (EDEADLK): + case -EDEADLK: status = nfserr_deadlock; break; default: @@ -5730,6 +5897,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; } out: + if (nbl) { + /* dequeue it if we queued it before */ + if (fl_flags & FL_SLEEP) { + spin_lock(&nn->client_lock); + list_del_init(&nbl->nbl_list); + list_del_init(&nbl->nbl_lru); + spin_unlock(&nn->client_lock); + } + free_blocked_lock(nbl); + } if (filp) fput(filp); if (lock_stp) { @@ -5753,8 +5930,6 @@ out: if (open_stp) nfs4_put_stid(&open_stp->st_stid); nfsd4_bump_seqid(cstate, status); - if (file_lock) - locks_free_lock(file_lock); if (conflock) locks_free_lock(conflock); return status; @@ -6768,6 +6943,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); + INIT_LIST_HEAD(&nn->blocked_locks_lru); spin_lock_init(&nn->client_lock); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); @@ -6865,6 +7041,7 @@ nfs4_state_shutdown_net(struct net *net) struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd4_blocked_lock *nbl; cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -6885,6 +7062,24 @@ nfs4_state_shutdown_net(struct net *net) nfs4_put_stid(&dp->dl_stid); } + BUG_ON(!list_empty(&reaplist)); + spin_lock(&nn->client_lock); + while (!list_empty(&nn->blocked_locks_lru)) { + nbl = list_first_entry(&nn->blocked_locks_lru, + struct nfsd4_blocked_lock, nbl_lru); + list_move(&nbl->nbl_lru, &reaplist); + list_del_init(&nbl->nbl_list); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&reaplist)) { + nbl = list_first_entry(&nn->blocked_locks_lru, + struct nfsd4_blocked_lock, nbl_lru); + list_del_init(&nbl->nbl_lru); + posix_unblock_lock(&nbl->nbl_lock); + free_blocked_lock(nbl); + } + nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0aa0236a1429..c2d2895a1ec1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1694,6 +1694,30 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) } static __be32 +nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) +{ + DECODE_HEAD; + unsigned int tmp; + + status = nfsd4_decode_stateid(argp, ©->cp_src_stateid); + if (status) + return status; + status = nfsd4_decode_stateid(argp, ©->cp_dst_stateid); + if (status) + return status; + + READ_BUF(8 + 8 + 8 + 4 + 4 + 4); + p = xdr_decode_hyper(p, ©->cp_src_pos); + p = xdr_decode_hyper(p, ©->cp_dst_pos); + p = xdr_decode_hyper(p, ©->cp_count); + copy->cp_consecutive = be32_to_cpup(p++); + copy->cp_synchronous = be32_to_cpup(p++); + tmp = be32_to_cpup(p); /* Source server list not supported */ + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { DECODE_HEAD; @@ -1793,7 +1817,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { /* new operations for NFSv4.2 */ [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, @@ -4062,7 +4086,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, u32 starting_len = xdr->buf->len, needed_len; __be32 *p; - dprintk("%s: err %d\n", __func__, nfserr); + dprintk("%s: err %d\n", __func__, be32_to_cpu(nfserr)); if (nfserr) goto out; @@ -4202,6 +4226,41 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, #endif /* CONFIG_NFSD_PNFS */ static __be32 +nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write) +{ + __be32 *p; + + p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + + *p++ = cpu_to_be32(0); + p = xdr_encode_hyper(p, write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_stable_how); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); + return nfs_ok; +} + +static __be32 +nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_copy *copy) +{ + __be32 *p; + + if (!nfserr) { + nfserr = nfsd42_encode_write_res(resp, ©->cp_res); + if (nfserr) + return nfserr; + + p = xdr_reserve_space(&resp->xdr, 4 + 4); + *p++ = cpu_to_be32(copy->cp_consecutive); + *p++ = cpu_to_be32(copy->cp_synchronous); + } + return nfserr; +} + +static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_seek *seek) { @@ -4300,7 +4359,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { /* NFSv4.2 operations */ [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 65ad0165a94f..36b2af931e06 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1216,6 +1216,8 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; + nn->clverifier_counter = prandom_u32(); + nn->clientid_counter = prandom_u32(); return 0; out_idmap_error: diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 08188743db53..010aff5c5a79 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -789,6 +789,7 @@ nfserrno (int errno) { nfserr_toosmall, -ETOOSMALL }, { nfserr_serverfault, -ESERVERFAULT }, { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EUCLEAN }, }; int i; @@ -796,7 +797,7 @@ nfserrno (int errno) if (nfs_errtbl[i].syserr == errno) return nfs_errtbl[i].nfserr; } - WARN(1, "nfsd: non-standard errno: %d\n", errno); + WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); return nfserr_io; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 45007acaf364..a2b65fc56dd6 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -366,14 +366,21 @@ static struct notifier_block nfsd_inet6addr_notifier = { }; #endif +/* Only used under nfsd_mutex, so this atomic may be overkill: */ +static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); + static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); + /* check if the notifier still has clients */ + if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { + unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) - unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); + unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif + } + /* * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are @@ -488,10 +495,13 @@ int nfsd_create_serv(struct net *net) } set_max_drc(); - register_inetaddr_notifier(&nfsd_inetaddr_notifier); + /* check if the notifier is already set */ + if (atomic_inc_return(&nfsd_notifier_refcount) == 1) { + register_inetaddr_notifier(&nfsd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) - register_inet6addr_notifier(&nfsd_inet6addr_notifier); + register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif + } do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ return 0; } diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h index 0c2a716e8741..d27a5aa60022 100644 --- a/fs/nfsd/pnfs.h +++ b/fs/nfsd/pnfs.h @@ -19,6 +19,7 @@ struct nfsd4_deviceid_map { struct nfsd4_layout_ops { u32 notify_types; + bool disable_recalls; __be32 (*proc_getdeviceinfo)(struct super_block *sb, struct svc_rqst *rqstp, diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index b95adf9a1595..c9399366f9df 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -63,7 +63,6 @@ typedef struct { struct nfsd4_callback { struct nfs4_client *cb_clp; - u32 cb_minorversion; struct rpc_message cb_msg; const struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; @@ -441,11 +440,11 @@ struct nfs4_openowner { /* * Represents a generic "lockowner". Similar to an openowner. References to it * are held by the lock stateids that are created on its behalf. This object is - * a superset of the nfs4_stateowner struct (or would be if it needed any extra - * fields). + * a superset of the nfs4_stateowner struct. */ struct nfs4_lockowner { - struct nfs4_stateowner lo_owner; /* must be first element */ + struct nfs4_stateowner lo_owner; /* must be first element */ + struct list_head lo_blocked; /* blocked file_locks */ }; static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) @@ -572,6 +571,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_RECALL, NFSPROC4_CLNT_CB_LAYOUT, NFSPROC4_CLNT_CB_SEQUENCE, + NFSPROC4_CLNT_CB_NOTIFY_LOCK, }; /* Returns true iff a is later than b: */ @@ -580,6 +580,20 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) return (s32)(a->si_generation - b->si_generation) > 0; } +/* + * When a client tries to get a lock on a file, we set one of these objects + * on the blocking lock. When the lock becomes free, we can then issue a + * CB_NOTIFY_LOCK to the server. + */ +struct nfsd4_blocked_lock { + struct list_head nbl_list; + struct list_head nbl_lru; + unsigned long nbl_time; + struct file_lock nbl_lock; + struct knfsd_fh nbl_fh; + struct nfsd4_callback nbl_cb; +}; + struct nfsd4_compound_state; struct nfsd_net; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ff476e654b8f..8ca642fe9b21 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -513,6 +513,22 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, count)); } +ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, + u64 dst_pos, u64 count) +{ + + /* + * Limit copy to 4MB to prevent indefinitely blocking an nfsd + * thread and client rpc slot. The choice of 4MB is somewhat + * arbitrary. We might instead base this on r/wsize, or make it + * tunable, or use a time instead of a byte limit, or implement + * asynchronous copy. In theory a client could also recognize a + * limit like this and pipeline multiple COPY requests. + */ + count = min_t(u64, count, 1 << 22); + return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); +} + __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, loff_t len, int flags) diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 3cbb1b33777b..0bf9e7bf5800 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -96,6 +96,8 @@ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); +ssize_t nfsd_copy_file_range(struct file *, u64, + struct file *, u64, u64); __be32 nfsd_rename(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *, char *, int); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index beea0c5edc51..8fda4abdf3b1 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -503,6 +503,28 @@ struct nfsd4_clone { u64 cl_count; }; +struct nfsd42_write_res { + u64 wr_bytes_written; + u32 wr_stable_how; + nfs4_verifier wr_verifier; +}; + +struct nfsd4_copy { + /* request */ + stateid_t cp_src_stateid; + stateid_t cp_dst_stateid; + u64 cp_src_pos; + u64 cp_dst_pos; + u64 cp_count; + + /* both */ + bool cp_consecutive; + bool cp_synchronous; + + /* response */ + struct nfsd42_write_res cp_res; +}; + struct nfsd4_seek { /* request */ stateid_t seek_stateid; @@ -568,6 +590,7 @@ struct nfsd4_op { struct nfsd4_fallocate allocate; struct nfsd4_fallocate deallocate; struct nfsd4_clone clone; + struct nfsd4_copy copy; struct nfsd4_seek seek; } u; struct nfs4_replay * replay; diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index c47f6fdb111a..49b719dfef95 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -28,3 +28,12 @@ #define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) + +#define NFS4_enc_cb_notify_lock_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 2 + 1 + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + enc_nfs4_fh_sz) +#define NFS4_dec_cb_notify_lock_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) diff --git a/fs/open.c b/fs/open.c index a7719cfb7257..d3ed8171e8e0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -267,6 +267,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) (mode & ~FALLOC_FL_INSERT_RANGE)) return -EINVAL; + /* Unshare range should only be used with allocate mode. */ + if ((mode & FALLOC_FL_UNSHARE_RANGE) && + (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE))) + return -EINVAL; + if (!(file->f_mode & FMODE_WRITE)) return -EBADF; diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 584e87e11cb6..26ef1958b65b 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -55,6 +55,8 @@ xfs-y += $(addprefix libxfs/, \ xfs_ag_resv.o \ xfs_rmap.o \ xfs_rmap_btree.o \ + xfs_refcount.o \ + xfs_refcount_btree.o \ xfs_sb.o \ xfs_symlink_remote.o \ xfs_trans_resv.o \ @@ -88,6 +90,7 @@ xfs-y += xfs_aops.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ + xfs_reflink.o \ xfs_stats.o \ xfs_super.o \ xfs_symlink.o \ @@ -100,16 +103,20 @@ xfs-y += xfs_aops.o \ # low-level transaction/log code xfs-y += xfs_log.o \ xfs_log_cil.o \ + xfs_bmap_item.o \ xfs_buf_item.o \ xfs_extfree_item.o \ xfs_icreate_item.o \ xfs_inode_item.o \ + xfs_refcount_item.o \ xfs_rmap_item.o \ xfs_log_recover.o \ xfs_trans_ail.o \ + xfs_trans_bmap.o \ xfs_trans_buf.o \ xfs_trans_extfree.o \ xfs_trans_inode.o \ + xfs_trans_refcount.o \ xfs_trans_rmap.o \ # optional features diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e3ae0f2b4294..e5ebc3770460 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -38,6 +38,7 @@ #include "xfs_trans_space.h" #include "xfs_rmap_btree.h" #include "xfs_btree.h" +#include "xfs_refcount_btree.h" /* * Per-AG Block Reservations @@ -108,7 +109,9 @@ xfs_ag_resv_critical( trace_xfs_ag_resv_critical(pag, type, avail); /* Critically low if less than 10% or max btree height remains. */ - return avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS; + return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS, + pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL, + XFS_RANDOM_AG_RESV_CRITICAL); } /* @@ -228,6 +231,11 @@ xfs_ag_resv_init( if (pag->pag_meta_resv.ar_asked == 0) { ask = used = 0; + error = xfs_refcountbt_calc_reserves(pag->pag_mount, + pag->pag_agno, &ask, &used); + if (error) + goto out; + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, ask, used); if (error) @@ -238,6 +246,11 @@ xfs_ag_resv_init( if (pag->pag_agfl_resv.ar_asked == 0) { ask = used = 0; + error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, + &ask, &used); + if (error) + goto out; + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used); if (error) goto out; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ca75dc90ebe0..effb64cf714f 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -52,10 +52,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); +unsigned int +xfs_refc_block( + struct xfs_mount *mp) +{ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + return XFS_RMAP_BLOCK(mp) + 1; + if (xfs_sb_version_hasfinobt(&mp->m_sb)) + return XFS_FIBT_BLOCK(mp) + 1; + return XFS_IBT_BLOCK(mp) + 1; +} + xfs_extlen_t xfs_prealloc_blocks( struct xfs_mount *mp) { + if (xfs_sb_version_hasreflink(&mp->m_sb)) + return xfs_refc_block(mp) + 1; if (xfs_sb_version_hasrmapbt(&mp->m_sb)) return XFS_RMAP_BLOCK(mp) + 1; if (xfs_sb_version_hasfinobt(&mp->m_sb)) @@ -115,6 +128,8 @@ xfs_alloc_ag_max_usable( blocks++; /* finobt root block */ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) blocks++; /* rmap root block */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) + blocks++; /* refcount root block */ return mp->m_sb.sb_agblocks - blocks; } @@ -2321,6 +2336,9 @@ xfs_alloc_log_agf( offsetof(xfs_agf_t, agf_btreeblks), offsetof(xfs_agf_t, agf_uuid), offsetof(xfs_agf_t, agf_rmap_blocks), + offsetof(xfs_agf_t, agf_refcount_blocks), + offsetof(xfs_agf_t, agf_refcount_root), + offsetof(xfs_agf_t, agf_refcount_level), /* needed so that we don't log the whole rest of the structure: */ offsetof(xfs_agf_t, agf_spare64), sizeof(xfs_agf_t) @@ -2458,6 +2476,10 @@ xfs_agf_verify( be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) return false; + if (xfs_sb_version_hasreflink(&mp->m_sb) && + be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS) + return false; + return true;; } @@ -2578,6 +2600,7 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); pag->pagf_levels[XFS_BTNUM_RMAPi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); + pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); spin_lock_init(&pag->pagb_lock); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 9d7f61d36645..c27344cf38e1 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -48,6 +48,7 @@ #include "xfs_filestream.h" #include "xfs_rmap.h" #include "xfs_ag_resv.h" +#include "xfs_refcount.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -140,7 +141,8 @@ xfs_bmbt_lookup_ge( */ static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) { - return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + return whichfork != XFS_COW_FORK && + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && XFS_IFORK_NEXTENTS(ip, whichfork) > XFS_IFORK_MAXEXT(ip, whichfork); } @@ -150,7 +152,8 @@ static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) */ static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) { - return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && + return whichfork != XFS_COW_FORK && + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && XFS_IFORK_NEXTENTS(ip, whichfork) <= XFS_IFORK_MAXEXT(ip, whichfork); } @@ -640,6 +643,7 @@ xfs_bmap_btree_to_extents( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(whichfork != XFS_COW_FORK); ASSERT(ifp->if_flags & XFS_IFEXTENTS); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); rblock = ifp->if_broot; @@ -706,6 +710,7 @@ xfs_bmap_extents_to_btree( xfs_bmbt_ptr_t *pp; /* root block address pointer */ mp = ip->i_mount; + ASSERT(whichfork != XFS_COW_FORK); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); @@ -748,6 +753,7 @@ xfs_bmap_extents_to_btree( args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); } else if (dfops->dop_low) { +try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = *firstblock; } else { @@ -762,6 +768,21 @@ xfs_bmap_extents_to_btree( xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } + + /* + * During a CoW operation, the allocation and bmbt updates occur in + * different transactions. The mapping code tries to put new bmbt + * blocks near extents being mapped, but the only way to guarantee this + * is if the alloc and the mapping happen in a single transaction that + * has a block reservation. That isn't the case here, so if we run out + * of space we'll try again with another AG. + */ + if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && + args.fsbno == NULLFSBLOCK && + args.type == XFS_ALLOCTYPE_NEAR_BNO) { + dfops->dop_low = true; + goto try_another_ag; + } /* * Allocation can't fail, the space was reserved. */ @@ -837,6 +858,7 @@ xfs_bmap_local_to_extents_empty( { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(whichfork != XFS_COW_FORK); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); ASSERT(ifp->if_bytes == 0); ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); @@ -896,6 +918,7 @@ xfs_bmap_local_to_extents( * file currently fits in an inode. */ if (*firstblock == NULLFSBLOCK) { +try_another_ag: args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); args.type = XFS_ALLOCTYPE_START_BNO; } else { @@ -908,6 +931,19 @@ xfs_bmap_local_to_extents( if (error) goto done; + /* + * During a CoW operation, the allocation and bmbt updates occur in + * different transactions. The mapping code tries to put new bmbt + * blocks near extents being mapped, but the only way to guarantee this + * is if the alloc and the mapping happen in a single transaction that + * has a block reservation. That isn't the case here, so if we run out + * of space we'll try again with another AG. + */ + if (xfs_sb_version_hasreflink(&ip->i_mount->m_sb) && + args.fsbno == NULLFSBLOCK && + args.type == XFS_ALLOCTYPE_NEAR_BNO) { + goto try_another_ag; + } /* Can't fail, the space was reserved. */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); @@ -1670,7 +1706,8 @@ xfs_bmap_one_block( */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( - struct xfs_bmalloca *bma) + struct xfs_bmalloca *bma, + int whichfork) { struct xfs_bmbt_irec *new = &bma->got; int diff; /* temp value */ @@ -1688,11 +1725,14 @@ xfs_bmap_add_extent_delay_real( xfs_filblks_t temp=0; /* value for da_new calculations */ xfs_filblks_t temp2=0;/* value for da_new calculations */ int tmp_rval; /* partial logging flags */ - int whichfork = XFS_DATA_FORK; struct xfs_mount *mp; + xfs_extnum_t *nextents; mp = bma->ip->i_mount; ifp = XFS_IFORK_PTR(bma->ip, whichfork); + ASSERT(whichfork != XFS_ATTR_FORK); + nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents : + &bma->ip->i_d.di_nextents); ASSERT(bma->idx >= 0); ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); @@ -1706,6 +1746,9 @@ xfs_bmap_add_extent_delay_real( #define RIGHT r[1] #define PREV r[2] + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; + /* * Set up a bunch of variables to make the tests simpler. */ @@ -1792,7 +1835,7 @@ xfs_bmap_add_extent_delay_real( trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); - bma->ip->i_d.di_nextents--; + (*nextents)--; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -1894,7 +1937,7 @@ xfs_bmap_add_extent_delay_real( xfs_bmbt_set_startblock(ep, new->br_startblock); trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - bma->ip->i_d.di_nextents++; + (*nextents)++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -1964,7 +2007,7 @@ xfs_bmap_add_extent_delay_real( temp = PREV.br_blockcount - new->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); xfs_iext_insert(bma->ip, bma->idx, 1, new, state); - bma->ip->i_d.di_nextents++; + (*nextents)++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2048,7 +2091,7 @@ xfs_bmap_add_extent_delay_real( trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); - bma->ip->i_d.di_nextents++; + (*nextents)++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2117,7 +2160,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount = temp2; /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); - bma->ip->i_d.di_nextents++; + (*nextents)++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2215,7 +2258,8 @@ xfs_bmap_add_extent_delay_real( xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); done: - bma->logflags |= rval; + if (whichfork != XFS_COW_FORK) + bma->logflags |= rval; return error; #undef LEFT #undef RIGHT @@ -2759,6 +2803,7 @@ done: STATIC void xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ + int whichfork, xfs_extnum_t *idx, /* extent number to update/insert */ xfs_bmbt_irec_t *new) /* new data to add to file extents */ { @@ -2770,8 +2815,10 @@ xfs_bmap_add_extent_hole_delay( int state; /* state bits, accessed thru macros */ xfs_filblks_t temp=0; /* temp for indirect calculations */ - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + ifp = XFS_IFORK_PTR(ip, whichfork); state = 0; + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; ASSERT(isnullstartblock(new->br_startblock)); /* @@ -2789,7 +2836,7 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); @@ -2923,6 +2970,7 @@ xfs_bmap_add_extent_hole_real( ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + ASSERT(whichfork != XFS_COW_FORK); XFS_STATS_INC(mp, xs_add_exlist); @@ -3648,7 +3696,9 @@ xfs_bmap_btalloc( else if (mp->m_dalign) stripe_align = mp->m_dalign; - if (xfs_alloc_is_userdata(ap->datatype)) + if (ap->flags & XFS_BMAPI_COWFORK) + align = xfs_get_cowextsz_hint(ap->ip); + else if (xfs_alloc_is_userdata(ap->datatype)) align = xfs_get_extsz_hint(ap->ip); if (unlikely(align)) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, @@ -3856,7 +3906,8 @@ xfs_bmap_btalloc( ASSERT(nullfb || fb_agno == args.agno || (ap->dfops->dop_low && fb_agno < args.agno)); ap->length = args.len; - ap->ip->i_d.di_nblocks += args.len; + if (!(ap->flags & XFS_BMAPI_COWFORK)) + ap->ip->i_d.di_nblocks += args.len; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) ap->ip->i_delayed_blks -= args.len; @@ -3876,6 +3927,63 @@ xfs_bmap_btalloc( } /* + * For a remap operation, just "allocate" an extent at the address that the + * caller passed in, and ensure that the AGFL is the right size. The caller + * will then map the "allocated" extent into the file somewhere. + */ +STATIC int +xfs_bmap_remap_alloc( + struct xfs_bmalloca *ap) +{ + struct xfs_trans *tp = ap->tp; + struct xfs_mount *mp = tp->t_mountp; + xfs_agblock_t bno; + struct xfs_alloc_arg args; + int error; + + /* + * validate that the block number is legal - the enables us to detect + * and handle a silent filesystem corruption rather than crashing. + */ + memset(&args, 0, sizeof(struct xfs_alloc_arg)); + args.tp = ap->tp; + args.mp = ap->tp->t_mountp; + bno = *ap->firstblock; + args.agno = XFS_FSB_TO_AGNO(mp, bno); + args.agbno = XFS_FSB_TO_AGBNO(mp, bno); + if (args.agno >= mp->m_sb.sb_agcount || + args.agbno >= mp->m_sb.sb_agblocks) + return -EFSCORRUPTED; + + /* "Allocate" the extent from the range we passed in. */ + trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length); + ap->blkno = bno; + ap->ip->i_d.di_nblocks += ap->length; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + + /* Fix the freelist, like a real allocator does. */ + args.datatype = ap->datatype; + args.pag = xfs_perag_get(args.mp, args.agno); + ASSERT(args.pag); + + /* + * The freelist fixing code will decline the allocation if + * the size and shape of the free space doesn't allow for + * allocating the extent and updating all the metadata that + * happens during an allocation. We're remapping, not + * allocating, so skip that check by pretending to be freeing. + */ + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); + if (error) + goto error0; +error0: + xfs_perag_put(args.pag); + if (error) + trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); + return error; +} + +/* * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. * It figures out where to ask the underlying allocator to put the new extent. */ @@ -3883,6 +3991,8 @@ STATIC int xfs_bmap_alloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { + if (ap->flags & XFS_BMAPI_REMAP) + return xfs_bmap_remap_alloc(ap); if (XFS_IS_REALTIME_INODE(ap->ip) && xfs_alloc_is_userdata(ap->datatype)) return xfs_bmap_rtalloc(ap); @@ -4012,12 +4122,11 @@ xfs_bmapi_read( int error; int eof; int n = 0; - int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; + int whichfork = xfs_bmapi_whichfork(flags); ASSERT(*nmap >= 1); ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| - XFS_BMAPI_IGSTATE))); + XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK))); ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); if (unlikely(XFS_TEST_ERROR( @@ -4035,6 +4144,16 @@ xfs_bmapi_read( ifp = XFS_IFORK_PTR(ip, whichfork); + /* No CoW fork? Return a hole. */ + if (whichfork == XFS_COW_FORK && !ifp) { + mval->br_startoff = bno; + mval->br_startblock = HOLESTARTBLOCK; + mval->br_blockcount = len; + mval->br_state = XFS_EXT_NORM; + *nmap = 1; + return 0; + } + if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(NULL, ip, whichfork); if (error) @@ -4084,6 +4203,7 @@ xfs_bmapi_read( int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, + int whichfork, xfs_fileoff_t aoff, xfs_filblks_t len, struct xfs_bmbt_irec *got, @@ -4092,7 +4212,7 @@ xfs_bmapi_reserve_delalloc( int eof) { struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); xfs_extlen_t alen; xfs_extlen_t indlen; char rt = XFS_IS_REALTIME_INODE(ip); @@ -4104,7 +4224,10 @@ xfs_bmapi_reserve_delalloc( alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); /* Figure out the extent size, adjust alen */ - extsz = xfs_get_extsz_hint(ip); + if (whichfork == XFS_COW_FORK) + extsz = xfs_get_cowextsz_hint(ip); + else + extsz = xfs_get_extsz_hint(ip); if (extsz) { error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, 1, 0, &aoff, &alen); @@ -4151,7 +4274,7 @@ xfs_bmapi_reserve_delalloc( got->br_startblock = nullstartblock(indlen); got->br_blockcount = alen; got->br_state = XFS_EXT_NORM; - xfs_bmap_add_extent_hole_delay(ip, lastx, got); + xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); /* * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay @@ -4182,8 +4305,7 @@ xfs_bmapi_allocate( struct xfs_bmalloca *bma) { struct xfs_mount *mp = bma->ip->i_mount; - int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; + int whichfork = xfs_bmapi_whichfork(bma->flags); struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); int tmp_logflags = 0; int error; @@ -4278,7 +4400,7 @@ xfs_bmapi_allocate( bma->got.br_state = XFS_EXT_UNWRITTEN; if (bma->wasdel) - error = xfs_bmap_add_extent_delay_real(bma); + error = xfs_bmap_add_extent_delay_real(bma, whichfork); else error = xfs_bmap_add_extent_hole_real(bma, whichfork); @@ -4308,8 +4430,7 @@ xfs_bmapi_convert_unwritten( xfs_filblks_t len, int flags) { - int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; + int whichfork = xfs_bmapi_whichfork(flags); struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); int tmp_logflags = 0; int error; @@ -4325,6 +4446,8 @@ xfs_bmapi_convert_unwritten( (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) return 0; + ASSERT(whichfork != XFS_COW_FORK); + /* * Modify (by adding) the state flag, if writing. */ @@ -4431,8 +4554,7 @@ xfs_bmapi_write( orig_mval = mval; orig_nmap = *nmap; #endif - whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; + whichfork = xfs_bmapi_whichfork(flags); ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); @@ -4441,6 +4563,11 @@ xfs_bmapi_write( ASSERT(len > 0); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); + ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); + ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); + ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK); + ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK); /* zeroing is for currently only for data extents, not metadata */ ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != @@ -4502,6 +4629,14 @@ xfs_bmapi_write( wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); /* + * Make sure we only reflink into a hole. + */ + if (flags & XFS_BMAPI_REMAP) + ASSERT(inhole); + if (flags & XFS_BMAPI_COWFORK) + ASSERT(!inhole); + + /* * First, deal with the hole before the allocated space * that we found, if any. */ @@ -4531,6 +4666,17 @@ xfs_bmapi_write( goto error0; if (bma.blkno == NULLFSBLOCK) break; + + /* + * If this is a CoW allocation, record the data in + * the refcount btree for orphan recovery. + */ + if (whichfork == XFS_COW_FORK) { + error = xfs_refcount_alloc_cow_extent(mp, dfops, + bma.blkno, bma.length); + if (error) + goto error0; + } } /* Deal with the allocated space we found. */ @@ -4696,7 +4842,8 @@ xfs_bmap_del_extent( xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ - int whichfork) /* data or attr fork */ + int whichfork, /* data or attr fork */ + int bflags) /* bmapi flags */ { xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ @@ -4725,6 +4872,8 @@ xfs_bmap_del_extent( if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; + else if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / @@ -4805,6 +4954,7 @@ xfs_bmap_del_extent( /* * Matches the whole extent. Delete the entry. */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_iext_remove(ip, *idx, 1, whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); --*idx; @@ -4988,9 +5138,16 @@ xfs_bmap_del_extent( /* * If we need to, add to list of extents to delete. */ - if (do_fx) - xfs_bmap_add_free(mp, dfops, del->br_startblock, - del->br_blockcount, NULL); + if (do_fx && !(bflags & XFS_BMAPI_REMAP)) { + if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { + error = xfs_refcount_decrease_extent(mp, dfops, del); + if (error) + goto done; + } else + xfs_bmap_add_free(mp, dfops, del->br_startblock, + del->br_blockcount, NULL); + } + /* * Adjust inode # blocks in the file. */ @@ -4999,7 +5156,7 @@ xfs_bmap_del_extent( /* * Adjust quota data. */ - if (qfield) + if (qfield && !(bflags & XFS_BMAPI_REMAP)) xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); /* @@ -5014,6 +5171,175 @@ done: return error; } +/* Remove an extent from the CoW fork. Similar to xfs_bmap_del_extent. */ +int +xfs_bunmapi_cow( + struct xfs_inode *ip, + struct xfs_bmbt_irec *del) +{ + xfs_filblks_t da_new; + xfs_filblks_t da_old; + xfs_fsblock_t del_endblock = 0; + xfs_fileoff_t del_endoff; + int delay; + struct xfs_bmbt_rec_host *ep; + int error; + struct xfs_bmbt_irec got; + xfs_fileoff_t got_endoff; + struct xfs_ifork *ifp; + struct xfs_mount *mp; + xfs_filblks_t nblks; + struct xfs_bmbt_irec new; + /* REFERENCED */ + uint qfield; + xfs_filblks_t temp; + xfs_filblks_t temp2; + int state = BMAP_COWFORK; + int eof; + xfs_extnum_t eidx; + + mp = ip->i_mount; + XFS_STATS_INC(mp, xs_del_exlist); + + ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof, + &eidx, &got, &new); + + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp; + ASSERT((eidx >= 0) && (eidx < ifp->if_bytes / + (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(del->br_blockcount > 0); + ASSERT(got.br_startoff <= del->br_startoff); + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got.br_startoff + got.br_blockcount; + ASSERT(got_endoff >= del_endoff); + delay = isnullstartblock(got.br_startblock); + ASSERT(isnullstartblock(del->br_startblock) == delay); + qfield = 0; + error = 0; + /* + * If deleting a real allocation, must free up the disk space. + */ + if (!delay) { + nblks = del->br_blockcount; + qfield = XFS_TRANS_DQ_BCOUNT; + /* + * Set up del_endblock and cur for later. + */ + del_endblock = del->br_startblock + del->br_blockcount; + da_old = da_new = 0; + } else { + da_old = startblockval(got.br_startblock); + da_new = 0; + nblks = 0; + } + qfield = qfield; + nblks = nblks; + + /* + * Set flag value to use in switch statement. + * Left-contig is 2, right-contig is 1. + */ + switch (((got.br_startoff == del->br_startoff) << 1) | + (got_endoff == del_endoff)) { + case 3: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK); + --eidx; + break; + + case 2: + /* + * Deleting the first part of the extent. + */ + trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); + xfs_bmbt_set_startoff(ep, del_endoff); + temp = got.br_blockcount - del->br_blockcount; + xfs_bmbt_set_blockcount(ep, temp); + if (delay) { + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_old); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); + da_new = temp; + break; + } + xfs_bmbt_set_startblock(ep, del_endblock); + trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); + break; + + case 1: + /* + * Deleting the last part of the extent. + */ + temp = got.br_blockcount - del->br_blockcount; + trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + if (delay) { + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_old); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); + da_new = temp; + break; + } + trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); + break; + + case 0: + /* + * Deleting the middle of the extent. + */ + temp = del->br_startoff - got.br_startoff; + trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); + new.br_startoff = del_endoff; + temp2 = got_endoff - del_endoff; + new.br_blockcount = temp2; + new.br_state = got.br_state; + if (!delay) { + new.br_startblock = del_endblock; + } else { + temp = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); + temp2 = xfs_bmap_worst_indlen(ip, temp2); + new.br_startblock = nullstartblock((int)temp2); + da_new = temp + temp2; + while (da_new > da_old) { + if (temp) { + temp--; + da_new--; + xfs_bmbt_set_startblock(ep, + nullstartblock((int)temp)); + } + if (da_new == da_old) + break; + if (temp2) { + temp2--; + da_new--; + new.br_startblock = + nullstartblock((int)temp2); + } + } + } + trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); + xfs_iext_insert(ip, eidx + 1, 1, &new, state); + ++eidx; + break; + } + + /* + * Account for change in delayed indirect blocks. + * Nothing to do for disk quota accounting here. + */ + ASSERT(da_old >= da_new); + if (da_old > da_new) + xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); + + return error; +} + /* * Unmap (remove) blocks from a file. * If nexts is nonzero then the number of extents to remove is limited to @@ -5021,17 +5347,16 @@ done: * *done is set. */ int /* error */ -xfs_bunmapi( +__xfs_bunmapi( xfs_trans_t *tp, /* transaction pointer */ struct xfs_inode *ip, /* incore inode */ xfs_fileoff_t bno, /* starting offset to unmap */ - xfs_filblks_t len, /* length to unmap in file */ + xfs_filblks_t *rlen, /* i/o: amount remaining */ int flags, /* misc flags */ xfs_extnum_t nexts, /* number of extents max */ xfs_fsblock_t *firstblock, /* first allocated block controls a.g. for allocs */ - struct xfs_defer_ops *dfops, /* i/o: list extents to free */ - int *done) /* set if not done yet */ + struct xfs_defer_ops *dfops) /* i/o: deferred updates */ { xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_bmbt_irec_t del; /* extent being deleted */ @@ -5053,11 +5378,12 @@ xfs_bunmapi( int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ xfs_fsblock_t sum; + xfs_filblks_t len = *rlen; /* length to unmap in file */ trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); - whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; + whichfork = xfs_bmapi_whichfork(flags); + ASSERT(whichfork != XFS_COW_FORK); ifp = XFS_IFORK_PTR(ip, whichfork); if (unlikely( XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && @@ -5079,7 +5405,7 @@ xfs_bunmapi( return error; nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); if (nextents == 0) { - *done = 1; + *rlen = 0; return 0; } XFS_STATS_INC(mp, xs_blk_unmap); @@ -5324,7 +5650,7 @@ xfs_bunmapi( cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del, - &tmp_logflags, whichfork); + &tmp_logflags, whichfork, flags); logflags |= tmp_logflags; if (error) goto error0; @@ -5350,7 +5676,10 @@ nodelete: extno++; } } - *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; + if (bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0) + *rlen = 0; + else + *rlen = bno - start + 1; /* * Convert to a btree if necessary. @@ -5406,6 +5735,27 @@ error0: return error; } +/* Unmap a range of a file. */ +int +xfs_bunmapi( + xfs_trans_t *tp, + struct xfs_inode *ip, + xfs_fileoff_t bno, + xfs_filblks_t len, + int flags, + xfs_extnum_t nexts, + xfs_fsblock_t *firstblock, + struct xfs_defer_ops *dfops, + int *done) +{ + int error; + + error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock, + dfops); + *done = (len == 0); + return error; +} + /* * Determine whether an extent shift can be accomplished by a merge with the * extent that precedes the target hole of the shift. @@ -5985,3 +6335,146 @@ out: xfs_trans_cancel(tp); return error; } + +/* Deferred mapping is only for real extents in the data fork. */ +static bool +xfs_bmap_is_update_needed( + struct xfs_bmbt_irec *bmap) +{ + return bmap->br_startblock != HOLESTARTBLOCK && + bmap->br_startblock != DELAYSTARTBLOCK; +} + +/* Record a bmap intent. */ +static int +__xfs_bmap_add( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + enum xfs_bmap_intent_type type, + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *bmap) +{ + int error; + struct xfs_bmap_intent *bi; + + trace_xfs_bmap_defer(mp, + XFS_FSB_TO_AGNO(mp, bmap->br_startblock), + type, + XFS_FSB_TO_AGBNO(mp, bmap->br_startblock), + ip->i_ino, whichfork, + bmap->br_startoff, + bmap->br_blockcount, + bmap->br_state); + + bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS); + INIT_LIST_HEAD(&bi->bi_list); + bi->bi_type = type; + bi->bi_owner = ip; + bi->bi_whichfork = whichfork; + bi->bi_bmap = *bmap; + + error = xfs_defer_join(dfops, bi->bi_owner); + if (error) { + kmem_free(bi); + return error; + } + + xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list); + return 0; +} + +/* Map an extent into a file. */ +int +xfs_bmap_map_extent( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + struct xfs_inode *ip, + struct xfs_bmbt_irec *PREV) +{ + if (!xfs_bmap_is_update_needed(PREV)) + return 0; + + return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip, + XFS_DATA_FORK, PREV); +} + +/* Unmap an extent out of a file. */ +int +xfs_bmap_unmap_extent( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + struct xfs_inode *ip, + struct xfs_bmbt_irec *PREV) +{ + if (!xfs_bmap_is_update_needed(PREV)) + return 0; + + return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip, + XFS_DATA_FORK, PREV); +} + +/* + * Process one of the deferred bmap operations. We pass back the + * btree cursor to maintain our lock on the bmapbt between calls. + */ +int +xfs_bmap_finish_one( + struct xfs_trans *tp, + struct xfs_defer_ops *dfops, + struct xfs_inode *ip, + enum xfs_bmap_intent_type type, + int whichfork, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + xfs_exntst_t state) +{ + struct xfs_bmbt_irec bmap; + int nimaps = 1; + xfs_fsblock_t firstfsb; + int flags = XFS_BMAPI_REMAP; + int done; + int error = 0; + + bmap.br_startblock = startblock; + bmap.br_startoff = startoff; + bmap.br_blockcount = blockcount; + bmap.br_state = state; + + trace_xfs_bmap_deferred(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, + XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), + ip->i_ino, whichfork, startoff, blockcount, state); + + if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) + return -EFSCORRUPTED; + if (whichfork == XFS_ATTR_FORK) + flags |= XFS_BMAPI_ATTRFORK; + + if (XFS_TEST_ERROR(false, tp->t_mountp, + XFS_ERRTAG_BMAP_FINISH_ONE, + XFS_RANDOM_BMAP_FINISH_ONE)) + return -EIO; + + switch (type) { + case XFS_BMAP_MAP: + firstfsb = bmap.br_startblock; + error = xfs_bmapi_write(tp, ip, bmap.br_startoff, + bmap.br_blockcount, flags, &firstfsb, + bmap.br_blockcount, &bmap, &nimaps, + dfops); + break; + case XFS_BMAP_UNMAP: + error = xfs_bunmapi(tp, ip, bmap.br_startoff, + bmap.br_blockcount, flags, 1, &firstfsb, + dfops, &done); + ASSERT(done); + break; + default: + ASSERT(0); + error = -EFSCORRUPTED; + } + + return error; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 8395f6e8cf7d..f97db7132564 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -97,6 +97,19 @@ struct xfs_extent_free_item */ #define XFS_BMAPI_ZERO 0x080 +/* + * Map the inode offset to the block given in ap->firstblock. Primarily + * used for reflink. The range must be in a hole, and this flag cannot be + * turned on with PREALLOC or CONVERT, and cannot be used on the attr fork. + * + * For bunmapi, this flag unmaps the range without adjusting quota, reducing + * refcount, or freeing the blocks. + */ +#define XFS_BMAPI_REMAP 0x100 + +/* Map something in the CoW fork. */ +#define XFS_BMAPI_COWFORK 0x200 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -105,12 +118,24 @@ struct xfs_extent_free_item { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ { XFS_BMAPI_CONVERT, "CONVERT" }, \ - { XFS_BMAPI_ZERO, "ZERO" } + { XFS_BMAPI_ZERO, "ZERO" }, \ + { XFS_BMAPI_REMAP, "REMAP" }, \ + { XFS_BMAPI_COWFORK, "COWFORK" } static inline int xfs_bmapi_aflag(int w) { - return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0); + return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : + (w == XFS_COW_FORK ? XFS_BMAPI_COWFORK : 0)); +} + +static inline int xfs_bmapi_whichfork(int bmapi_flags) +{ + if (bmapi_flags & XFS_BMAPI_COWFORK) + return XFS_COW_FORK; + else if (bmapi_flags & XFS_BMAPI_ATTRFORK) + return XFS_ATTR_FORK; + return XFS_DATA_FORK; } /* @@ -131,13 +156,15 @@ static inline int xfs_bmapi_aflag(int w) #define BMAP_LEFT_VALID (1 << 6) #define BMAP_RIGHT_VALID (1 << 7) #define BMAP_ATTRFORK (1 << 8) +#define BMAP_COWFORK (1 << 9) #define XFS_BMAP_EXT_FLAGS \ { BMAP_LEFT_CONTIG, "LC" }, \ { BMAP_RIGHT_CONTIG, "RC" }, \ { BMAP_LEFT_FILLING, "LF" }, \ { BMAP_RIGHT_FILLING, "RF" }, \ - { BMAP_ATTRFORK, "ATTR" } + { BMAP_ATTRFORK, "ATTR" }, \ + { BMAP_COWFORK, "COW" } /* @@ -186,10 +213,15 @@ int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fsblock_t *firstblock, xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap, struct xfs_defer_ops *dfops); +int __xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t bno, xfs_filblks_t *rlen, int flags, + xfs_extnum_t nexts, xfs_fsblock_t *firstblock, + struct xfs_defer_ops *dfops); int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, int flags, xfs_extnum_t nexts, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops, int *done); +int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del); int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); @@ -203,8 +235,31 @@ struct xfs_bmbt_rec_host * xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno, int fork, int *eofp, xfs_extnum_t *lastxp, struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp); -int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, xfs_fileoff_t aoff, - xfs_filblks_t len, struct xfs_bmbt_irec *got, - struct xfs_bmbt_irec *prev, xfs_extnum_t *lastx, int eof); +int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, + xfs_fileoff_t aoff, xfs_filblks_t len, + struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev, + xfs_extnum_t *lastx, int eof); + +enum xfs_bmap_intent_type { + XFS_BMAP_MAP = 1, + XFS_BMAP_UNMAP, +}; + +struct xfs_bmap_intent { + struct list_head bi_list; + enum xfs_bmap_intent_type bi_type; + struct xfs_inode *bi_owner; + int bi_whichfork; + struct xfs_bmbt_irec bi_bmap; +}; + +int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops, + struct xfs_inode *ip, enum xfs_bmap_intent_type type, + int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, + xfs_filblks_t blockcount, xfs_exntst_t state); +int xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, + struct xfs_inode *ip, struct xfs_bmbt_irec *imap); +int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, + struct xfs_inode *ip, struct xfs_bmbt_irec *imap); #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index cd85274e810c..8007d2ba9aef 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -453,6 +453,7 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); +try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; /* * Make sure there is sufficient room left in the AG to @@ -482,6 +483,22 @@ xfs_bmbt_alloc_block( if (error) goto error0; + /* + * During a CoW operation, the allocation and bmbt updates occur in + * different transactions. The mapping code tries to put new bmbt + * blocks near extents being mapped, but the only way to guarantee this + * is if the alloc and the mapping happen in a single transaction that + * has a block reservation. That isn't the case here, so if we run out + * of space we'll try again with another AG. + */ + if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && + args.fsbno == NULLFSBLOCK && + args.type == XFS_ALLOCTYPE_NEAR_BNO) { + cur->bc_private.b.dfops->dop_low = true; + args.fsbno = cur->bc_private.b.firstblock; + goto try_another_ag; + } + if (args.fsbno == NULLFSBLOCK && args.minleft) { /* * Could not find an AG with enough free space to satisfy @@ -777,6 +794,7 @@ xfs_bmbt_init_cursor( { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_cur *cur; + ASSERT(whichfork != XFS_COW_FORK); cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index aa1752f918b8..5c8e6f2ce44f 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -45,9 +45,10 @@ kmem_zone_t *xfs_btree_cur_zone; */ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, - XFS_FIBT_MAGIC }, + XFS_FIBT_MAGIC, 0 }, { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, - XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } + XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, + XFS_REFC_CRC_MAGIC } }; #define xfs_btree_magic(cur) \ xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] @@ -1216,6 +1217,9 @@ xfs_btree_set_refs( case XFS_BTNUM_RMAP: xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); break; + case XFS_BTNUM_REFC: + xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); + break; default: ASSERT(0); } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 3f8556a5c2ad..c2b01d1c79ee 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -49,6 +49,7 @@ union xfs_btree_key { struct xfs_inobt_key inobt; struct xfs_rmap_key rmap; struct xfs_rmap_key __rmap_bigkey[2]; + struct xfs_refcount_key refc; }; union xfs_btree_rec { @@ -57,6 +58,7 @@ union xfs_btree_rec { struct xfs_alloc_rec alloc; struct xfs_inobt_rec inobt; struct xfs_rmap_rec rmap; + struct xfs_refcount_rec refc; }; /* @@ -72,6 +74,7 @@ union xfs_btree_rec { #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) #define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) #define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) +#define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi) /* * For logging record fields. @@ -105,6 +108,7 @@ do { \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \ + case XFS_BTNUM_REFC: __XFS_BTREE_STATS_INC(__mp, refcbt, stat); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) @@ -127,6 +131,8 @@ do { \ __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ case XFS_BTNUM_RMAP: \ __XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \ + case XFS_BTNUM_REFC: \ + __XFS_BTREE_STATS_ADD(__mp, refcbt, stat, val); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) @@ -217,6 +223,15 @@ union xfs_btree_irec { struct xfs_bmbt_irec b; struct xfs_inobt_rec_incore i; struct xfs_rmap_irec r; + struct xfs_refcount_irec rc; +}; + +/* Per-AG btree private information. */ +union xfs_btree_cur_private { + struct { + unsigned long nr_ops; /* # record updates */ + int shape_changes; /* # of extent splits */ + } refc; }; /* @@ -243,6 +258,7 @@ typedef struct xfs_btree_cur struct xfs_buf *agbp; /* agf/agi buffer pointer */ struct xfs_defer_ops *dfops; /* deferred updates */ xfs_agnumber_t agno; /* ag number */ + union xfs_btree_cur_private priv; } a; struct { /* needed for BMAP */ struct xfs_inode *ip; /* pointer to our inode */ diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index e96533d178cf..f6e93ef0bffe 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -51,6 +51,8 @@ struct xfs_defer_pending { * find all the space it needs. */ enum xfs_defer_ops_type { + XFS_DEFER_OPS_TYPE_BMAP, + XFS_DEFER_OPS_TYPE_REFCOUNT, XFS_DEFER_OPS_TYPE_RMAP, XFS_DEFER_OPS_TYPE_FREE, XFS_DEFER_OPS_TYPE_MAX, diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 270fb5cf4fa1..f6547fc5e016 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -456,9 +456,11 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ +#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ - XFS_SB_FEAT_RO_COMPAT_RMAPBT) + XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ + XFS_SB_FEAT_RO_COMPAT_REFLINK) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( @@ -546,6 +548,12 @@ static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT); } +static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK); +} + /* * end of superblock version macros */ @@ -641,14 +649,17 @@ typedef struct xfs_agf { uuid_t agf_uuid; /* uuid of filesystem */ __be32 agf_rmap_blocks; /* rmapbt blocks used */ - __be32 agf_padding; /* padding */ + __be32 agf_refcount_blocks; /* refcountbt blocks used */ + + __be32 agf_refcount_root; /* refcount tree root block */ + __be32 agf_refcount_level; /* refcount btree levels */ /* * reserve some contiguous space for future logged fields before we add * the unlogged fields. This makes the range logging via flags and * structure offsets much simpler. */ - __be64 agf_spare64[15]; + __be64 agf_spare64[14]; /* unlogged fields, written during buffer writeback. */ __be64 agf_lsn; /* last write sequence */ @@ -674,8 +685,11 @@ typedef struct xfs_agf { #define XFS_AGF_BTREEBLKS 0x00000800 #define XFS_AGF_UUID 0x00001000 #define XFS_AGF_RMAP_BLOCKS 0x00002000 -#define XFS_AGF_SPARE64 0x00004000 -#define XFS_AGF_NUM_BITS 15 +#define XFS_AGF_REFCOUNT_BLOCKS 0x00004000 +#define XFS_AGF_REFCOUNT_ROOT 0x00008000 +#define XFS_AGF_REFCOUNT_LEVEL 0x00010000 +#define XFS_AGF_SPARE64 0x00020000 +#define XFS_AGF_NUM_BITS 18 #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) #define XFS_AGF_FLAGS \ @@ -693,6 +707,9 @@ typedef struct xfs_agf { { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ { XFS_AGF_UUID, "UUID" }, \ { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" }, \ + { XFS_AGF_REFCOUNT_BLOCKS, "REFCOUNT_BLOCKS" }, \ + { XFS_AGF_REFCOUNT_ROOT, "REFCOUNT_ROOT" }, \ + { XFS_AGF_REFCOUNT_LEVEL, "REFCOUNT_LEVEL" }, \ { XFS_AGF_SPARE64, "SPARE64" } /* disk block (xfs_daddr_t) in the AG */ @@ -885,7 +902,8 @@ typedef struct xfs_dinode { __be64 di_changecount; /* number of attribute changes */ __be64 di_lsn; /* flush sequence */ __be64 di_flags2; /* more random flags */ - __u8 di_pad2[16]; /* more padding for future expansion */ + __be32 di_cowextsize; /* basic cow extent size for file */ + __u8 di_pad2[12]; /* more padding for future expansion */ /* fields only written to during inode creation */ xfs_timestamp_t di_crtime; /* time created */ @@ -1041,9 +1059,14 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) * 16 bits of the XFS_XFLAG_s range. */ #define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ +#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ +#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) +#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) +#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) -#define XFS_DIFLAG2_ANY (XFS_DIFLAG2_DAX) +#define XFS_DIFLAG2_ANY \ + (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE) /* * Inode number format: @@ -1353,7 +1376,9 @@ struct xfs_owner_info { #define XFS_RMAP_OWN_AG (-5ULL) /* AG freespace btree blocks */ #define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */ #define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */ -#define XFS_RMAP_OWN_MIN (-8ULL) /* guard */ +#define XFS_RMAP_OWN_REFC (-8ULL) /* refcount tree */ +#define XFS_RMAP_OWN_COW (-9ULL) /* cow allocations */ +#define XFS_RMAP_OWN_MIN (-10ULL) /* guard */ #define XFS_RMAP_NON_INODE_OWNER(owner) (!!((owner) & (1ULL << 63))) @@ -1434,6 +1459,62 @@ typedef __be32 xfs_rmap_ptr_t; XFS_IBT_BLOCK(mp) + 1) /* + * Reference Count Btree format definitions + * + */ +#define XFS_REFC_CRC_MAGIC 0x52334643 /* 'R3FC' */ + +unsigned int xfs_refc_block(struct xfs_mount *mp); + +/* + * Data record/key structure + * + * Each record associates a range of physical blocks (starting at + * rc_startblock and ending rc_blockcount blocks later) with a reference + * count (rc_refcount). Extents that are being used to stage a copy on + * write (CoW) operation are recorded in the refcount btree with a + * refcount of 1. All other records must have a refcount > 1 and must + * track an extent mapped only by file data forks. + * + * Extents with a single owner (attributes, metadata, non-shared file + * data) are not tracked here. Free space is also not tracked here. + * This is consistent with pre-reflink XFS. + */ + +/* + * Extents that are being used to stage a copy on write are stored + * in the refcount btree with a refcount of 1 and the upper bit set + * on the startblock. This speeds up mount time deletion of stale + * staging extents because they're all at the right side of the tree. + */ +#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31)) +#define REFCNTBT_COWFLAG_BITLEN 1 +#define REFCNTBT_AGBLOCK_BITLEN 31 + +struct xfs_refcount_rec { + __be32 rc_startblock; /* starting block number */ + __be32 rc_blockcount; /* count of blocks */ + __be32 rc_refcount; /* number of inodes linked here */ +}; + +struct xfs_refcount_key { + __be32 rc_startblock; /* starting block number */ +}; + +struct xfs_refcount_irec { + xfs_agblock_t rc_startblock; /* starting block number */ + xfs_extlen_t rc_blockcount; /* count of free blocks */ + xfs_nlink_t rc_refcount; /* number of inodes linked here */ +}; + +#define MAXREFCOUNT ((xfs_nlink_t)~0U) +#define MAXREFCEXTLEN ((xfs_extlen_t)~0U) + +/* btree pointer type */ +typedef __be32 xfs_refcount_ptr_t; + + +/* * BMAP Btree format definitions * * This includes both the root block definition that sits inside an inode fork diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 79455058b752..b72dc821d78b 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -81,14 +81,16 @@ struct getbmapx { #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ #define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ +#define BMV_IF_COWFORK 0x20 /* return CoW fork rather than data */ #define BMV_IF_VALID \ (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ - BMV_IF_DELALLOC|BMV_IF_NO_HOLES) + BMV_IF_DELALLOC|BMV_IF_NO_HOLES|BMV_IF_COWFORK) /* bmv_oflags values - returned for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ #define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ #define BMV_OF_LAST 0x4 /* segment is the last in the file */ +#define BMV_OF_SHARED 0x8 /* segment shared with another file */ /* * Structure for XFS_IOC_FSSETDM. @@ -206,7 +208,8 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ #define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ #define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */ -#define XFS_FSOP_GEOM_FLAGS_RMAPBT 0x80000 /* Reverse mapping btree */ +#define XFS_FSOP_GEOM_FLAGS_RMAPBT 0x80000 /* reverse mapping btree */ +#define XFS_FSOP_GEOM_FLAGS_REFLINK 0x100000 /* files can share blocks */ /* * Minimum and maximum sizes need for growth checks. @@ -275,7 +278,8 @@ typedef struct xfs_bstat { #define bs_projid bs_projid_lo /* (previously just bs_projid) */ __u16 bs_forkoff; /* inode fork offset in bytes */ __u16 bs_projid_hi; /* higher part of project id */ - unsigned char bs_pad[10]; /* pad space, unused */ + unsigned char bs_pad[6]; /* pad space, unused */ + __u32 bs_cowextsize; /* cow extent size */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 4b9769e23c83..8de9a3a29589 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -256,6 +256,7 @@ xfs_inode_from_disk( to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); to->di_flags2 = be64_to_cpu(from->di_flags2); + to->di_cowextsize = be32_to_cpu(from->di_cowextsize); } } @@ -305,7 +306,7 @@ xfs_inode_to_disk( to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); - + to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(ip->i_ino); to->di_lsn = cpu_to_be64(lsn); memset(to->di_pad2, 0, sizeof(to->di_pad2)); @@ -357,6 +358,7 @@ xfs_log_dinode_to_disk( to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); + to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(from->di_ino); to->di_lsn = cpu_to_be64(from->di_lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); @@ -373,6 +375,9 @@ xfs_dinode_verify( struct xfs_inode *ip, struct xfs_dinode *dip) { + uint16_t flags; + uint64_t flags2; + if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return false; @@ -389,6 +394,23 @@ xfs_dinode_verify( return false; if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) return false; + + flags = be16_to_cpu(dip->di_flags); + flags2 = be64_to_cpu(dip->di_flags2); + + /* don't allow reflink/cowextsize if we don't have reflink */ + if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && + !xfs_sb_version_hasreflink(&mp->m_sb)) + return false; + + /* don't let reflink and realtime mix */ + if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) + return false; + + /* don't let reflink and dax mix */ + if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) + return false; + return true; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 7c4dd321b215..62d9d4681c8c 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -47,6 +47,7 @@ struct xfs_icdinode { __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ __uint64_t di_flags2; /* more random flags */ + __uint32_t di_cowextsize; /* basic cow extent size for file */ xfs_ictimestamp_t di_crtime; /* time created */ }; diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index bbcc8c7a44b3..5dd56d3dbb3a 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -121,6 +121,26 @@ xfs_iformat_fork( return -EFSCORRUPTED; } + if (unlikely(xfs_is_reflink_inode(ip) && + (VFS_I(ip)->i_mode & S_IFMT) != S_IFREG)) { + xfs_warn(ip->i_mount, + "corrupt dinode %llu, wrong file type for reflink.", + ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", + XFS_ERRLEVEL_LOW, ip->i_mount, dip); + return -EFSCORRUPTED; + } + + if (unlikely(xfs_is_reflink_inode(ip) && + (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) { + xfs_warn(ip->i_mount, + "corrupt dinode %llu, has reflink+realtime flag set.", + ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", + XFS_ERRLEVEL_LOW, ip->i_mount, dip); + return -EFSCORRUPTED; + } + switch (VFS_I(ip)->i_mode & S_IFMT) { case S_IFIFO: case S_IFCHR: @@ -186,9 +206,14 @@ xfs_iformat_fork( XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); return -EFSCORRUPTED; } - if (error) { + if (error) return error; + + if (xfs_is_reflink_inode(ip)) { + ASSERT(ip->i_cowfp == NULL); + xfs_ifork_init_cow(ip); } + if (!XFS_DFORK_Q(dip)) return 0; @@ -208,7 +233,8 @@ xfs_iformat_fork( XFS_CORRUPTION_ERROR("xfs_iformat(8)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; + error = -EFSCORRUPTED; + break; } error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); @@ -226,6 +252,9 @@ xfs_iformat_fork( if (error) { kmem_zone_free(xfs_ifork_zone, ip->i_afp); ip->i_afp = NULL; + if (ip->i_cowfp) + kmem_zone_free(xfs_ifork_zone, ip->i_cowfp); + ip->i_cowfp = NULL; xfs_idestroy_fork(ip, XFS_DATA_FORK); } return error; @@ -740,6 +769,9 @@ xfs_idestroy_fork( if (whichfork == XFS_ATTR_FORK) { kmem_zone_free(xfs_ifork_zone, ip->i_afp); ip->i_afp = NULL; + } else if (whichfork == XFS_COW_FORK) { + kmem_zone_free(xfs_ifork_zone, ip->i_cowfp); + ip->i_cowfp = NULL; } } @@ -927,6 +959,19 @@ xfs_iext_get_ext( } } +/* Convert bmap state flags to an inode fork. */ +struct xfs_ifork * +xfs_iext_state_to_fork( + struct xfs_inode *ip, + int state) +{ + if (state & BMAP_COWFORK) + return ip->i_cowfp; + else if (state & BMAP_ATTRFORK) + return ip->i_afp; + return &ip->i_df; +} + /* * Insert new item(s) into the extent records for incore inode * fork 'ifp'. 'count' new items are inserted at index 'idx'. @@ -939,7 +984,7 @@ xfs_iext_insert( xfs_bmbt_irec_t *new, /* items to insert */ int state) /* type of extent conversion */ { - xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; + xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state); xfs_extnum_t i; /* extent record index */ trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); @@ -1189,7 +1234,7 @@ xfs_iext_remove( int ext_diff, /* number of extents to remove */ int state) /* type of extent conversion */ { - xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; + xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state); xfs_extnum_t nextents; /* number of extents in file */ int new_size; /* size of extents after removal */ @@ -1934,3 +1979,20 @@ xfs_iext_irec_update_extoffs( ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; } } + +/* + * Initialize an inode's copy-on-write fork. + */ +void +xfs_ifork_init_cow( + struct xfs_inode *ip) +{ + if (ip->i_cowfp) + return; + + ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone, + KM_SLEEP | KM_NOFS); + ip->i_cowfp->if_flags = XFS_IFEXTENTS; + ip->i_cformat = XFS_DINODE_FMT_EXTENTS; + ip->i_cnextents = 0; +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index f95e072ae646..c9476f50e32d 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -92,7 +92,9 @@ typedef struct xfs_ifork { #define XFS_IFORK_PTR(ip,w) \ ((w) == XFS_DATA_FORK ? \ &(ip)->i_df : \ - (ip)->i_afp) + ((w) == XFS_ATTR_FORK ? \ + (ip)->i_afp : \ + (ip)->i_cowfp)) #define XFS_IFORK_DSIZE(ip) \ (XFS_IFORK_Q(ip) ? \ XFS_IFORK_BOFF(ip) : \ @@ -105,26 +107,38 @@ typedef struct xfs_ifork { #define XFS_IFORK_SIZE(ip,w) \ ((w) == XFS_DATA_FORK ? \ XFS_IFORK_DSIZE(ip) : \ - XFS_IFORK_ASIZE(ip)) + ((w) == XFS_ATTR_FORK ? \ + XFS_IFORK_ASIZE(ip) : \ + 0)) #define XFS_IFORK_FORMAT(ip,w) \ ((w) == XFS_DATA_FORK ? \ (ip)->i_d.di_format : \ - (ip)->i_d.di_aformat) + ((w) == XFS_ATTR_FORK ? \ + (ip)->i_d.di_aformat : \ + (ip)->i_cformat)) #define XFS_IFORK_FMT_SET(ip,w,n) \ ((w) == XFS_DATA_FORK ? \ ((ip)->i_d.di_format = (n)) : \ - ((ip)->i_d.di_aformat = (n))) + ((w) == XFS_ATTR_FORK ? \ + ((ip)->i_d.di_aformat = (n)) : \ + ((ip)->i_cformat = (n)))) #define XFS_IFORK_NEXTENTS(ip,w) \ ((w) == XFS_DATA_FORK ? \ (ip)->i_d.di_nextents : \ - (ip)->i_d.di_anextents) + ((w) == XFS_ATTR_FORK ? \ + (ip)->i_d.di_anextents : \ + (ip)->i_cnextents)) #define XFS_IFORK_NEXT_SET(ip,w,n) \ ((w) == XFS_DATA_FORK ? \ ((ip)->i_d.di_nextents = (n)) : \ - ((ip)->i_d.di_anextents = (n))) + ((w) == XFS_ATTR_FORK ? \ + ((ip)->i_d.di_anextents = (n)) : \ + ((ip)->i_cnextents = (n)))) #define XFS_IFORK_MAXEXT(ip, w) \ (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t)) +struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); + int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); @@ -169,4 +183,6 @@ void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); extern struct kmem_zone *xfs_ifork_zone; +extern void xfs_ifork_init_cow(struct xfs_inode *ip); + #endif /* __XFS_INODE_FORK_H__ */ diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index fc5eef85d61e..083cdd6d6c28 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -112,7 +112,11 @@ static inline uint xlog_get_cycle(char *ptr) #define XLOG_REG_TYPE_ICREATE 20 #define XLOG_REG_TYPE_RUI_FORMAT 21 #define XLOG_REG_TYPE_RUD_FORMAT 22 -#define XLOG_REG_TYPE_MAX 22 +#define XLOG_REG_TYPE_CUI_FORMAT 23 +#define XLOG_REG_TYPE_CUD_FORMAT 24 +#define XLOG_REG_TYPE_BUI_FORMAT 25 +#define XLOG_REG_TYPE_BUD_FORMAT 26 +#define XLOG_REG_TYPE_MAX 26 /* * Flags to log operation header @@ -231,6 +235,10 @@ typedef struct xfs_trans_header { #define XFS_LI_ICREATE 0x123f #define XFS_LI_RUI 0x1240 /* rmap update intent */ #define XFS_LI_RUD 0x1241 +#define XFS_LI_CUI 0x1242 /* refcount update intent */ +#define XFS_LI_CUD 0x1243 +#define XFS_LI_BUI 0x1244 /* bmbt update intent */ +#define XFS_LI_BUD 0x1245 #define XFS_LI_TYPE_DESC \ { XFS_LI_EFI, "XFS_LI_EFI" }, \ @@ -242,7 +250,11 @@ typedef struct xfs_trans_header { { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \ { XFS_LI_ICREATE, "XFS_LI_ICREATE" }, \ { XFS_LI_RUI, "XFS_LI_RUI" }, \ - { XFS_LI_RUD, "XFS_LI_RUD" } + { XFS_LI_RUD, "XFS_LI_RUD" }, \ + { XFS_LI_CUI, "XFS_LI_CUI" }, \ + { XFS_LI_CUD, "XFS_LI_CUD" }, \ + { XFS_LI_BUI, "XFS_LI_BUI" }, \ + { XFS_LI_BUD, "XFS_LI_BUD" } /* * Inode Log Item Format definitions. @@ -411,7 +423,8 @@ struct xfs_log_dinode { __uint64_t di_changecount; /* number of attribute changes */ xfs_lsn_t di_lsn; /* flush sequence */ __uint64_t di_flags2; /* more random flags */ - __uint8_t di_pad2[16]; /* more padding for future expansion */ + __uint32_t di_cowextsize; /* basic cow extent size for file */ + __uint8_t di_pad2[12]; /* more padding for future expansion */ /* fields only written to during inode creation */ xfs_ictimestamp_t di_crtime; /* time created */ @@ -622,8 +635,11 @@ struct xfs_map_extent { /* rmap me_flags: upper bits are flags, lower byte is type code */ #define XFS_RMAP_EXTENT_MAP 1 +#define XFS_RMAP_EXTENT_MAP_SHARED 2 #define XFS_RMAP_EXTENT_UNMAP 3 +#define XFS_RMAP_EXTENT_UNMAP_SHARED 4 #define XFS_RMAP_EXTENT_CONVERT 5 +#define XFS_RMAP_EXTENT_CONVERT_SHARED 6 #define XFS_RMAP_EXTENT_ALLOC 7 #define XFS_RMAP_EXTENT_FREE 8 #define XFS_RMAP_EXTENT_TYPE_MASK 0xFF @@ -671,6 +687,102 @@ struct xfs_rud_log_format { }; /* + * CUI/CUD (refcount update) log format definitions + */ +struct xfs_phys_extent { + __uint64_t pe_startblock; + __uint32_t pe_len; + __uint32_t pe_flags; +}; + +/* refcount pe_flags: upper bits are flags, lower byte is type code */ +/* Type codes are taken directly from enum xfs_refcount_intent_type. */ +#define XFS_REFCOUNT_EXTENT_TYPE_MASK 0xFF + +#define XFS_REFCOUNT_EXTENT_FLAGS (XFS_REFCOUNT_EXTENT_TYPE_MASK) + +/* + * This is the structure used to lay out a cui log item in the + * log. The cui_extents field is a variable size array whose + * size is given by cui_nextents. + */ +struct xfs_cui_log_format { + __uint16_t cui_type; /* cui log item type */ + __uint16_t cui_size; /* size of this item */ + __uint32_t cui_nextents; /* # extents to free */ + __uint64_t cui_id; /* cui identifier */ + struct xfs_phys_extent cui_extents[]; /* array of extents */ +}; + +static inline size_t +xfs_cui_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_cui_log_format) + + nr * sizeof(struct xfs_phys_extent); +} + +/* + * This is the structure used to lay out a cud log item in the + * log. The cud_extents array is a variable size array whose + * size is given by cud_nextents; + */ +struct xfs_cud_log_format { + __uint16_t cud_type; /* cud log item type */ + __uint16_t cud_size; /* size of this item */ + __uint32_t __pad; + __uint64_t cud_cui_id; /* id of corresponding cui */ +}; + +/* + * BUI/BUD (inode block mapping) log format definitions + */ + +/* bmbt me_flags: upper bits are flags, lower byte is type code */ +/* Type codes are taken directly from enum xfs_bmap_intent_type. */ +#define XFS_BMAP_EXTENT_TYPE_MASK 0xFF + +#define XFS_BMAP_EXTENT_ATTR_FORK (1U << 31) +#define XFS_BMAP_EXTENT_UNWRITTEN (1U << 30) + +#define XFS_BMAP_EXTENT_FLAGS (XFS_BMAP_EXTENT_TYPE_MASK | \ + XFS_BMAP_EXTENT_ATTR_FORK | \ + XFS_BMAP_EXTENT_UNWRITTEN) + +/* + * This is the structure used to lay out an bui log item in the + * log. The bui_extents field is a variable size array whose + * size is given by bui_nextents. + */ +struct xfs_bui_log_format { + __uint16_t bui_type; /* bui log item type */ + __uint16_t bui_size; /* size of this item */ + __uint32_t bui_nextents; /* # extents to free */ + __uint64_t bui_id; /* bui identifier */ + struct xfs_map_extent bui_extents[]; /* array of extents to bmap */ +}; + +static inline size_t +xfs_bui_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_bui_log_format) + + nr * sizeof(struct xfs_map_extent); +} + +/* + * This is the structure used to lay out an bud log item in the + * log. The bud_extents array is a variable size array whose + * size is given by bud_nextents; + */ +struct xfs_bud_log_format { + __uint16_t bud_type; /* bud log item type */ + __uint16_t bud_size; /* size of this item */ + __uint32_t __pad; + __uint64_t bud_bui_id; /* id of corresponding bui */ +}; + +/* * Dquot Log format definitions. * * The first two fields must be the type and size fitting into diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c new file mode 100644 index 000000000000..b177ef33cd4c --- /dev/null +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -0,0 +1,1698 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bmap.h" +#include "xfs_refcount_btree.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_trans.h" +#include "xfs_bit.h" +#include "xfs_refcount.h" +#include "xfs_rmap.h" + +/* Allowable refcount adjustment amounts. */ +enum xfs_refc_adjust_op { + XFS_REFCOUNT_ADJUST_INCREASE = 1, + XFS_REFCOUNT_ADJUST_DECREASE = -1, + XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, + XFS_REFCOUNT_ADJUST_COW_FREE = -1, +}; + +STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, + xfs_agblock_t agbno, xfs_extlen_t aglen, + struct xfs_defer_ops *dfops); +STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, + xfs_agblock_t agbno, xfs_extlen_t aglen, + struct xfs_defer_ops *dfops); + +/* + * Look up the first record less than or equal to [bno, len] in the btree + * given by cur. + */ +int +xfs_refcount_lookup_le( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + int *stat) +{ + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, + XFS_LOOKUP_LE); + cur->bc_rec.rc.rc_startblock = bno; + cur->bc_rec.rc.rc_blockcount = 0; + return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); +} + +/* + * Look up the first record greater than or equal to [bno, len] in the btree + * given by cur. + */ +int +xfs_refcount_lookup_ge( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + int *stat) +{ + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, + XFS_LOOKUP_GE); + cur->bc_rec.rc.rc_startblock = bno; + cur->bc_rec.rc.rc_blockcount = 0; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* Convert on-disk record to in-core format. */ +static inline void +xfs_refcount_btrec_to_irec( + union xfs_btree_rec *rec, + struct xfs_refcount_irec *irec) +{ + irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); + irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); + irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); +} + +/* + * Get the data from the pointed-to record. + */ +int +xfs_refcount_get_rec( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *irec, + int *stat) +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (!error && *stat == 1) { + xfs_refcount_btrec_to_irec(rec, irec); + trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, + irec); + } + return error; +} + +/* + * Update the record referred to by cur to the value given + * by [bno, len, refcount]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int +xfs_refcount_update( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *irec) +{ + union xfs_btree_rec rec; + int error; + + trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); + rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); + rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); + rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); + error = xfs_btree_update(cur, &rec); + if (error) + trace_xfs_refcount_update_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Insert the record referred to by cur to the value given + * by [bno, len, refcount]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int +xfs_refcount_insert( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *irec, + int *i) +{ + int error; + + trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); + cur->bc_rec.rc.rc_startblock = irec->rc_startblock; + cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; + cur->bc_rec.rc.rc_refcount = irec->rc_refcount; + error = xfs_btree_insert(cur, i); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); +out_error: + if (error) + trace_xfs_refcount_insert_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Remove the record referred to by cur, then set the pointer to the spot + * where the record could be re-inserted, in case we want to increment or + * decrement the cursor. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int +xfs_refcount_delete( + struct xfs_btree_cur *cur, + int *i) +{ + struct xfs_refcount_irec irec; + int found_rec; + int error; + + error = xfs_refcount_get_rec(cur, &irec, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); + error = xfs_btree_delete(cur, i); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); + if (error) + goto out_error; + error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); +out_error: + if (error) + trace_xfs_refcount_delete_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Adjusting the Reference Count + * + * As stated elsewhere, the reference count btree (refcbt) stores + * >1 reference counts for extents of physical blocks. In this + * operation, we're either raising or lowering the reference count of + * some subrange stored in the tree: + * + * <------ adjustment range ------> + * ----+ +---+-----+ +--+--------+--------- + * 2 | | 3 | 4 | |17| 55 | 10 + * ----+ +---+-----+ +--+--------+--------- + * X axis is physical blocks number; + * reference counts are the numbers inside the rectangles + * + * The first thing we need to do is to ensure that there are no + * refcount extents crossing either boundary of the range to be + * adjusted. For any extent that does cross a boundary, split it into + * two extents so that we can increment the refcount of one of the + * pieces later: + * + * <------ adjustment range ------> + * ----+ +---+-----+ +--+--------+----+---- + * 2 | | 3 | 2 | |17| 55 | 10 | 10 + * ----+ +---+-----+ +--+--------+----+---- + * + * For this next step, let's assume that all the physical blocks in + * the adjustment range are mapped to a file and are therefore in use + * at least once. Therefore, we can infer that any gap in the + * refcount tree within the adjustment range represents a physical + * extent with refcount == 1: + * + * <------ adjustment range ------> + * ----+---+---+-----+-+--+--------+----+---- + * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 + * ----+---+---+-----+-+--+--------+----+---- + * ^ + * + * For each extent that falls within the interval range, figure out + * which extent is to the left or the right of that extent. Now we + * have a left, current, and right extent. If the new reference count + * of the center extent enables us to merge left, center, and right + * into one record covering all three, do so. If the center extent is + * at the left end of the range, abuts the left extent, and its new + * reference count matches the left extent's record, then merge them. + * If the center extent is at the right end of the range, abuts the + * right extent, and the reference counts match, merge those. In the + * example, we can left merge (assuming an increment operation): + * + * <------ adjustment range ------> + * --------+---+-----+-+--+--------+----+---- + * 2 | 3 | 2 |1|17| 55 | 10 | 10 + * --------+---+-----+-+--+--------+----+---- + * ^ + * + * For all other extents within the range, adjust the reference count + * or delete it if the refcount falls below 2. If we were + * incrementing, the end result looks like this: + * + * <------ adjustment range ------> + * --------+---+-----+-+--+--------+----+---- + * 2 | 4 | 3 |2|18| 56 | 11 | 10 + * --------+---+-----+-+--+--------+----+---- + * + * The result of a decrement operation looks as such: + * + * <------ adjustment range ------> + * ----+ +---+ +--+--------+----+---- + * 2 | | 2 | |16| 54 | 9 | 10 + * ----+ +---+ +--+--------+----+---- + * DDDD 111111DD + * + * The blocks marked "D" are freed; the blocks marked "1" are only + * referenced once and therefore the record is removed from the + * refcount btree. + */ + +/* Next block after this extent. */ +static inline xfs_agblock_t +xfs_refc_next( + struct xfs_refcount_irec *rc) +{ + return rc->rc_startblock + rc->rc_blockcount; +} + +/* + * Split a refcount extent that crosses agbno. + */ +STATIC int +xfs_refcount_split_extent( + struct xfs_btree_cur *cur, + xfs_agblock_t agbno, + bool *shape_changed) +{ + struct xfs_refcount_irec rcext, tmp; + int found_rec; + int error; + + *shape_changed = false; + error = xfs_refcount_lookup_le(cur, agbno, &found_rec); + if (error) + goto out_error; + if (!found_rec) + return 0; + + error = xfs_refcount_get_rec(cur, &rcext, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) + return 0; + + *shape_changed = true; + trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, + &rcext, agbno); + + /* Establish the right extent. */ + tmp = rcext; + tmp.rc_startblock = agbno; + tmp.rc_blockcount -= (agbno - rcext.rc_startblock); + error = xfs_refcount_update(cur, &tmp); + if (error) + goto out_error; + + /* Insert the left extent. */ + tmp = rcext; + tmp.rc_blockcount = agbno - rcext.rc_startblock; + error = xfs_refcount_insert(cur, &tmp, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + return error; + +out_error: + trace_xfs_refcount_split_extent_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Merge the left, center, and right extents. + */ +STATIC int +xfs_refcount_merge_center_extents( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *left, + struct xfs_refcount_irec *center, + struct xfs_refcount_irec *right, + unsigned long long extlen, + xfs_agblock_t *agbno, + xfs_extlen_t *aglen) +{ + int error; + int found_rec; + + trace_xfs_refcount_merge_center_extents(cur->bc_mp, + cur->bc_private.a.agno, left, center, right); + + /* + * Make sure the center and right extents are not in the btree. + * If the center extent was synthesized, the first delete call + * removes the right extent and we skip the second deletion. + * If center and right were in the btree, then the first delete + * call removes the center and the second one removes the right + * extent. + */ + error = xfs_refcount_lookup_ge(cur, center->rc_startblock, + &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + + error = xfs_refcount_delete(cur, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + + if (center->rc_refcount > 1) { + error = xfs_refcount_delete(cur, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, + out_error); + } + + /* Enlarge the left extent. */ + error = xfs_refcount_lookup_le(cur, left->rc_startblock, + &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + + left->rc_blockcount = extlen; + error = xfs_refcount_update(cur, left); + if (error) + goto out_error; + + *aglen = 0; + return error; + +out_error: + trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Merge with the left extent. + */ +STATIC int +xfs_refcount_merge_left_extent( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *left, + struct xfs_refcount_irec *cleft, + xfs_agblock_t *agbno, + xfs_extlen_t *aglen) +{ + int error; + int found_rec; + + trace_xfs_refcount_merge_left_extent(cur->bc_mp, + cur->bc_private.a.agno, left, cleft); + + /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ + if (cleft->rc_refcount > 1) { + error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, + &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, + out_error); + + error = xfs_refcount_delete(cur, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, + out_error); + } + + /* Enlarge the left extent. */ + error = xfs_refcount_lookup_le(cur, left->rc_startblock, + &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + + left->rc_blockcount += cleft->rc_blockcount; + error = xfs_refcount_update(cur, left); + if (error) + goto out_error; + + *agbno += cleft->rc_blockcount; + *aglen -= cleft->rc_blockcount; + return error; + +out_error: + trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Merge with the right extent. + */ +STATIC int +xfs_refcount_merge_right_extent( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *right, + struct xfs_refcount_irec *cright, + xfs_agblock_t *agbno, + xfs_extlen_t *aglen) +{ + int error; + int found_rec; + + trace_xfs_refcount_merge_right_extent(cur->bc_mp, + cur->bc_private.a.agno, cright, right); + + /* + * If the extent ending at agbno+aglen (cright) wasn't synthesized, + * remove it. + */ + if (cright->rc_refcount > 1) { + error = xfs_refcount_lookup_le(cur, cright->rc_startblock, + &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, + out_error); + + error = xfs_refcount_delete(cur, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, + out_error); + } + + /* Enlarge the right extent. */ + error = xfs_refcount_lookup_le(cur, right->rc_startblock, + &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + + right->rc_startblock -= cright->rc_blockcount; + right->rc_blockcount += cright->rc_blockcount; + error = xfs_refcount_update(cur, right); + if (error) + goto out_error; + + *aglen -= cright->rc_blockcount; + return error; + +out_error: + trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +#define XFS_FIND_RCEXT_SHARED 1 +#define XFS_FIND_RCEXT_COW 2 +/* + * Find the left extent and the one after it (cleft). This function assumes + * that we've already split any extent crossing agbno. + */ +STATIC int +xfs_refcount_find_left_extents( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *left, + struct xfs_refcount_irec *cleft, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + int flags) +{ + struct xfs_refcount_irec tmp; + int error; + int found_rec; + + left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; + error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); + if (error) + goto out_error; + if (!found_rec) + return 0; + + error = xfs_refcount_get_rec(cur, &tmp, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + + if (xfs_refc_next(&tmp) != agbno) + return 0; + if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + return 0; + if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + return 0; + /* We have a left extent; retrieve (or invent) the next right one */ + *left = tmp; + + error = xfs_btree_increment(cur, 0, &found_rec); + if (error) + goto out_error; + if (found_rec) { + error = xfs_refcount_get_rec(cur, &tmp, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, + out_error); + + /* if tmp starts at the end of our range, just use that */ + if (tmp.rc_startblock == agbno) + *cleft = tmp; + else { + /* + * There's a gap in the refcntbt at the start of the + * range we're interested in (refcount == 1) so + * synthesize the implied extent and pass it back. + * We assume here that the agbno/aglen range was + * passed in from a data fork extent mapping and + * therefore is allocated to exactly one owner. + */ + cleft->rc_startblock = agbno; + cleft->rc_blockcount = min(aglen, + tmp.rc_startblock - agbno); + cleft->rc_refcount = 1; + } + } else { + /* + * No extents, so pretend that there's one covering the whole + * range. + */ + cleft->rc_startblock = agbno; + cleft->rc_blockcount = aglen; + cleft->rc_refcount = 1; + } + trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, + left, cleft, agbno); + return error; + +out_error: + trace_xfs_refcount_find_left_extent_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Find the right extent and the one before it (cright). This function + * assumes that we've already split any extents crossing agbno + aglen. + */ +STATIC int +xfs_refcount_find_right_extents( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *right, + struct xfs_refcount_irec *cright, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + int flags) +{ + struct xfs_refcount_irec tmp; + int error; + int found_rec; + + right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; + error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); + if (error) + goto out_error; + if (!found_rec) + return 0; + + error = xfs_refcount_get_rec(cur, &tmp, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + + if (tmp.rc_startblock != agbno + aglen) + return 0; + if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + return 0; + if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + return 0; + /* We have a right extent; retrieve (or invent) the next left one */ + *right = tmp; + + error = xfs_btree_decrement(cur, 0, &found_rec); + if (error) + goto out_error; + if (found_rec) { + error = xfs_refcount_get_rec(cur, &tmp, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, + out_error); + + /* if tmp ends at the end of our range, just use that */ + if (xfs_refc_next(&tmp) == agbno + aglen) + *cright = tmp; + else { + /* + * There's a gap in the refcntbt at the end of the + * range we're interested in (refcount == 1) so + * create the implied extent and pass it back. + * We assume here that the agbno/aglen range was + * passed in from a data fork extent mapping and + * therefore is allocated to exactly one owner. + */ + cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); + cright->rc_blockcount = right->rc_startblock - + cright->rc_startblock; + cright->rc_refcount = 1; + } + } else { + /* + * No extents, so pretend that there's one covering the whole + * range. + */ + cright->rc_startblock = agbno; + cright->rc_blockcount = aglen; + cright->rc_refcount = 1; + } + trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, + cright, right, agbno + aglen); + return error; + +out_error: + trace_xfs_refcount_find_right_extent_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* Is this extent valid? */ +static inline bool +xfs_refc_valid( + struct xfs_refcount_irec *rc) +{ + return rc->rc_startblock != NULLAGBLOCK; +} + +/* + * Try to merge with any extents on the boundaries of the adjustment range. + */ +STATIC int +xfs_refcount_merge_extents( + struct xfs_btree_cur *cur, + xfs_agblock_t *agbno, + xfs_extlen_t *aglen, + enum xfs_refc_adjust_op adjust, + int flags, + bool *shape_changed) +{ + struct xfs_refcount_irec left = {0}, cleft = {0}; + struct xfs_refcount_irec cright = {0}, right = {0}; + int error; + unsigned long long ulen; + bool cequal; + + *shape_changed = false; + /* + * Find the extent just below agbno [left], just above agbno [cleft], + * just below (agbno + aglen) [cright], and just above (agbno + aglen) + * [right]. + */ + error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, + *aglen, flags); + if (error) + return error; + error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, + *aglen, flags); + if (error) + return error; + + /* No left or right extent to merge; exit. */ + if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) + return 0; + + cequal = (cleft.rc_startblock == cright.rc_startblock) && + (cleft.rc_blockcount == cright.rc_blockcount); + + /* Try to merge left, cleft, and right. cleft must == cright. */ + ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + + right.rc_blockcount; + if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && + xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && + left.rc_refcount == cleft.rc_refcount + adjust && + right.rc_refcount == cleft.rc_refcount + adjust && + ulen < MAXREFCEXTLEN) { + *shape_changed = true; + return xfs_refcount_merge_center_extents(cur, &left, &cleft, + &right, ulen, agbno, aglen); + } + + /* Try to merge left and cleft. */ + ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; + if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && + left.rc_refcount == cleft.rc_refcount + adjust && + ulen < MAXREFCEXTLEN) { + *shape_changed = true; + error = xfs_refcount_merge_left_extent(cur, &left, &cleft, + agbno, aglen); + if (error) + return error; + + /* + * If we just merged left + cleft and cleft == cright, + * we no longer have a cright to merge with right. We're done. + */ + if (cequal) + return 0; + } + + /* Try to merge cright and right. */ + ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; + if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && + right.rc_refcount == cright.rc_refcount + adjust && + ulen < MAXREFCEXTLEN) { + *shape_changed = true; + return xfs_refcount_merge_right_extent(cur, &right, &cright, + agbno, aglen); + } + + return error; +} + +/* + * While we're adjusting the refcounts records of an extent, we have + * to keep an eye on the number of extents we're dirtying -- run too + * many in a single transaction and we'll exceed the transaction's + * reservation and crash the fs. Each record adds 12 bytes to the + * log (plus any key updates) so we'll conservatively assume 24 bytes + * per record. We must also leave space for btree splits on both ends + * of the range and space for the CUD and a new CUI. + * + * XXX: This is a pretty hand-wavy estimate. The penalty for guessing + * true incorrectly is a shutdown FS; the penalty for guessing false + * incorrectly is more transaction rolls than might be necessary. + * Be conservative here. + */ +static bool +xfs_refcount_still_have_space( + struct xfs_btree_cur *cur) +{ + unsigned long overhead; + + overhead = cur->bc_private.a.priv.refc.shape_changes * + xfs_allocfree_log_count(cur->bc_mp, 1); + overhead *= cur->bc_mp->m_sb.sb_blocksize; + + /* + * Only allow 2 refcount extent updates per transaction if the + * refcount continue update "error" has been injected. + */ + if (cur->bc_private.a.priv.refc.nr_ops > 2 && + XFS_TEST_ERROR(false, cur->bc_mp, + XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE, + XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE)) + return false; + + if (cur->bc_private.a.priv.refc.nr_ops == 0) + return true; + else if (overhead > cur->bc_tp->t_log_res) + return false; + return cur->bc_tp->t_log_res - overhead > + cur->bc_private.a.priv.refc.nr_ops * 32; +} + +/* + * Adjust the refcounts of middle extents. At this point we should have + * split extents that crossed the adjustment range; merged with adjacent + * extents; and updated agbno/aglen to reflect the merges. Therefore, + * all we have to do is update the extents inside [agbno, agbno + aglen]. + */ +STATIC int +xfs_refcount_adjust_extents( + struct xfs_btree_cur *cur, + xfs_agblock_t *agbno, + xfs_extlen_t *aglen, + enum xfs_refc_adjust_op adj, + struct xfs_defer_ops *dfops, + struct xfs_owner_info *oinfo) +{ + struct xfs_refcount_irec ext, tmp; + int error; + int found_rec, found_tmp; + xfs_fsblock_t fsbno; + + /* Merging did all the work already. */ + if (*aglen == 0) + return 0; + + error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); + if (error) + goto out_error; + + while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { + error = xfs_refcount_get_rec(cur, &ext, &found_rec); + if (error) + goto out_error; + if (!found_rec) { + ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; + ext.rc_blockcount = 0; + ext.rc_refcount = 0; + } + + /* + * Deal with a hole in the refcount tree; if a file maps to + * these blocks and there's no refcountbt record, pretend that + * there is one with refcount == 1. + */ + if (ext.rc_startblock != *agbno) { + tmp.rc_startblock = *agbno; + tmp.rc_blockcount = min(*aglen, + ext.rc_startblock - *agbno); + tmp.rc_refcount = 1 + adj; + trace_xfs_refcount_modify_extent(cur->bc_mp, + cur->bc_private.a.agno, &tmp); + + /* + * Either cover the hole (increment) or + * delete the range (decrement). + */ + if (tmp.rc_refcount) { + error = xfs_refcount_insert(cur, &tmp, + &found_tmp); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, + found_tmp == 1, out_error); + cur->bc_private.a.priv.refc.nr_ops++; + } else { + fsbno = XFS_AGB_TO_FSB(cur->bc_mp, + cur->bc_private.a.agno, + tmp.rc_startblock); + xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, + tmp.rc_blockcount, oinfo); + } + + (*agbno) += tmp.rc_blockcount; + (*aglen) -= tmp.rc_blockcount; + + error = xfs_refcount_lookup_ge(cur, *agbno, + &found_rec); + if (error) + goto out_error; + } + + /* Stop if there's nothing left to modify */ + if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) + break; + + /* + * Adjust the reference count and either update the tree + * (incr) or free the blocks (decr). + */ + if (ext.rc_refcount == MAXREFCOUNT) + goto skip; + ext.rc_refcount += adj; + trace_xfs_refcount_modify_extent(cur->bc_mp, + cur->bc_private.a.agno, &ext); + if (ext.rc_refcount > 1) { + error = xfs_refcount_update(cur, &ext); + if (error) + goto out_error; + cur->bc_private.a.priv.refc.nr_ops++; + } else if (ext.rc_refcount == 1) { + error = xfs_refcount_delete(cur, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, + found_rec == 1, out_error); + cur->bc_private.a.priv.refc.nr_ops++; + goto advloop; + } else { + fsbno = XFS_AGB_TO_FSB(cur->bc_mp, + cur->bc_private.a.agno, + ext.rc_startblock); + xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, + ext.rc_blockcount, oinfo); + } + +skip: + error = xfs_btree_increment(cur, 0, &found_rec); + if (error) + goto out_error; + +advloop: + (*agbno) += ext.rc_blockcount; + (*aglen) -= ext.rc_blockcount; + } + + return error; +out_error: + trace_xfs_refcount_modify_extent_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* Adjust the reference count of a range of AG blocks. */ +STATIC int +xfs_refcount_adjust( + struct xfs_btree_cur *cur, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + xfs_agblock_t *new_agbno, + xfs_extlen_t *new_aglen, + enum xfs_refc_adjust_op adj, + struct xfs_defer_ops *dfops, + struct xfs_owner_info *oinfo) +{ + bool shape_changed; + int shape_changes = 0; + int error; + + *new_agbno = agbno; + *new_aglen = aglen; + if (adj == XFS_REFCOUNT_ADJUST_INCREASE) + trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, + agbno, aglen); + else + trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, + agbno, aglen); + + /* + * Ensure that no rcextents cross the boundary of the adjustment range. + */ + error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + if (error) + goto out_error; + if (shape_changed) + shape_changes++; + + error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + if (error) + goto out_error; + if (shape_changed) + shape_changes++; + + /* + * Try to merge with the left or right extents of the range. + */ + error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, + XFS_FIND_RCEXT_SHARED, &shape_changed); + if (error) + goto out_error; + if (shape_changed) + shape_changes++; + if (shape_changes) + cur->bc_private.a.priv.refc.shape_changes++; + + /* Now that we've taken care of the ends, adjust the middle extents */ + error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, + adj, dfops, oinfo); + if (error) + goto out_error; + + return 0; + +out_error: + trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, + error, _RET_IP_); + return error; +} + +/* Clean up after calling xfs_refcount_finish_one. */ +void +xfs_refcount_finish_one_cleanup( + struct xfs_trans *tp, + struct xfs_btree_cur *rcur, + int error) +{ + struct xfs_buf *agbp; + + if (rcur == NULL) + return; + agbp = rcur->bc_private.a.agbp; + xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + if (error) + xfs_trans_brelse(tp, agbp); +} + +/* + * Process one of the deferred refcount operations. We pass back the + * btree cursor to maintain our lock on the btree between calls. + * This saves time and eliminates a buffer deadlock between the + * superblock and the AGF because we'll always grab them in the same + * order. + */ +int +xfs_refcount_finish_one( + struct xfs_trans *tp, + struct xfs_defer_ops *dfops, + enum xfs_refcount_intent_type type, + xfs_fsblock_t startblock, + xfs_extlen_t blockcount, + xfs_fsblock_t *new_fsb, + xfs_extlen_t *new_len, + struct xfs_btree_cur **pcur) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_btree_cur *rcur; + struct xfs_buf *agbp = NULL; + int error = 0; + xfs_agnumber_t agno; + xfs_agblock_t bno; + xfs_agblock_t new_agbno; + unsigned long nr_ops = 0; + int shape_changes = 0; + + agno = XFS_FSB_TO_AGNO(mp, startblock); + ASSERT(agno != NULLAGNUMBER); + bno = XFS_FSB_TO_AGBNO(mp, startblock); + + trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), + type, XFS_FSB_TO_AGBNO(mp, startblock), + blockcount); + + if (XFS_TEST_ERROR(false, mp, + XFS_ERRTAG_REFCOUNT_FINISH_ONE, + XFS_RANDOM_REFCOUNT_FINISH_ONE)) + return -EIO; + + /* + * If we haven't gotten a cursor or the cursor AG doesn't match + * the startblock, get one now. + */ + rcur = *pcur; + if (rcur != NULL && rcur->bc_private.a.agno != agno) { + nr_ops = rcur->bc_private.a.priv.refc.nr_ops; + shape_changes = rcur->bc_private.a.priv.refc.shape_changes; + xfs_refcount_finish_one_cleanup(tp, rcur, 0); + rcur = NULL; + *pcur = NULL; + } + if (rcur == NULL) { + error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, + XFS_ALLOC_FLAG_FREEING, &agbp); + if (error) + return error; + if (!agbp) + return -EFSCORRUPTED; + + rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops); + if (!rcur) { + error = -ENOMEM; + goto out_cur; + } + rcur->bc_private.a.priv.refc.nr_ops = nr_ops; + rcur->bc_private.a.priv.refc.shape_changes = shape_changes; + } + *pcur = rcur; + + switch (type) { + case XFS_REFCOUNT_INCREASE: + error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, + new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL); + *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); + break; + case XFS_REFCOUNT_DECREASE: + error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, + new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL); + *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); + break; + case XFS_REFCOUNT_ALLOC_COW: + *new_fsb = startblock + blockcount; + *new_len = 0; + error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops); + break; + case XFS_REFCOUNT_FREE_COW: + *new_fsb = startblock + blockcount; + *new_len = 0; + error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops); + break; + default: + ASSERT(0); + error = -EFSCORRUPTED; + } + if (!error && *new_len > 0) + trace_xfs_refcount_finish_one_leftover(mp, agno, type, + bno, blockcount, new_agbno, *new_len); + return error; + +out_cur: + xfs_trans_brelse(tp, agbp); + + return error; +} + +/* + * Record a refcount intent for later processing. + */ +static int +__xfs_refcount_add( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + enum xfs_refcount_intent_type type, + xfs_fsblock_t startblock, + xfs_extlen_t blockcount) +{ + struct xfs_refcount_intent *ri; + + trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock), + type, XFS_FSB_TO_AGBNO(mp, startblock), + blockcount); + + ri = kmem_alloc(sizeof(struct xfs_refcount_intent), + KM_SLEEP | KM_NOFS); + INIT_LIST_HEAD(&ri->ri_list); + ri->ri_type = type; + ri->ri_startblock = startblock; + ri->ri_blockcount = blockcount; + + xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); + return 0; +} + +/* + * Increase the reference count of the blocks backing a file's extent. + */ +int +xfs_refcount_increase_extent( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + struct xfs_bmbt_irec *PREV) +{ + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE, + PREV->br_startblock, PREV->br_blockcount); +} + +/* + * Decrease the reference count of the blocks backing a file's extent. + */ +int +xfs_refcount_decrease_extent( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + struct xfs_bmbt_irec *PREV) +{ + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE, + PREV->br_startblock, PREV->br_blockcount); +} + +/* + * Given an AG extent, find the lowest-numbered run of shared blocks + * within that range and return the range in fbno/flen. If + * find_end_of_shared is set, return the longest contiguous extent of + * shared blocks; if not, just return the first extent we find. If no + * shared blocks are found, fbno and flen will be set to NULLAGBLOCK + * and 0, respectively. + */ +int +xfs_refcount_find_shared( + struct xfs_btree_cur *cur, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + xfs_agblock_t *fbno, + xfs_extlen_t *flen, + bool find_end_of_shared) +{ + struct xfs_refcount_irec tmp; + int i; + int have; + int error; + + trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, + agbno, aglen); + + /* By default, skip the whole range */ + *fbno = NULLAGBLOCK; + *flen = 0; + + /* Try to find a refcount extent that crosses the start */ + error = xfs_refcount_lookup_le(cur, agbno, &have); + if (error) + goto out_error; + if (!have) { + /* No left extent, look at the next one */ + error = xfs_btree_increment(cur, 0, &have); + if (error) + goto out_error; + if (!have) + goto done; + } + error = xfs_refcount_get_rec(cur, &tmp, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); + + /* If the extent ends before the start, look at the next one */ + if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { + error = xfs_btree_increment(cur, 0, &have); + if (error) + goto out_error; + if (!have) + goto done; + error = xfs_refcount_get_rec(cur, &tmp, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); + } + + /* If the extent starts after the range we want, bail out */ + if (tmp.rc_startblock >= agbno + aglen) + goto done; + + /* We found the start of a shared extent! */ + if (tmp.rc_startblock < agbno) { + tmp.rc_blockcount -= (agbno - tmp.rc_startblock); + tmp.rc_startblock = agbno; + } + + *fbno = tmp.rc_startblock; + *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); + if (!find_end_of_shared) + goto done; + + /* Otherwise, find the end of this shared extent */ + while (*fbno + *flen < agbno + aglen) { + error = xfs_btree_increment(cur, 0, &have); + if (error) + goto out_error; + if (!have) + break; + error = xfs_refcount_get_rec(cur, &tmp, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); + if (tmp.rc_startblock >= agbno + aglen || + tmp.rc_startblock != *fbno + *flen) + break; + *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); + } + +done: + trace_xfs_refcount_find_shared_result(cur->bc_mp, + cur->bc_private.a.agno, *fbno, *flen); + +out_error: + if (error) + trace_xfs_refcount_find_shared_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Recovering CoW Blocks After a Crash + * + * Due to the way that the copy on write mechanism works, there's a window of + * opportunity in which we can lose track of allocated blocks during a crash. + * Because CoW uses delayed allocation in the in-core CoW fork, writeback + * causes blocks to be allocated and stored in the CoW fork. The blocks are + * no longer in the free space btree but are not otherwise recorded anywhere + * until the write completes and the blocks are mapped into the file. A crash + * in between allocation and remapping results in the replacement blocks being + * lost. This situation is exacerbated by the CoW extent size hint because + * allocations can hang around for long time. + * + * However, there is a place where we can record these allocations before they + * become mappings -- the reference count btree. The btree does not record + * extents with refcount == 1, so we can record allocations with a refcount of + * 1. Blocks being used for CoW writeout cannot be shared, so there should be + * no conflict with shared block records. These mappings should be created + * when we allocate blocks to the CoW fork and deleted when they're removed + * from the CoW fork. + * + * Minor nit: records for in-progress CoW allocations and records for shared + * extents must never be merged, to preserve the property that (except for CoW + * allocations) there are no refcount btree entries with refcount == 1. The + * only time this could potentially happen is when unsharing a block that's + * adjacent to CoW allocations, so we must be careful to avoid this. + * + * At mount time we recover lost CoW allocations by searching the refcount + * btree for these refcount == 1 mappings. These represent CoW allocations + * that were in progress at the time the filesystem went down, so we can free + * them to get the space back. + * + * This mechanism is superior to creating EFIs for unmapped CoW extents for + * several reasons -- first, EFIs pin the tail of the log and would have to be + * periodically relogged to avoid filling up the log. Second, CoW completions + * will have to file an EFD and create new EFIs for whatever remains in the + * CoW fork; this partially takes care of (1) but extent-size reservations + * will have to periodically relog even if there's no writeout in progress. + * This can happen if the CoW extent size hint is set, which you really want. + * Third, EFIs cannot currently be automatically relogged into newer + * transactions to advance the log tail. Fourth, stuffing the log full of + * EFIs places an upper bound on the number of CoW allocations that can be + * held filesystem-wide at any given time. Recording them in the refcount + * btree doesn't require us to maintain any state in memory and doesn't pin + * the log. + */ +/* + * Adjust the refcounts of CoW allocations. These allocations are "magic" + * in that they're not referenced anywhere else in the filesystem, so we + * stash them in the refcount btree with a refcount of 1 until either file + * remapping (or CoW cancellation) happens. + */ +STATIC int +xfs_refcount_adjust_cow_extents( + struct xfs_btree_cur *cur, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + enum xfs_refc_adjust_op adj, + struct xfs_defer_ops *dfops, + struct xfs_owner_info *oinfo) +{ + struct xfs_refcount_irec ext, tmp; + int error; + int found_rec, found_tmp; + + if (aglen == 0) + return 0; + + /* Find any overlapping refcount records */ + error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); + if (error) + goto out_error; + error = xfs_refcount_get_rec(cur, &ext, &found_rec); + if (error) + goto out_error; + if (!found_rec) { + ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + + XFS_REFC_COW_START; + ext.rc_blockcount = 0; + ext.rc_refcount = 0; + } + + switch (adj) { + case XFS_REFCOUNT_ADJUST_COW_ALLOC: + /* Adding a CoW reservation, there should be nothing here. */ + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, + ext.rc_startblock >= agbno + aglen, out_error); + + tmp.rc_startblock = agbno; + tmp.rc_blockcount = aglen; + tmp.rc_refcount = 1; + trace_xfs_refcount_modify_extent(cur->bc_mp, + cur->bc_private.a.agno, &tmp); + + error = xfs_refcount_insert(cur, &tmp, + &found_tmp); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, + found_tmp == 1, out_error); + break; + case XFS_REFCOUNT_ADJUST_COW_FREE: + /* Removing a CoW reservation, there should be one extent. */ + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, + ext.rc_startblock == agbno, out_error); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, + ext.rc_blockcount == aglen, out_error); + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, + ext.rc_refcount == 1, out_error); + + ext.rc_refcount = 0; + trace_xfs_refcount_modify_extent(cur->bc_mp, + cur->bc_private.a.agno, &ext); + error = xfs_refcount_delete(cur, &found_rec); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, + found_rec == 1, out_error); + break; + default: + ASSERT(0); + } + + return error; +out_error: + trace_xfs_refcount_modify_extent_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Add or remove refcount btree entries for CoW reservations. + */ +STATIC int +xfs_refcount_adjust_cow( + struct xfs_btree_cur *cur, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + enum xfs_refc_adjust_op adj, + struct xfs_defer_ops *dfops) +{ + bool shape_changed; + int error; + + agbno += XFS_REFC_COW_START; + + /* + * Ensure that no rcextents cross the boundary of the adjustment range. + */ + error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + if (error) + goto out_error; + + error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + if (error) + goto out_error; + + /* + * Try to merge with the left or right extents of the range. + */ + error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, + XFS_FIND_RCEXT_COW, &shape_changed); + if (error) + goto out_error; + + /* Now that we've taken care of the ends, adjust the middle extents */ + error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj, + dfops, NULL); + if (error) + goto out_error; + + return 0; + +out_error: + trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, + error, _RET_IP_); + return error; +} + +/* + * Record a CoW allocation in the refcount btree. + */ +STATIC int +__xfs_refcount_cow_alloc( + struct xfs_btree_cur *rcur, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + struct xfs_defer_ops *dfops) +{ + int error; + + trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, + agbno, aglen); + + /* Add refcount btree reservation */ + error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); + if (error) + return error; + + /* Add rmap entry */ + if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { + error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, + rcur->bc_private.a.agno, + agbno, aglen, XFS_RMAP_OWN_COW); + if (error) + return error; + } + + return error; +} + +/* + * Remove a CoW allocation from the refcount btree. + */ +STATIC int +__xfs_refcount_cow_free( + struct xfs_btree_cur *rcur, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + struct xfs_defer_ops *dfops) +{ + int error; + + trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, + agbno, aglen); + + /* Remove refcount btree reservation */ + error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + XFS_REFCOUNT_ADJUST_COW_FREE, dfops); + if (error) + return error; + + /* Remove rmap entry */ + if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { + error = xfs_rmap_free_extent(rcur->bc_mp, dfops, + rcur->bc_private.a.agno, + agbno, aglen, XFS_RMAP_OWN_COW); + if (error) + return error; + } + + return error; +} + +/* Record a CoW staging extent in the refcount btree. */ +int +xfs_refcount_alloc_cow_extent( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + xfs_fsblock_t fsb, + xfs_extlen_t len) +{ + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, + fsb, len); +} + +/* Forget a CoW staging event in the refcount btree. */ +int +xfs_refcount_free_cow_extent( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + xfs_fsblock_t fsb, + xfs_extlen_t len) +{ + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, + fsb, len); +} + +struct xfs_refcount_recovery { + struct list_head rr_list; + struct xfs_refcount_irec rr_rrec; +}; + +/* Stuff an extent on the recovery list. */ +STATIC int +xfs_refcount_recover_extent( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + struct list_head *debris = priv; + struct xfs_refcount_recovery *rr; + + if (be32_to_cpu(rec->refc.rc_refcount) != 1) + return -EFSCORRUPTED; + + rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); + xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); + list_add_tail(&rr->rr_list, debris); + + return 0; +} + +/* Find and remove leftover CoW reservations. */ +int +xfs_refcount_recover_cow_leftovers( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_trans *tp; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + struct xfs_refcount_recovery *rr, *n; + struct list_head debris; + union xfs_btree_irec low; + union xfs_btree_irec high; + struct xfs_defer_ops dfops; + xfs_fsblock_t fsb; + xfs_agblock_t agbno; + int error; + + if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) + return -EOPNOTSUPP; + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error) + return error; + cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + + /* Find all the leftover CoW staging extents. */ + INIT_LIST_HEAD(&debris); + memset(&low, 0, sizeof(low)); + memset(&high, 0, sizeof(high)); + low.rc.rc_startblock = XFS_REFC_COW_START; + high.rc.rc_startblock = -1U; + error = xfs_btree_query_range(cur, &low, &high, + xfs_refcount_recover_extent, &debris); + if (error) + goto out_cursor; + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); + + /* Now iterate the list to free the leftovers */ + list_for_each_entry(rr, &debris, rr_list) { + /* Set up transaction. */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); + if (error) + goto out_free; + + trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); + + /* Free the orphan record */ + xfs_defer_init(&dfops, &fsb); + agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; + fsb = XFS_AGB_TO_FSB(mp, agno, agbno); + error = xfs_refcount_free_cow_extent(mp, &dfops, fsb, + rr->rr_rrec.rc_blockcount); + if (error) + goto out_defer; + + /* Free the block. */ + xfs_bmap_add_free(mp, &dfops, fsb, + rr->rr_rrec.rc_blockcount, NULL); + + error = xfs_defer_finish(&tp, &dfops, NULL); + if (error) + goto out_defer; + + error = xfs_trans_commit(tp); + if (error) + goto out_free; + } + +out_free: + /* Free the leftover list */ + list_for_each_entry_safe(rr, n, &debris, rr_list) { + list_del(&rr->rr_list); + kmem_free(rr); + } + return error; + +out_cursor: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_buf_relse(agbp); + goto out_free; + +out_defer: + xfs_defer_cancel(&dfops); + xfs_trans_cancel(tp); + goto out_free; +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h new file mode 100644 index 000000000000..098dc668ab2c --- /dev/null +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_REFCOUNT_H__ +#define __XFS_REFCOUNT_H__ + +extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur, + xfs_agblock_t bno, int *stat); +extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur, + xfs_agblock_t bno, int *stat); +extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur, + struct xfs_refcount_irec *irec, int *stat); + +enum xfs_refcount_intent_type { + XFS_REFCOUNT_INCREASE = 1, + XFS_REFCOUNT_DECREASE, + XFS_REFCOUNT_ALLOC_COW, + XFS_REFCOUNT_FREE_COW, +}; + +struct xfs_refcount_intent { + struct list_head ri_list; + enum xfs_refcount_intent_type ri_type; + xfs_fsblock_t ri_startblock; + xfs_extlen_t ri_blockcount; +}; + +extern int xfs_refcount_increase_extent(struct xfs_mount *mp, + struct xfs_defer_ops *dfops, struct xfs_bmbt_irec *irec); +extern int xfs_refcount_decrease_extent(struct xfs_mount *mp, + struct xfs_defer_ops *dfops, struct xfs_bmbt_irec *irec); + +extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, + struct xfs_btree_cur *rcur, int error); +extern int xfs_refcount_finish_one(struct xfs_trans *tp, + struct xfs_defer_ops *dfops, enum xfs_refcount_intent_type type, + xfs_fsblock_t startblock, xfs_extlen_t blockcount, + xfs_fsblock_t *new_fsb, xfs_extlen_t *new_len, + struct xfs_btree_cur **pcur); + +extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, + xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, + xfs_extlen_t *flen, bool find_end_of_shared); + +extern int xfs_refcount_alloc_cow_extent(struct xfs_mount *mp, + struct xfs_defer_ops *dfops, xfs_fsblock_t fsb, + xfs_extlen_t len); +extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp, + struct xfs_defer_ops *dfops, xfs_fsblock_t fsb, + xfs_extlen_t len); +extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, + xfs_agnumber_t agno); + +#endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c new file mode 100644 index 000000000000..453bb2757ec2 --- /dev/null +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -0,0 +1,451 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "xfs_bmap.h" +#include "xfs_refcount_btree.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_trans.h" +#include "xfs_bit.h" +#include "xfs_rmap.h" + +static struct xfs_btree_cur * +xfs_refcountbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_private.a.dfops); +} + +STATIC void +xfs_refcountbt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int inc) +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); + + ASSERT(ptr->s != 0); + + agf->agf_refcount_root = ptr->s; + be32_add_cpu(&agf->agf_refcount_level, inc); + pag->pagf_refcount_level += inc; + xfs_perag_put(pag); + + xfs_alloc_log_agf(cur->bc_tp, agbp, + XFS_AGF_REFCOUNT_ROOT | XFS_AGF_REFCOUNT_LEVEL); +} + +STATIC int +xfs_refcountbt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_alloc_arg args; /* block allocation args */ + int error; /* error return value */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + memset(&args, 0, sizeof(args)); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, + xfs_refc_block(args.mp)); + args.firstblock = args.fsbno; + xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_REFC); + args.minlen = args.maxlen = args.prod = 1; + args.resv = XFS_AG_RESV_METADATA; + + error = xfs_alloc_vextent(&args); + if (error) + goto out_error; + trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, + args.agbno, 1); + if (args.fsbno == NULLFSBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + ASSERT(args.agno == cur->bc_private.a.agno); + ASSERT(args.len == 1); + + new->s = cpu_to_be32(args.agbno); + be32_add_cpu(&agf->agf_refcount_blocks, 1); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out_error: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +STATIC int +xfs_refcountbt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); + struct xfs_owner_info oinfo; + int error; + + trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_private.a.agno, + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); + be32_add_cpu(&agf->agf_refcount_blocks, -1); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); + error = xfs_free_extent(cur->bc_tp, fsbno, 1, &oinfo, + XFS_AG_RESV_METADATA); + if (error) + return error; + + return error; +} + +STATIC int +xfs_refcountbt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_refc_mnr[level != 0]; +} + +STATIC int +xfs_refcountbt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_refc_mxr[level != 0]; +} + +STATIC void +xfs_refcountbt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->refc.rc_startblock = rec->refc.rc_startblock; +} + +STATIC void +xfs_refcountbt_init_high_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + __u32 x; + + x = be32_to_cpu(rec->refc.rc_startblock); + x += be32_to_cpu(rec->refc.rc_blockcount) - 1; + key->refc.rc_startblock = cpu_to_be32(x); +} + +STATIC void +xfs_refcountbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock); + rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount); + rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount); +} + +STATIC void +xfs_refcountbt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(agf->agf_refcount_root != 0); + + ptr->s = agf->agf_refcount_root; +} + +STATIC __int64_t +xfs_refcountbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + struct xfs_refcount_irec *rec = &cur->bc_rec.rc; + struct xfs_refcount_key *kp = &key->refc; + + return (__int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock; +} + +STATIC __int64_t +xfs_refcountbt_diff_two_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return (__int64_t)be32_to_cpu(k1->refc.rc_startblock) - + be32_to_cpu(k2->refc.rc_startblock); +} + +STATIC bool +xfs_refcountbt_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_perag *pag = bp->b_pag; + unsigned int level; + + if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) + return false; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return false; + if (!xfs_btree_sblock_v5hdr_verify(bp)) + return false; + + level = be16_to_cpu(block->bb_level); + if (pag && pag->pagf_init) { + if (level >= pag->pagf_refcount_level) + return false; + } else if (level >= mp->m_refc_maxlevels) + return false; + + return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); +} + +STATIC void +xfs_refcountbt_read_verify( + struct xfs_buf *bp) +{ + if (!xfs_btree_sblock_verify_crc(bp)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_refcountbt_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_verifier_error(bp); + } +} + +STATIC void +xfs_refcountbt_write_verify( + struct xfs_buf *bp) +{ + if (!xfs_refcountbt_verify(bp)) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + xfs_btree_sblock_calc_crc(bp); + +} + +const struct xfs_buf_ops xfs_refcountbt_buf_ops = { + .name = "xfs_refcountbt", + .verify_read = xfs_refcountbt_read_verify, + .verify_write = xfs_refcountbt_write_verify, +}; + +#if defined(DEBUG) || defined(XFS_WARN) +STATIC int +xfs_refcountbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be32_to_cpu(k1->refc.rc_startblock) < + be32_to_cpu(k2->refc.rc_startblock); +} + +STATIC int +xfs_refcountbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return be32_to_cpu(r1->refc.rc_startblock) + + be32_to_cpu(r1->refc.rc_blockcount) <= + be32_to_cpu(r2->refc.rc_startblock); +} +#endif + +static const struct xfs_btree_ops xfs_refcountbt_ops = { + .rec_len = sizeof(struct xfs_refcount_rec), + .key_len = sizeof(struct xfs_refcount_key), + + .dup_cursor = xfs_refcountbt_dup_cursor, + .set_root = xfs_refcountbt_set_root, + .alloc_block = xfs_refcountbt_alloc_block, + .free_block = xfs_refcountbt_free_block, + .get_minrecs = xfs_refcountbt_get_minrecs, + .get_maxrecs = xfs_refcountbt_get_maxrecs, + .init_key_from_rec = xfs_refcountbt_init_key_from_rec, + .init_high_key_from_rec = xfs_refcountbt_init_high_key_from_rec, + .init_rec_from_cur = xfs_refcountbt_init_rec_from_cur, + .init_ptr_from_cur = xfs_refcountbt_init_ptr_from_cur, + .key_diff = xfs_refcountbt_key_diff, + .buf_ops = &xfs_refcountbt_buf_ops, + .diff_two_keys = xfs_refcountbt_diff_two_keys, +#if defined(DEBUG) || defined(XFS_WARN) + .keys_inorder = xfs_refcountbt_keys_inorder, + .recs_inorder = xfs_refcountbt_recs_inorder, +#endif +}; + +/* + * Allocate a new refcount btree cursor. + */ +struct xfs_btree_cur * +xfs_refcountbt_init_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno, + struct xfs_defer_ops *dfops) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_btree_cur *cur; + + ASSERT(agno != NULLAGNUMBER); + ASSERT(agno < mp->m_sb.sb_agcount); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_btnum = XFS_BTNUM_REFC; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur->bc_ops = &xfs_refcountbt_ops; + + cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); + + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + cur->bc_private.a.dfops = dfops; + cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; + + cur->bc_private.a.priv.refc.nr_ops = 0; + cur->bc_private.a.priv.refc.shape_changes = 0; + + return cur; +} + +/* + * Calculate the number of records in a refcount btree block. + */ +int +xfs_refcountbt_maxrecs( + struct xfs_mount *mp, + int blocklen, + bool leaf) +{ + blocklen -= XFS_REFCOUNT_BLOCK_LEN; + + if (leaf) + return blocklen / sizeof(struct xfs_refcount_rec); + return blocklen / (sizeof(struct xfs_refcount_key) + + sizeof(xfs_refcount_ptr_t)); +} + +/* Compute the maximum height of a refcount btree. */ +void +xfs_refcountbt_compute_maxlevels( + struct xfs_mount *mp) +{ + mp->m_refc_maxlevels = xfs_btree_compute_maxlevels(mp, + mp->m_refc_mnr, mp->m_sb.sb_agblocks); +} + +/* Calculate the refcount btree size for some records. */ +xfs_extlen_t +xfs_refcountbt_calc_size( + struct xfs_mount *mp, + unsigned long long len) +{ + return xfs_btree_calc_size(mp, mp->m_refc_mnr, len); +} + +/* + * Calculate the maximum refcount btree size. + */ +xfs_extlen_t +xfs_refcountbt_max_size( + struct xfs_mount *mp) +{ + /* Bail out if we're uninitialized, which can happen in mkfs. */ + if (mp->m_refc_mxr[0] == 0) + return 0; + + return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks); +} + +/* + * Figure out how many blocks to reserve and how many are used by this btree. + */ +int +xfs_refcountbt_calc_reserves( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_extlen_t *ask, + xfs_extlen_t *used) +{ + struct xfs_buf *agbp; + struct xfs_agf *agf; + xfs_extlen_t tree_len; + int error; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + + *ask += xfs_refcountbt_max_size(mp); + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error) + return error; + + agf = XFS_BUF_TO_AGF(agbp); + tree_len = be32_to_cpu(agf->agf_refcount_blocks); + xfs_buf_relse(agbp); + + *used += tree_len; + + return error; +} diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h new file mode 100644 index 000000000000..3be7768bd51a --- /dev/null +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_REFCOUNT_BTREE_H__ +#define __XFS_REFCOUNT_BTREE_H__ + +/* + * Reference Count Btree on-disk structures + */ + +struct xfs_buf; +struct xfs_btree_cur; +struct xfs_mount; + +/* + * Btree block header size + */ +#define XFS_REFCOUNT_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN + +/* + * Record, key, and pointer address macros for btree blocks. + * + * (note that some of these may appear unused, but they are used in userspace) + */ +#define XFS_REFCOUNT_REC_ADDR(block, index) \ + ((struct xfs_refcount_rec *) \ + ((char *)(block) + \ + XFS_REFCOUNT_BLOCK_LEN + \ + (((index) - 1) * sizeof(struct xfs_refcount_rec)))) + +#define XFS_REFCOUNT_KEY_ADDR(block, index) \ + ((struct xfs_refcount_key *) \ + ((char *)(block) + \ + XFS_REFCOUNT_BLOCK_LEN + \ + ((index) - 1) * sizeof(struct xfs_refcount_key))) + +#define XFS_REFCOUNT_PTR_ADDR(block, index, maxrecs) \ + ((xfs_refcount_ptr_t *) \ + ((char *)(block) + \ + XFS_REFCOUNT_BLOCK_LEN + \ + (maxrecs) * sizeof(struct xfs_refcount_key) + \ + ((index) - 1) * sizeof(xfs_refcount_ptr_t))) + +extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, + struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno, + struct xfs_defer_ops *dfops); +extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen, + bool leaf); +extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); + +extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, + unsigned long long len); +extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp); + +extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, + xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); + +#endif /* __XFS_REFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 73d05407d663..3a8cc7139912 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -148,6 +148,37 @@ done: return error; } +STATIC int +xfs_rmap_delete( + struct xfs_btree_cur *rcur, + xfs_agblock_t agbno, + xfs_extlen_t len, + uint64_t owner, + uint64_t offset, + unsigned int flags) +{ + int i; + int error; + + trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_private.a.agno, agbno, + len, owner, offset, flags); + + error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done); + + error = xfs_btree_delete(rcur, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done); +done: + if (error) + trace_xfs_rmap_delete_error(rcur->bc_mp, + rcur->bc_private.a.agno, error, _RET_IP_); + return error; +} + static int xfs_rmap_btrec_to_irec( union xfs_btree_rec *rec, @@ -180,6 +211,160 @@ xfs_rmap_get_rec( return xfs_rmap_btrec_to_irec(rec, irec); } +struct xfs_find_left_neighbor_info { + struct xfs_rmap_irec high; + struct xfs_rmap_irec *irec; + int *stat; +}; + +/* For each rmap given, figure out if it matches the key we want. */ +STATIC int +xfs_rmap_find_left_neighbor_helper( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_find_left_neighbor_info *info = priv; + + trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp, + cur->bc_private.a.agno, rec->rm_startblock, + rec->rm_blockcount, rec->rm_owner, rec->rm_offset, + rec->rm_flags); + + if (rec->rm_owner != info->high.rm_owner) + return XFS_BTREE_QUERY_RANGE_CONTINUE; + if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && + !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && + rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset) + return XFS_BTREE_QUERY_RANGE_CONTINUE; + + *info->irec = *rec; + *info->stat = 1; + return XFS_BTREE_QUERY_RANGE_ABORT; +} + +/* + * Find the record to the left of the given extent, being careful only to + * return a match with the same owner and adjacent physical and logical + * block ranges. + */ +int +xfs_rmap_find_left_neighbor( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + uint64_t owner, + uint64_t offset, + unsigned int flags, + struct xfs_rmap_irec *irec, + int *stat) +{ + struct xfs_find_left_neighbor_info info; + int error; + + *stat = 0; + if (bno == 0) + return 0; + info.high.rm_startblock = bno - 1; + info.high.rm_owner = owner; + if (!XFS_RMAP_NON_INODE_OWNER(owner) && + !(flags & XFS_RMAP_BMBT_BLOCK)) { + if (offset == 0) + return 0; + info.high.rm_offset = offset - 1; + } else + info.high.rm_offset = 0; + info.high.rm_flags = flags; + info.high.rm_blockcount = 0; + info.irec = irec; + info.stat = stat; + + trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp, + cur->bc_private.a.agno, bno, 0, owner, offset, flags); + + error = xfs_rmap_query_range(cur, &info.high, &info.high, + xfs_rmap_find_left_neighbor_helper, &info); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) + error = 0; + if (*stat) + trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, + cur->bc_private.a.agno, irec->rm_startblock, + irec->rm_blockcount, irec->rm_owner, + irec->rm_offset, irec->rm_flags); + return error; +} + +/* For each rmap given, figure out if it matches the key we want. */ +STATIC int +xfs_rmap_lookup_le_range_helper( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_find_left_neighbor_info *info = priv; + + trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp, + cur->bc_private.a.agno, rec->rm_startblock, + rec->rm_blockcount, rec->rm_owner, rec->rm_offset, + rec->rm_flags); + + if (rec->rm_owner != info->high.rm_owner) + return XFS_BTREE_QUERY_RANGE_CONTINUE; + if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) && + !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && + (rec->rm_offset > info->high.rm_offset || + rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset)) + return XFS_BTREE_QUERY_RANGE_CONTINUE; + + *info->irec = *rec; + *info->stat = 1; + return XFS_BTREE_QUERY_RANGE_ABORT; +} + +/* + * Find the record to the left of the given extent, being careful only to + * return a match with the same owner and overlapping physical and logical + * block ranges. This is the overlapping-interval version of + * xfs_rmap_lookup_le. + */ +int +xfs_rmap_lookup_le_range( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + uint64_t owner, + uint64_t offset, + unsigned int flags, + struct xfs_rmap_irec *irec, + int *stat) +{ + struct xfs_find_left_neighbor_info info; + int error; + + info.high.rm_startblock = bno; + info.high.rm_owner = owner; + if (!XFS_RMAP_NON_INODE_OWNER(owner) && !(flags & XFS_RMAP_BMBT_BLOCK)) + info.high.rm_offset = offset; + else + info.high.rm_offset = 0; + info.high.rm_flags = flags; + info.high.rm_blockcount = 0; + *stat = 0; + info.irec = irec; + info.stat = stat; + + trace_xfs_rmap_lookup_le_range(cur->bc_mp, + cur->bc_private.a.agno, bno, 0, owner, offset, flags); + error = xfs_rmap_query_range(cur, &info.high, &info.high, + xfs_rmap_lookup_le_range_helper, &info); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) + error = 0; + if (*stat) + trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, + cur->bc_private.a.agno, irec->rm_startblock, + irec->rm_blockcount, irec->rm_owner, + irec->rm_offset, irec->rm_flags); + return error; +} + /* * Find the extent in the rmap btree and remove it. * @@ -1093,11 +1278,704 @@ done: return error; } +/* + * Convert an unwritten extent to a real extent or vice versa. If there is no + * possibility of overlapping extents, delegate to the simpler convert + * function. + */ +STATIC int +xfs_rmap_convert_shared( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool unwritten, + struct xfs_owner_info *oinfo) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_rmap_irec r[4]; /* neighbor extent entries */ + /* left is 0, right is 1, prev is 2 */ + /* new is 3 */ + uint64_t owner; + uint64_t offset; + uint64_t new_endoff; + unsigned int oldext; + unsigned int newext; + unsigned int flags = 0; + int i; + int state = 0; + int error; + + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); + ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) || + (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); + oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; + new_endoff = offset + len; + trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len, + unwritten, oinfo); + + /* + * For the initial lookup, look for and exact match or the left-adjacent + * record for our insertion point. This will also give us the record for + * start block contiguity tests. + */ + error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags, + &PREV, &i); + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + + ASSERT(PREV.rm_offset <= offset); + ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff); + ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext); + newext = ~oldext & XFS_RMAP_UNWRITTEN; + + /* + * Set flags determining what part of the previous oldext allocation + * extent is being replaced by a newext allocation. + */ + if (PREV.rm_offset == offset) + state |= RMAP_LEFT_FILLING; + if (PREV.rm_offset + PREV.rm_blockcount == new_endoff) + state |= RMAP_RIGHT_FILLING; + + /* Is there a left record that abuts our range? */ + error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, newext, + &LEFT, &i); + if (error) + goto done; + if (i) { + state |= RMAP_LEFT_VALID; + XFS_WANT_CORRUPTED_GOTO(mp, + LEFT.rm_startblock + LEFT.rm_blockcount <= bno, + done); + if (xfs_rmap_is_mergeable(&LEFT, owner, newext)) + state |= RMAP_LEFT_CONTIG; + } + + /* Is there a right record that abuts our range? */ + error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len, + newext, &i); + if (error) + goto done; + if (i) { + state |= RMAP_RIGHT_VALID; + error = xfs_rmap_get_rec(cur, &RIGHT, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock, + done); + trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, + cur->bc_private.a.agno, RIGHT.rm_startblock, + RIGHT.rm_blockcount, RIGHT.rm_owner, + RIGHT.rm_offset, RIGHT.rm_flags); + if (xfs_rmap_is_mergeable(&RIGHT, owner, newext)) + state |= RMAP_RIGHT_CONTIG; + } + + /* check that left + prev + right is not too long */ + if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | + RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) == + (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | + RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) && + (unsigned long)LEFT.rm_blockcount + len + + RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) + state &= ~RMAP_RIGHT_CONTIG; + + trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state, + _RET_IP_); + /* + * Switch out based on the FILLING and CONTIG state bits. + */ + switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | + RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) { + case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | + RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: + /* + * Setting all of a previous oldext extent to newext. + * The left and right neighbors are both contiguous with new. + */ + error = xfs_rmap_delete(cur, RIGHT.rm_startblock, + RIGHT.rm_blockcount, RIGHT.rm_owner, + RIGHT.rm_offset, RIGHT.rm_flags); + if (error) + goto done; + error = xfs_rmap_delete(cur, PREV.rm_startblock, + PREV.rm_blockcount, PREV.rm_owner, + PREV.rm_offset, PREV.rm_flags); + if (error) + goto done; + NEW = LEFT; + error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount; + error = xfs_rmap_update(cur, &NEW); + if (error) + goto done; + break; + + case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG: + /* + * Setting all of a previous oldext extent to newext. + * The left neighbor is contiguous, the right is not. + */ + error = xfs_rmap_delete(cur, PREV.rm_startblock, + PREV.rm_blockcount, PREV.rm_owner, + PREV.rm_offset, PREV.rm_flags); + if (error) + goto done; + NEW = LEFT; + error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + NEW.rm_blockcount += PREV.rm_blockcount; + error = xfs_rmap_update(cur, &NEW); + if (error) + goto done; + break; + + case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: + /* + * Setting all of a previous oldext extent to newext. + * The right neighbor is contiguous, the left is not. + */ + error = xfs_rmap_delete(cur, RIGHT.rm_startblock, + RIGHT.rm_blockcount, RIGHT.rm_owner, + RIGHT.rm_offset, RIGHT.rm_flags); + if (error) + goto done; + NEW = PREV; + error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + NEW.rm_blockcount += RIGHT.rm_blockcount; + NEW.rm_flags = RIGHT.rm_flags; + error = xfs_rmap_update(cur, &NEW); + if (error) + goto done; + break; + + case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING: + /* + * Setting all of a previous oldext extent to newext. + * Neither the left nor right neighbors are contiguous with + * the new one. + */ + NEW = PREV; + error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + NEW.rm_flags = newext; + error = xfs_rmap_update(cur, &NEW); + if (error) + goto done; + break; + + case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG: + /* + * Setting the first part of a previous oldext extent to newext. + * The left neighbor is contiguous. + */ + NEW = PREV; + error = xfs_rmap_delete(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags); + if (error) + goto done; + NEW.rm_offset += len; + NEW.rm_startblock += len; + NEW.rm_blockcount -= len; + error = xfs_rmap_insert(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags); + if (error) + goto done; + NEW = LEFT; + error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + NEW.rm_blockcount += len; + error = xfs_rmap_update(cur, &NEW); + if (error) + goto done; + break; + + case RMAP_LEFT_FILLING: + /* + * Setting the first part of a previous oldext extent to newext. + * The left neighbor is not contiguous. + */ + NEW = PREV; + error = xfs_rmap_delete(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags); + if (error) + goto done; + NEW.rm_offset += len; + NEW.rm_startblock += len; + NEW.rm_blockcount -= len; + error = xfs_rmap_insert(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags); + if (error) + goto done; + error = xfs_rmap_insert(cur, bno, len, owner, offset, newext); + if (error) + goto done; + break; + + case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: + /* + * Setting the last part of a previous oldext extent to newext. + * The right neighbor is contiguous with the new allocation. + */ + NEW = PREV; + error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + NEW.rm_blockcount = offset - NEW.rm_offset; + error = xfs_rmap_update(cur, &NEW); + if (error) + goto done; + NEW = RIGHT; + error = xfs_rmap_delete(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags); + if (error) + goto done; + NEW.rm_offset = offset; + NEW.rm_startblock = bno; + NEW.rm_blockcount += len; + error = xfs_rmap_insert(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags); + if (error) + goto done; + break; + + case RMAP_RIGHT_FILLING: + /* + * Setting the last part of a previous oldext extent to newext. + * The right neighbor is not contiguous. + */ + NEW = PREV; + error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + NEW.rm_blockcount -= len; + error = xfs_rmap_update(cur, &NEW); + if (error) + goto done; + error = xfs_rmap_insert(cur, bno, len, owner, offset, newext); + if (error) + goto done; + break; + + case 0: + /* + * Setting the middle part of a previous oldext extent to + * newext. Contiguity is impossible here. + * One extent becomes three extents. + */ + /* new right extent - oldext */ + NEW.rm_startblock = bno + len; + NEW.rm_owner = owner; + NEW.rm_offset = new_endoff; + NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount - + new_endoff; + NEW.rm_flags = PREV.rm_flags; + error = xfs_rmap_insert(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, + NEW.rm_flags); + if (error) + goto done; + /* new left extent - oldext */ + NEW = PREV; + error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, + NEW.rm_offset, NEW.rm_flags, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + NEW.rm_blockcount = offset - NEW.rm_offset; + error = xfs_rmap_update(cur, &NEW); + if (error) + goto done; + /* new middle extent - newext */ + NEW.rm_startblock = bno; + NEW.rm_blockcount = len; + NEW.rm_owner = owner; + NEW.rm_offset = offset; + NEW.rm_flags = newext; + error = xfs_rmap_insert(cur, NEW.rm_startblock, + NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, + NEW.rm_flags); + if (error) + goto done; + break; + + case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: + case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: + case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG: + case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG: + case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: + case RMAP_LEFT_CONTIG: + case RMAP_RIGHT_CONTIG: + /* + * These cases are all impossible. + */ + ASSERT(0); + } + + trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len, + unwritten, oinfo); +done: + if (error) + trace_xfs_rmap_convert_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + #undef NEW #undef LEFT #undef RIGHT #undef PREV +/* + * Find an extent in the rmap btree and unmap it. For rmap extent types that + * can overlap (data fork rmaps on reflink filesystems) we must be careful + * that the prev/next records in the btree might belong to another owner. + * Therefore we must use delete+insert to alter any of the key fields. + * + * For every other situation there can only be one owner for a given extent, + * so we can call the regular _free function. + */ +STATIC int +xfs_rmap_unmap_shared( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool unwritten, + struct xfs_owner_info *oinfo) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_rmap_irec ltrec; + uint64_t ltoff; + int error = 0; + int i; + uint64_t owner; + uint64_t offset; + unsigned int flags; + + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); + if (unwritten) + flags |= XFS_RMAP_UNWRITTEN; + trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len, + unwritten, oinfo); + + /* + * We should always have a left record because there's a static record + * for the AG headers at rm_startblock == 0 created by mkfs/growfs that + * will not ever be removed from the tree. + */ + error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags, + <rec, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + ltoff = ltrec.rm_offset; + + /* Make sure the extent we found covers the entire freeing range. */ + XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && + ltrec.rm_startblock + ltrec.rm_blockcount >= + bno + len, out_error); + + /* Make sure the owner matches what we expect to find in the tree. */ + XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner, out_error); + + /* Make sure the unwritten flag matches. */ + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == + (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); + + /* Check the offset. */ + XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_offset <= offset, out_error); + XFS_WANT_CORRUPTED_GOTO(mp, offset <= ltoff + ltrec.rm_blockcount, + out_error); + + if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { + /* Exact match, simply remove the record from rmap tree. */ + error = xfs_rmap_delete(cur, ltrec.rm_startblock, + ltrec.rm_blockcount, ltrec.rm_owner, + ltrec.rm_offset, ltrec.rm_flags); + if (error) + goto out_error; + } else if (ltrec.rm_startblock == bno) { + /* + * Overlap left hand side of extent: move the start, trim the + * length and update the current record. + * + * ltbno ltlen + * Orig: |oooooooooooooooooooo| + * Freeing: |fffffffff| + * Result: |rrrrrrrrrr| + * bno len + */ + + /* Delete prev rmap. */ + error = xfs_rmap_delete(cur, ltrec.rm_startblock, + ltrec.rm_blockcount, ltrec.rm_owner, + ltrec.rm_offset, ltrec.rm_flags); + if (error) + goto out_error; + + /* Add an rmap at the new offset. */ + ltrec.rm_startblock += len; + ltrec.rm_blockcount -= len; + ltrec.rm_offset += len; + error = xfs_rmap_insert(cur, ltrec.rm_startblock, + ltrec.rm_blockcount, ltrec.rm_owner, + ltrec.rm_offset, ltrec.rm_flags); + if (error) + goto out_error; + } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) { + /* + * Overlap right hand side of extent: trim the length and + * update the current record. + * + * ltbno ltlen + * Orig: |oooooooooooooooooooo| + * Freeing: |fffffffff| + * Result: |rrrrrrrrrr| + * bno len + */ + error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock, + ltrec.rm_blockcount, ltrec.rm_owner, + ltrec.rm_offset, ltrec.rm_flags, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + ltrec.rm_blockcount -= len; + error = xfs_rmap_update(cur, <rec); + if (error) + goto out_error; + } else { + /* + * Overlap middle of extent: trim the length of the existing + * record to the length of the new left-extent size, increment + * the insertion position so we can insert a new record + * containing the remaining right-extent space. + * + * ltbno ltlen + * Orig: |oooooooooooooooooooo| + * Freeing: |fffffffff| + * Result: |rrrrr| |rrrr| + * bno len + */ + xfs_extlen_t orig_len = ltrec.rm_blockcount; + + /* Shrink the left side of the rmap */ + error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock, + ltrec.rm_blockcount, ltrec.rm_owner, + ltrec.rm_offset, ltrec.rm_flags, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + ltrec.rm_blockcount = bno - ltrec.rm_startblock; + error = xfs_rmap_update(cur, <rec); + if (error) + goto out_error; + + /* Add an rmap at the new offset */ + error = xfs_rmap_insert(cur, bno + len, + orig_len - len - ltrec.rm_blockcount, + ltrec.rm_owner, offset + len, + ltrec.rm_flags); + if (error) + goto out_error; + } + + trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len, + unwritten, oinfo); +out_error: + if (error) + trace_xfs_rmap_unmap_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + +/* + * Find an extent in the rmap btree and map it. For rmap extent types that + * can overlap (data fork rmaps on reflink filesystems) we must be careful + * that the prev/next records in the btree might belong to another owner. + * Therefore we must use delete+insert to alter any of the key fields. + * + * For every other situation there can only be one owner for a given extent, + * so we can call the regular _alloc function. + */ +STATIC int +xfs_rmap_map_shared( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool unwritten, + struct xfs_owner_info *oinfo) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_rmap_irec ltrec; + struct xfs_rmap_irec gtrec; + int have_gt; + int have_lt; + int error = 0; + int i; + uint64_t owner; + uint64_t offset; + unsigned int flags = 0; + + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); + if (unwritten) + flags |= XFS_RMAP_UNWRITTEN; + trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, + unwritten, oinfo); + + /* Is there a left record that abuts our range? */ + error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, flags, + <rec, &have_lt); + if (error) + goto out_error; + if (have_lt && + !xfs_rmap_is_mergeable(<rec, owner, flags)) + have_lt = 0; + + /* Is there a right record that abuts our range? */ + error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len, + flags, &have_gt); + if (error) + goto out_error; + if (have_gt) { + error = xfs_rmap_get_rec(cur, >rec, &have_gt); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error); + trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, + cur->bc_private.a.agno, gtrec.rm_startblock, + gtrec.rm_blockcount, gtrec.rm_owner, + gtrec.rm_offset, gtrec.rm_flags); + + if (!xfs_rmap_is_mergeable(>rec, owner, flags)) + have_gt = 0; + } + + if (have_lt && + ltrec.rm_startblock + ltrec.rm_blockcount == bno && + ltrec.rm_offset + ltrec.rm_blockcount == offset) { + /* + * Left edge contiguous, merge into left record. + * + * ltbno ltlen + * orig: |ooooooooo| + * adding: |aaaaaaaaa| + * result: |rrrrrrrrrrrrrrrrrrr| + * bno len + */ + ltrec.rm_blockcount += len; + if (have_gt && + bno + len == gtrec.rm_startblock && + offset + len == gtrec.rm_offset) { + /* + * Right edge also contiguous, delete right record + * and merge into left record. + * + * ltbno ltlen gtbno gtlen + * orig: |ooooooooo| |ooooooooo| + * adding: |aaaaaaaaa| + * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr| + */ + ltrec.rm_blockcount += gtrec.rm_blockcount; + error = xfs_rmap_delete(cur, gtrec.rm_startblock, + gtrec.rm_blockcount, gtrec.rm_owner, + gtrec.rm_offset, gtrec.rm_flags); + if (error) + goto out_error; + } + + /* Point the cursor back to the left record and update. */ + error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock, + ltrec.rm_blockcount, ltrec.rm_owner, + ltrec.rm_offset, ltrec.rm_flags, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + + error = xfs_rmap_update(cur, <rec); + if (error) + goto out_error; + } else if (have_gt && + bno + len == gtrec.rm_startblock && + offset + len == gtrec.rm_offset) { + /* + * Right edge contiguous, merge into right record. + * + * gtbno gtlen + * Orig: |ooooooooo| + * adding: |aaaaaaaaa| + * Result: |rrrrrrrrrrrrrrrrrrr| + * bno len + */ + /* Delete the old record. */ + error = xfs_rmap_delete(cur, gtrec.rm_startblock, + gtrec.rm_blockcount, gtrec.rm_owner, + gtrec.rm_offset, gtrec.rm_flags); + if (error) + goto out_error; + + /* Move the start and re-add it. */ + gtrec.rm_startblock = bno; + gtrec.rm_blockcount += len; + gtrec.rm_offset = offset; + error = xfs_rmap_insert(cur, gtrec.rm_startblock, + gtrec.rm_blockcount, gtrec.rm_owner, + gtrec.rm_offset, gtrec.rm_flags); + if (error) + goto out_error; + } else { + /* + * No contiguous edge with identical owner, insert + * new record at current cursor position. + */ + error = xfs_rmap_insert(cur, bno, len, owner, offset, flags); + if (error) + goto out_error; + } + + trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len, + unwritten, oinfo); +out_error: + if (error) + trace_xfs_rmap_map_error(cur->bc_mp, + cur->bc_private.a.agno, error, _RET_IP_); + return error; +} + struct xfs_rmap_query_range_info { xfs_rmap_query_range_fn fn; void *priv; @@ -1237,15 +2115,27 @@ xfs_rmap_finish_one( case XFS_RMAP_MAP: error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo); break; + case XFS_RMAP_MAP_SHARED: + error = xfs_rmap_map_shared(rcur, bno, blockcount, unwritten, + &oinfo); + break; case XFS_RMAP_FREE: case XFS_RMAP_UNMAP: error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten, &oinfo); break; + case XFS_RMAP_UNMAP_SHARED: + error = xfs_rmap_unmap_shared(rcur, bno, blockcount, unwritten, + &oinfo); + break; case XFS_RMAP_CONVERT: error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten, &oinfo); break; + case XFS_RMAP_CONVERT_SHARED: + error = xfs_rmap_convert_shared(rcur, bno, blockcount, + !unwritten, &oinfo); + break; default: ASSERT(0); error = -EFSCORRUPTED; @@ -1263,9 +2153,10 @@ out_cur: */ static bool xfs_rmap_update_is_needed( - struct xfs_mount *mp) + struct xfs_mount *mp, + int whichfork) { - return xfs_sb_version_hasrmapbt(&mp->m_sb); + return xfs_sb_version_hasrmapbt(&mp->m_sb) && whichfork != XFS_COW_FORK; } /* @@ -1311,10 +2202,11 @@ xfs_rmap_map_extent( int whichfork, struct xfs_bmbt_irec *PREV) { - if (!xfs_rmap_update_is_needed(mp)) + if (!xfs_rmap_update_is_needed(mp, whichfork)) return 0; - return __xfs_rmap_add(mp, dfops, XFS_RMAP_MAP, ip->i_ino, + return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ? + XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino, whichfork, PREV); } @@ -1327,10 +2219,11 @@ xfs_rmap_unmap_extent( int whichfork, struct xfs_bmbt_irec *PREV) { - if (!xfs_rmap_update_is_needed(mp)) + if (!xfs_rmap_update_is_needed(mp, whichfork)) return 0; - return __xfs_rmap_add(mp, dfops, XFS_RMAP_UNMAP, ip->i_ino, + return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ? + XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino, whichfork, PREV); } @@ -1343,10 +2236,11 @@ xfs_rmap_convert_extent( int whichfork, struct xfs_bmbt_irec *PREV) { - if (!xfs_rmap_update_is_needed(mp)) + if (!xfs_rmap_update_is_needed(mp, whichfork)) return 0; - return __xfs_rmap_add(mp, dfops, XFS_RMAP_CONVERT, ip->i_ino, + return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ? + XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino, whichfork, PREV); } @@ -1362,7 +2256,7 @@ xfs_rmap_alloc_extent( { struct xfs_bmbt_irec bmap; - if (!xfs_rmap_update_is_needed(mp)) + if (!xfs_rmap_update_is_needed(mp, XFS_DATA_FORK)) return 0; bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno); @@ -1386,7 +2280,7 @@ xfs_rmap_free_extent( { struct xfs_bmbt_irec bmap; - if (!xfs_rmap_update_is_needed(mp)) + if (!xfs_rmap_update_is_needed(mp, XFS_DATA_FORK)) return 0; bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno); diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 71cf99a4acba..789930599339 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -206,4 +206,11 @@ int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type, xfs_fsblock_t startblock, xfs_filblks_t blockcount, xfs_exntst_t state, struct xfs_btree_cur **pcur); +int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno, + uint64_t owner, uint64_t offset, unsigned int flags, + struct xfs_rmap_irec *irec, int *stat); +int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, + uint64_t owner, uint64_t offset, unsigned int flags, + struct xfs_rmap_irec *irec, int *stat); + #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 17b8eeb34ac8..83e672ff7577 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -35,6 +35,7 @@ #include "xfs_cksum.h" #include "xfs_error.h" #include "xfs_extent_busy.h" +#include "xfs_ag_resv.h" /* * Reverse map btree. @@ -512,6 +513,83 @@ void xfs_rmapbt_compute_maxlevels( struct xfs_mount *mp) { - mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp, - mp->m_rmap_mnr, mp->m_sb.sb_agblocks); + /* + * On a non-reflink filesystem, the maximum number of rmap + * records is the number of blocks in the AG, hence the max + * rmapbt height is log_$maxrecs($agblocks). However, with + * reflink each AG block can have up to 2^32 (per the refcount + * record format) owners, which means that theoretically we + * could face up to 2^64 rmap records. + * + * That effectively means that the max rmapbt height must be + * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG + * blocks to feed the rmapbt long before the rmapbt reaches + * maximum height. The reflink code uses ag_resv_critical to + * disallow reflinking when less than 10% of the per-AG metadata + * block reservation since the fallback is a regular file copy. + */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) + mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; + else + mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp, + mp->m_rmap_mnr, mp->m_sb.sb_agblocks); +} + +/* Calculate the refcount btree size for some records. */ +xfs_extlen_t +xfs_rmapbt_calc_size( + struct xfs_mount *mp, + unsigned long long len) +{ + return xfs_btree_calc_size(mp, mp->m_rmap_mnr, len); +} + +/* + * Calculate the maximum refcount btree size. + */ +xfs_extlen_t +xfs_rmapbt_max_size( + struct xfs_mount *mp) +{ + /* Bail out if we're uninitialized, which can happen in mkfs. */ + if (mp->m_rmap_mxr[0] == 0) + return 0; + + return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks); +} + +/* + * Figure out how many blocks to reserve and how many are used by this btree. + */ +int +xfs_rmapbt_calc_reserves( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_extlen_t *ask, + xfs_extlen_t *used) +{ + struct xfs_buf *agbp; + struct xfs_agf *agf; + xfs_extlen_t pool_len; + xfs_extlen_t tree_len; + int error; + + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return 0; + + /* Reserve 1% of the AG or enough for 1 block per record. */ + pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp)); + *ask += pool_len; + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error) + return error; + + agf = XFS_BUF_TO_AGF(agbp); + tree_len = be32_to_cpu(agf->agf_rmap_blocks); + xfs_buf_relse(agbp); + + *used += tree_len; + + return error; } diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index e73a55357dab..2a9ac472fb15 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -58,4 +58,11 @@ struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf); extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); +extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, + unsigned long long len); +extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp); + +extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, + xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); + #endif /* __XFS_RMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 4aecc5fefe96..a70aec910626 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -38,6 +38,8 @@ #include "xfs_ialloc_btree.h" #include "xfs_log.h" #include "xfs_rmap_btree.h" +#include "xfs_bmap.h" +#include "xfs_refcount_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -737,6 +739,13 @@ xfs_sb_mount_common( mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; + mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, + true); + mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, + false); + mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; + mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; + mp->m_bsize = XFS_FSB_TO_BB(mp, 1); mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, sbp->sb_inopblock); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 0c5b30bd884c..c6f4eb46fe26 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -39,6 +39,7 @@ extern const struct xfs_buf_ops xfs_agf_buf_ops; extern const struct xfs_buf_ops xfs_agfl_buf_ops; extern const struct xfs_buf_ops xfs_allocbt_buf_ops; extern const struct xfs_buf_ops xfs_rmapbt_buf_ops; +extern const struct xfs_buf_ops xfs_refcountbt_buf_ops; extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; extern const struct xfs_buf_ops xfs_bmbt_buf_ops; @@ -122,6 +123,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *); #define XFS_INO_REF 2 #define XFS_ATTR_BTREE_REF 1 #define XFS_DQUOT_REF 1 +#define XFS_REFC_BTREE_REF 1 /* * Flags for xfs_trans_ichgtime(). diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 301ef2f4dbd6..b456cca1bfb2 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -67,13 +67,14 @@ xfs_calc_buf_res( * Per-extent log reservation for the btree changes involved in freeing or * allocating an extent. In classic XFS there were two trees that will be * modified (bnobt + cntbt). With rmap enabled, there are three trees - * (rmapbt). The number of blocks reserved is based on the formula: + * (rmapbt). With reflink, there are four trees (refcountbt). The number of + * blocks reserved is based on the formula: * * num trees * ((2 blocks/level * max depth) - 1) * * Keep in mind that max depth is calculated separately for each type of tree. */ -static uint +uint xfs_allocfree_log_count( struct xfs_mount *mp, uint num_ops) @@ -83,6 +84,8 @@ xfs_allocfree_log_count( blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1); if (xfs_sb_version_hasrmapbt(&mp->m_sb)) blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); + if (xfs_sb_version_hasreflink(&mp->m_sb)) + blocks += num_ops * (2 * mp->m_refc_maxlevels - 1); return blocks; } @@ -809,11 +812,18 @@ xfs_trans_resv_calc( * require a permanent reservation on space. */ resp->tr_write.tr_logres = xfs_calc_write_reservation(mp); - resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; + if (xfs_sb_version_hasreflink(&mp->m_sb)) + resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; + else + resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES; resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp); - resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; + if (xfs_sb_version_hasreflink(&mp->m_sb)) + resp->tr_itruncate.tr_logcount = + XFS_ITRUNCATE_LOG_COUNT_REFLINK; + else + resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES; resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); @@ -870,7 +880,10 @@ xfs_trans_resv_calc( resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp); - resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; + if (xfs_sb_version_hasreflink(&mp->m_sb)) + resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; + else + resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; /* diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index 0eb46ed6d404..b7e5357d060a 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h @@ -87,6 +87,7 @@ struct xfs_trans_resv { #define XFS_DEFAULT_LOG_COUNT 1 #define XFS_DEFAULT_PERM_LOG_COUNT 2 #define XFS_ITRUNCATE_LOG_COUNT 2 +#define XFS_ITRUNCATE_LOG_COUNT_REFLINK 8 #define XFS_INACTIVE_LOG_COUNT 2 #define XFS_CREATE_LOG_COUNT 2 #define XFS_CREATE_TMPFILE_LOG_COUNT 2 @@ -96,11 +97,13 @@ struct xfs_trans_resv { #define XFS_LINK_LOG_COUNT 2 #define XFS_RENAME_LOG_COUNT 2 #define XFS_WRITE_LOG_COUNT 2 +#define XFS_WRITE_LOG_COUNT_REFLINK 8 #define XFS_ADDAFORK_LOG_COUNT 2 #define XFS_ATTRINVAL_LOG_COUNT 1 #define XFS_ATTRSET_LOG_COUNT 3 #define XFS_ATTRRM_LOG_COUNT 3 void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp); +uint xfs_allocfree_log_count(struct xfs_mount *mp, uint num_ops); #endif /* __XFS_TRANS_RESV_H__ */ diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 41e0428d8175..7917f6e44286 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -21,6 +21,8 @@ /* * Components of space reservations. */ +#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \ + (((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0])) #define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \ (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0])) #define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1) @@ -28,6 +30,13 @@ (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ XFS_EXTENTADD_SPACE_RES(mp,w)) +#define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\ + (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ + XFS_EXTENTADD_SPACE_RES(mp,w) + \ + ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \ + (mp)->m_rmap_maxlevels) #define XFS_DAENTER_1B(mp,w) \ ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1) #define XFS_DAENTER_DBS(mp,w) \ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 3d503647f26b..8d74870468c2 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -90,6 +90,7 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ */ #define XFS_DATA_FORK 0 #define XFS_ATTR_FORK 1 +#define XFS_COW_FORK 2 /* * Min numbers of data/attr fork btree root pointers. @@ -109,7 +110,7 @@ typedef enum { typedef enum { XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi, - XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX + XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_REFCi, XFS_BTNUM_MAX } xfs_btnum_t; struct xfs_name { diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4a28fa91e3b1..3e57a56cf829 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -31,6 +31,7 @@ #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" +#include "xfs_reflink.h" #include <linux/gfp.h> #include <linux/mpage.h> #include <linux/pagevec.h> @@ -39,6 +40,7 @@ /* flags for direct write completions */ #define XFS_DIO_FLAG_UNWRITTEN (1 << 0) #define XFS_DIO_FLAG_APPEND (1 << 1) +#define XFS_DIO_FLAG_COW (1 << 2) /* * structure owned by writepages passed to individual writepage calls @@ -287,6 +289,25 @@ xfs_end_io( error = -EIO; /* + * For a CoW extent, we need to move the mapping from the CoW fork + * to the data fork. If instead an error happened, just dump the + * new blocks. + */ + if (ioend->io_type == XFS_IO_COW) { + if (error) + goto done; + if (ioend->io_bio->bi_error) { + error = xfs_reflink_cancel_cow_range(ip, + ioend->io_offset, ioend->io_size); + goto done; + } + error = xfs_reflink_end_cow(ip, ioend->io_offset, + ioend->io_size); + if (error) + goto done; + } + + /* * For unwritten extents we need to issue transactions to convert a * range to normal written extens after the data I/O has finished. * Detecting and handling completion IO errors is done individually @@ -301,7 +322,8 @@ xfs_end_io( } else if (ioend->io_append_trans) { error = xfs_setfilesize_ioend(ioend, error); } else { - ASSERT(!xfs_ioend_is_append(ioend)); + ASSERT(!xfs_ioend_is_append(ioend) || + ioend->io_type == XFS_IO_COW); } done: @@ -315,7 +337,7 @@ xfs_end_bio( struct xfs_ioend *ioend = bio->bi_private; struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; - if (ioend->io_type == XFS_IO_UNWRITTEN) + if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW) queue_work(mp->m_unwritten_workqueue, &ioend->io_work); else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); @@ -341,6 +363,7 @@ xfs_map_blocks( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; + ASSERT(type != XFS_IO_COW); if (type == XFS_IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; @@ -355,6 +378,13 @@ xfs_map_blocks( offset_fsb = XFS_B_TO_FSBT(mp, offset); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, imap, &nimaps, bmapi_flags); + /* + * Truncate an overwrite extent if there's a pending CoW + * reservation before the end of this extent. This forces us + * to come back to writepage to take care of the CoW. + */ + if (nimaps && type == XFS_IO_OVERWRITE) + xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) @@ -362,7 +392,8 @@ xfs_map_blocks( if (type == XFS_IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { - error = xfs_iomap_write_allocate(ip, offset, imap); + error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset, + imap); if (!error) trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); return error; @@ -737,6 +768,56 @@ out_invalidate: return; } +static int +xfs_map_cow( + struct xfs_writepage_ctx *wpc, + struct inode *inode, + loff_t offset, + unsigned int *new_type) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_bmbt_irec imap; + bool is_cow = false, need_alloc = false; + int error; + + /* + * If we already have a valid COW mapping keep using it. + */ + if (wpc->io_type == XFS_IO_COW) { + wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); + if (wpc->imap_valid) { + *new_type = XFS_IO_COW; + return 0; + } + } + + /* + * Else we need to check if there is a COW mapping at this offset. + */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (!is_cow) + return 0; + + /* + * And if the COW mapping has a delayed extent here we need to + * allocate real space for it now. + */ + if (need_alloc) { + error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, + &imap); + if (error) + return error; + } + + wpc->io_type = *new_type = XFS_IO_COW; + wpc->imap_valid = true; + wpc->imap = imap; + return 0; +} + /* * We implement an immediate ioend submission policy here to avoid needing to * chain multiple ioends and hence nest mempool allocations which can violate @@ -769,6 +850,7 @@ xfs_writepage_map( int error = 0; int count = 0; int uptodate = 1; + unsigned int new_type; bh = head = page_buffers(page); offset = page_offset(page); @@ -789,22 +871,13 @@ xfs_writepage_map( continue; } - if (buffer_unwritten(bh)) { - if (wpc->io_type != XFS_IO_UNWRITTEN) { - wpc->io_type = XFS_IO_UNWRITTEN; - wpc->imap_valid = false; - } - } else if (buffer_delay(bh)) { - if (wpc->io_type != XFS_IO_DELALLOC) { - wpc->io_type = XFS_IO_DELALLOC; - wpc->imap_valid = false; - } - } else if (buffer_uptodate(bh)) { - if (wpc->io_type != XFS_IO_OVERWRITE) { - wpc->io_type = XFS_IO_OVERWRITE; - wpc->imap_valid = false; - } - } else { + if (buffer_unwritten(bh)) + new_type = XFS_IO_UNWRITTEN; + else if (buffer_delay(bh)) + new_type = XFS_IO_DELALLOC; + else if (buffer_uptodate(bh)) + new_type = XFS_IO_OVERWRITE; + else { if (PageUptodate(page)) ASSERT(buffer_mapped(bh)); /* @@ -817,6 +890,17 @@ xfs_writepage_map( continue; } + if (xfs_is_reflink_inode(XFS_I(inode))) { + error = xfs_map_cow(wpc, inode, offset, &new_type); + if (error) + goto out; + } + + if (wpc->io_type != new_type) { + wpc->io_type = new_type; + wpc->imap_valid = false; + } + if (wpc->imap_valid) wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); @@ -1107,18 +1191,24 @@ xfs_map_direct( struct inode *inode, struct buffer_head *bh_result, struct xfs_bmbt_irec *imap, - xfs_off_t offset) + xfs_off_t offset, + bool is_cow) { uintptr_t *flags = (uintptr_t *)&bh_result->b_private; xfs_off_t size = bh_result->b_size; trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size, - ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap); + ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW : + XFS_IO_OVERWRITE, imap); if (ISUNWRITTEN(imap)) { *flags |= XFS_DIO_FLAG_UNWRITTEN; set_buffer_defer_completion(bh_result); - } else if (offset + size > i_size_read(inode) || offset + size < 0) { + } else if (is_cow) { + *flags |= XFS_DIO_FLAG_COW; + set_buffer_defer_completion(bh_result); + } + if (offset + size > i_size_read(inode) || offset + size < 0) { *flags |= XFS_DIO_FLAG_APPEND; set_buffer_defer_completion(bh_result); } @@ -1164,6 +1254,44 @@ xfs_map_trim_size( bh_result->b_size = mapping_size; } +/* Bounce unaligned directio writes to the page cache. */ +static int +xfs_bounce_unaligned_dio_write( + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + struct xfs_bmbt_irec *imap) +{ + struct xfs_bmbt_irec irec; + xfs_fileoff_t delta; + bool shared; + bool x; + int error; + + irec = *imap; + if (offset_fsb > irec.br_startoff) { + delta = offset_fsb - irec.br_startoff; + irec.br_blockcount -= delta; + irec.br_startblock += delta; + irec.br_startoff = offset_fsb; + } + error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x); + if (error) + return error; + + /* + * We're here because we're trying to do a directio write to a + * region that isn't aligned to a filesystem block. If any part + * of the extent is shared, fall back to buffered mode to handle + * the RMW. This is done by returning -EREMCHG ("remote addr + * changed"), which is caught further up the call stack. + */ + if (shared) { + trace_xfs_reflink_bounce_dio_write(ip, imap); + return -EREMCHG; + } + return 0; +} + STATIC int __xfs_get_blocks( struct inode *inode, @@ -1183,6 +1311,8 @@ __xfs_get_blocks( xfs_off_t offset; ssize_t size; int new = 0; + bool is_cow = false; + bool need_alloc = false; BUG_ON(create && !direct); @@ -1208,8 +1338,26 @@ __xfs_get_blocks( end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, - &imap, &nimaps, XFS_BMAPI_ENTIRE); + if (create && direct && xfs_is_reflink_inode(ip)) + is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, + &need_alloc); + if (!is_cow) { + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, + &imap, &nimaps, XFS_BMAPI_ENTIRE); + /* + * Truncate an overwrite extent if there's a pending CoW + * reservation before the end of this extent. This + * forces us to come back to get_blocks to take care of + * the CoW. + */ + if (create && direct && nimaps && + imap.br_startblock != HOLESTARTBLOCK && + imap.br_startblock != DELAYSTARTBLOCK && + !ISUNWRITTEN(&imap)) + xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, + &imap); + } + ASSERT(!need_alloc); if (error) goto out_unlock; @@ -1261,6 +1409,13 @@ __xfs_get_blocks( if (imap.br_startblock != HOLESTARTBLOCK && imap.br_startblock != DELAYSTARTBLOCK && (create || !ISUNWRITTEN(&imap))) { + if (create && direct && !is_cow) { + error = xfs_bounce_unaligned_dio_write(ip, offset_fsb, + &imap); + if (error) + return error; + } + xfs_map_buffer(inode, bh_result, &imap, offset); if (ISUNWRITTEN(&imap)) set_buffer_unwritten(bh_result); @@ -1269,7 +1424,8 @@ __xfs_get_blocks( if (dax_fault) ASSERT(!ISUNWRITTEN(&imap)); else - xfs_map_direct(inode, bh_result, &imap, offset); + xfs_map_direct(inode, bh_result, &imap, offset, + is_cow); } } @@ -1391,11 +1547,14 @@ xfs_end_io_direct_write( i_size_write(inode, offset + size); spin_unlock(&ip->i_flags_lock); + if (flags & XFS_DIO_FLAG_COW) + error = xfs_reflink_end_cow(ip, offset, size); if (flags & XFS_DIO_FLAG_UNWRITTEN) { trace_xfs_end_io_direct_write_unwritten(ip, offset, size); error = xfs_iomap_write_unwritten(ip, offset, size); - } else if (flags & XFS_DIO_FLAG_APPEND) { + } + if (flags & XFS_DIO_FLAG_APPEND) { trace_xfs_end_io_direct_write_append(ip, offset, size); error = xfs_setfilesize(ip, offset, size); @@ -1425,6 +1584,17 @@ xfs_vm_bmap( trace_xfs_vm_bmap(XFS_I(inode)); xfs_ilock(ip, XFS_IOLOCK_SHARED); + + /* + * The swap code (ab-)uses ->bmap to get a block mapping and then + * bypasseѕ the file system for actual I/O. We really can't allow + * that on reflinks inodes, so we have to skip out here. And yes, + * 0 is the magic code for a bmap error.. + */ + if (xfs_is_reflink_inode(ip)) { + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return 0; + } filemap_write_and_wait(mapping); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return generic_block_bmap(mapping, block, xfs_get_blocks); diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 1950e3bca2ac..b3c6634f9518 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -28,13 +28,15 @@ enum { XFS_IO_DELALLOC, /* covers delalloc region */ XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ XFS_IO_OVERWRITE, /* covers already allocated extent */ + XFS_IO_COW, /* covers copy-on-write extent */ }; #define XFS_IO_TYPES \ { XFS_IO_INVALID, "invalid" }, \ { XFS_IO_DELALLOC, "delalloc" }, \ { XFS_IO_UNWRITTEN, "unwritten" }, \ - { XFS_IO_OVERWRITE, "overwrite" } + { XFS_IO_OVERWRITE, "overwrite" }, \ + { XFS_IO_COW, "CoW" } /* * Structure for buffered I/O completions. diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c new file mode 100644 index 000000000000..9bf57c76623b --- /dev/null +++ b/fs/xfs/xfs_bmap_item.c @@ -0,0 +1,508 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_buf_item.h" +#include "xfs_bmap_item.h" +#include "xfs_log.h" +#include "xfs_bmap.h" +#include "xfs_icache.h" +#include "xfs_trace.h" + + +kmem_zone_t *xfs_bui_zone; +kmem_zone_t *xfs_bud_zone; + +static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_bui_log_item, bui_item); +} + +void +xfs_bui_item_free( + struct xfs_bui_log_item *buip) +{ + kmem_zone_free(xfs_bui_zone, buip); +} + +STATIC void +xfs_bui_item_size( + struct xfs_log_item *lip, + int *nvecs, + int *nbytes) +{ + struct xfs_bui_log_item *buip = BUI_ITEM(lip); + + *nvecs += 1; + *nbytes += xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents); +} + +/* + * This is called to fill in the vector of log iovecs for the + * given bui log item. We use only 1 iovec, and we point that + * at the bui_log_format structure embedded in the bui item. + * It is at this point that we assert that all of the extent + * slots in the bui item have been filled. + */ +STATIC void +xfs_bui_item_format( + struct xfs_log_item *lip, + struct xfs_log_vec *lv) +{ + struct xfs_bui_log_item *buip = BUI_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; + + ASSERT(atomic_read(&buip->bui_next_extent) == + buip->bui_format.bui_nextents); + + buip->bui_format.bui_type = XFS_LI_BUI; + buip->bui_format.bui_size = 1; + + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format, + xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents)); +} + +/* + * Pinning has no meaning for an bui item, so just return. + */ +STATIC void +xfs_bui_item_pin( + struct xfs_log_item *lip) +{ +} + +/* + * The unpin operation is the last place an BUI is manipulated in the log. It is + * either inserted in the AIL or aborted in the event of a log I/O error. In + * either case, the BUI transaction has been successfully committed to make it + * this far. Therefore, we expect whoever committed the BUI to either construct + * and commit the BUD or drop the BUD's reference in the event of error. Simply + * drop the log's BUI reference now that the log is done with it. + */ +STATIC void +xfs_bui_item_unpin( + struct xfs_log_item *lip, + int remove) +{ + struct xfs_bui_log_item *buip = BUI_ITEM(lip); + + xfs_bui_release(buip); +} + +/* + * BUI items have no locking or pushing. However, since BUIs are pulled from + * the AIL when their corresponding BUDs are committed to disk, their situation + * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller + * will eventually flush the log. This should help in getting the BUI out of + * the AIL. + */ +STATIC uint +xfs_bui_item_push( + struct xfs_log_item *lip, + struct list_head *buffer_list) +{ + return XFS_ITEM_PINNED; +} + +/* + * The BUI has been either committed or aborted if the transaction has been + * cancelled. If the transaction was cancelled, an BUD isn't going to be + * constructed and thus we free the BUI here directly. + */ +STATIC void +xfs_bui_item_unlock( + struct xfs_log_item *lip) +{ + if (lip->li_flags & XFS_LI_ABORTED) + xfs_bui_item_free(BUI_ITEM(lip)); +} + +/* + * The BUI is logged only once and cannot be moved in the log, so simply return + * the lsn at which it's been logged. + */ +STATIC xfs_lsn_t +xfs_bui_item_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + return lsn; +} + +/* + * The BUI dependency tracking op doesn't do squat. It can't because + * it doesn't know where the free extent is coming from. The dependency + * tracking has to be handled by the "enclosing" metadata object. For + * example, for inodes, the inode is locked throughout the extent freeing + * so the dependency should be recorded there. + */ +STATIC void +xfs_bui_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ +} + +/* + * This is the ops vector shared by all bui log items. + */ +static const struct xfs_item_ops xfs_bui_item_ops = { + .iop_size = xfs_bui_item_size, + .iop_format = xfs_bui_item_format, + .iop_pin = xfs_bui_item_pin, + .iop_unpin = xfs_bui_item_unpin, + .iop_unlock = xfs_bui_item_unlock, + .iop_committed = xfs_bui_item_committed, + .iop_push = xfs_bui_item_push, + .iop_committing = xfs_bui_item_committing, +}; + +/* + * Allocate and initialize an bui item with the given number of extents. + */ +struct xfs_bui_log_item * +xfs_bui_init( + struct xfs_mount *mp) + +{ + struct xfs_bui_log_item *buip; + + buip = kmem_zone_zalloc(xfs_bui_zone, KM_SLEEP); + + xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops); + buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS; + buip->bui_format.bui_id = (uintptr_t)(void *)buip; + atomic_set(&buip->bui_next_extent, 0); + atomic_set(&buip->bui_refcount, 2); + + return buip; +} + +/* + * Freeing the BUI requires that we remove it from the AIL if it has already + * been placed there. However, the BUI may not yet have been placed in the AIL + * when called by xfs_bui_release() from BUD processing due to the ordering of + * committed vs unpin operations in bulk insert operations. Hence the reference + * count to ensure only the last caller frees the BUI. + */ +void +xfs_bui_release( + struct xfs_bui_log_item *buip) +{ + if (atomic_dec_and_test(&buip->bui_refcount)) { + xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); + xfs_bui_item_free(buip); + } +} + +static inline struct xfs_bud_log_item *BUD_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_bud_log_item, bud_item); +} + +STATIC void +xfs_bud_item_size( + struct xfs_log_item *lip, + int *nvecs, + int *nbytes) +{ + *nvecs += 1; + *nbytes += sizeof(struct xfs_bud_log_format); +} + +/* + * This is called to fill in the vector of log iovecs for the + * given bud log item. We use only 1 iovec, and we point that + * at the bud_log_format structure embedded in the bud item. + * It is at this point that we assert that all of the extent + * slots in the bud item have been filled. + */ +STATIC void +xfs_bud_item_format( + struct xfs_log_item *lip, + struct xfs_log_vec *lv) +{ + struct xfs_bud_log_item *budp = BUD_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; + + budp->bud_format.bud_type = XFS_LI_BUD; + budp->bud_format.bud_size = 1; + + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format, + sizeof(struct xfs_bud_log_format)); +} + +/* + * Pinning has no meaning for an bud item, so just return. + */ +STATIC void +xfs_bud_item_pin( + struct xfs_log_item *lip) +{ +} + +/* + * Since pinning has no meaning for an bud item, unpinning does + * not either. + */ +STATIC void +xfs_bud_item_unpin( + struct xfs_log_item *lip, + int remove) +{ +} + +/* + * There isn't much you can do to push on an bud item. It is simply stuck + * waiting for the log to be flushed to disk. + */ +STATIC uint +xfs_bud_item_push( + struct xfs_log_item *lip, + struct list_head *buffer_list) +{ + return XFS_ITEM_PINNED; +} + +/* + * The BUD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the BUI and free the + * BUD. + */ +STATIC void +xfs_bud_item_unlock( + struct xfs_log_item *lip) +{ + struct xfs_bud_log_item *budp = BUD_ITEM(lip); + + if (lip->li_flags & XFS_LI_ABORTED) { + xfs_bui_release(budp->bud_buip); + kmem_zone_free(xfs_bud_zone, budp); + } +} + +/* + * When the bud item is committed to disk, all we need to do is delete our + * reference to our partner bui item and then free ourselves. Since we're + * freeing ourselves we must return -1 to keep the transaction code from + * further referencing this item. + */ +STATIC xfs_lsn_t +xfs_bud_item_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + struct xfs_bud_log_item *budp = BUD_ITEM(lip); + + /* + * Drop the BUI reference regardless of whether the BUD has been + * aborted. Once the BUD transaction is constructed, it is the sole + * responsibility of the BUD to release the BUI (even if the BUI is + * aborted due to log I/O error). + */ + xfs_bui_release(budp->bud_buip); + kmem_zone_free(xfs_bud_zone, budp); + + return (xfs_lsn_t)-1; +} + +/* + * The BUD dependency tracking op doesn't do squat. It can't because + * it doesn't know where the free extent is coming from. The dependency + * tracking has to be handled by the "enclosing" metadata object. For + * example, for inodes, the inode is locked throughout the extent freeing + * so the dependency should be recorded there. + */ +STATIC void +xfs_bud_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ +} + +/* + * This is the ops vector shared by all bud log items. + */ +static const struct xfs_item_ops xfs_bud_item_ops = { + .iop_size = xfs_bud_item_size, + .iop_format = xfs_bud_item_format, + .iop_pin = xfs_bud_item_pin, + .iop_unpin = xfs_bud_item_unpin, + .iop_unlock = xfs_bud_item_unlock, + .iop_committed = xfs_bud_item_committed, + .iop_push = xfs_bud_item_push, + .iop_committing = xfs_bud_item_committing, +}; + +/* + * Allocate and initialize an bud item with the given number of extents. + */ +struct xfs_bud_log_item * +xfs_bud_init( + struct xfs_mount *mp, + struct xfs_bui_log_item *buip) + +{ + struct xfs_bud_log_item *budp; + + budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); + xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); + budp->bud_buip = buip; + budp->bud_format.bud_bui_id = buip->bui_format.bui_id; + + return budp; +} + +/* + * Process a bmap update intent item that was recovered from the log. + * We need to update some inode's bmbt. + */ +int +xfs_bui_recover( + struct xfs_mount *mp, + struct xfs_bui_log_item *buip) +{ + int error = 0; + unsigned int bui_type; + struct xfs_map_extent *bmap; + xfs_fsblock_t startblock_fsb; + xfs_fsblock_t inode_fsb; + bool op_ok; + struct xfs_bud_log_item *budp; + enum xfs_bmap_intent_type type; + int whichfork; + xfs_exntst_t state; + struct xfs_trans *tp; + struct xfs_inode *ip = NULL; + struct xfs_defer_ops dfops; + xfs_fsblock_t firstfsb; + + ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); + + /* Only one mapping operation per BUI... */ + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { + set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); + xfs_bui_release(buip); + return -EIO; + } + + /* + * First check the validity of the extent described by the + * BUI. If anything is bad, then toss the BUI. + */ + bmap = &buip->bui_format.bui_extents[0]; + startblock_fsb = XFS_BB_TO_FSB(mp, + XFS_FSB_TO_DADDR(mp, bmap->me_startblock)); + inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp, + XFS_INO_TO_FSB(mp, bmap->me_owner))); + switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) { + case XFS_BMAP_MAP: + case XFS_BMAP_UNMAP: + op_ok = true; + break; + default: + op_ok = false; + break; + } + if (!op_ok || startblock_fsb == 0 || + bmap->me_len == 0 || + inode_fsb == 0 || + startblock_fsb >= mp->m_sb.sb_dblocks || + bmap->me_len >= mp->m_sb.sb_agblocks || + inode_fsb >= mp->m_sb.sb_dblocks || + (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) { + /* + * This will pull the BUI from the AIL and + * free the memory associated with it. + */ + set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); + xfs_bui_release(buip); + return -EIO; + } + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + if (error) + return error; + budp = xfs_trans_get_bud(tp, buip); + + /* Grab the inode. */ + error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip); + if (error) + goto err_inode; + + if (VFS_I(ip)->i_nlink == 0) + xfs_iflags_set(ip, XFS_IRECOVERY); + xfs_defer_init(&dfops, &firstfsb); + + /* Process deferred bmap item. */ + state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? + XFS_EXT_UNWRITTEN : XFS_EXT_NORM; + whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; + switch (bui_type) { + case XFS_BMAP_MAP: + case XFS_BMAP_UNMAP: + type = bui_type; + break; + default: + error = -EFSCORRUPTED; + goto err_dfops; + } + xfs_trans_ijoin(tp, ip, 0); + + error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, + ip, whichfork, bmap->me_startoff, + bmap->me_startblock, bmap->me_len, + state); + if (error) + goto err_dfops; + + /* Finish transaction, free inodes. */ + error = xfs_defer_finish(&tp, &dfops, NULL); + if (error) + goto err_dfops; + + set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); + + return error; + +err_dfops: + xfs_defer_cancel(&dfops); +err_inode: + xfs_trans_cancel(tp); + if (ip) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); + } + return error; +} diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h new file mode 100644 index 000000000000..c867daae4a3c --- /dev/null +++ b/fs/xfs/xfs_bmap_item.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_BMAP_ITEM_H__ +#define __XFS_BMAP_ITEM_H__ + +/* + * There are (currently) two pairs of bmap btree redo item types: map & unmap. + * The common abbreviations for these are BUI (bmap update intent) and BUD + * (bmap update done). The redo item type is encoded in the flags field of + * each xfs_map_extent. + * + * *I items should be recorded in the *first* of a series of rolled + * transactions, and the *D items should be recorded in the same transaction + * that records the associated bmbt updates. + * + * Should the system crash after the commit of the first transaction but + * before the commit of the final transaction in a series, log recovery will + * use the redo information recorded by the intent items to replay the + * bmbt metadata updates in the non-first transaction. + */ + +/* kernel only BUI/BUD definitions */ + +struct xfs_mount; +struct kmem_zone; + +/* + * Max number of extents in fast allocation path. + */ +#define XFS_BUI_MAX_FAST_EXTENTS 1 + +/* + * Define BUI flag bits. Manipulated by set/clear/test_bit operators. + */ +#define XFS_BUI_RECOVERED 1 + +/* + * This is the "bmap update intent" log item. It is used to log the fact that + * some reverse mappings need to change. It is used in conjunction with the + * "bmap update done" log item described below. + * + * These log items follow the same rules as struct xfs_efi_log_item; see the + * comments about that structure (in xfs_extfree_item.h) for more details. + */ +struct xfs_bui_log_item { + struct xfs_log_item bui_item; + atomic_t bui_refcount; + atomic_t bui_next_extent; + unsigned long bui_flags; /* misc flags */ + struct xfs_bui_log_format bui_format; +}; + +static inline size_t +xfs_bui_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_bui_log_item, bui_format) + + xfs_bui_log_format_sizeof(nr); +} + +/* + * This is the "bmap update done" log item. It is used to log the fact that + * some bmbt updates mentioned in an earlier bui item have been performed. + */ +struct xfs_bud_log_item { + struct xfs_log_item bud_item; + struct xfs_bui_log_item *bud_buip; + struct xfs_bud_log_format bud_format; +}; + +extern struct kmem_zone *xfs_bui_zone; +extern struct kmem_zone *xfs_bud_zone; + +struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *); +struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, + struct xfs_bui_log_item *); +void xfs_bui_item_free(struct xfs_bui_log_item *); +void xfs_bui_release(struct xfs_bui_log_item *); +int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip); + +#endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index e827d657c314..552465e011ec 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -42,6 +42,9 @@ #include "xfs_icache.h" #include "xfs_log.h" #include "xfs_rmap_btree.h" +#include "xfs_iomap.h" +#include "xfs_reflink.h" +#include "xfs_refcount.h" /* Kernel only BMAP related definitions and functions */ @@ -389,11 +392,13 @@ xfs_bmap_count_blocks( STATIC int xfs_getbmapx_fix_eof_hole( xfs_inode_t *ip, /* xfs incore inode pointer */ + int whichfork, struct getbmapx *out, /* output structure */ int prealloced, /* this is a file with * preallocated data space */ __int64_t end, /* last block requested */ - xfs_fsblock_t startblock) + xfs_fsblock_t startblock, + bool moretocome) { __int64_t fixlen; xfs_mount_t *mp; /* file system mount point */ @@ -418,8 +423,9 @@ xfs_getbmapx_fix_eof_hole( else out->bmv_block = xfs_fsb_to_db(ip, startblock); fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!moretocome && + xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) out->bmv_oflags |= BMV_OF_LAST; } @@ -427,6 +433,81 @@ xfs_getbmapx_fix_eof_hole( return 1; } +/* Adjust the reported bmap around shared/unshared extent transitions. */ +STATIC int +xfs_getbmap_adjust_shared( + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *map, + struct getbmapx *out, + struct xfs_bmbt_irec *next_map) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_agblock_t ebno; + xfs_extlen_t elen; + xfs_extlen_t nlen; + int error; + + next_map->br_startblock = NULLFSBLOCK; + next_map->br_startoff = NULLFILEOFF; + next_map->br_blockcount = 0; + + /* Only written data blocks can be shared. */ + if (!xfs_is_reflink_inode(ip) || whichfork != XFS_DATA_FORK || + map->br_startblock == DELAYSTARTBLOCK || + map->br_startblock == HOLESTARTBLOCK || + ISUNWRITTEN(map)) + return 0; + + agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); + error = xfs_reflink_find_shared(mp, agno, agbno, map->br_blockcount, + &ebno, &elen, true); + if (error) + return error; + + if (ebno == NULLAGBLOCK) { + /* No shared blocks at all. */ + return 0; + } else if (agbno == ebno) { + /* + * Shared extent at (agbno, elen). Shrink the reported + * extent length and prepare to move the start of map[i] + * to agbno+elen, with the aim of (re)formatting the new + * map[i] the next time through the inner loop. + */ + out->bmv_length = XFS_FSB_TO_BB(mp, elen); + out->bmv_oflags |= BMV_OF_SHARED; + if (elen != map->br_blockcount) { + *next_map = *map; + next_map->br_startblock += elen; + next_map->br_startoff += elen; + next_map->br_blockcount -= elen; + } + map->br_blockcount -= elen; + } else { + /* + * There's an unshared extent (agbno, ebno - agbno) + * followed by shared extent at (ebno, elen). Shrink + * the reported extent length to cover only the unshared + * extent and prepare to move up the start of map[i] to + * ebno, with the aim of (re)formatting the new map[i] + * the next time through the inner loop. + */ + *next_map = *map; + nlen = ebno - agbno; + out->bmv_length = XFS_FSB_TO_BB(mp, nlen); + next_map->br_startblock += nlen; + next_map->br_startoff += nlen; + next_map->br_blockcount -= nlen; + map->br_blockcount -= nlen; + } + + return 0; +} + /* * Get inode's extents as described in bmv, and format for output. * Calls formatter to fill the user's buffer until all extents @@ -459,12 +540,28 @@ xfs_getbmap( int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ int cur_ext = 0; + struct xfs_bmbt_irec inject_map; mp = ip->i_mount; iflags = bmv->bmv_iflags; - whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; - if (whichfork == XFS_ATTR_FORK) { +#ifndef DEBUG + /* Only allow CoW fork queries if we're debugging. */ + if (iflags & BMV_IF_COWFORK) + return -EINVAL; +#endif + if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) + return -EINVAL; + + if (iflags & BMV_IF_ATTRFORK) + whichfork = XFS_ATTR_FORK; + else if (iflags & BMV_IF_COWFORK) + whichfork = XFS_COW_FORK; + else + whichfork = XFS_DATA_FORK; + + switch (whichfork) { + case XFS_ATTR_FORK: if (XFS_IFORK_Q(ip)) { if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && @@ -480,7 +577,20 @@ xfs_getbmap( prealloced = 0; fixlen = 1LL << 32; - } else { + break; + case XFS_COW_FORK: + if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) + return -EINVAL; + + if (xfs_get_cowextsz_hint(ip)) { + prealloced = 1; + fixlen = mp->m_super->s_maxbytes; + } else { + prealloced = 0; + fixlen = XFS_ISIZE(ip); + } + break; + default: if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_format != XFS_DINODE_FMT_BTREE && ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) @@ -494,6 +604,7 @@ xfs_getbmap( prealloced = 0; fixlen = XFS_ISIZE(ip); } + break; } if (bmv->bmv_length == -1) { @@ -520,7 +631,8 @@ xfs_getbmap( return -ENOMEM; xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (whichfork == XFS_DATA_FORK) { + switch (whichfork) { + case XFS_DATA_FORK: if (!(iflags & BMV_IF_DELALLOC) && (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { error = filemap_write_and_wait(VFS_I(ip)->i_mapping); @@ -538,8 +650,14 @@ xfs_getbmap( } lock = xfs_ilock_data_map_shared(ip); - } else { + break; + case XFS_COW_FORK: + lock = XFS_ILOCK_SHARED; + xfs_ilock(ip, lock); + break; + case XFS_ATTR_FORK: lock = xfs_ilock_attr_map_shared(ip); + break; } /* @@ -581,7 +699,8 @@ xfs_getbmap( goto out_free_map; ASSERT(nmap <= subnex); - for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { + for (i = 0; i < nmap && nexleft && bmv->bmv_length && + cur_ext < bmv->bmv_count; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; @@ -614,9 +733,16 @@ xfs_getbmap( goto out_free_map; } - if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], - prealloced, bmvend, - map[i].br_startblock)) + /* Is this a shared block? */ + error = xfs_getbmap_adjust_shared(ip, whichfork, + &map[i], &out[cur_ext], &inject_map); + if (error) + goto out_free_map; + + if (!xfs_getbmapx_fix_eof_hole(ip, whichfork, + &out[cur_ext], prealloced, bmvend, + map[i].br_startblock, + inject_map.br_startblock != NULLFSBLOCK)) goto out_free_map; bmv->bmv_offset = @@ -636,11 +762,16 @@ xfs_getbmap( continue; } - nexleft--; + if (inject_map.br_startblock != NULLFSBLOCK) { + map[i] = inject_map; + i--; + } else + nexleft--; bmv->bmv_entries++; cur_ext++; } - } while (nmap && nexleft && bmv->bmv_length); + } while (nmap && nexleft && bmv->bmv_length && + cur_ext < bmv->bmv_count); out_free_map: kmem_free(map); @@ -1433,8 +1564,8 @@ xfs_insert_file_space( */ static int xfs_swap_extents_check_format( - xfs_inode_t *ip, /* target inode */ - xfs_inode_t *tip) /* tmp inode */ + struct xfs_inode *ip, /* target inode */ + struct xfs_inode *tip) /* tmp inode */ { /* Should never get a local format */ @@ -1450,6 +1581,13 @@ xfs_swap_extents_check_format( return -EINVAL; /* + * If we have to use the (expensive) rmap swap method, we can + * handle any number of extents and any format. + */ + if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb)) + return 0; + + /* * if the target inode is in extent form and the temp inode is in btree * form then we will end up with the target inode in the wrong format * as we already know there are less extents in the temp inode. @@ -1518,125 +1656,161 @@ xfs_swap_extent_flush( return 0; } -int -xfs_swap_extents( - xfs_inode_t *ip, /* target inode */ - xfs_inode_t *tip, /* tmp inode */ - xfs_swapext_t *sxp) +/* + * Move extents from one file to another, when rmap is enabled. + */ +STATIC int +xfs_swap_extent_rmap( + struct xfs_trans **tpp, + struct xfs_inode *ip, + struct xfs_inode *tip) { - xfs_mount_t *mp = ip->i_mount; - xfs_trans_t *tp; - xfs_bstat_t *sbp = &sxp->sx_stat; - xfs_ifork_t *tempifp, *ifp, *tifp; - int src_log_flags, target_log_flags; - int error = 0; - int aforkblks = 0; - int taforkblks = 0; - __uint64_t tmp; - int lock_flags; - - /* XXX: we can't do this with rmap, will fix later */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) - return -EOPNOTSUPP; - - tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); - if (!tempifp) { - error = -ENOMEM; - goto out; - } + struct xfs_bmbt_irec irec; + struct xfs_bmbt_irec uirec; + struct xfs_bmbt_irec tirec; + xfs_fileoff_t offset_fsb; + xfs_fileoff_t end_fsb; + xfs_filblks_t count_fsb; + xfs_fsblock_t firstfsb; + struct xfs_defer_ops dfops; + int error; + xfs_filblks_t ilen; + xfs_filblks_t rlen; + int nimaps; + __uint64_t tip_flags2; /* - * Lock the inodes against other IO, page faults and truncate to - * begin with. Then we can ensure the inodes are flushed and have no - * page cache safely. Once we have done this we can take the ilocks and - * do the rest of the checks. + * If the source file has shared blocks, we must flag the donor + * file as having shared blocks so that we get the shared-block + * rmap functions when we go to fix up the rmaps. The flags + * will be switch for reals later. */ - lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; - xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); - xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); - - /* Verify that both files have the same format */ - if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { - error = -EINVAL; - goto out_unlock; - } + tip_flags2 = tip->i_d.di_flags2; + if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) + tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK; + + offset_fsb = 0; + end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip))); + count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); + + while (count_fsb) { + /* Read extent from the donor file */ + nimaps = 1; + error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec, + &nimaps, 0); + if (error) + goto out; + ASSERT(nimaps == 1); + ASSERT(tirec.br_startblock != DELAYSTARTBLOCK); + + trace_xfs_swap_extent_rmap_remap(tip, &tirec); + ilen = tirec.br_blockcount; + + /* Unmap the old blocks in the source file. */ + while (tirec.br_blockcount) { + xfs_defer_init(&dfops, &firstfsb); + trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec); + + /* Read extent from the source file */ + nimaps = 1; + error = xfs_bmapi_read(ip, tirec.br_startoff, + tirec.br_blockcount, &irec, + &nimaps, 0); + if (error) + goto out_defer; + ASSERT(nimaps == 1); + ASSERT(tirec.br_startoff == irec.br_startoff); + trace_xfs_swap_extent_rmap_remap_piece(ip, &irec); + + /* Trim the extent. */ + uirec = tirec; + uirec.br_blockcount = rlen = min_t(xfs_filblks_t, + tirec.br_blockcount, + irec.br_blockcount); + trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); + + /* Remove the mapping from the donor file. */ + error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops, + tip, &uirec); + if (error) + goto out_defer; - /* Verify both files are either real-time or non-realtime */ - if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { - error = -EINVAL; - goto out_unlock; - } + /* Remove the mapping from the source file. */ + error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops, + ip, &irec); + if (error) + goto out_defer; - error = xfs_swap_extent_flush(ip); - if (error) - goto out_unlock; - error = xfs_swap_extent_flush(tip); - if (error) - goto out_unlock; + /* Map the donor file's blocks into the source file. */ + error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops, + ip, &uirec); + if (error) + goto out_defer; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); - if (error) - goto out_unlock; + /* Map the source file's blocks into the donor file. */ + error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops, + tip, &irec); + if (error) + goto out_defer; - /* - * Lock and join the inodes to the tansaction so that transaction commit - * or cancel will unlock the inodes from this point onwards. - */ - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); - lock_flags |= XFS_ILOCK_EXCL; - xfs_trans_ijoin(tp, ip, lock_flags); - xfs_trans_ijoin(tp, tip, lock_flags); + error = xfs_defer_finish(tpp, &dfops, ip); + if (error) + goto out_defer; + tirec.br_startoff += rlen; + if (tirec.br_startblock != HOLESTARTBLOCK && + tirec.br_startblock != DELAYSTARTBLOCK) + tirec.br_startblock += rlen; + tirec.br_blockcount -= rlen; + } - /* Verify all data are being swapped */ - if (sxp->sx_offset != 0 || - sxp->sx_length != ip->i_d.di_size || - sxp->sx_length != tip->i_d.di_size) { - error = -EFAULT; - goto out_trans_cancel; + /* Roll on... */ + count_fsb -= ilen; + offset_fsb += ilen; } - trace_xfs_swap_extent_before(ip, 0); - trace_xfs_swap_extent_before(tip, 1); + tip->i_d.di_flags2 = tip_flags2; + return 0; - /* check inode formats now that data is flushed */ - error = xfs_swap_extents_check_format(ip, tip); - if (error) { - xfs_notice(mp, - "%s: inode 0x%llx format is incompatible for exchanging.", - __func__, ip->i_ino); - goto out_trans_cancel; - } +out_defer: + xfs_defer_cancel(&dfops); +out: + trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); + tip->i_d.di_flags2 = tip_flags2; + return error; +} + +/* Swap the extents of two files by swapping data forks. */ +STATIC int +xfs_swap_extent_forks( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_inode *tip, + int *src_log_flags, + int *target_log_flags) +{ + struct xfs_ifork tempifp, *ifp, *tifp; + int aforkblks = 0; + int taforkblks = 0; + __uint64_t tmp; + int error; - /* - * Compare the current change & modify times with that - * passed in. If they differ, we abort this swap. - * This is the mechanism used to ensure the calling - * process that the file was not changed out from - * under it. - */ - if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || - (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || - (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || - (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { - error = -EBUSY; - goto out_trans_cancel; - } /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { - error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); + error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, + &aforkblks); if (error) - goto out_trans_cancel; + return error; } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, - &taforkblks); + &taforkblks); if (error) - goto out_trans_cancel; + return error; } /* @@ -1645,31 +1819,23 @@ xfs_swap_extents( * buffers, and so the validation done on read will expect the owner * field to be correctly set. Once we change the owners, we can swap the * inode forks. - * - * Note the trickiness in setting the log flags - we set the owner log - * flag on the opposite inode (i.e. the inode we are setting the new - * owner to be) because once we swap the forks and log that, log - * recovery is going to see the fork as owned by the swapped inode, - * not the pre-swapped inodes. */ - src_log_flags = XFS_ILOG_CORE; - target_log_flags = XFS_ILOG_CORE; if (ip->i_d.di_version == 3 && ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - target_log_flags |= XFS_ILOG_DOWNER; + (*target_log_flags) |= XFS_ILOG_DOWNER; error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, tip->i_ino, NULL); if (error) - goto out_trans_cancel; + return error; } if (tip->i_d.di_version == 3 && tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - src_log_flags |= XFS_ILOG_DOWNER; + (*src_log_flags) |= XFS_ILOG_DOWNER; error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, ip->i_ino, NULL); if (error) - goto out_trans_cancel; + return error; } /* @@ -1677,9 +1843,9 @@ xfs_swap_extents( */ ifp = &ip->i_df; tifp = &tip->i_df; - *tempifp = *ifp; /* struct copy */ + tempifp = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ - *tifp = *tempifp; /* struct copy */ + *tifp = tempifp; /* struct copy */ /* * Fix the on-disk inode values @@ -1719,12 +1885,12 @@ xfs_swap_extents( ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } - src_log_flags |= XFS_ILOG_DEXT; + (*src_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ASSERT(ip->i_d.di_version < 3 || - (src_log_flags & XFS_ILOG_DOWNER)); - src_log_flags |= XFS_ILOG_DBROOT; + (*src_log_flags & XFS_ILOG_DOWNER)); + (*src_log_flags) |= XFS_ILOG_DBROOT; break; } @@ -1738,15 +1904,166 @@ xfs_swap_extents( tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } - target_log_flags |= XFS_ILOG_DEXT; + (*target_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: - target_log_flags |= XFS_ILOG_DBROOT; + (*target_log_flags) |= XFS_ILOG_DBROOT; ASSERT(tip->i_d.di_version < 3 || - (target_log_flags & XFS_ILOG_DOWNER)); + (*target_log_flags & XFS_ILOG_DOWNER)); break; } + return 0; +} + +int +xfs_swap_extents( + struct xfs_inode *ip, /* target inode */ + struct xfs_inode *tip, /* tmp inode */ + struct xfs_swapext *sxp) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + struct xfs_bstat *sbp = &sxp->sx_stat; + int src_log_flags, target_log_flags; + int error = 0; + int lock_flags; + struct xfs_ifork *cowfp; + __uint64_t f; + int resblks; + + /* + * Lock the inodes against other IO, page faults and truncate to + * begin with. Then we can ensure the inodes are flushed and have no + * page cache safely. Once we have done this we can take the ilocks and + * do the rest of the checks. + */ + lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); + + /* Verify that both files have the same format */ + if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { + error = -EINVAL; + goto out_unlock; + } + + /* Verify both files are either real-time or non-realtime */ + if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { + error = -EINVAL; + goto out_unlock; + } + + error = xfs_swap_extent_flush(ip); + if (error) + goto out_unlock; + error = xfs_swap_extent_flush(tip); + if (error) + goto out_unlock; + + /* + * Extent "swapping" with rmap requires a permanent reservation and + * a block reservation because it's really just a remap operation + * performed with log redo items! + */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + /* + * Conceptually this shouldn't affect the shape of either + * bmbt, but since we atomically move extents one by one, + * we reserve enough space to rebuild both trees. + */ + resblks = XFS_SWAP_RMAP_SPACE_RES(mp, + XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK), + XFS_DATA_FORK) + + XFS_SWAP_RMAP_SPACE_RES(mp, + XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK), + XFS_DATA_FORK); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, + 0, 0, &tp); + } else + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, + 0, 0, &tp); + if (error) + goto out_unlock; + + /* + * Lock and join the inodes to the tansaction so that transaction commit + * or cancel will unlock the inodes from this point onwards. + */ + xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); + lock_flags |= XFS_ILOCK_EXCL; + xfs_trans_ijoin(tp, ip, 0); + xfs_trans_ijoin(tp, tip, 0); + + + /* Verify all data are being swapped */ + if (sxp->sx_offset != 0 || + sxp->sx_length != ip->i_d.di_size || + sxp->sx_length != tip->i_d.di_size) { + error = -EFAULT; + goto out_trans_cancel; + } + + trace_xfs_swap_extent_before(ip, 0); + trace_xfs_swap_extent_before(tip, 1); + + /* check inode formats now that data is flushed */ + error = xfs_swap_extents_check_format(ip, tip); + if (error) { + xfs_notice(mp, + "%s: inode 0x%llx format is incompatible for exchanging.", + __func__, ip->i_ino); + goto out_trans_cancel; + } + + /* + * Compare the current change & modify times with that + * passed in. If they differ, we abort this swap. + * This is the mechanism used to ensure the calling + * process that the file was not changed out from + * under it. + */ + if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || + (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || + (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || + (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { + error = -EBUSY; + goto out_trans_cancel; + } + + /* + * Note the trickiness in setting the log flags - we set the owner log + * flag on the opposite inode (i.e. the inode we are setting the new + * owner to be) because once we swap the forks and log that, log + * recovery is going to see the fork as owned by the swapped inode, + * not the pre-swapped inodes. + */ + src_log_flags = XFS_ILOG_CORE; + target_log_flags = XFS_ILOG_CORE; + + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + error = xfs_swap_extent_rmap(&tp, ip, tip); + else + error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags, + &target_log_flags); + if (error) + goto out_trans_cancel; + + /* Do we have to swap reflink flags? */ + if ((ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) ^ + (tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)) { + f = ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; + tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; + cowfp = ip->i_cowfp; + ip->i_cowfp = tip->i_cowfp; + tip->i_cowfp = cowfp; + xfs_inode_set_cowblocks_tag(ip); + xfs_inode_set_cowblocks_tag(tip); + } + xfs_trans_log_inode(tp, ip, src_log_flags); xfs_trans_log_inode(tp, tip, target_log_flags); @@ -1761,16 +2078,16 @@ xfs_swap_extents( trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(tip, 1); -out: - kmem_free(tempifp); - return error; -out_unlock: xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); - goto out; + return error; out_trans_cancel: xfs_trans_cancel(tp); - goto out; + +out_unlock: + xfs_iunlock(ip, lock_flags); + xfs_iunlock(tip, lock_flags); + return error; } diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index f44f79996978..29816981b50a 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -84,7 +84,8 @@ xfs_dir2_sf_getdents( sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); + if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count)) + return -EFSCORRUPTED; /* * If the block number in the offset is out of range, we're done. diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 3d224702fbc0..05f8666733a0 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -92,7 +92,11 @@ extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_ERRTAG_BMAPIFORMAT 21 #define XFS_ERRTAG_FREE_EXTENT 22 #define XFS_ERRTAG_RMAP_FINISH_ONE 23 -#define XFS_ERRTAG_MAX 24 +#define XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE 24 +#define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 +#define XFS_ERRTAG_BMAP_FINISH_ONE 26 +#define XFS_ERRTAG_AG_RESV_CRITICAL 27 +#define XFS_ERRTAG_MAX 28 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -121,6 +125,10 @@ extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT #define XFS_RANDOM_FREE_EXTENT 1 #define XFS_RANDOM_RMAP_FINISH_ONE 1 +#define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE 1 +#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 +#define XFS_RANDOM_BMAP_FINISH_ONE 1 +#define XFS_RANDOM_AG_RESV_CRITICAL 4 #ifdef DEBUG extern int xfs_error_test_active; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 2bc58b3fd37d..a314fc7b56fa 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -38,6 +38,7 @@ #include "xfs_icache.h" #include "xfs_pnfs.h" #include "xfs_iomap.h" +#include "xfs_reflink.h" #include <linux/dcache.h> #include <linux/falloc.h> @@ -634,6 +635,13 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos); + /* If this is a block-aligned directio CoW, remap immediately. */ + if (xfs_is_reflink_inode(ip) && !unaligned_io) { + ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count); + if (ret) + goto out; + } + data = *from; ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, xfs_get_blocks_direct, xfs_end_io_direct_write, @@ -735,6 +743,9 @@ write_retry: enospc = xfs_inode_free_quota_eofblocks(ip); if (enospc) goto write_retry; + enospc = xfs_inode_free_quota_cowblocks(ip); + if (enospc) + goto write_retry; } else if (ret == -ENOSPC && !enospc) { struct xfs_eofblocks eofb = {0}; @@ -774,10 +785,20 @@ xfs_file_write_iter( if (IS_DAX(inode)) ret = xfs_file_dax_write(iocb, from); - else if (iocb->ki_flags & IOCB_DIRECT) + else if (iocb->ki_flags & IOCB_DIRECT) { + /* + * Allow a directio write to fall back to a buffered + * write *only* in the case that we're doing a reflink + * CoW. In all other directio scenarios we do not + * allow an operation to fall back to buffered mode. + */ ret = xfs_file_dio_aio_write(iocb, from); - else + if (ret == -EREMCHG) + goto buffered; + } else { +buffered: ret = xfs_file_buffered_aio_write(iocb, from); + } if (ret > 0) { XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); @@ -791,7 +812,7 @@ xfs_file_write_iter( #define XFS_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE) + FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) STATIC long xfs_file_fallocate( @@ -881,9 +902,15 @@ xfs_file_fallocate( if (mode & FALLOC_FL_ZERO_RANGE) error = xfs_zero_file_space(ip, offset, len); - else + else { + if (mode & FALLOC_FL_UNSHARE_RANGE) { + error = xfs_reflink_unshare(ip, offset, len); + if (error) + goto out_unlock; + } error = xfs_alloc_file_space(ip, offset, len, XFS_BMAPI_PREALLOC); + } if (error) goto out_unlock; } @@ -920,6 +947,189 @@ out_unlock: return error; } +/* + * Flush all file writes out to disk. + */ +static int +xfs_file_wait_for_io( + struct inode *inode, + loff_t offset, + size_t len) +{ + loff_t rounding; + loff_t ioffset; + loff_t iendoffset; + loff_t bs; + int ret; + + bs = inode->i_sb->s_blocksize; + inode_dio_wait(inode); + + rounding = max_t(xfs_off_t, bs, PAGE_SIZE); + ioffset = round_down(offset, rounding); + iendoffset = round_up(offset + len, rounding) - 1; + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, + iendoffset); + return ret; +} + +/* Hook up to the VFS reflink function */ +STATIC int +xfs_file_share_range( + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) +{ + struct inode *inode_in; + struct inode *inode_out; + ssize_t ret; + loff_t bs; + loff_t isize; + int same_inode; + loff_t blen; + unsigned int flags = 0; + + inode_in = file_inode(file_in); + inode_out = file_inode(file_out); + bs = inode_out->i_sb->s_blocksize; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode_out)) + return -EPERM; + if (IS_SWAPFILE(inode_in) || + IS_SWAPFILE(inode_out)) + return -ETXTBSY; + + /* Reflink only works within this filesystem. */ + if (inode_in->i_sb != inode_out->i_sb) + return -EXDEV; + same_inode = (inode_in->i_ino == inode_out->i_ino); + + /* Don't reflink dirs, pipes, sockets... */ + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) + return -EINVAL; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + + /* Don't share DAX file data for now. */ + if (IS_DAX(inode_in) || IS_DAX(inode_out)) + return -EINVAL; + + /* Are we going all the way to the end? */ + isize = i_size_read(inode_in); + if (isize == 0) + return 0; + if (len == 0) + len = isize - pos_in; + + /* Ensure offsets don't wrap and the input is inside i_size */ + if (pos_in + len < pos_in || pos_out + len < pos_out || + pos_in + len > isize) + return -EINVAL; + + /* Don't allow dedupe past EOF in the dest file */ + if (is_dedupe) { + loff_t disize; + + disize = i_size_read(inode_out); + if (pos_out >= disize || pos_out + len > disize) + return -EINVAL; + } + + /* If we're linking to EOF, continue to the block boundary. */ + if (pos_in + len == isize) + blen = ALIGN(isize, bs) - pos_in; + else + blen = len; + + /* Only reflink if we're aligned to block boundaries */ + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || + !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) + return -EINVAL; + + /* Don't allow overlapped reflink within the same file */ + if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen) + return -EINVAL; + + /* Wait for the completion of any pending IOs on srcfile */ + ret = xfs_file_wait_for_io(inode_in, pos_in, len); + if (ret) + goto out; + ret = xfs_file_wait_for_io(inode_out, pos_out, len); + if (ret) + goto out; + + if (is_dedupe) + flags |= XFS_REFLINK_DEDUPE; + ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), + pos_out, len, flags); + if (ret < 0) + goto out; + +out: + return ret; +} + +STATIC ssize_t +xfs_file_copy_range( + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + size_t len, + unsigned int flags) +{ + int error; + + error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, + len, false); + if (error) + return error; + return len; +} + +STATIC int +xfs_file_clone_range( + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len) +{ + return xfs_file_share_range(file_in, pos_in, file_out, pos_out, + len, false); +} + +#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) +STATIC ssize_t +xfs_file_dedupe_range( + struct file *src_file, + u64 loff, + u64 len, + struct file *dst_file, + u64 dst_loff) +{ + int error; + + /* + * Limit the total length we will dedupe for each operation. + * This is intended to bound the total time spent in this + * ioctl to something sane. + */ + if (len > XFS_MAX_DEDUPE_LEN) + len = XFS_MAX_DEDUPE_LEN; + + error = xfs_file_share_range(src_file, loff, dst_file, dst_loff, + len, true); + if (error) + return error; + return len; +} STATIC int xfs_file_open( @@ -1581,6 +1791,9 @@ const struct file_operations xfs_file_operations = { .fsync = xfs_file_fsync, .get_unmapped_area = thp_get_unmapped_area, .fallocate = xfs_file_fallocate, + .copy_file_range = xfs_file_copy_range, + .clone_file_range = xfs_file_clone_range, + .dedupe_file_range = xfs_file_dedupe_range, }; const struct file_operations xfs_dir_file_operations = { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 94ac06f3d908..93d12fa2670d 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -43,6 +43,7 @@ #include "xfs_log.h" #include "xfs_filestream.h" #include "xfs_rmap.h" +#include "xfs_ag_resv.h" /* * File system operations @@ -108,7 +109,9 @@ xfs_fs_geometry( (xfs_sb_version_hassparseinodes(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_SPINODES : 0) | (xfs_sb_version_hasrmapbt(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_RMAPBT : 0); + XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) | + (xfs_sb_version_hasreflink(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_REFLINK : 0); geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? mp->m_sb.sb_logsectsize : BBSIZE; geo->rtsectsize = mp->m_sb.sb_blocksize; @@ -259,6 +262,12 @@ xfs_growfs_data_private( agf->agf_longest = cpu_to_be32(tmpsize); if (xfs_sb_version_hascrc(&mp->m_sb)) uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + agf->agf_refcount_root = cpu_to_be32( + xfs_refc_block(mp)); + agf->agf_refcount_level = cpu_to_be32(1); + agf->agf_refcount_blocks = cpu_to_be32(1); + } error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -450,6 +459,17 @@ xfs_growfs_data_private( rrec->rm_offset = 0; be16_add_cpu(&block->bb_numrecs, 1); + /* account for refc btree root */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + rrec = XFS_RMAP_REC_ADDR(block, 5); + rrec->rm_startblock = cpu_to_be32( + xfs_refc_block(mp)); + rrec->rm_blockcount = cpu_to_be32(1); + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); + rrec->rm_offset = 0; + be16_add_cpu(&block->bb_numrecs, 1); + } + error = xfs_bwrite(bp); xfs_buf_relse(bp); if (error) @@ -507,6 +527,28 @@ xfs_growfs_data_private( goto error0; } + /* + * refcount btree root block + */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + bp = xfs_growfs_get_hdr_buf(mp, + XFS_AGB_TO_DADDR(mp, agno, xfs_refc_block(mp)), + BTOBB(mp->m_sb.sb_blocksize), 0, + &xfs_refcountbt_buf_ops); + if (!bp) { + error = -ENOMEM; + goto error0; + } + + xfs_btree_init_block(mp, bp, XFS_REFC_CRC_MAGIC, + 0, 0, agno, + XFS_BTREE_CRC_BLOCKS); + + error = xfs_bwrite(bp); + xfs_buf_relse(bp); + if (error) + goto error0; + } } xfs_trans_agblocks_delta(tp, nfree); /* @@ -589,6 +631,11 @@ xfs_growfs_data_private( xfs_set_low_space_thresholds(mp); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); + /* Reserve AG metadata blocks. */ + error = xfs_fs_reserve_ag_blocks(mp); + if (error && error != -ENOSPC) + goto out; + /* update secondary superblocks. */ for (agno = 1; agno < nagcount; agno++) { error = 0; @@ -639,6 +686,8 @@ xfs_growfs_data_private( continue; } } + + out: return saved_error ? saved_error : error; error0: @@ -948,3 +997,59 @@ xfs_do_force_shutdown( "Please umount the filesystem and rectify the problem(s)"); } } + +/* + * Reserve free space for per-AG metadata. + */ +int +xfs_fs_reserve_ag_blocks( + struct xfs_mount *mp) +{ + xfs_agnumber_t agno; + struct xfs_perag *pag; + int error = 0; + int err2; + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + pag = xfs_perag_get(mp, agno); + err2 = xfs_ag_resv_init(pag); + xfs_perag_put(pag); + if (err2 && !error) + error = err2; + } + + if (error && error != -ENOSPC) { + xfs_warn(mp, + "Error %d reserving per-AG metadata reserve pool.", error); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + } + + return error; +} + +/* + * Free space reserved for per-AG metadata. + */ +int +xfs_fs_unreserve_ag_blocks( + struct xfs_mount *mp) +{ + xfs_agnumber_t agno; + struct xfs_perag *pag; + int error = 0; + int err2; + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + pag = xfs_perag_get(mp, agno); + err2 = xfs_ag_resv_free(pag); + xfs_perag_put(pag); + if (err2 && !error) + error = err2; + } + + if (error) + xfs_warn(mp, + "Error %d freeing per-AG metadata reserve pool.", error); + + return error; +} diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index f32713f14f9a..f34915898fea 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -26,4 +26,7 @@ extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, xfs_fsop_resblks_t *outval); extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); +extern int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp); +extern int xfs_fs_unreserve_ag_blocks(struct xfs_mount *mp); + #endif /* __XFS_FSOPS_H__ */ diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 4d41b241298f..687a4b01fc53 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -21,8 +21,8 @@ /* * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, * other XFS code uses these values. Times are measured in centisecs (i.e. - * 100ths of a second) with the exception of eofb_timer, which is measured in - * seconds. + * 100ths of a second) with the exception of eofb_timer and cowb_timer, which + * are measured in seconds. */ xfs_param_t xfs_params = { /* MIN DFLT MAX */ @@ -42,6 +42,7 @@ xfs_param_t xfs_params = { .inherit_nodfrg = { 0, 1, 1 }, .fstrm_timer = { 1, 30*100, 3600*100}, .eofb_timer = { 1, 300, 3600*24}, + .cowb_timer = { 1, 1800, 3600*24}, }; struct xfs_globals xfs_globals = { diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 65b2e3f85f52..14796b744e0a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -33,6 +33,7 @@ #include "xfs_bmap_util.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" +#include "xfs_reflink.h" #include <linux/kthread.h> #include <linux/freezer.h> @@ -76,6 +77,9 @@ xfs_inode_alloc( ip->i_mount = mp; memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); ip->i_afp = NULL; + ip->i_cowfp = NULL; + ip->i_cnextents = 0; + ip->i_cformat = XFS_DINODE_FMT_EXTENTS; memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; ip->i_delayed_blks = 0; @@ -101,6 +105,8 @@ xfs_inode_free_callback( if (ip->i_afp) xfs_idestroy_fork(ip, XFS_ATTR_FORK); + if (ip->i_cowfp) + xfs_idestroy_fork(ip, XFS_COW_FORK); if (ip->i_itemp) { ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); @@ -787,6 +793,33 @@ xfs_eofblocks_worker( xfs_queue_eofblocks(mp); } +/* + * Background scanning to trim preallocated CoW space. This is queued + * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). + * (We'll just piggyback on the post-EOF prealloc space workqueue.) + */ +STATIC void +xfs_queue_cowblocks( + struct xfs_mount *mp) +{ + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_COWBLOCKS_TAG)) + queue_delayed_work(mp->m_eofblocks_workqueue, + &mp->m_cowblocks_work, + msecs_to_jiffies(xfs_cowb_secs * 1000)); + rcu_read_unlock(); +} + +void +xfs_cowblocks_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_cowblocks_work); + xfs_icache_free_cowblocks(mp, NULL); + xfs_queue_cowblocks(mp); +} + int xfs_inode_ag_iterator( struct xfs_mount *mp, @@ -1343,18 +1376,30 @@ xfs_inode_free_eofblocks( return ret; } -int -xfs_icache_free_eofblocks( +static int +__xfs_icache_free_eofblocks( struct xfs_mount *mp, - struct xfs_eofblocks *eofb) + struct xfs_eofblocks *eofb, + int (*execute)(struct xfs_inode *ip, int flags, + void *args), + int tag) { int flags = SYNC_TRYLOCK; if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC)) flags = SYNC_WAIT; - return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags, - eofb, XFS_ICI_EOFBLOCKS_TAG); + return xfs_inode_ag_iterator_tag(mp, execute, flags, + eofb, tag); +} + +int +xfs_icache_free_eofblocks( + struct xfs_mount *mp, + struct xfs_eofblocks *eofb) +{ + return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_eofblocks, + XFS_ICI_EOFBLOCKS_TAG); } /* @@ -1363,9 +1408,11 @@ xfs_icache_free_eofblocks( * failure. We make a best effort by including each quota under low free space * conditions (less than 1% free space) in the scan. */ -int -xfs_inode_free_quota_eofblocks( - struct xfs_inode *ip) +static int +__xfs_inode_free_quota_eofblocks( + struct xfs_inode *ip, + int (*execute)(struct xfs_mount *mp, + struct xfs_eofblocks *eofb)) { int scan = 0; struct xfs_eofblocks eofb = {0}; @@ -1401,14 +1448,25 @@ xfs_inode_free_quota_eofblocks( } if (scan) - xfs_icache_free_eofblocks(ip->i_mount, &eofb); + execute(ip->i_mount, &eofb); return scan; } -void -xfs_inode_set_eofblocks_tag( - xfs_inode_t *ip) +int +xfs_inode_free_quota_eofblocks( + struct xfs_inode *ip) +{ + return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); +} + +static void +__xfs_inode_set_eofblocks_tag( + xfs_inode_t *ip, + void (*execute)(struct xfs_mount *mp), + void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, + int error, unsigned long caller_ip), + int tag) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; @@ -1426,26 +1484,22 @@ xfs_inode_set_eofblocks_tag( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); - trace_xfs_inode_set_eofblocks_tag(ip); - tagged = radix_tree_tagged(&pag->pag_ici_root, - XFS_ICI_EOFBLOCKS_TAG); + tagged = radix_tree_tagged(&pag->pag_ici_root, tag); radix_tree_tag_set(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), - XFS_ICI_EOFBLOCKS_TAG); + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag); if (!tagged) { /* propagate the eofblocks tag up into the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_set(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - XFS_ICI_EOFBLOCKS_TAG); + tag); spin_unlock(&ip->i_mount->m_perag_lock); /* kick off background trimming */ - xfs_queue_eofblocks(ip->i_mount); + execute(ip->i_mount); - trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno, - -1, _RET_IP_); + set_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } spin_unlock(&pag->pag_ici_lock); @@ -1453,9 +1507,22 @@ xfs_inode_set_eofblocks_tag( } void -xfs_inode_clear_eofblocks_tag( +xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) { + trace_xfs_inode_set_eofblocks_tag(ip); + return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, + trace_xfs_perag_set_eofblocks, + XFS_ICI_EOFBLOCKS_TAG); +} + +static void +__xfs_inode_clear_eofblocks_tag( + xfs_inode_t *ip, + void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, + int error, unsigned long caller_ip), + int tag) +{ struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; @@ -1465,23 +1532,141 @@ xfs_inode_clear_eofblocks_tag( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); - trace_xfs_inode_clear_eofblocks_tag(ip); radix_tree_tag_clear(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), - XFS_ICI_EOFBLOCKS_TAG); - if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) { + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag); + if (!radix_tree_tagged(&pag->pag_ici_root, tag)) { /* clear the eofblocks tag from the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_clear(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - XFS_ICI_EOFBLOCKS_TAG); + tag); spin_unlock(&ip->i_mount->m_perag_lock); - trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno, - -1, _RET_IP_); + clear_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } spin_unlock(&pag->pag_ici_lock); xfs_perag_put(pag); } +void +xfs_inode_clear_eofblocks_tag( + xfs_inode_t *ip) +{ + trace_xfs_inode_clear_eofblocks_tag(ip); + return __xfs_inode_clear_eofblocks_tag(ip, + trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); +} + +/* + * Automatic CoW Reservation Freeing + * + * These functions automatically garbage collect leftover CoW reservations + * that were made on behalf of a cowextsize hint when we start to run out + * of quota or when the reservations sit around for too long. If the file + * has dirty pages or is undergoing writeback, its CoW reservations will + * be retained. + * + * The actual garbage collection piggybacks off the same code that runs + * the speculative EOF preallocation garbage collector. + */ +STATIC int +xfs_inode_free_cowblocks( + struct xfs_inode *ip, + int flags, + void *args) +{ + int ret; + struct xfs_eofblocks *eofb = args; + bool need_iolock = true; + int match; + + ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); + + if (!xfs_reflink_has_real_cow_blocks(ip)) { + trace_xfs_inode_free_cowblocks_invalid(ip); + xfs_inode_clear_cowblocks_tag(ip); + return 0; + } + + /* + * If the mapping is dirty or under writeback we cannot touch the + * CoW fork. Leave it alone if we're in the midst of a directio. + */ + if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || + atomic_read(&VFS_I(ip)->i_dio_count)) + return 0; + + if (eofb) { + if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) + match = xfs_inode_match_id_union(ip, eofb); + else + match = xfs_inode_match_id(ip, eofb); + if (!match) + return 0; + + /* skip the inode if the file size is too small */ + if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && + XFS_ISIZE(ip) < eofb->eof_min_file_size) + return 0; + + /* + * A scan owner implies we already hold the iolock. Skip it in + * xfs_free_eofblocks() to avoid deadlock. This also eliminates + * the possibility of EAGAIN being returned. + */ + if (eofb->eof_scan_owner == ip->i_ino) + need_iolock = false; + } + + /* Free the CoW blocks */ + if (need_iolock) { + xfs_ilock(ip, XFS_IOLOCK_EXCL); + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + } + + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + + if (need_iolock) { + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + } + + return ret; +} + +int +xfs_icache_free_cowblocks( + struct xfs_mount *mp, + struct xfs_eofblocks *eofb) +{ + return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_cowblocks, + XFS_ICI_COWBLOCKS_TAG); +} + +int +xfs_inode_free_quota_cowblocks( + struct xfs_inode *ip) +{ + return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_cowblocks); +} + +void +xfs_inode_set_cowblocks_tag( + xfs_inode_t *ip) +{ + trace_xfs_inode_set_eofblocks_tag(ip); + return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, + trace_xfs_perag_set_eofblocks, + XFS_ICI_COWBLOCKS_TAG); +} + +void +xfs_inode_clear_cowblocks_tag( + xfs_inode_t *ip) +{ + trace_xfs_inode_clear_eofblocks_tag(ip); + return __xfs_inode_clear_eofblocks_tag(ip, + trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG); +} diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 05bac99bef75..a1e02f4708ab 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -40,6 +40,7 @@ struct xfs_eofblocks { in xfs_inode_ag_iterator */ #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ #define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ +#define XFS_ICI_COWBLOCKS_TAG 2 /* inode can have cow blocks to gc */ /* * Flags for xfs_iget() @@ -70,6 +71,12 @@ int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip); void xfs_eofblocks_worker(struct work_struct *); void xfs_queue_eofblocks(struct xfs_mount *); +void xfs_inode_set_cowblocks_tag(struct xfs_inode *ip); +void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip); +int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); +int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); +void xfs_cowblocks_worker(struct work_struct *); + int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 624e1dfa716b..4e560e6a12c1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -49,6 +49,7 @@ #include "xfs_trans_priv.h" #include "xfs_log.h" #include "xfs_bmap_btree.h" +#include "xfs_reflink.h" kmem_zone_t *xfs_inode_zone; @@ -77,6 +78,29 @@ xfs_get_extsz_hint( } /* + * Helper function to extract CoW extent size hint from inode. + * Between the extent size hint and the CoW extent size hint, we + * return the greater of the two. If the value is zero (automatic), + * use the default size. + */ +xfs_extlen_t +xfs_get_cowextsz_hint( + struct xfs_inode *ip) +{ + xfs_extlen_t a, b; + + a = 0; + if (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) + a = ip->i_d.di_cowextsize; + b = xfs_get_extsz_hint(ip); + + a = max(a, b); + if (a == 0) + return XFS_DEFAULT_COWEXTSZ_HINT; + return a; +} + +/* * These two are wrapper routines around the xfs_ilock() routine used to * centralize some grungy code. They are used in places that wish to lock the * inode solely for reading the extents. The reason these places can't just @@ -651,6 +675,8 @@ _xfs_dic2xflags( if (di_flags2 & XFS_DIFLAG2_ANY) { if (di_flags2 & XFS_DIFLAG2_DAX) flags |= FS_XFLAG_DAX; + if (di_flags2 & XFS_DIFLAG2_COWEXTSIZE) + flags |= FS_XFLAG_COWEXTSIZE; } if (has_attr) @@ -834,6 +860,7 @@ xfs_ialloc( if (ip->i_d.di_version == 3) { inode->i_version = 1; ip->i_d.di_flags2 = 0; + ip->i_d.di_cowextsize = 0; ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec; } @@ -896,6 +923,15 @@ xfs_ialloc( ip->i_d.di_flags |= di_flags; ip->i_d.di_flags2 |= di_flags2; } + if (pip && + (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) && + pip->i_d.di_version == 3 && + ip->i_d.di_version == 3) { + if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { + ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; + } + } /* FALLTHROUGH */ case S_IFLNK: ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; @@ -1586,6 +1622,20 @@ xfs_itruncate_extents( goto out; } + /* Remove all pending CoW reservations. */ + error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, + last_block); + if (error) + goto out; + + /* + * Clear the reflink flag if we truncated everything. + */ + if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) { + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + xfs_inode_clear_cowblocks_tag(ip); + } + /* * Always re-log the inode so that our permanent transaction can keep * on rolling it forward in the log. @@ -1850,6 +1900,7 @@ xfs_inactive( } mp = ip->i_mount; + ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); /* If this is a read-only mount, don't do this (would generate I/O) */ if (mp->m_flags & XFS_MOUNT_RDONLY) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 8f30d2533b48..f14c1de2549d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -47,6 +47,7 @@ typedef struct xfs_inode { /* Extent information. */ xfs_ifork_t *i_afp; /* attribute fork pointer */ + xfs_ifork_t *i_cowfp; /* copy on write extents */ xfs_ifork_t i_df; /* data fork */ /* operations vectors */ @@ -65,6 +66,9 @@ typedef struct xfs_inode { struct xfs_icdinode i_d; /* most of ondisk inode */ + xfs_extnum_t i_cnextents; /* # of extents in cow fork */ + unsigned int i_cformat; /* format of cow fork */ + /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ } xfs_inode_t; @@ -202,6 +206,11 @@ xfs_get_initial_prid(struct xfs_inode *dp) return XFS_PROJID_DEFAULT; } +static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) +{ + return ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; +} + /* * In-core inode flags. */ @@ -217,6 +226,12 @@ xfs_get_initial_prid(struct xfs_inode *dp) #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) #define XFS_IDONTCACHE (1 << 9) /* don't cache the inode long term */ #define XFS_IEOFBLOCKS (1 << 10)/* has the preallocblocks tag set */ +/* + * If this unlinked inode is in the middle of recovery, don't let drop_inode + * truncate and free the inode. This can happen if we iget the inode during + * log recovery to replay a bmap operation on the inode. + */ +#define XFS_IRECOVERY (1 << 11) /* * Per-lifetime flags need to be reset when re-using a reclaimable inode during @@ -411,6 +426,7 @@ int xfs_iflush(struct xfs_inode *, struct xfs_buf **); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); +xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, xfs_nlink_t, xfs_dev_t, prid_t, int, @@ -474,4 +490,7 @@ do { \ extern struct kmem_zone *xfs_inode_zone; +/* The default CoW extent size hint. */ +#define XFS_DEFAULT_COWEXTSZ_HINT 32 + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 892c2aced207..9610e9c00952 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -368,7 +368,7 @@ xfs_inode_to_log_dinode( to->di_crtime.t_sec = from->di_crtime.t_sec; to->di_crtime.t_nsec = from->di_crtime.t_nsec; to->di_flags2 = from->di_flags2; - + to->di_cowextsize = from->di_cowextsize; to->di_ino = ip->i_ino; to->di_lsn = lsn; memset(to->di_pad2, 0, sizeof(to->di_pad2)); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0d9021f0551e..c245bed3249b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -903,6 +903,8 @@ xfs_ioc_fsgetxattr( xfs_ilock(ip, XFS_ILOCK_SHARED); fa.fsx_xflags = xfs_ip2xflags(ip); fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; + fa.fsx_cowextsize = ip->i_d.di_cowextsize << + ip->i_mount->m_sb.sb_blocklog; fa.fsx_projid = xfs_get_projid(ip); if (attr) { @@ -973,12 +975,13 @@ xfs_set_diflags( if (ip->i_d.di_version < 3) return; - di_flags2 = 0; + di_flags2 = (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK); if (xflags & FS_XFLAG_DAX) di_flags2 |= XFS_DIFLAG2_DAX; + if (xflags & FS_XFLAG_COWEXTSIZE) + di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_d.di_flags2 = di_flags2; - } STATIC void @@ -1031,6 +1034,14 @@ xfs_ioctl_setattr_xflags( return -EINVAL; } + /* Clear reflink if we are actually able to set the rt flag. */ + if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + + /* Don't allow us to set DAX mode for a reflinked file for now. */ + if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip)) + return -EINVAL; + /* * Can't modify an immutable/append-only file unless * we have appropriate permission. @@ -1219,6 +1230,56 @@ xfs_ioctl_setattr_check_extsize( return 0; } +/* + * CoW extent size hint validation rules are: + * + * 1. CoW extent size hint can only be set if reflink is enabled on the fs. + * The inode does not have to have any shared blocks, but it must be a v3. + * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; + * for a directory, the hint is propagated to new files. + * 3. Can be changed on files & directories at any time. + * 4. CoW extsize hint of 0 turns off hints, clears inode flags. + * 5. Extent size must be a multiple of the appropriate block size. + * 6. The extent size hint must be limited to half the AG size to avoid + * alignment extending the extent beyond the limits of the AG. + */ +static int +xfs_ioctl_setattr_check_cowextsize( + struct xfs_inode *ip, + struct fsxattr *fa) +{ + struct xfs_mount *mp = ip->i_mount; + + if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) + return 0; + + if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) || + ip->i_d.di_version != 3) + return -EINVAL; + + if (!S_ISREG(VFS_I(ip)->i_mode) && !S_ISDIR(VFS_I(ip)->i_mode)) + return -EINVAL; + + if (fa->fsx_cowextsize != 0) { + xfs_extlen_t size; + xfs_fsblock_t cowextsize_fsb; + + cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); + if (cowextsize_fsb > MAXEXTLEN) + return -EINVAL; + + size = mp->m_sb.sb_blocksize; + if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) + return -EINVAL; + + if (fa->fsx_cowextsize % size) + return -EINVAL; + } else + fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE; + + return 0; +} + static int xfs_ioctl_setattr_check_projid( struct xfs_inode *ip, @@ -1311,6 +1372,10 @@ xfs_ioctl_setattr( if (code) goto error_trans_cancel; + code = xfs_ioctl_setattr_check_cowextsize(ip, fa); + if (code) + goto error_trans_cancel; + code = xfs_ioctl_setattr_xflags(tp, ip, fa); if (code) goto error_trans_cancel; @@ -1346,6 +1411,12 @@ xfs_ioctl_setattr( ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; else ip->i_d.di_extsize = 0; + if (ip->i_d.di_version == 3 && + (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) + ip->i_d.di_cowextsize = fa->fsx_cowextsize >> + mp->m_sb.sb_blocklog; + else + ip->i_d.di_cowextsize = 0; code = xfs_trans_commit(tp); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index c08253e11545..d907eb9f8ef3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -39,6 +39,7 @@ #include "xfs_quota.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" +#include "xfs_reflink.h" #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ @@ -70,7 +71,7 @@ xfs_bmbt_to_iomap( iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); } -static xfs_extlen_t +xfs_extlen_t xfs_eof_alignment( struct xfs_inode *ip, xfs_extlen_t extsize) @@ -609,7 +610,7 @@ xfs_file_iomap_begin_delay( } retry: - error = xfs_bmapi_reserve_delalloc(ip, offset_fsb, + error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, end_fsb - offset_fsb, &got, &prev, &idx, eof); switch (error) { @@ -666,6 +667,7 @@ out_unlock: int xfs_iomap_write_allocate( xfs_inode_t *ip, + int whichfork, xfs_off_t offset, xfs_bmbt_irec_t *imap) { @@ -678,8 +680,12 @@ xfs_iomap_write_allocate( xfs_trans_t *tp; int nimaps; int error = 0; + int flags = 0; int nres; + if (whichfork == XFS_COW_FORK) + flags |= XFS_BMAPI_COWFORK; + /* * Make sure that the dquots are there. */ @@ -773,7 +779,7 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, 0, &first_block, + count_fsb, flags, &first_block, nres, imap, &nimaps, &dfops); if (error) @@ -955,14 +961,22 @@ xfs_file_iomap_begin( struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; + bool shared, trimmed; int nimaps = 1, error = 0; unsigned lockmode; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & IOMAP_WRITE) && - !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { + error = xfs_reflink_reserve_cow_range(ip, offset, length); + if (error < 0) + return error; + } + + if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && + !xfs_get_extsz_hint(ip)) { + /* Reserve delalloc blocks for regular writeback. */ return xfs_file_iomap_begin_delay(inode, offset, length, flags, iomap); } @@ -976,7 +990,14 @@ xfs_file_iomap_begin( end_fsb = XFS_B_TO_FSB(mp, offset + length); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, - &nimaps, XFS_BMAPI_ENTIRE); + &nimaps, 0); + if (error) { + xfs_iunlock(ip, lockmode); + return error; + } + + /* Trim the mapping to the nearest shared extent boundary. */ + error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); if (error) { xfs_iunlock(ip, lockmode); return error; @@ -1015,6 +1036,8 @@ xfs_file_iomap_begin( } xfs_bmbt_to_iomap(ip, iomap, &imap); + if (shared) + iomap->flags |= IOMAP_F_SHARED; return 0; } diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 6498be485932..6d45cf01fcff 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -25,12 +25,13 @@ struct xfs_bmbt_irec; int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *, int); -int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, +int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, struct xfs_bmbt_irec *); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); +xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize); extern struct iomap_ops xfs_iomap_ops; extern struct iomap_ops xfs_xattr_iomap_ops; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index c5da95eb79b8..405a65cd9d6b 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1159,6 +1159,7 @@ xfs_diflags_to_iflags( inode->i_flags |= S_NOATIME; if (S_ISREG(inode->i_mode) && ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE && + !xfs_is_reflink_inode(ip) && (ip->i_mount->m_flags & XFS_MOUNT_DAX || ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) inode->i_flags |= S_DAX; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index ce73eb34620d..66e881790c17 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -66,7 +66,7 @@ xfs_bulkstat_one_int( if (!buffer || xfs_internal_inum(mp, ino)) return -EINVAL; - buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); + buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); if (!buf) return -ENOMEM; @@ -111,6 +111,12 @@ xfs_bulkstat_one_int( buf->bs_aextents = dic->di_anextents; buf->bs_forkoff = XFS_IFORK_BOFF(ip); + if (dic->di_version == 3) { + if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE) + buf->bs_cowextsize = dic->di_cowextsize << + mp->m_sb.sb_blocklog; + } + switch (dic->di_format) { case XFS_DINODE_FMT_DEV: buf->bs_rdev = ip->i_df.if_u2.if_rdev; diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index b8d64d520e12..68640fb63a54 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -116,6 +116,7 @@ typedef __u32 xfs_nlink_t; #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val #define xfs_fstrm_centisecs xfs_params.fstrm_timer.val #define xfs_eofb_secs xfs_params.eofb_timer.val +#define xfs_cowb_secs xfs_params.cowb_timer.val #define current_cpu() (raw_smp_processor_id()) #define current_pid() (current->pid) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 846483d56949..9b3d7c76915d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -45,6 +45,8 @@ #include "xfs_dir2.h" #include "xfs_rmap_item.h" #include "xfs_buf_item.h" +#include "xfs_refcount_item.h" +#include "xfs_bmap_item.h" #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) @@ -1924,6 +1926,10 @@ xlog_recover_reorder_trans( case XFS_LI_EFI: case XFS_LI_RUI: case XFS_LI_RUD: + case XFS_LI_CUI: + case XFS_LI_CUD: + case XFS_LI_BUI: + case XFS_LI_BUD: trace_xfs_log_recover_item_reorder_tail(log, trans, item, pass); list_move_tail(&item->ri_list, &inode_list); @@ -2242,6 +2248,7 @@ xlog_recover_get_buf_lsn( case XFS_ABTB_MAGIC: case XFS_ABTC_MAGIC: case XFS_RMAP_CRC_MAGIC: + case XFS_REFC_CRC_MAGIC: case XFS_IBT_CRC_MAGIC: case XFS_IBT_MAGIC: { struct xfs_btree_block *btb = blk; @@ -2415,6 +2422,9 @@ xlog_recover_validate_buf_type( case XFS_RMAP_CRC_MAGIC: bp->b_ops = &xfs_rmapbt_buf_ops; break; + case XFS_REFC_CRC_MAGIC: + bp->b_ops = &xfs_refcountbt_buf_ops; + break; default: warnmsg = "Bad btree block magic!"; break; @@ -3547,6 +3557,242 @@ xlog_recover_rud_pass2( } /* + * Copy an CUI format buffer from the given buf, and into the destination + * CUI format structure. The CUI/CUD items were designed not to need any + * special alignment handling. + */ +static int +xfs_cui_copy_format( + struct xfs_log_iovec *buf, + struct xfs_cui_log_format *dst_cui_fmt) +{ + struct xfs_cui_log_format *src_cui_fmt; + uint len; + + src_cui_fmt = buf->i_addr; + len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); + + if (buf->i_len == len) { + memcpy(dst_cui_fmt, src_cui_fmt, len); + return 0; + } + return -EFSCORRUPTED; +} + +/* + * This routine is called to create an in-core extent refcount update + * item from the cui format structure which was logged on disk. + * It allocates an in-core cui, copies the extents from the format + * structure into it, and adds the cui to the AIL with the given + * LSN. + */ +STATIC int +xlog_recover_cui_pass2( + struct xlog *log, + struct xlog_recover_item *item, + xfs_lsn_t lsn) +{ + int error; + struct xfs_mount *mp = log->l_mp; + struct xfs_cui_log_item *cuip; + struct xfs_cui_log_format *cui_formatp; + + cui_formatp = item->ri_buf[0].i_addr; + + cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); + error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); + if (error) { + xfs_cui_item_free(cuip); + return error; + } + atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); + + spin_lock(&log->l_ailp->xa_lock); + /* + * The CUI has two references. One for the CUD and one for CUI to ensure + * it makes it into the AIL. Insert the CUI into the AIL directly and + * drop the CUI reference. Note that xfs_trans_ail_update() drops the + * AIL lock. + */ + xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn); + xfs_cui_release(cuip); + return 0; +} + + +/* + * This routine is called when an CUD format structure is found in a committed + * transaction in the log. Its purpose is to cancel the corresponding CUI if it + * was still in the log. To do this it searches the AIL for the CUI with an id + * equal to that in the CUD format structure. If we find it we drop the CUD + * reference, which removes the CUI from the AIL and frees it. + */ +STATIC int +xlog_recover_cud_pass2( + struct xlog *log, + struct xlog_recover_item *item) +{ + struct xfs_cud_log_format *cud_formatp; + struct xfs_cui_log_item *cuip = NULL; + struct xfs_log_item *lip; + __uint64_t cui_id; + struct xfs_ail_cursor cur; + struct xfs_ail *ailp = log->l_ailp; + + cud_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) + return -EFSCORRUPTED; + cui_id = cud_formatp->cud_cui_id; + + /* + * Search for the CUI with the id in the CUD format structure in the + * AIL. + */ + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); + while (lip != NULL) { + if (lip->li_type == XFS_LI_CUI) { + cuip = (struct xfs_cui_log_item *)lip; + if (cuip->cui_format.cui_id == cui_id) { + /* + * Drop the CUD reference to the CUI. This + * removes the CUI from the AIL and frees it. + */ + spin_unlock(&ailp->xa_lock); + xfs_cui_release(cuip); + spin_lock(&ailp->xa_lock); + break; + } + } + lip = xfs_trans_ail_cursor_next(ailp, &cur); + } + + xfs_trans_ail_cursor_done(&cur); + spin_unlock(&ailp->xa_lock); + + return 0; +} + +/* + * Copy an BUI format buffer from the given buf, and into the destination + * BUI format structure. The BUI/BUD items were designed not to need any + * special alignment handling. + */ +static int +xfs_bui_copy_format( + struct xfs_log_iovec *buf, + struct xfs_bui_log_format *dst_bui_fmt) +{ + struct xfs_bui_log_format *src_bui_fmt; + uint len; + + src_bui_fmt = buf->i_addr; + len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); + + if (buf->i_len == len) { + memcpy(dst_bui_fmt, src_bui_fmt, len); + return 0; + } + return -EFSCORRUPTED; +} + +/* + * This routine is called to create an in-core extent bmap update + * item from the bui format structure which was logged on disk. + * It allocates an in-core bui, copies the extents from the format + * structure into it, and adds the bui to the AIL with the given + * LSN. + */ +STATIC int +xlog_recover_bui_pass2( + struct xlog *log, + struct xlog_recover_item *item, + xfs_lsn_t lsn) +{ + int error; + struct xfs_mount *mp = log->l_mp; + struct xfs_bui_log_item *buip; + struct xfs_bui_log_format *bui_formatp; + + bui_formatp = item->ri_buf[0].i_addr; + + if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) + return -EFSCORRUPTED; + buip = xfs_bui_init(mp); + error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); + if (error) { + xfs_bui_item_free(buip); + return error; + } + atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); + + spin_lock(&log->l_ailp->xa_lock); + /* + * The RUI has two references. One for the RUD and one for RUI to ensure + * it makes it into the AIL. Insert the RUI into the AIL directly and + * drop the RUI reference. Note that xfs_trans_ail_update() drops the + * AIL lock. + */ + xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn); + xfs_bui_release(buip); + return 0; +} + + +/* + * This routine is called when an BUD format structure is found in a committed + * transaction in the log. Its purpose is to cancel the corresponding BUI if it + * was still in the log. To do this it searches the AIL for the BUI with an id + * equal to that in the BUD format structure. If we find it we drop the BUD + * reference, which removes the BUI from the AIL and frees it. + */ +STATIC int +xlog_recover_bud_pass2( + struct xlog *log, + struct xlog_recover_item *item) +{ + struct xfs_bud_log_format *bud_formatp; + struct xfs_bui_log_item *buip = NULL; + struct xfs_log_item *lip; + __uint64_t bui_id; + struct xfs_ail_cursor cur; + struct xfs_ail *ailp = log->l_ailp; + + bud_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) + return -EFSCORRUPTED; + bui_id = bud_formatp->bud_bui_id; + + /* + * Search for the BUI with the id in the BUD format structure in the + * AIL. + */ + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); + while (lip != NULL) { + if (lip->li_type == XFS_LI_BUI) { + buip = (struct xfs_bui_log_item *)lip; + if (buip->bui_format.bui_id == bui_id) { + /* + * Drop the BUD reference to the BUI. This + * removes the BUI from the AIL and frees it. + */ + spin_unlock(&ailp->xa_lock); + xfs_bui_release(buip); + spin_lock(&ailp->xa_lock); + break; + } + } + lip = xfs_trans_ail_cursor_next(ailp, &cur); + } + + xfs_trans_ail_cursor_done(&cur); + spin_unlock(&ailp->xa_lock); + + return 0; +} + +/* * This routine is called when an inode create format structure is found in a * committed transaction in the log. It's purpose is to initialise the inodes * being allocated on disk. This requires us to get inode cluster buffers that @@ -3773,6 +4019,10 @@ xlog_recover_ra_pass2( case XFS_LI_QUOTAOFF: case XFS_LI_RUI: case XFS_LI_RUD: + case XFS_LI_CUI: + case XFS_LI_CUD: + case XFS_LI_BUI: + case XFS_LI_BUD: default: break; } @@ -3798,6 +4048,10 @@ xlog_recover_commit_pass1( case XFS_LI_ICREATE: case XFS_LI_RUI: case XFS_LI_RUD: + case XFS_LI_CUI: + case XFS_LI_CUD: + case XFS_LI_BUI: + case XFS_LI_BUD: /* nothing to do in pass 1 */ return 0; default: @@ -3832,6 +4086,14 @@ xlog_recover_commit_pass2( return xlog_recover_rui_pass2(log, item, trans->r_lsn); case XFS_LI_RUD: return xlog_recover_rud_pass2(log, item); + case XFS_LI_CUI: + return xlog_recover_cui_pass2(log, item, trans->r_lsn); + case XFS_LI_CUD: + return xlog_recover_cud_pass2(log, item); + case XFS_LI_BUI: + return xlog_recover_bui_pass2(log, item, trans->r_lsn); + case XFS_LI_BUD: + return xlog_recover_bud_pass2(log, item); case XFS_LI_DQUOT: return xlog_recover_dquot_pass2(log, buffer_list, item, trans->r_lsn); @@ -4419,12 +4681,94 @@ xlog_recover_cancel_rui( spin_lock(&ailp->xa_lock); } +/* Recover the CUI if necessary. */ +STATIC int +xlog_recover_process_cui( + struct xfs_mount *mp, + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_cui_log_item *cuip; + int error; + + /* + * Skip CUIs that we've already processed. + */ + cuip = container_of(lip, struct xfs_cui_log_item, cui_item); + if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)) + return 0; + + spin_unlock(&ailp->xa_lock); + error = xfs_cui_recover(mp, cuip); + spin_lock(&ailp->xa_lock); + + return error; +} + +/* Release the CUI since we're cancelling everything. */ +STATIC void +xlog_recover_cancel_cui( + struct xfs_mount *mp, + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_cui_log_item *cuip; + + cuip = container_of(lip, struct xfs_cui_log_item, cui_item); + + spin_unlock(&ailp->xa_lock); + xfs_cui_release(cuip); + spin_lock(&ailp->xa_lock); +} + +/* Recover the BUI if necessary. */ +STATIC int +xlog_recover_process_bui( + struct xfs_mount *mp, + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_bui_log_item *buip; + int error; + + /* + * Skip BUIs that we've already processed. + */ + buip = container_of(lip, struct xfs_bui_log_item, bui_item); + if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)) + return 0; + + spin_unlock(&ailp->xa_lock); + error = xfs_bui_recover(mp, buip); + spin_lock(&ailp->xa_lock); + + return error; +} + +/* Release the BUI since we're cancelling everything. */ +STATIC void +xlog_recover_cancel_bui( + struct xfs_mount *mp, + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_bui_log_item *buip; + + buip = container_of(lip, struct xfs_bui_log_item, bui_item); + + spin_unlock(&ailp->xa_lock); + xfs_bui_release(buip); + spin_lock(&ailp->xa_lock); +} + /* Is this log item a deferred action intent? */ static inline bool xlog_item_is_intent(struct xfs_log_item *lip) { switch (lip->li_type) { case XFS_LI_EFI: case XFS_LI_RUI: + case XFS_LI_CUI: + case XFS_LI_BUI: return true; default: return false; @@ -4488,6 +4832,12 @@ xlog_recover_process_intents( case XFS_LI_RUI: error = xlog_recover_process_rui(log->l_mp, ailp, lip); break; + case XFS_LI_CUI: + error = xlog_recover_process_cui(log->l_mp, ailp, lip); + break; + case XFS_LI_BUI: + error = xlog_recover_process_bui(log->l_mp, ailp, lip); + break; } if (error) goto out; @@ -4535,6 +4885,12 @@ xlog_recover_cancel_intents( case XFS_LI_RUI: xlog_recover_cancel_rui(log->l_mp, ailp, lip); break; + case XFS_LI_CUI: + xlog_recover_cancel_cui(log->l_mp, ailp, lip); + break; + case XFS_LI_BUI: + xlog_recover_cancel_bui(log->l_mp, ailp, lip); + break; } lip = xfs_trans_ail_cursor_next(ailp, &cur); @@ -4613,6 +4969,7 @@ xlog_recover_process_one_iunlink( if (error) goto fail_iput; + xfs_iflags_clear(ip, XFS_IRECOVERY); ASSERT(VFS_I(ip)->i_nlink == 0); ASSERT(VFS_I(ip)->i_mode != 0); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 56e85a6c85c7..fc7873942bea 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -43,6 +43,8 @@ #include "xfs_icache.h" #include "xfs_sysfs.h" #include "xfs_rmap_btree.h" +#include "xfs_refcount_btree.h" +#include "xfs_reflink.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); @@ -684,6 +686,7 @@ xfs_mountfs( xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); xfs_ialloc_compute_maxlevels(mp); xfs_rmapbt_compute_maxlevels(mp); + xfs_refcountbt_compute_maxlevels(mp); xfs_set_maxicount(mp); @@ -923,6 +926,15 @@ xfs_mountfs( } /* + * During the second phase of log recovery, we need iget and + * iput to behave like they do for an active filesystem. + * xfs_fs_drop_inode needs to be able to prevent the deletion + * of inodes before we're done replaying log items on those + * inodes. + */ + mp->m_super->s_flags |= MS_ACTIVE; + + /* * Finish recovering the file system. This part needed to be delayed * until after the root and real-time bitmap inodes were consistently * read in. @@ -974,10 +986,28 @@ xfs_mountfs( if (error) xfs_warn(mp, "Unable to allocate reserve blocks. Continuing without reserve pool."); + + /* Recover any CoW blocks that never got remapped. */ + error = xfs_reflink_recover_cow(mp); + if (error) { + xfs_err(mp, + "Error %d recovering leftover CoW allocations.", error); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + goto out_quota; + } + + /* Reserve AG blocks for future btree expansion. */ + error = xfs_fs_reserve_ag_blocks(mp); + if (error && error != -ENOSPC) + goto out_agresv; } return 0; + out_agresv: + xfs_fs_unreserve_ag_blocks(mp); + out_quota: + xfs_qm_unmount_quotas(mp); out_rtunmount: xfs_rtunmount_inodes(mp); out_rele_rip: @@ -1019,7 +1049,9 @@ xfs_unmountfs( int error; cancel_delayed_work_sync(&mp->m_eofblocks_work); + cancel_delayed_work_sync(&mp->m_cowblocks_work); + xfs_fs_unreserve_ag_blocks(mp); xfs_qm_unmount_quotas(mp); xfs_rtunmount_inodes(mp); IRELE(mp->m_rootip); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 041d9493e798..819b80b15bfb 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -124,10 +124,13 @@ typedef struct xfs_mount { uint m_inobt_mnr[2]; /* min inobt btree records */ uint m_rmap_mxr[2]; /* max rmap btree records */ uint m_rmap_mnr[2]; /* min rmap btree records */ + uint m_refc_mxr[2]; /* max refc btree records */ + uint m_refc_mnr[2]; /* min refc btree records */ uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ uint m_in_maxlevels; /* max inobt btree levels. */ uint m_rmap_maxlevels; /* max rmap btree levels */ + uint m_refc_maxlevels; /* max refcount btree level */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ uint m_alloc_set_aside; /* space we can't use */ uint m_ag_max_usable; /* max space per AG */ @@ -161,6 +164,8 @@ typedef struct xfs_mount { struct delayed_work m_reclaim_work; /* background inode reclaim */ struct delayed_work m_eofblocks_work; /* background eof blocks trimming */ + struct delayed_work m_cowblocks_work; /* background cow blocks + trimming */ bool m_update_sb; /* sb needs update in mount */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ @@ -399,6 +404,9 @@ typedef struct xfs_perag { struct xfs_ag_resv pag_meta_resv; /* Blocks reserved for just AGFL-based metadata. */ struct xfs_ag_resv pag_agfl_resv; + + /* reference count */ + __uint8_t pagf_refcount_level; } xfs_perag_t; static inline struct xfs_ag_resv * diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 69e2986a3776..0c381d71b242 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -49,6 +49,8 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_key, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20); XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24); XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8); @@ -56,6 +58,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4); XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8); XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4); + XFS_CHECK_STRUCT_SIZE(xfs_refcount_ptr_t, 4); XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4); /* dir/attr trees */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 0f14b2e4bf6c..93a7aafa56d6 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -114,6 +114,13 @@ xfs_fs_map_blocks( return -ENXIO; /* + * The pNFS block layout spec actually supports reflink like + * functionality, but the Linux pNFS server doesn't implement it yet. + */ + if (xfs_is_reflink_inode(ip)) + return -ENXIO; + + /* * Lock out any other I/O before we flush and invalidate the pagecache, * and then hand out a layout to the remote system. This is very * similar to direct I/O, except that the synchronization is much more diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c new file mode 100644 index 000000000000..fe86a668a57e --- /dev/null +++ b/fs/xfs/xfs_refcount_item.c @@ -0,0 +1,539 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_buf_item.h" +#include "xfs_refcount_item.h" +#include "xfs_log.h" +#include "xfs_refcount.h" + + +kmem_zone_t *xfs_cui_zone; +kmem_zone_t *xfs_cud_zone; + +static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_cui_log_item, cui_item); +} + +void +xfs_cui_item_free( + struct xfs_cui_log_item *cuip) +{ + if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) + kmem_free(cuip); + else + kmem_zone_free(xfs_cui_zone, cuip); +} + +STATIC void +xfs_cui_item_size( + struct xfs_log_item *lip, + int *nvecs, + int *nbytes) +{ + struct xfs_cui_log_item *cuip = CUI_ITEM(lip); + + *nvecs += 1; + *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents); +} + +/* + * This is called to fill in the vector of log iovecs for the + * given cui log item. We use only 1 iovec, and we point that + * at the cui_log_format structure embedded in the cui item. + * It is at this point that we assert that all of the extent + * slots in the cui item have been filled. + */ +STATIC void +xfs_cui_item_format( + struct xfs_log_item *lip, + struct xfs_log_vec *lv) +{ + struct xfs_cui_log_item *cuip = CUI_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; + + ASSERT(atomic_read(&cuip->cui_next_extent) == + cuip->cui_format.cui_nextents); + + cuip->cui_format.cui_type = XFS_LI_CUI; + cuip->cui_format.cui_size = 1; + + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, + xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); +} + +/* + * Pinning has no meaning for an cui item, so just return. + */ +STATIC void +xfs_cui_item_pin( + struct xfs_log_item *lip) +{ +} + +/* + * The unpin operation is the last place an CUI is manipulated in the log. It is + * either inserted in the AIL or aborted in the event of a log I/O error. In + * either case, the CUI transaction has been successfully committed to make it + * this far. Therefore, we expect whoever committed the CUI to either construct + * and commit the CUD or drop the CUD's reference in the event of error. Simply + * drop the log's CUI reference now that the log is done with it. + */ +STATIC void +xfs_cui_item_unpin( + struct xfs_log_item *lip, + int remove) +{ + struct xfs_cui_log_item *cuip = CUI_ITEM(lip); + + xfs_cui_release(cuip); +} + +/* + * CUI items have no locking or pushing. However, since CUIs are pulled from + * the AIL when their corresponding CUDs are committed to disk, their situation + * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller + * will eventually flush the log. This should help in getting the CUI out of + * the AIL. + */ +STATIC uint +xfs_cui_item_push( + struct xfs_log_item *lip, + struct list_head *buffer_list) +{ + return XFS_ITEM_PINNED; +} + +/* + * The CUI has been either committed or aborted if the transaction has been + * cancelled. If the transaction was cancelled, an CUD isn't going to be + * constructed and thus we free the CUI here directly. + */ +STATIC void +xfs_cui_item_unlock( + struct xfs_log_item *lip) +{ + if (lip->li_flags & XFS_LI_ABORTED) + xfs_cui_item_free(CUI_ITEM(lip)); +} + +/* + * The CUI is logged only once and cannot be moved in the log, so simply return + * the lsn at which it's been logged. + */ +STATIC xfs_lsn_t +xfs_cui_item_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + return lsn; +} + +/* + * The CUI dependency tracking op doesn't do squat. It can't because + * it doesn't know where the free extent is coming from. The dependency + * tracking has to be handled by the "enclosing" metadata object. For + * example, for inodes, the inode is locked throughout the extent freeing + * so the dependency should be recorded there. + */ +STATIC void +xfs_cui_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ +} + +/* + * This is the ops vector shared by all cui log items. + */ +static const struct xfs_item_ops xfs_cui_item_ops = { + .iop_size = xfs_cui_item_size, + .iop_format = xfs_cui_item_format, + .iop_pin = xfs_cui_item_pin, + .iop_unpin = xfs_cui_item_unpin, + .iop_unlock = xfs_cui_item_unlock, + .iop_committed = xfs_cui_item_committed, + .iop_push = xfs_cui_item_push, + .iop_committing = xfs_cui_item_committing, +}; + +/* + * Allocate and initialize an cui item with the given number of extents. + */ +struct xfs_cui_log_item * +xfs_cui_init( + struct xfs_mount *mp, + uint nextents) + +{ + struct xfs_cui_log_item *cuip; + + ASSERT(nextents > 0); + if (nextents > XFS_CUI_MAX_FAST_EXTENTS) + cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), + KM_SLEEP); + else + cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP); + + xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); + cuip->cui_format.cui_nextents = nextents; + cuip->cui_format.cui_id = (uintptr_t)(void *)cuip; + atomic_set(&cuip->cui_next_extent, 0); + atomic_set(&cuip->cui_refcount, 2); + + return cuip; +} + +/* + * Freeing the CUI requires that we remove it from the AIL if it has already + * been placed there. However, the CUI may not yet have been placed in the AIL + * when called by xfs_cui_release() from CUD processing due to the ordering of + * committed vs unpin operations in bulk insert operations. Hence the reference + * count to ensure only the last caller frees the CUI. + */ +void +xfs_cui_release( + struct xfs_cui_log_item *cuip) +{ + if (atomic_dec_and_test(&cuip->cui_refcount)) { + xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); + xfs_cui_item_free(cuip); + } +} + +static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_cud_log_item, cud_item); +} + +STATIC void +xfs_cud_item_size( + struct xfs_log_item *lip, + int *nvecs, + int *nbytes) +{ + *nvecs += 1; + *nbytes += sizeof(struct xfs_cud_log_format); +} + +/* + * This is called to fill in the vector of log iovecs for the + * given cud log item. We use only 1 iovec, and we point that + * at the cud_log_format structure embedded in the cud item. + * It is at this point that we assert that all of the extent + * slots in the cud item have been filled. + */ +STATIC void +xfs_cud_item_format( + struct xfs_log_item *lip, + struct xfs_log_vec *lv) +{ + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; + + cudp->cud_format.cud_type = XFS_LI_CUD; + cudp->cud_format.cud_size = 1; + + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, + sizeof(struct xfs_cud_log_format)); +} + +/* + * Pinning has no meaning for an cud item, so just return. + */ +STATIC void +xfs_cud_item_pin( + struct xfs_log_item *lip) +{ +} + +/* + * Since pinning has no meaning for an cud item, unpinning does + * not either. + */ +STATIC void +xfs_cud_item_unpin( + struct xfs_log_item *lip, + int remove) +{ +} + +/* + * There isn't much you can do to push on an cud item. It is simply stuck + * waiting for the log to be flushed to disk. + */ +STATIC uint +xfs_cud_item_push( + struct xfs_log_item *lip, + struct list_head *buffer_list) +{ + return XFS_ITEM_PINNED; +} + +/* + * The CUD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the CUI and free the + * CUD. + */ +STATIC void +xfs_cud_item_unlock( + struct xfs_log_item *lip) +{ + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + + if (lip->li_flags & XFS_LI_ABORTED) { + xfs_cui_release(cudp->cud_cuip); + kmem_zone_free(xfs_cud_zone, cudp); + } +} + +/* + * When the cud item is committed to disk, all we need to do is delete our + * reference to our partner cui item and then free ourselves. Since we're + * freeing ourselves we must return -1 to keep the transaction code from + * further referencing this item. + */ +STATIC xfs_lsn_t +xfs_cud_item_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + + /* + * Drop the CUI reference regardless of whether the CUD has been + * aborted. Once the CUD transaction is constructed, it is the sole + * responsibility of the CUD to release the CUI (even if the CUI is + * aborted due to log I/O error). + */ + xfs_cui_release(cudp->cud_cuip); + kmem_zone_free(xfs_cud_zone, cudp); + + return (xfs_lsn_t)-1; +} + +/* + * The CUD dependency tracking op doesn't do squat. It can't because + * it doesn't know where the free extent is coming from. The dependency + * tracking has to be handled by the "enclosing" metadata object. For + * example, for inodes, the inode is locked throughout the extent freeing + * so the dependency should be recorded there. + */ +STATIC void +xfs_cud_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ +} + +/* + * This is the ops vector shared by all cud log items. + */ +static const struct xfs_item_ops xfs_cud_item_ops = { + .iop_size = xfs_cud_item_size, + .iop_format = xfs_cud_item_format, + .iop_pin = xfs_cud_item_pin, + .iop_unpin = xfs_cud_item_unpin, + .iop_unlock = xfs_cud_item_unlock, + .iop_committed = xfs_cud_item_committed, + .iop_push = xfs_cud_item_push, + .iop_committing = xfs_cud_item_committing, +}; + +/* + * Allocate and initialize an cud item with the given number of extents. + */ +struct xfs_cud_log_item * +xfs_cud_init( + struct xfs_mount *mp, + struct xfs_cui_log_item *cuip) + +{ + struct xfs_cud_log_item *cudp; + + cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); + xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); + cudp->cud_cuip = cuip; + cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; + + return cudp; +} + +/* + * Process a refcount update intent item that was recovered from the log. + * We need to update the refcountbt. + */ +int +xfs_cui_recover( + struct xfs_mount *mp, + struct xfs_cui_log_item *cuip) +{ + int i; + int error = 0; + unsigned int refc_type; + struct xfs_phys_extent *refc; + xfs_fsblock_t startblock_fsb; + bool op_ok; + struct xfs_cud_log_item *cudp; + struct xfs_trans *tp; + struct xfs_btree_cur *rcur = NULL; + enum xfs_refcount_intent_type type; + xfs_fsblock_t firstfsb; + xfs_fsblock_t new_fsb; + xfs_extlen_t new_len; + struct xfs_bmbt_irec irec; + struct xfs_defer_ops dfops; + bool requeue_only = false; + + ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); + + /* + * First check the validity of the extents described by the + * CUI. If any are bad, then assume that all are bad and + * just toss the CUI. + */ + for (i = 0; i < cuip->cui_format.cui_nextents; i++) { + refc = &cuip->cui_format.cui_extents[i]; + startblock_fsb = XFS_BB_TO_FSB(mp, + XFS_FSB_TO_DADDR(mp, refc->pe_startblock)); + switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { + case XFS_REFCOUNT_INCREASE: + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: + op_ok = true; + break; + default: + op_ok = false; + break; + } + if (!op_ok || startblock_fsb == 0 || + refc->pe_len == 0 || + startblock_fsb >= mp->m_sb.sb_dblocks || + refc->pe_len >= mp->m_sb.sb_agblocks || + (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) { + /* + * This will pull the CUI from the AIL and + * free the memory associated with it. + */ + set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); + xfs_cui_release(cuip); + return -EIO; + } + } + + /* + * Under normal operation, refcount updates are deferred, so we + * wouldn't be adding them directly to a transaction. All + * refcount updates manage reservation usage internally and + * dynamically by deferring work that won't fit in the + * transaction. Normally, any work that needs to be deferred + * gets attached to the same defer_ops that scheduled the + * refcount update. However, we're in log recovery here, so we + * we create our own defer_ops and use that to finish up any + * work that doesn't fit. + */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + if (error) + return error; + cudp = xfs_trans_get_cud(tp, cuip); + + xfs_defer_init(&dfops, &firstfsb); + for (i = 0; i < cuip->cui_format.cui_nextents; i++) { + refc = &cuip->cui_format.cui_extents[i]; + refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; + switch (refc_type) { + case XFS_REFCOUNT_INCREASE: + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: + type = refc_type; + break; + default: + error = -EFSCORRUPTED; + goto abort_error; + } + if (requeue_only) { + new_fsb = refc->pe_startblock; + new_len = refc->pe_len; + } else + error = xfs_trans_log_finish_refcount_update(tp, cudp, + &dfops, type, refc->pe_startblock, refc->pe_len, + &new_fsb, &new_len, &rcur); + if (error) + goto abort_error; + + /* Requeue what we didn't finish. */ + if (new_len > 0) { + irec.br_startblock = new_fsb; + irec.br_blockcount = new_len; + switch (type) { + case XFS_REFCOUNT_INCREASE: + error = xfs_refcount_increase_extent( + tp->t_mountp, &dfops, &irec); + break; + case XFS_REFCOUNT_DECREASE: + error = xfs_refcount_decrease_extent( + tp->t_mountp, &dfops, &irec); + break; + case XFS_REFCOUNT_ALLOC_COW: + error = xfs_refcount_alloc_cow_extent( + tp->t_mountp, &dfops, + irec.br_startblock, + irec.br_blockcount); + break; + case XFS_REFCOUNT_FREE_COW: + error = xfs_refcount_free_cow_extent( + tp->t_mountp, &dfops, + irec.br_startblock, + irec.br_blockcount); + break; + default: + ASSERT(0); + } + if (error) + goto abort_error; + requeue_only = true; + } + } + + xfs_refcount_finish_one_cleanup(tp, rcur, error); + error = xfs_defer_finish(&tp, &dfops, NULL); + if (error) + goto abort_error; + set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); + error = xfs_trans_commit(tp); + return error; + +abort_error: + xfs_refcount_finish_one_cleanup(tp, rcur, error); + xfs_defer_cancel(&dfops); + xfs_trans_cancel(tp); + return error; +} diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h new file mode 100644 index 000000000000..5b74dddfa64b --- /dev/null +++ b/fs/xfs/xfs_refcount_item.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_REFCOUNT_ITEM_H__ +#define __XFS_REFCOUNT_ITEM_H__ + +/* + * There are (currently) two pairs of refcount btree redo item types: + * increase and decrease. The log items for these are CUI (refcount + * update intent) and CUD (refcount update done). The redo item type + * is encoded in the flags field of each xfs_map_extent. + * + * *I items should be recorded in the *first* of a series of rolled + * transactions, and the *D items should be recorded in the same + * transaction that records the associated refcountbt updates. + * + * Should the system crash after the commit of the first transaction + * but before the commit of the final transaction in a series, log + * recovery will use the redo information recorded by the intent items + * to replay the refcountbt metadata updates. + */ + +/* kernel only CUI/CUD definitions */ + +struct xfs_mount; +struct kmem_zone; + +/* + * Max number of extents in fast allocation path. + */ +#define XFS_CUI_MAX_FAST_EXTENTS 16 + +/* + * Define CUI flag bits. Manipulated by set/clear/test_bit operators. + */ +#define XFS_CUI_RECOVERED 1 + +/* + * This is the "refcount update intent" log item. It is used to log + * the fact that some reverse mappings need to change. It is used in + * conjunction with the "refcount update done" log item described + * below. + * + * These log items follow the same rules as struct xfs_efi_log_item; + * see the comments about that structure (in xfs_extfree_item.h) for + * more details. + */ +struct xfs_cui_log_item { + struct xfs_log_item cui_item; + atomic_t cui_refcount; + atomic_t cui_next_extent; + unsigned long cui_flags; /* misc flags */ + struct xfs_cui_log_format cui_format; +}; + +static inline size_t +xfs_cui_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_cui_log_item, cui_format) + + xfs_cui_log_format_sizeof(nr); +} + +/* + * This is the "refcount update done" log item. It is used to log the + * fact that some refcountbt updates mentioned in an earlier cui item + * have been performed. + */ +struct xfs_cud_log_item { + struct xfs_log_item cud_item; + struct xfs_cui_log_item *cud_cuip; + struct xfs_cud_log_format cud_format; +}; + +extern struct kmem_zone *xfs_cui_zone; +extern struct kmem_zone *xfs_cud_zone; + +struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint); +struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, + struct xfs_cui_log_item *); +void xfs_cui_item_free(struct xfs_cui_log_item *); +void xfs_cui_release(struct xfs_cui_log_item *); +int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip); + +#endif /* __XFS_REFCOUNT_ITEM_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c new file mode 100644 index 000000000000..5965e9455d91 --- /dev/null +++ b/fs/xfs/xfs_reflink.c @@ -0,0 +1,1688 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_error.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_ioctl.h" +#include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_icache.h" +#include "xfs_pnfs.h" +#include "xfs_btree.h" +#include "xfs_refcount_btree.h" +#include "xfs_refcount.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" +#include "xfs_bit.h" +#include "xfs_alloc.h" +#include "xfs_quota_defs.h" +#include "xfs_quota.h" +#include "xfs_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_reflink.h" +#include "xfs_iomap.h" +#include "xfs_rmap_btree.h" +#include "xfs_sb.h" +#include "xfs_ag_resv.h" + +/* + * Copy on Write of Shared Blocks + * + * XFS must preserve "the usual" file semantics even when two files share + * the same physical blocks. This means that a write to one file must not + * alter the blocks in a different file; the way that we'll do that is + * through the use of a copy-on-write mechanism. At a high level, that + * means that when we want to write to a shared block, we allocate a new + * block, write the data to the new block, and if that succeeds we map the + * new block into the file. + * + * XFS provides a "delayed allocation" mechanism that defers the allocation + * of disk blocks to dirty-but-not-yet-mapped file blocks as long as + * possible. This reduces fragmentation by enabling the filesystem to ask + * for bigger chunks less often, which is exactly what we want for CoW. + * + * The delalloc mechanism begins when the kernel wants to make a block + * writable (write_begin or page_mkwrite). If the offset is not mapped, we + * create a delalloc mapping, which is a regular in-core extent, but without + * a real startblock. (For delalloc mappings, the startblock encodes both + * a flag that this is a delalloc mapping, and a worst-case estimate of how + * many blocks might be required to put the mapping into the BMBT.) delalloc + * mappings are a reservation against the free space in the filesystem; + * adjacent mappings can also be combined into fewer larger mappings. + * + * When dirty pages are being written out (typically in writepage), the + * delalloc reservations are converted into real mappings by allocating + * blocks and replacing the delalloc mapping with real ones. A delalloc + * mapping can be replaced by several real ones if the free space is + * fragmented. + * + * We want to adapt the delalloc mechanism for copy-on-write, since the + * write paths are similar. The first two steps (creating the reservation + * and allocating the blocks) are exactly the same as delalloc except that + * the mappings must be stored in a separate CoW fork because we do not want + * to disturb the mapping in the data fork until we're sure that the write + * succeeded. IO completion in this case is the process of removing the old + * mapping from the data fork and moving the new mapping from the CoW fork to + * the data fork. This will be discussed shortly. + * + * For now, unaligned directio writes will be bounced back to the page cache. + * Block-aligned directio writes will use the same mechanism as buffered + * writes. + * + * CoW remapping must be done after the data block write completes, + * because we don't want to destroy the old data fork map until we're sure + * the new block has been written. Since the new mappings are kept in a + * separate fork, we can simply iterate these mappings to find the ones + * that cover the file blocks that we just CoW'd. For each extent, simply + * unmap the corresponding range in the data fork, map the new range into + * the data fork, and remove the extent from the CoW fork. + * + * Since the remapping operation can be applied to an arbitrary file + * range, we record the need for the remap step as a flag in the ioend + * instead of declaring a new IO type. This is required for direct io + * because we only have ioend for the whole dio, and we have to be able to + * remember the presence of unwritten blocks and CoW blocks with a single + * ioend structure. Better yet, the more ground we can cover with one + * ioend, the better. + */ + +/* + * Given an AG extent, find the lowest-numbered run of shared blocks + * within that range and return the range in fbno/flen. If + * find_end_of_shared is true, return the longest contiguous extent of + * shared blocks. If there are no shared extents, fbno and flen will + * be set to NULLAGBLOCK and 0, respectively. + */ +int +xfs_reflink_find_shared( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t aglen, + xfs_agblock_t *fbno, + xfs_extlen_t *flen, + bool find_end_of_shared) +{ + struct xfs_buf *agbp; + struct xfs_btree_cur *cur; + int error; + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error) + return error; + + cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + + error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen, + find_end_of_shared); + + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + + xfs_buf_relse(agbp); + return error; +} + +/* + * Trim the mapping to the next block where there's a change in the + * shared/unshared status. More specifically, this means that we + * find the lowest-numbered extent of shared blocks that coincides with + * the given block mapping. If the shared extent overlaps the start of + * the mapping, trim the mapping to the end of the shared extent. If + * the shared region intersects the mapping, trim the mapping to the + * start of the shared extent. If there are no shared regions that + * overlap, just return the original extent. + */ +int +xfs_reflink_trim_around_shared( + struct xfs_inode *ip, + struct xfs_bmbt_irec *irec, + bool *shared, + bool *trimmed) +{ + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t aglen; + xfs_agblock_t fbno; + xfs_extlen_t flen; + int error = 0; + + /* Holes, unwritten, and delalloc extents cannot be shared */ + if (!xfs_is_reflink_inode(ip) || + ISUNWRITTEN(irec) || + irec->br_startblock == HOLESTARTBLOCK || + irec->br_startblock == DELAYSTARTBLOCK) { + *shared = false; + return 0; + } + + trace_xfs_reflink_trim_around_shared(ip, irec); + + agno = XFS_FSB_TO_AGNO(ip->i_mount, irec->br_startblock); + agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock); + aglen = irec->br_blockcount; + + error = xfs_reflink_find_shared(ip->i_mount, agno, agbno, + aglen, &fbno, &flen, true); + if (error) + return error; + + *shared = *trimmed = false; + if (fbno == NULLAGBLOCK) { + /* No shared blocks at all. */ + return 0; + } else if (fbno == agbno) { + /* + * The start of this extent is shared. Truncate the + * mapping at the end of the shared region so that a + * subsequent iteration starts at the start of the + * unshared region. + */ + irec->br_blockcount = flen; + *shared = true; + if (flen != aglen) + *trimmed = true; + return 0; + } else { + /* + * There's a shared extent midway through this extent. + * Truncate the mapping at the start of the shared + * extent so that a subsequent iteration starts at the + * start of the shared region. + */ + irec->br_blockcount = fbno - agbno; + *trimmed = true; + return 0; + } +} + +/* Create a CoW reservation for a range of blocks within a file. */ +static int +__xfs_reflink_reserve_cow( + struct xfs_inode *ip, + xfs_fileoff_t *offset_fsb, + xfs_fileoff_t end_fsb, + bool *skipped) +{ + struct xfs_bmbt_irec got, prev, imap; + xfs_fileoff_t orig_end_fsb; + int nimaps, eof = 0, error = 0; + bool shared = false, trimmed = false; + xfs_extnum_t idx; + xfs_extlen_t align; + + /* Already reserved? Skip the refcount btree access. */ + xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, + &got, &prev); + if (!eof && got.br_startoff <= *offset_fsb) { + end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; + trace_xfs_reflink_cow_found(ip, &got); + goto done; + } + + /* Read extent from the source file. */ + nimaps = 1; + error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, + &imap, &nimaps, 0); + if (error) + goto out_unlock; + ASSERT(nimaps == 1); + + /* Trim the mapping to the nearest shared extent boundary. */ + error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); + if (error) + goto out_unlock; + + end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount; + + /* Not shared? Just report the (potentially capped) extent. */ + if (!shared) { + *skipped = true; + goto done; + } + + /* + * Fork all the shared blocks from our write offset until the end of + * the extent. + */ + error = xfs_qm_dqattach_locked(ip, 0); + if (error) + goto out_unlock; + + align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); + if (align) + end_fsb = roundup_64(end_fsb, align); + +retry: + error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, + end_fsb - *offset_fsb, &got, + &prev, &idx, eof); + switch (error) { + case 0: + break; + case -ENOSPC: + case -EDQUOT: + /* retry without any preallocation */ + trace_xfs_reflink_cow_enospc(ip, &imap); + if (end_fsb != orig_end_fsb) { + end_fsb = orig_end_fsb; + goto retry; + } + /*FALLTHRU*/ + default: + goto out_unlock; + } + + if (end_fsb != orig_end_fsb) + xfs_inode_set_cowblocks_tag(ip); + + trace_xfs_reflink_cow_alloc(ip, &got); +done: + *offset_fsb = end_fsb; +out_unlock: + return error; +} + +/* Create a CoW reservation for part of a file. */ +int +xfs_reflink_reserve_cow_range( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t count) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb, end_fsb; + bool skipped = false; + int error; + + trace_xfs_reflink_reserve_cow_range(ip, offset, count); + + offset_fsb = XFS_B_TO_FSBT(mp, offset); + end_fsb = XFS_B_TO_FSB(mp, offset + count); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + while (offset_fsb < end_fsb) { + error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb, + &skipped); + if (error) { + trace_xfs_reflink_reserve_cow_range_error(ip, error, + _RET_IP_); + break; + } + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return error; +} + +/* Allocate all CoW reservations covering a range of blocks in a file. */ +static int +__xfs_reflink_allocate_cow( + struct xfs_inode *ip, + xfs_fileoff_t *offset_fsb, + xfs_fileoff_t end_fsb) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_bmbt_irec imap; + struct xfs_defer_ops dfops; + struct xfs_trans *tp; + xfs_fsblock_t first_block; + xfs_fileoff_t next_fsb; + int nimaps = 1, error; + bool skipped = false; + + xfs_defer_init(&dfops, &first_block); + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, + XFS_TRANS_RESERVE, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + next_fsb = *offset_fsb; + error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); + if (error) + goto out_trans_cancel; + + if (skipped) { + *offset_fsb = next_fsb; + goto out_trans_cancel; + } + + xfs_trans_ijoin(tp, ip, 0); + error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, + XFS_BMAPI_COWFORK, &first_block, + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), + &imap, &nimaps, &dfops); + if (error) + goto out_trans_cancel; + + /* We might not have been able to map the whole delalloc extent */ + *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb); + + error = xfs_defer_finish(&tp, &dfops, NULL); + if (error) + goto out_trans_cancel; + + error = xfs_trans_commit(tp); + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +out_trans_cancel: + xfs_defer_cancel(&dfops); + xfs_trans_cancel(tp); + goto out_unlock; +} + +/* Allocate all CoW reservations covering a part of a file. */ +int +xfs_reflink_allocate_cow_range( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t count) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); + int error; + + ASSERT(xfs_is_reflink_inode(ip)); + + trace_xfs_reflink_allocate_cow_range(ip, offset, count); + + /* + * Make sure that the dquots are there. + */ + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + + while (offset_fsb < end_fsb) { + error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb); + if (error) { + trace_xfs_reflink_allocate_cow_range_error(ip, error, + _RET_IP_); + break; + } + } + + return error; +} + +/* + * Find the CoW reservation (and whether or not it needs block allocation) + * for a given byte offset of a file. + */ +bool +xfs_reflink_find_cow_mapping( + struct xfs_inode *ip, + xfs_off_t offset, + struct xfs_bmbt_irec *imap, + bool *need_alloc) +{ + struct xfs_bmbt_irec irec; + struct xfs_ifork *ifp; + struct xfs_bmbt_rec_host *gotp; + xfs_fileoff_t bno; + xfs_extnum_t idx; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); + ASSERT(xfs_is_reflink_inode(ip)); + + /* Find the extent in the CoW fork. */ + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + bno = XFS_B_TO_FSBT(ip->i_mount, offset); + gotp = xfs_iext_bno_to_ext(ifp, bno, &idx); + if (!gotp) + return false; + + xfs_bmbt_get_all(gotp, &irec); + if (bno >= irec.br_startoff + irec.br_blockcount || + bno < irec.br_startoff) + return false; + + trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE, + &irec); + + /* If it's still delalloc, we must allocate later. */ + *imap = irec; + *need_alloc = !!(isnullstartblock(irec.br_startblock)); + + return true; +} + +/* + * Trim an extent to end at the next CoW reservation past offset_fsb. + */ +int +xfs_reflink_trim_irec_to_next_cow( + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + struct xfs_bmbt_irec *imap) +{ + struct xfs_bmbt_irec irec; + struct xfs_ifork *ifp; + struct xfs_bmbt_rec_host *gotp; + xfs_extnum_t idx; + + if (!xfs_is_reflink_inode(ip)) + return 0; + + /* Find the extent in the CoW fork. */ + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx); + if (!gotp) + return 0; + xfs_bmbt_get_all(gotp, &irec); + + /* This is the extent before; try sliding up one. */ + if (irec.br_startoff < offset_fsb) { + idx++; + if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + return 0; + gotp = xfs_iext_get_ext(ifp, idx); + xfs_bmbt_get_all(gotp, &irec); + } + + if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount) + return 0; + + imap->br_blockcount = irec.br_startoff - imap->br_startoff; + trace_xfs_reflink_trim_irec(ip, imap); + + return 0; +} + +/* + * Cancel all pending CoW reservations for some block range of an inode. + */ +int +xfs_reflink_cancel_cow_blocks( + struct xfs_inode *ip, + struct xfs_trans **tpp, + xfs_fileoff_t offset_fsb, + xfs_fileoff_t end_fsb) +{ + struct xfs_bmbt_irec irec; + xfs_filblks_t count_fsb; + xfs_fsblock_t firstfsb; + struct xfs_defer_ops dfops; + int error = 0; + int nimaps; + + if (!xfs_is_reflink_inode(ip)) + return 0; + + /* Go find the old extent in the CoW fork. */ + while (offset_fsb < end_fsb) { + nimaps = 1; + count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); + error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, + &nimaps, XFS_BMAPI_COWFORK); + if (error) + break; + ASSERT(nimaps == 1); + + trace_xfs_reflink_cancel_cow(ip, &irec); + + if (irec.br_startblock == DELAYSTARTBLOCK) { + /* Free a delayed allocation. */ + xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount, + false); + ip->i_delayed_blks -= irec.br_blockcount; + + /* Remove the mapping from the CoW fork. */ + error = xfs_bunmapi_cow(ip, &irec); + if (error) + break; + } else if (irec.br_startblock == HOLESTARTBLOCK) { + /* empty */ + } else { + xfs_trans_ijoin(*tpp, ip, 0); + xfs_defer_init(&dfops, &firstfsb); + + /* Free the CoW orphan record. */ + error = xfs_refcount_free_cow_extent(ip->i_mount, + &dfops, irec.br_startblock, + irec.br_blockcount); + if (error) + break; + + xfs_bmap_add_free(ip->i_mount, &dfops, + irec.br_startblock, irec.br_blockcount, + NULL); + + /* Update quota accounting */ + xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, + -(long)irec.br_blockcount); + + /* Roll the transaction */ + error = xfs_defer_finish(tpp, &dfops, ip); + if (error) { + xfs_defer_cancel(&dfops); + break; + } + + /* Remove the mapping from the CoW fork. */ + error = xfs_bunmapi_cow(ip, &irec); + if (error) + break; + } + + /* Roll on... */ + offset_fsb = irec.br_startoff + irec.br_blockcount; + } + + return error; +} + +/* + * Cancel all pending CoW reservations for some byte range of an inode. + */ +int +xfs_reflink_cancel_cow_range( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t count) +{ + struct xfs_trans *tp; + xfs_fileoff_t offset_fsb; + xfs_fileoff_t end_fsb; + int error; + + trace_xfs_reflink_cancel_cow_range(ip, offset, count); + ASSERT(xfs_is_reflink_inode(ip)); + + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); + if (count == NULLFILEOFF) + end_fsb = NULLFILEOFF; + else + end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); + + /* Start a rolling transaction to remove the mappings */ + error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, + 0, 0, 0, &tp); + if (error) + goto out; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + /* Scrape out the old CoW reservations */ + error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); + if (error) + goto out_cancel; + + error = xfs_trans_commit(tp); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + +out_cancel: + xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out: + trace_xfs_reflink_cancel_cow_range_error(ip, error, _RET_IP_); + return error; +} + +/* + * Remap parts of a file's data fork after a successful CoW. + */ +int +xfs_reflink_end_cow( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t count) +{ + struct xfs_bmbt_irec irec; + struct xfs_bmbt_irec uirec; + struct xfs_trans *tp; + xfs_fileoff_t offset_fsb; + xfs_fileoff_t end_fsb; + xfs_filblks_t count_fsb; + xfs_fsblock_t firstfsb; + struct xfs_defer_ops dfops; + int error; + unsigned int resblks; + xfs_filblks_t ilen; + xfs_filblks_t rlen; + int nimaps; + + trace_xfs_reflink_end_cow(ip, offset, count); + + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); + end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); + count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); + + /* Start a rolling transaction to switch the mappings */ + resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); + error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, + resblks, 0, 0, &tp); + if (error) + goto out; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + /* Go find the old extent in the CoW fork. */ + while (offset_fsb < end_fsb) { + /* Read extent from the source file */ + nimaps = 1; + count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); + error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, + &nimaps, XFS_BMAPI_COWFORK); + if (error) + goto out_cancel; + ASSERT(nimaps == 1); + + ASSERT(irec.br_startblock != DELAYSTARTBLOCK); + trace_xfs_reflink_cow_remap(ip, &irec); + + /* + * We can have a hole in the CoW fork if part of a directio + * write is CoW but part of it isn't. + */ + rlen = ilen = irec.br_blockcount; + if (irec.br_startblock == HOLESTARTBLOCK) + goto next_extent; + + /* Unmap the old blocks in the data fork. */ + while (rlen) { + xfs_defer_init(&dfops, &firstfsb); + error = __xfs_bunmapi(tp, ip, irec.br_startoff, + &rlen, 0, 1, &firstfsb, &dfops); + if (error) + goto out_defer; + + /* + * Trim the extent to whatever got unmapped. + * Remember, bunmapi works backwards. + */ + uirec.br_startblock = irec.br_startblock + rlen; + uirec.br_startoff = irec.br_startoff + rlen; + uirec.br_blockcount = irec.br_blockcount - rlen; + irec.br_blockcount = rlen; + trace_xfs_reflink_cow_remap_piece(ip, &uirec); + + /* Free the CoW orphan record. */ + error = xfs_refcount_free_cow_extent(tp->t_mountp, + &dfops, uirec.br_startblock, + uirec.br_blockcount); + if (error) + goto out_defer; + + /* Map the new blocks into the data fork. */ + error = xfs_bmap_map_extent(tp->t_mountp, &dfops, + ip, &uirec); + if (error) + goto out_defer; + + /* Remove the mapping from the CoW fork. */ + error = xfs_bunmapi_cow(ip, &uirec); + if (error) + goto out_defer; + + error = xfs_defer_finish(&tp, &dfops, ip); + if (error) + goto out_defer; + } + +next_extent: + /* Roll on... */ + offset_fsb = irec.br_startoff + ilen; + } + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + goto out; + return 0; + +out_defer: + xfs_defer_cancel(&dfops); +out_cancel: + xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out: + trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); + return error; +} + +/* + * Free leftover CoW reservations that didn't get cleaned out. + */ +int +xfs_reflink_recover_cow( + struct xfs_mount *mp) +{ + xfs_agnumber_t agno; + int error = 0; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + error = xfs_refcount_recover_cow_leftovers(mp, agno); + if (error) + break; + } + + return error; +} + +/* + * Reflinking (Block) Ranges of Two Files Together + * + * First, ensure that the reflink flag is set on both inodes. The flag is an + * optimization to avoid unnecessary refcount btree lookups in the write path. + * + * Now we can iteratively remap the range of extents (and holes) in src to the + * corresponding ranges in dest. Let drange and srange denote the ranges of + * logical blocks in dest and src touched by the reflink operation. + * + * While the length of drange is greater than zero, + * - Read src's bmbt at the start of srange ("imap") + * - If imap doesn't exist, make imap appear to start at the end of srange + * with zero length. + * - If imap starts before srange, advance imap to start at srange. + * - If imap goes beyond srange, truncate imap to end at the end of srange. + * - Punch (imap start - srange start + imap len) blocks from dest at + * offset (drange start). + * - If imap points to a real range of pblks, + * > Increase the refcount of the imap's pblks + * > Map imap's pblks into dest at the offset + * (drange start + imap start - srange start) + * - Advance drange and srange by (imap start - srange start + imap len) + * + * Finally, if the reflink made dest longer, update both the in-core and + * on-disk file sizes. + * + * ASCII Art Demonstration: + * + * Let's say we want to reflink this source file: + * + * ----SSSSSSS-SSSSS----SSSSSS (src file) + * <--------------------> + * + * into this destination file: + * + * --DDDDDDDDDDDDDDDDDDD--DDD (dest file) + * <--------------------> + * '-' means a hole, and 'S' and 'D' are written blocks in the src and dest. + * Observe that the range has different logical offsets in either file. + * + * Consider that the first extent in the source file doesn't line up with our + * reflink range. Unmapping and remapping are separate operations, so we can + * unmap more blocks from the destination file than we remap. + * + * ----SSSSSSS-SSSSS----SSSSSS + * <-------> + * --DDDDD---------DDDDD--DDD + * <-------> + * + * Now remap the source extent into the destination file: + * + * ----SSSSSSS-SSSSS----SSSSSS + * <-------> + * --DDDDD--SSSSSSSDDDDD--DDD + * <-------> + * + * Do likewise with the second hole and extent in our range. Holes in the + * unmap range don't affect our operation. + * + * ----SSSSSSS-SSSSS----SSSSSS + * <----> + * --DDDDD--SSSSSSS-SSSSS-DDD + * <----> + * + * Finally, unmap and remap part of the third extent. This will increase the + * size of the destination file. + * + * ----SSSSSSS-SSSSS----SSSSSS + * <-----> + * --DDDDD--SSSSSSS-SSSSS----SSS + * <-----> + * + * Once we update the destination file's i_size, we're done. + */ + +/* + * Ensure the reflink bit is set in both inodes. + */ +STATIC int +xfs_reflink_set_inode_flag( + struct xfs_inode *src, + struct xfs_inode *dest) +{ + struct xfs_mount *mp = src->i_mount; + int error; + struct xfs_trans *tp; + + if (xfs_is_reflink_inode(src) && xfs_is_reflink_inode(dest)) + return 0; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); + if (error) + goto out_error; + + /* Lock both files against IO */ + if (src->i_ino == dest->i_ino) + xfs_ilock(src, XFS_ILOCK_EXCL); + else + xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL); + + if (!xfs_is_reflink_inode(src)) { + trace_xfs_reflink_set_inode_flag(src); + xfs_trans_ijoin(tp, src, XFS_ILOCK_EXCL); + src->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK; + xfs_trans_log_inode(tp, src, XFS_ILOG_CORE); + xfs_ifork_init_cow(src); + } else + xfs_iunlock(src, XFS_ILOCK_EXCL); + + if (src->i_ino == dest->i_ino) + goto commit_flags; + + if (!xfs_is_reflink_inode(dest)) { + trace_xfs_reflink_set_inode_flag(dest); + xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL); + dest->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK; + xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); + xfs_ifork_init_cow(dest); + } else + xfs_iunlock(dest, XFS_ILOCK_EXCL); + +commit_flags: + error = xfs_trans_commit(tp); + if (error) + goto out_error; + return error; + +out_error: + trace_xfs_reflink_set_inode_flag_error(dest, error, _RET_IP_); + return error; +} + +/* + * Update destination inode size & cowextsize hint, if necessary. + */ +STATIC int +xfs_reflink_update_dest( + struct xfs_inode *dest, + xfs_off_t newlen, + xfs_extlen_t cowextsize) +{ + struct xfs_mount *mp = dest->i_mount; + struct xfs_trans *tp; + int error; + + if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) + return 0; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); + if (error) + goto out_error; + + xfs_ilock(dest, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL); + + if (newlen > i_size_read(VFS_I(dest))) { + trace_xfs_reflink_update_inode_size(dest, newlen); + i_size_write(VFS_I(dest), newlen); + dest->i_d.di_size = newlen; + } + + if (cowextsize) { + dest->i_d.di_cowextsize = cowextsize; + dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + } + + xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); + + error = xfs_trans_commit(tp); + if (error) + goto out_error; + return error; + +out_error: + trace_xfs_reflink_update_inode_size_error(dest, error, _RET_IP_); + return error; +} + +/* + * Do we have enough reserve in this AG to handle a reflink? The refcount + * btree already reserved all the space it needs, but the rmap btree can grow + * infinitely, so we won't allow more reflinks when the AG is down to the + * btree reserves. + */ +static int +xfs_reflink_ag_has_free_space( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + int error = 0; + + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return 0; + + pag = xfs_perag_get(mp, agno); + if (xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) || + xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA)) + error = -ENOSPC; + xfs_perag_put(pag); + return error; +} + +/* + * Unmap a range of blocks from a file, then map other blocks into the hole. + * The range to unmap is (destoff : destoff + srcioff + irec->br_blockcount). + * The extent irec is mapped into dest at irec->br_startoff. + */ +STATIC int +xfs_reflink_remap_extent( + struct xfs_inode *ip, + struct xfs_bmbt_irec *irec, + xfs_fileoff_t destoff, + xfs_off_t new_isize) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + xfs_fsblock_t firstfsb; + unsigned int resblks; + struct xfs_defer_ops dfops; + struct xfs_bmbt_irec uirec; + bool real_extent; + xfs_filblks_t rlen; + xfs_filblks_t unmap_len; + xfs_off_t newlen; + int error; + + unmap_len = irec->br_startoff + irec->br_blockcount - destoff; + trace_xfs_reflink_punch_range(ip, destoff, unmap_len); + + /* Only remap normal extents. */ + real_extent = (irec->br_startblock != HOLESTARTBLOCK && + irec->br_startblock != DELAYSTARTBLOCK && + !ISUNWRITTEN(irec)); + + /* No reflinking if we're low on space */ + if (real_extent) { + error = xfs_reflink_ag_has_free_space(mp, + XFS_FSB_TO_AGNO(mp, irec->br_startblock)); + if (error) + goto out; + } + + /* Start a rolling transaction to switch the mappings */ + resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); + if (error) + goto out; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + /* If we're not just clearing space, then do we have enough quota? */ + if (real_extent) { + error = xfs_trans_reserve_quota_nblks(tp, ip, + irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS); + if (error) + goto out_cancel; + } + + trace_xfs_reflink_remap(ip, irec->br_startoff, + irec->br_blockcount, irec->br_startblock); + + /* Unmap the old blocks in the data fork. */ + rlen = unmap_len; + while (rlen) { + xfs_defer_init(&dfops, &firstfsb); + error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1, + &firstfsb, &dfops); + if (error) + goto out_defer; + + /* + * Trim the extent to whatever got unmapped. + * Remember, bunmapi works backwards. + */ + uirec.br_startblock = irec->br_startblock + rlen; + uirec.br_startoff = irec->br_startoff + rlen; + uirec.br_blockcount = unmap_len - rlen; + unmap_len = rlen; + + /* If this isn't a real mapping, we're done. */ + if (!real_extent || uirec.br_blockcount == 0) + goto next_extent; + + trace_xfs_reflink_remap(ip, uirec.br_startoff, + uirec.br_blockcount, uirec.br_startblock); + + /* Update the refcount tree */ + error = xfs_refcount_increase_extent(mp, &dfops, &uirec); + if (error) + goto out_defer; + + /* Map the new blocks into the data fork. */ + error = xfs_bmap_map_extent(mp, &dfops, ip, &uirec); + if (error) + goto out_defer; + + /* Update quota accounting. */ + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, + uirec.br_blockcount); + + /* Update dest isize if needed. */ + newlen = XFS_FSB_TO_B(mp, + uirec.br_startoff + uirec.br_blockcount); + newlen = min_t(xfs_off_t, newlen, new_isize); + if (newlen > i_size_read(VFS_I(ip))) { + trace_xfs_reflink_update_inode_size(ip, newlen); + i_size_write(VFS_I(ip), newlen); + ip->i_d.di_size = newlen; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + } + +next_extent: + /* Process all the deferred stuff. */ + error = xfs_defer_finish(&tp, &dfops, ip); + if (error) + goto out_defer; + } + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + goto out; + return 0; + +out_defer: + xfs_defer_cancel(&dfops); +out_cancel: + xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out: + trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_); + return error; +} + +/* + * Iteratively remap one file's extents (and holes) to another's. + */ +STATIC int +xfs_reflink_remap_blocks( + struct xfs_inode *src, + xfs_fileoff_t srcoff, + struct xfs_inode *dest, + xfs_fileoff_t destoff, + xfs_filblks_t len, + xfs_off_t new_isize) +{ + struct xfs_bmbt_irec imap; + int nimaps; + int error = 0; + xfs_filblks_t range_len; + + /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ + while (len) { + trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, + dest, destoff); + /* Read extent from the source file */ + nimaps = 1; + xfs_ilock(src, XFS_ILOCK_EXCL); + error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); + xfs_iunlock(src, XFS_ILOCK_EXCL); + if (error) + goto err; + ASSERT(nimaps == 1); + + trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE, + &imap); + + /* Translate imap into the destination file. */ + range_len = imap.br_startoff + imap.br_blockcount - srcoff; + imap.br_startoff += destoff - srcoff; + + /* Clear dest from destoff to the end of imap and map it in. */ + error = xfs_reflink_remap_extent(dest, &imap, destoff, + new_isize); + if (error) + goto err; + + if (fatal_signal_pending(current)) { + error = -EINTR; + goto err; + } + + /* Advance drange/srange */ + srcoff += range_len; + destoff += range_len; + len -= range_len; + } + + return 0; + +err: + trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); + return error; +} + +/* + * Read a page's worth of file data into the page cache. Return the page + * locked. + */ +static struct page * +xfs_get_page( + struct inode *inode, + xfs_off_t offset) +{ + struct address_space *mapping; + struct page *page; + pgoff_t n; + + n = offset >> PAGE_SHIFT; + mapping = inode->i_mapping; + page = read_mapping_page(mapping, n, NULL); + if (IS_ERR(page)) + return page; + if (!PageUptodate(page)) { + put_page(page); + return ERR_PTR(-EIO); + } + lock_page(page); + return page; +} + +/* + * Compare extents of two files to see if they are the same. + */ +static int +xfs_compare_extents( + struct inode *src, + xfs_off_t srcoff, + struct inode *dest, + xfs_off_t destoff, + xfs_off_t len, + bool *is_same) +{ + xfs_off_t src_poff; + xfs_off_t dest_poff; + void *src_addr; + void *dest_addr; + struct page *src_page; + struct page *dest_page; + xfs_off_t cmp_len; + bool same; + int error; + + error = -EINVAL; + same = true; + while (len) { + src_poff = srcoff & (PAGE_SIZE - 1); + dest_poff = destoff & (PAGE_SIZE - 1); + cmp_len = min(PAGE_SIZE - src_poff, + PAGE_SIZE - dest_poff); + cmp_len = min(cmp_len, len); + ASSERT(cmp_len > 0); + + trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len, + XFS_I(dest), destoff); + + src_page = xfs_get_page(src, srcoff); + if (IS_ERR(src_page)) { + error = PTR_ERR(src_page); + goto out_error; + } + dest_page = xfs_get_page(dest, destoff); + if (IS_ERR(dest_page)) { + error = PTR_ERR(dest_page); + unlock_page(src_page); + put_page(src_page); + goto out_error; + } + src_addr = kmap_atomic(src_page); + dest_addr = kmap_atomic(dest_page); + + flush_dcache_page(src_page); + flush_dcache_page(dest_page); + + if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) + same = false; + + kunmap_atomic(dest_addr); + kunmap_atomic(src_addr); + unlock_page(dest_page); + unlock_page(src_page); + put_page(dest_page); + put_page(src_page); + + if (!same) + break; + + srcoff += cmp_len; + destoff += cmp_len; + len -= cmp_len; + } + + *is_same = same; + return 0; + +out_error: + trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_); + return error; +} + +/* + * Link a range of blocks from one file to another. + */ +int +xfs_reflink_remap_range( + struct xfs_inode *src, + xfs_off_t srcoff, + struct xfs_inode *dest, + xfs_off_t destoff, + xfs_off_t len, + unsigned int flags) +{ + struct xfs_mount *mp = src->i_mount; + xfs_fileoff_t sfsbno, dfsbno; + xfs_filblks_t fsblen; + int error; + xfs_extlen_t cowextsize; + bool is_same; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return -EOPNOTSUPP; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + /* Don't reflink realtime inodes */ + if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) + return -EINVAL; + + if (flags & ~XFS_REFLINK_ALL) + return -EINVAL; + + trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); + + /* Lock both files against IO */ + if (src->i_ino == dest->i_ino) { + xfs_ilock(src, XFS_IOLOCK_EXCL); + xfs_ilock(src, XFS_MMAPLOCK_EXCL); + } else { + xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); + } + + /* + * Check that the extents are the same. + */ + if (flags & XFS_REFLINK_DEDUPE) { + is_same = false; + error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest), + destoff, len, &is_same); + if (error) + goto out_error; + if (!is_same) { + error = -EBADE; + goto out_error; + } + } + + error = xfs_reflink_set_inode_flag(src, dest); + if (error) + goto out_error; + + /* + * Invalidate the page cache so that we can clear any CoW mappings + * in the destination file. + */ + truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff, + PAGE_ALIGN(destoff + len) - 1); + + dfsbno = XFS_B_TO_FSBT(mp, destoff); + sfsbno = XFS_B_TO_FSBT(mp, srcoff); + fsblen = XFS_B_TO_FSB(mp, len); + error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, + destoff + len); + if (error) + goto out_error; + + /* + * Carry the cowextsize hint from src to dest if we're sharing the + * entire source file to the entire destination file, the source file + * has a cowextsize hint, and the destination file does not. + */ + cowextsize = 0; + if (srcoff == 0 && len == i_size_read(VFS_I(src)) && + (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && + destoff == 0 && len >= i_size_read(VFS_I(dest)) && + !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) + cowextsize = src->i_d.di_cowextsize; + + error = xfs_reflink_update_dest(dest, destoff + len, cowextsize); + if (error) + goto out_error; + +out_error: + xfs_iunlock(src, XFS_MMAPLOCK_EXCL); + xfs_iunlock(src, XFS_IOLOCK_EXCL); + if (src->i_ino != dest->i_ino) { + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + xfs_iunlock(dest, XFS_IOLOCK_EXCL); + } + if (error) + trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); + return error; +} + +/* + * The user wants to preemptively CoW all shared blocks in this file, + * which enables us to turn off the reflink flag. Iterate all + * extents which are not prealloc/delalloc to see which ranges are + * mentioned in the refcount tree, then read those blocks into the + * pagecache, dirty them, fsync them back out, and then we can update + * the inode flag. What happens if we run out of memory? :) + */ +STATIC int +xfs_reflink_dirty_extents( + struct xfs_inode *ip, + xfs_fileoff_t fbno, + xfs_filblks_t end, + xfs_off_t isize) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t aglen; + xfs_agblock_t rbno; + xfs_extlen_t rlen; + xfs_off_t fpos; + xfs_off_t flen; + struct xfs_bmbt_irec map[2]; + int nmaps; + int error = 0; + + while (end - fbno > 0) { + nmaps = 1; + /* + * Look for extents in the file. Skip holes, delalloc, or + * unwritten extents; they can't be reflinked. + */ + error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0); + if (error) + goto out; + if (nmaps == 0) + break; + if (map[0].br_startblock == HOLESTARTBLOCK || + map[0].br_startblock == DELAYSTARTBLOCK || + ISUNWRITTEN(&map[0])) + goto next; + + map[1] = map[0]; + while (map[1].br_blockcount) { + agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); + aglen = map[1].br_blockcount; + + error = xfs_reflink_find_shared(mp, agno, agbno, aglen, + &rbno, &rlen, true); + if (error) + goto out; + if (rbno == NULLAGBLOCK) + break; + + /* Dirty the pages */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); + fpos = XFS_FSB_TO_B(mp, map[1].br_startoff + + (rbno - agbno)); + flen = XFS_FSB_TO_B(mp, rlen); + if (fpos + flen > isize) + flen = isize - fpos; + error = iomap_file_dirty(VFS_I(ip), fpos, flen, + &xfs_iomap_ops); + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (error) + goto out; + + map[1].br_blockcount -= (rbno - agbno + rlen); + map[1].br_startoff += (rbno - agbno + rlen); + map[1].br_startblock += (rbno - agbno + rlen); + } + +next: + fbno = map[0].br_startoff + map[0].br_blockcount; + } +out: + return error; +} + +/* Clear the inode reflink flag if there are no shared extents. */ +int +xfs_reflink_clear_inode_flag( + struct xfs_inode *ip, + struct xfs_trans **tpp) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t fbno; + xfs_filblks_t end; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t aglen; + xfs_agblock_t rbno; + xfs_extlen_t rlen; + struct xfs_bmbt_irec map; + int nmaps; + int error = 0; + + ASSERT(xfs_is_reflink_inode(ip)); + + fbno = 0; + end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip))); + while (end - fbno > 0) { + nmaps = 1; + /* + * Look for extents in the file. Skip holes, delalloc, or + * unwritten extents; they can't be reflinked. + */ + error = xfs_bmapi_read(ip, fbno, end - fbno, &map, &nmaps, 0); + if (error) + return error; + if (nmaps == 0) + break; + if (map.br_startblock == HOLESTARTBLOCK || + map.br_startblock == DELAYSTARTBLOCK || + ISUNWRITTEN(&map)) + goto next; + + agno = XFS_FSB_TO_AGNO(mp, map.br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock); + aglen = map.br_blockcount; + + error = xfs_reflink_find_shared(mp, agno, agbno, aglen, + &rbno, &rlen, false); + if (error) + return error; + /* Is there still a shared block here? */ + if (rbno != NULLAGBLOCK) + return 0; +next: + fbno = map.br_startoff + map.br_blockcount; + } + + /* + * We didn't find any shared blocks so turn off the reflink flag. + * First, get rid of any leftover CoW mappings. + */ + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); + if (error) + return error; + + /* Clear the inode flag. */ + trace_xfs_reflink_unset_inode_flag(ip); + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + xfs_inode_clear_cowblocks_tag(ip); + xfs_trans_ijoin(*tpp, ip, 0); + xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); + + return error; +} + +/* + * Clear the inode reflink flag if there are no shared extents and the size + * hasn't changed. + */ +STATIC int +xfs_reflink_try_clear_inode_flag( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error = 0; + + /* Start a rolling transaction to remove the mappings */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + error = xfs_reflink_clear_inode_flag(ip, &tp); + if (error) + goto cancel; + + error = xfs_trans_commit(tp); + if (error) + goto out; + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; +cancel: + xfs_trans_cancel(tp); +out: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + +/* + * Pre-COW all shared blocks within a given byte range of a file and turn off + * the reflink flag if we unshare all of the file's blocks. + */ +int +xfs_reflink_unshare( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t len) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t fbno; + xfs_filblks_t end; + xfs_off_t isize; + int error; + + if (!xfs_is_reflink_inode(ip)) + return 0; + + trace_xfs_reflink_unshare(ip, offset, len); + + inode_dio_wait(VFS_I(ip)); + + /* Try to CoW the selected ranges */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + fbno = XFS_B_TO_FSBT(mp, offset); + isize = i_size_read(VFS_I(ip)); + end = XFS_B_TO_FSB(mp, offset + len); + error = xfs_reflink_dirty_extents(ip, fbno, end, isize); + if (error) + goto out_unlock; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + /* Wait for the IO to finish */ + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (error) + goto out; + + /* Turn off the reflink flag if possible. */ + error = xfs_reflink_try_clear_inode_flag(ip); + if (error) + goto out; + + return 0; + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out: + trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); + return error; +} + +/* + * Does this inode have any real CoW reservations? + */ +bool +xfs_reflink_has_real_cow_blocks( + struct xfs_inode *ip) +{ + struct xfs_bmbt_irec irec; + struct xfs_ifork *ifp; + struct xfs_bmbt_rec_host *gotp; + xfs_extnum_t idx; + + if (!xfs_is_reflink_inode(ip)) + return false; + + /* Go find the old extent in the CoW fork. */ + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + gotp = xfs_iext_bno_to_ext(ifp, 0, &idx); + while (gotp) { + xfs_bmbt_get_all(gotp, &irec); + + if (!isnullstartblock(irec.br_startblock)) + return true; + + /* Roll on... */ + idx++; + if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + break; + gotp = xfs_iext_get_ext(ifp, idx); + } + + return false; +} diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h new file mode 100644 index 000000000000..5dc3c8ac12aa --- /dev/null +++ b/fs/xfs/xfs_reflink.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_REFLINK_H +#define __XFS_REFLINK_H 1 + +extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, + xfs_extlen_t *flen, bool find_maximal); +extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, + struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); + +extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, + xfs_off_t offset, xfs_off_t count); +extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, + xfs_off_t offset, xfs_off_t count); +extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, + struct xfs_bmbt_irec *imap, bool *need_alloc); +extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap); + +extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, + struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, + xfs_fileoff_t end_fsb); +extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t count); +extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t count); +extern int xfs_reflink_recover_cow(struct xfs_mount *mp); +#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */ +#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE) +extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, + struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, + unsigned int flags); +extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, + struct xfs_trans **tpp); +extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t len); + +extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip); + +#endif /* __XFS_REFLINK_H */ diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 0432a459871c..73c827831551 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -441,8 +441,11 @@ xfs_rui_recover( XFS_FSB_TO_DADDR(mp, rmap->me_startblock)); switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { case XFS_RMAP_EXTENT_MAP: + case XFS_RMAP_EXTENT_MAP_SHARED: case XFS_RMAP_EXTENT_UNMAP: + case XFS_RMAP_EXTENT_UNMAP_SHARED: case XFS_RMAP_EXTENT_CONVERT: + case XFS_RMAP_EXTENT_CONVERT_SHARED: case XFS_RMAP_EXTENT_ALLOC: case XFS_RMAP_EXTENT_FREE: op_ok = true; @@ -481,12 +484,21 @@ xfs_rui_recover( case XFS_RMAP_EXTENT_MAP: type = XFS_RMAP_MAP; break; + case XFS_RMAP_EXTENT_MAP_SHARED: + type = XFS_RMAP_MAP_SHARED; + break; case XFS_RMAP_EXTENT_UNMAP: type = XFS_RMAP_UNMAP; break; + case XFS_RMAP_EXTENT_UNMAP_SHARED: + type = XFS_RMAP_UNMAP_SHARED; + break; case XFS_RMAP_EXTENT_CONVERT: type = XFS_RMAP_CONVERT; break; + case XFS_RMAP_EXTENT_CONVERT_SHARED: + type = XFS_RMAP_CONVERT_SHARED; + break; case XFS_RMAP_EXTENT_ALLOC: type = XFS_RMAP_ALLOC; break; diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 6e812fe0fd43..12d48cd8f8a4 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -62,6 +62,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { "ibt2", XFSSTAT_END_IBT_V2 }, { "fibt2", XFSSTAT_END_FIBT_V2 }, { "rmapbt", XFSSTAT_END_RMAP_V2 }, + { "refcntbt", XFSSTAT_END_REFCOUNT }, /* we print both series of quota information together */ { "qm", XFSSTAT_END_QM }, }; diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 657865f51e78..79ad2e69fc33 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -213,7 +213,23 @@ struct xfsstats { __uint32_t xs_rmap_2_alloc; __uint32_t xs_rmap_2_free; __uint32_t xs_rmap_2_moves; -#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_RMAP_V2+6) +#define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + 15) + __uint32_t xs_refcbt_2_lookup; + __uint32_t xs_refcbt_2_compare; + __uint32_t xs_refcbt_2_insrec; + __uint32_t xs_refcbt_2_delrec; + __uint32_t xs_refcbt_2_newroot; + __uint32_t xs_refcbt_2_killroot; + __uint32_t xs_refcbt_2_increment; + __uint32_t xs_refcbt_2_decrement; + __uint32_t xs_refcbt_2_lshift; + __uint32_t xs_refcbt_2_rshift; + __uint32_t xs_refcbt_2_split; + __uint32_t xs_refcbt_2_join; + __uint32_t xs_refcbt_2_alloc; + __uint32_t xs_refcbt_2_free; + __uint32_t xs_refcbt_2_moves; +#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_REFCOUNT + 6) __uint32_t xs_qm_dqreclaims; __uint32_t xs_qm_dqreclaim_misses; __uint32_t xs_qm_dquot_dups; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2d092f9577ca..ade4691e3f74 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -47,6 +47,9 @@ #include "xfs_sysfs.h" #include "xfs_ondisk.h" #include "xfs_rmap_item.h" +#include "xfs_refcount_item.h" +#include "xfs_bmap_item.h" +#include "xfs_reflink.h" #include <linux/namei.h> #include <linux/init.h> @@ -936,6 +939,7 @@ xfs_fs_destroy_inode( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); + int error; trace_xfs_destroy_inode(ip); @@ -943,6 +947,14 @@ xfs_fs_destroy_inode( XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); + if (xfs_is_reflink_inode(ip)) { + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) + xfs_warn(ip->i_mount, +"Error %d while evicting CoW blocks for inode %llu.", + error, ip->i_ino); + } + xfs_inactive(ip); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -1006,6 +1018,16 @@ xfs_fs_drop_inode( { struct xfs_inode *ip = XFS_I(inode); + /* + * If this unlinked inode is in the middle of recovery, don't + * drop the inode just yet; log recovery will take care of + * that. See the comment for this inode flag. + */ + if (ip->i_flags & XFS_IRECOVERY) { + ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED); + return 0; + } + return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE); } @@ -1296,10 +1318,31 @@ xfs_fs_remount( xfs_restore_resvblks(mp); xfs_log_work_queue(mp); xfs_queue_eofblocks(mp); + + /* Recover any CoW blocks that never got remapped. */ + error = xfs_reflink_recover_cow(mp); + if (error) { + xfs_err(mp, + "Error %d recovering leftover CoW allocations.", error); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + + /* Create the per-AG metadata reservation pool .*/ + error = xfs_fs_reserve_ag_blocks(mp); + if (error && error != -ENOSPC) + return error; } /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* Free the per-AG metadata reservation pool. */ + error = xfs_fs_unreserve_ag_blocks(mp); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + /* * Before we sync the metadata, we need to free up the reserve * block pool so that the used block count in the superblock on @@ -1490,6 +1533,7 @@ xfs_fs_fill_super( atomic_set(&mp->m_active_trans, 0); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); + INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); mp->m_kobj.kobject.kset = xfs_kset; mp->m_super = sb; @@ -1572,6 +1616,9 @@ xfs_fs_fill_super( "DAX unsupported by block device. Turning off DAX."); mp->m_flags &= ~XFS_MOUNT_DAX; } + if (xfs_sb_version_hasreflink(&mp->m_sb)) + xfs_alert(mp, + "DAX and reflink have not been tested together!"); } if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { @@ -1585,6 +1632,10 @@ xfs_fs_fill_super( "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); } + if (xfs_sb_version_hasreflink(&mp->m_sb)) + xfs_alert(mp, + "EXPERIMENTAL reflink feature enabled. Use at your own risk!"); + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; @@ -1788,8 +1839,38 @@ xfs_init_zones(void) if (!xfs_rui_zone) goto out_destroy_rud_zone; + xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item), + "xfs_cud_item"); + if (!xfs_cud_zone) + goto out_destroy_rui_zone; + + xfs_cui_zone = kmem_zone_init( + xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), + "xfs_cui_item"); + if (!xfs_cui_zone) + goto out_destroy_cud_zone; + + xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item), + "xfs_bud_item"); + if (!xfs_bud_zone) + goto out_destroy_cui_zone; + + xfs_bui_zone = kmem_zone_init( + xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), + "xfs_bui_item"); + if (!xfs_bui_zone) + goto out_destroy_bud_zone; + return 0; + out_destroy_bud_zone: + kmem_zone_destroy(xfs_bud_zone); + out_destroy_cui_zone: + kmem_zone_destroy(xfs_cui_zone); + out_destroy_cud_zone: + kmem_zone_destroy(xfs_cud_zone); + out_destroy_rui_zone: + kmem_zone_destroy(xfs_rui_zone); out_destroy_rud_zone: kmem_zone_destroy(xfs_rud_zone); out_destroy_icreate_zone: @@ -1832,6 +1913,10 @@ xfs_destroy_zones(void) * destroy caches. */ rcu_barrier(); + kmem_zone_destroy(xfs_bui_zone); + kmem_zone_destroy(xfs_bud_zone); + kmem_zone_destroy(xfs_cui_zone); + kmem_zone_destroy(xfs_cud_zone); kmem_zone_destroy(xfs_rui_zone); kmem_zone_destroy(xfs_rud_zone); kmem_zone_destroy(xfs_icreate_zone); @@ -1885,6 +1970,8 @@ init_xfs_fs(void) xfs_extent_free_init_defer_op(); xfs_rmap_update_init_defer_op(); + xfs_refcount_update_init_defer_op(); + xfs_bmap_update_init_defer_op(); xfs_dir_startup(); diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index aed74d3f8da9..afe1f66aaa69 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -184,6 +184,15 @@ static struct ctl_table xfs_table[] = { .extra1 = &xfs_params.eofb_timer.min, .extra2 = &xfs_params.eofb_timer.max, }, + { + .procname = "speculative_cow_prealloc_lifetime", + .data = &xfs_params.cowb_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.cowb_timer.min, + .extra2 = &xfs_params.cowb_timer.max, + }, /* please keep this the last entry */ #ifdef CONFIG_PROC_FS { diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index ffef45375754..984a3499cfe3 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h @@ -48,6 +48,7 @@ typedef struct xfs_param { xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ xfs_sysctl_val_t eofb_timer; /* Interval between eofb scan wakeups */ + xfs_sysctl_val_t cowb_timer; /* Interval between cowb scan wakeups */ } xfs_param_t; /* diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 16093c7dacde..ad188d3a83f3 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -39,6 +39,7 @@ struct xfs_buf_log_format; struct xfs_inode_log_format; struct xfs_bmbt_irec; struct xfs_btree_cur; +struct xfs_refcount_irec; DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), @@ -135,6 +136,8 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks); +DEFINE_PERAG_REF_EVENT(xfs_perag_set_cowblocks); +DEFINE_PERAG_REF_EVENT(xfs_perag_clear_cowblocks); DECLARE_EVENT_CLASS(xfs_ag_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), @@ -268,10 +271,10 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, __field(unsigned long, caller_ip) ), TP_fast_assign( - struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? - ip->i_afp : &ip->i_df; + struct xfs_ifork *ifp; struct xfs_bmbt_irec r; + ifp = xfs_iext_state_to_fork(ip, state); xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; @@ -686,6 +689,9 @@ DEFINE_INODE_EVENT(xfs_dquot_dqdetach); DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); +DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag); +DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); +DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); DEFINE_INODE_EVENT(xfs_filemap_fault); DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); @@ -2581,10 +2587,20 @@ DEFINE_RMAPBT_EVENT(xfs_rmap_delete); DEFINE_AG_ERROR_EVENT(xfs_rmap_insert_error); DEFINE_AG_ERROR_EVENT(xfs_rmap_delete_error); DEFINE_AG_ERROR_EVENT(xfs_rmap_update_error); + +DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_candidate); +DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_query); +DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_candidate); +DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range); DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result); DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result); DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result); +/* deferred bmbt updates */ +#define DEFINE_BMAP_DEFERRED_EVENT DEFINE_RMAP_DEFERRED_EVENT +DEFINE_BMAP_DEFERRED_EVENT(xfs_bmap_defer); +DEFINE_BMAP_DEFERRED_EVENT(xfs_bmap_deferred); + /* per-AG reservation */ DECLARE_EVENT_CLASS(xfs_ag_resv_class, TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type resv, @@ -2639,6 +2655,728 @@ DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed); DEFINE_AG_ERROR_EVENT(xfs_ag_resv_free_error); DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error); +/* refcount tracepoint classes */ + +/* reuse the discard trace class for agbno/aglen-based traces */ +#define DEFINE_AG_EXTENT_EVENT(name) DEFINE_DISCARD_EVENT(name) + +/* ag btree lookup tracepoint class */ +#define XFS_AG_BTREE_CMP_FORMAT_STR \ + { XFS_LOOKUP_EQ, "eq" }, \ + { XFS_LOOKUP_LE, "le" }, \ + { XFS_LOOKUP_GE, "ge" } +DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_lookup_t dir), + TP_ARGS(mp, agno, agbno, dir), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_lookup_t, dir) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->dir = dir; + ), + TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __print_symbolic(__entry->dir, XFS_AG_BTREE_CMP_FORMAT_STR), + __entry->dir) +) + +#define DEFINE_AG_BTREE_LOOKUP_EVENT(name) \ +DEFINE_EVENT(xfs_ag_btree_lookup_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + xfs_agblock_t agbno, xfs_lookup_t dir), \ + TP_ARGS(mp, agno, agbno, dir)) + +/* single-rcext tracepoint class */ +DECLARE_EVENT_CLASS(xfs_refcount_extent_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + struct xfs_refcount_irec *irec), + TP_ARGS(mp, agno, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, startblock) + __field(xfs_extlen_t, blockcount) + __field(xfs_nlink_t, refcount) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->startblock = irec->rc_startblock; + __entry->blockcount = irec->rc_blockcount; + __entry->refcount = irec->rc_refcount; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->startblock, + __entry->blockcount, + __entry->refcount) +) + +#define DEFINE_REFCOUNT_EXTENT_EVENT(name) \ +DEFINE_EVENT(xfs_refcount_extent_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + struct xfs_refcount_irec *irec), \ + TP_ARGS(mp, agno, irec)) + +/* single-rcext and an agbno tracepoint class */ +DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + struct xfs_refcount_irec *irec, xfs_agblock_t agbno), + TP_ARGS(mp, agno, irec, agbno), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, startblock) + __field(xfs_extlen_t, blockcount) + __field(xfs_nlink_t, refcount) + __field(xfs_agblock_t, agbno) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->startblock = irec->rc_startblock; + __entry->blockcount = irec->rc_blockcount; + __entry->refcount = irec->rc_refcount; + __entry->agbno = agbno; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->startblock, + __entry->blockcount, + __entry->refcount, + __entry->agbno) +) + +#define DEFINE_REFCOUNT_EXTENT_AT_EVENT(name) \ +DEFINE_EVENT(xfs_refcount_extent_at_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + struct xfs_refcount_irec *irec, xfs_agblock_t agbno), \ + TP_ARGS(mp, agno, irec, agbno)) + +/* double-rcext tracepoint class */ +DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2), + TP_ARGS(mp, agno, i1, i2), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, i1_startblock) + __field(xfs_extlen_t, i1_blockcount) + __field(xfs_nlink_t, i1_refcount) + __field(xfs_agblock_t, i2_startblock) + __field(xfs_extlen_t, i2_blockcount) + __field(xfs_nlink_t, i2_refcount) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->i1_startblock = i1->rc_startblock; + __entry->i1_blockcount = i1->rc_blockcount; + __entry->i1_refcount = i1->rc_refcount; + __entry->i2_startblock = i2->rc_startblock; + __entry->i2_blockcount = i2->rc_blockcount; + __entry->i2_refcount = i2->rc_refcount; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " + "agbno %u len %u refcount %u\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->i1_startblock, + __entry->i1_blockcount, + __entry->i1_refcount, + __entry->i2_startblock, + __entry->i2_blockcount, + __entry->i2_refcount) +) + +#define DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(name) \ +DEFINE_EVENT(xfs_refcount_double_extent_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2), \ + TP_ARGS(mp, agno, i1, i2)) + +/* double-rcext and an agbno tracepoint class */ +DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, + xfs_agblock_t agbno), + TP_ARGS(mp, agno, i1, i2, agbno), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, i1_startblock) + __field(xfs_extlen_t, i1_blockcount) + __field(xfs_nlink_t, i1_refcount) + __field(xfs_agblock_t, i2_startblock) + __field(xfs_extlen_t, i2_blockcount) + __field(xfs_nlink_t, i2_refcount) + __field(xfs_agblock_t, agbno) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->i1_startblock = i1->rc_startblock; + __entry->i1_blockcount = i1->rc_blockcount; + __entry->i1_refcount = i1->rc_refcount; + __entry->i2_startblock = i2->rc_startblock; + __entry->i2_blockcount = i2->rc_blockcount; + __entry->i2_refcount = i2->rc_refcount; + __entry->agbno = agbno; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " + "agbno %u len %u refcount %u @ agbno %u\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->i1_startblock, + __entry->i1_blockcount, + __entry->i1_refcount, + __entry->i2_startblock, + __entry->i2_blockcount, + __entry->i2_refcount, + __entry->agbno) +) + +#define DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(name) \ +DEFINE_EVENT(xfs_refcount_double_extent_at_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, \ + xfs_agblock_t agbno), \ + TP_ARGS(mp, agno, i1, i2, agbno)) + +/* triple-rcext tracepoint class */ +DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, + struct xfs_refcount_irec *i3), + TP_ARGS(mp, agno, i1, i2, i3), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, i1_startblock) + __field(xfs_extlen_t, i1_blockcount) + __field(xfs_nlink_t, i1_refcount) + __field(xfs_agblock_t, i2_startblock) + __field(xfs_extlen_t, i2_blockcount) + __field(xfs_nlink_t, i2_refcount) + __field(xfs_agblock_t, i3_startblock) + __field(xfs_extlen_t, i3_blockcount) + __field(xfs_nlink_t, i3_refcount) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->i1_startblock = i1->rc_startblock; + __entry->i1_blockcount = i1->rc_blockcount; + __entry->i1_refcount = i1->rc_refcount; + __entry->i2_startblock = i2->rc_startblock; + __entry->i2_blockcount = i2->rc_blockcount; + __entry->i2_refcount = i2->rc_refcount; + __entry->i3_startblock = i3->rc_startblock; + __entry->i3_blockcount = i3->rc_blockcount; + __entry->i3_refcount = i3->rc_refcount; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " + "agbno %u len %u refcount %u -- " + "agbno %u len %u refcount %u\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->i1_startblock, + __entry->i1_blockcount, + __entry->i1_refcount, + __entry->i2_startblock, + __entry->i2_blockcount, + __entry->i2_refcount, + __entry->i3_startblock, + __entry->i3_blockcount, + __entry->i3_refcount) +); + +#define DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(name) \ +DEFINE_EVENT(xfs_refcount_triple_extent_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, \ + struct xfs_refcount_irec *i3), \ + TP_ARGS(mp, agno, i1, i2, i3)) + +/* refcount btree tracepoints */ +DEFINE_BUSY_EVENT(xfs_refcountbt_alloc_block); +DEFINE_BUSY_EVENT(xfs_refcountbt_free_block); +DEFINE_AG_BTREE_LOOKUP_EVENT(xfs_refcount_lookup); +DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_get); +DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_update); +DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_insert); +DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_delete); +DEFINE_AG_ERROR_EVENT(xfs_refcount_insert_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_delete_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_update_error); + +/* refcount adjustment tracepoints */ +DEFINE_AG_EXTENT_EVENT(xfs_refcount_increase); +DEFINE_AG_EXTENT_EVENT(xfs_refcount_decrease); +DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_increase); +DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_decrease); +DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(xfs_refcount_merge_center_extents); +DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_modify_extent); +DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_recover_extent); +DEFINE_REFCOUNT_EXTENT_AT_EVENT(xfs_refcount_split_extent); +DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_left_extent); +DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_right_extent); +DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_left_extent); +DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_right_extent); +DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_cow_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_center_extents_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_modify_extent_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_split_extent_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_left_extent_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_right_extent_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_find_left_extent_error); +DEFINE_AG_ERROR_EVENT(xfs_refcount_find_right_extent_error); + +/* reflink helpers */ +DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared); +DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared_result); +DEFINE_AG_ERROR_EVENT(xfs_refcount_find_shared_error); +#define DEFINE_REFCOUNT_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT +DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_defer); +DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_deferred); + +TRACE_EVENT(xfs_refcount_finish_one_leftover, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + int type, xfs_agblock_t agbno, xfs_extlen_t len, + xfs_agblock_t new_agbno, xfs_extlen_t new_len), + TP_ARGS(mp, agno, type, agbno, len, new_agbno, new_len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, type) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(xfs_agblock_t, new_agbno) + __field(xfs_extlen_t, new_len) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->type = type; + __entry->agbno = agbno; + __entry->len = len; + __entry->new_agbno = new_agbno; + __entry->new_len = new_len; + ), + TP_printk("dev %d:%d type %d agno %u agbno %u len %u new_agbno %u new_len %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->agno, + __entry->agbno, + __entry->len, + __entry->new_agbno, + __entry->new_len) +); + +/* simple inode-based error/%ip tracepoint class */ +DECLARE_EVENT_CLASS(xfs_inode_error_class, + TP_PROTO(struct xfs_inode *ip, int error, unsigned long caller_ip), + TP_ARGS(ip, error, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, error) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->error = error; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino %llx error %d caller %ps", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->error, + (char *)__entry->caller_ip) +); + +#define DEFINE_INODE_ERROR_EVENT(name) \ +DEFINE_EVENT(xfs_inode_error_class, name, \ + TP_PROTO(struct xfs_inode *ip, int error, \ + unsigned long caller_ip), \ + TP_ARGS(ip, error, caller_ip)) + +/* reflink allocator */ +TRACE_EVENT(xfs_bmap_remap_alloc, + TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno, + xfs_extlen_t len), + TP_ARGS(ip, fsbno, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsblock_t, fsbno) + __field(xfs_extlen_t, len) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->fsbno = fsbno; + __entry->len = len; + ), + TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->fsbno, + __entry->len) +); +DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error); + +/* reflink tracepoint classes */ + +/* two-file io tracepoint class */ +DECLARE_EVENT_CLASS(xfs_double_io_class, + TP_PROTO(struct xfs_inode *src, xfs_off_t soffset, xfs_off_t len, + struct xfs_inode *dest, xfs_off_t doffset), + TP_ARGS(src, soffset, len, dest, doffset), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, src_ino) + __field(loff_t, src_isize) + __field(loff_t, src_disize) + __field(loff_t, src_offset) + __field(size_t, len) + __field(xfs_ino_t, dest_ino) + __field(loff_t, dest_isize) + __field(loff_t, dest_disize) + __field(loff_t, dest_offset) + ), + TP_fast_assign( + __entry->dev = VFS_I(src)->i_sb->s_dev; + __entry->src_ino = src->i_ino; + __entry->src_isize = VFS_I(src)->i_size; + __entry->src_disize = src->i_d.di_size; + __entry->src_offset = soffset; + __entry->len = len; + __entry->dest_ino = dest->i_ino; + __entry->dest_isize = VFS_I(dest)->i_size; + __entry->dest_disize = dest->i_d.di_size; + __entry->dest_offset = doffset; + ), + TP_printk("dev %d:%d count %zd " + "ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx -> " + "ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->len, + __entry->src_ino, + __entry->src_isize, + __entry->src_disize, + __entry->src_offset, + __entry->dest_ino, + __entry->dest_isize, + __entry->dest_disize, + __entry->dest_offset) +) + +#define DEFINE_DOUBLE_IO_EVENT(name) \ +DEFINE_EVENT(xfs_double_io_class, name, \ + TP_PROTO(struct xfs_inode *src, xfs_off_t soffset, xfs_off_t len, \ + struct xfs_inode *dest, xfs_off_t doffset), \ + TP_ARGS(src, soffset, len, dest, doffset)) + +/* two-file vfs io tracepoint class */ +DECLARE_EVENT_CLASS(xfs_double_vfs_io_class, + TP_PROTO(struct inode *src, u64 soffset, u64 len, + struct inode *dest, u64 doffset), + TP_ARGS(src, soffset, len, dest, doffset), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long, src_ino) + __field(loff_t, src_isize) + __field(loff_t, src_offset) + __field(size_t, len) + __field(unsigned long, dest_ino) + __field(loff_t, dest_isize) + __field(loff_t, dest_offset) + ), + TP_fast_assign( + __entry->dev = src->i_sb->s_dev; + __entry->src_ino = src->i_ino; + __entry->src_isize = i_size_read(src); + __entry->src_offset = soffset; + __entry->len = len; + __entry->dest_ino = dest->i_ino; + __entry->dest_isize = i_size_read(dest); + __entry->dest_offset = doffset; + ), + TP_printk("dev %d:%d count %zd " + "ino 0x%lx isize 0x%llx offset 0x%llx -> " + "ino 0x%lx isize 0x%llx offset 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->len, + __entry->src_ino, + __entry->src_isize, + __entry->src_offset, + __entry->dest_ino, + __entry->dest_isize, + __entry->dest_offset) +) + +#define DEFINE_DOUBLE_VFS_IO_EVENT(name) \ +DEFINE_EVENT(xfs_double_vfs_io_class, name, \ + TP_PROTO(struct inode *src, u64 soffset, u64 len, \ + struct inode *dest, u64 doffset), \ + TP_ARGS(src, soffset, len, dest, doffset)) + +/* CoW write tracepoint */ +DECLARE_EVENT_CLASS(xfs_copy_on_write_class, + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk, + xfs_extlen_t len, xfs_fsblock_t new_pblk), + TP_ARGS(ip, lblk, pblk, len, new_pblk), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fileoff_t, lblk) + __field(xfs_fsblock_t, pblk) + __field(xfs_extlen_t, len) + __field(xfs_fsblock_t, new_pblk) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->lblk = lblk; + __entry->pblk = pblk; + __entry->len = len; + __entry->new_pblk = new_pblk; + ), + TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx pblk 0x%llx " + "len 0x%x new_pblk %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->lblk, + __entry->pblk, + __entry->len, + __entry->new_pblk) +) + +#define DEFINE_COW_EVENT(name) \ +DEFINE_EVENT(xfs_copy_on_write_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk, \ + xfs_extlen_t len, xfs_fsblock_t new_pblk), \ + TP_ARGS(ip, lblk, pblk, len, new_pblk)) + +/* inode/irec events */ +DECLARE_EVENT_CLASS(xfs_inode_irec_class, + TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), + TP_ARGS(ip, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fileoff_t, lblk) + __field(xfs_extlen_t, len) + __field(xfs_fsblock_t, pblk) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->lblk = irec->br_startoff; + __entry->len = irec->br_blockcount; + __entry->pblk = irec->br_startblock; + ), + TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->lblk, + __entry->len, + __entry->pblk) +); +#define DEFINE_INODE_IREC_EVENT(name) \ +DEFINE_EVENT(xfs_inode_irec_class, name, \ + TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \ + TP_ARGS(ip, irec)) + +/* refcount/reflink tracepoint definitions */ + +/* reflink tracepoints */ +DEFINE_INODE_EVENT(xfs_reflink_set_inode_flag); +DEFINE_INODE_EVENT(xfs_reflink_unset_inode_flag); +DEFINE_ITRUNC_EVENT(xfs_reflink_update_inode_size); +DEFINE_IOMAP_EVENT(xfs_reflink_remap_imap); +TRACE_EVENT(xfs_reflink_remap_blocks_loop, + TP_PROTO(struct xfs_inode *src, xfs_fileoff_t soffset, + xfs_filblks_t len, struct xfs_inode *dest, + xfs_fileoff_t doffset), + TP_ARGS(src, soffset, len, dest, doffset), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, src_ino) + __field(xfs_fileoff_t, src_lblk) + __field(xfs_filblks_t, len) + __field(xfs_ino_t, dest_ino) + __field(xfs_fileoff_t, dest_lblk) + ), + TP_fast_assign( + __entry->dev = VFS_I(src)->i_sb->s_dev; + __entry->src_ino = src->i_ino; + __entry->src_lblk = soffset; + __entry->len = len; + __entry->dest_ino = dest->i_ino; + __entry->dest_lblk = doffset; + ), + TP_printk("dev %d:%d len 0x%llx " + "ino 0x%llx offset 0x%llx blocks -> " + "ino 0x%llx offset 0x%llx blocks", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->len, + __entry->src_ino, + __entry->src_lblk, + __entry->dest_ino, + __entry->dest_lblk) +); +TRACE_EVENT(xfs_reflink_punch_range, + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, + xfs_extlen_t len), + TP_ARGS(ip, lblk, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fileoff_t, lblk) + __field(xfs_extlen_t, len) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->lblk = lblk; + __entry->len = len; + ), + TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->lblk, + __entry->len) +); +TRACE_EVENT(xfs_reflink_remap, + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, + xfs_extlen_t len, xfs_fsblock_t new_pblk), + TP_ARGS(ip, lblk, len, new_pblk), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fileoff_t, lblk) + __field(xfs_extlen_t, len) + __field(xfs_fsblock_t, new_pblk) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->lblk = lblk; + __entry->len = len; + __entry->new_pblk = new_pblk; + ), + TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x new_pblk %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->lblk, + __entry->len, + __entry->new_pblk) +); +DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_reflink_main_loop_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_read_iomap_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error); + +/* dedupe tracepoints */ +DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_compare_extents_error); + +/* ioctl tracepoints */ +DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_reflink); +DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_clone_range); +DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_file_extent_same); +TRACE_EVENT(xfs_ioctl_clone, + TP_PROTO(struct inode *src, struct inode *dest), + TP_ARGS(src, dest), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long, src_ino) + __field(loff_t, src_isize) + __field(unsigned long, dest_ino) + __field(loff_t, dest_isize) + ), + TP_fast_assign( + __entry->dev = src->i_sb->s_dev; + __entry->src_ino = src->i_ino; + __entry->src_isize = i_size_read(src); + __entry->dest_ino = dest->i_ino; + __entry->dest_isize = i_size_read(dest); + ), + TP_printk("dev %d:%d " + "ino 0x%lx isize 0x%llx -> " + "ino 0x%lx isize 0x%llx\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->src_ino, + __entry->src_isize, + __entry->dest_ino, + __entry->dest_isize) +); + +/* unshare tracepoints */ +DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare); +DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cow_eof_block); +DEFINE_PAGE_EVENT(xfs_reflink_unshare_page); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_cow_eof_block_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_dirty_page_error); + +/* copy on write */ +DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); +DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); +DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); +DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); + +DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); +DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); + +DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); +DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); +DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); + +DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); +DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); +DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); +DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); + +DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); + +DEFINE_COW_EVENT(xfs_reflink_fork_buf); +DEFINE_COW_EVENT(xfs_reflink_finish_fork_buf); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_fork_buf_error); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_finish_fork_buf_error); + +DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow); +DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow); +DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error); + +/* rmap swapext tracepoints */ +DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap); +DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece); +DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index e2bf86aad33d..61b7fbdd3ebd 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -36,6 +36,11 @@ struct xfs_busy_extent; struct xfs_rud_log_item; struct xfs_rui_log_item; struct xfs_btree_cur; +struct xfs_cui_log_item; +struct xfs_cud_log_item; +struct xfs_defer_ops; +struct xfs_bui_log_item; +struct xfs_bud_log_item; typedef struct xfs_log_item { struct list_head li_ail; /* AIL pointers */ @@ -248,4 +253,28 @@ int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp, xfs_fsblock_t startblock, xfs_filblks_t blockcount, xfs_exntst_t state, struct xfs_btree_cur **pcur); +/* refcount updates */ +enum xfs_refcount_intent_type; + +void xfs_refcount_update_init_defer_op(void); +struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp, + struct xfs_cui_log_item *cuip); +int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp, + struct xfs_cud_log_item *cudp, struct xfs_defer_ops *dfops, + enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, + xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, + xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); + +/* mapping updates */ +enum xfs_bmap_intent_type; + +void xfs_bmap_update_init_defer_op(void); +struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp, + struct xfs_bui_log_item *buip); +int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp, + struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops, + enum xfs_bmap_intent_type type, struct xfs_inode *ip, + int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, + xfs_filblks_t blockcount, xfs_exntst_t state); + #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c new file mode 100644 index 000000000000..6408e7d7c08c --- /dev/null +++ b/fs/xfs/xfs_trans_bmap.c @@ -0,0 +1,249 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_bmap_item.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_inode.h" + +/* + * This routine is called to allocate a "bmap update done" + * log item. + */ +struct xfs_bud_log_item * +xfs_trans_get_bud( + struct xfs_trans *tp, + struct xfs_bui_log_item *buip) +{ + struct xfs_bud_log_item *budp; + + budp = xfs_bud_init(tp->t_mountp, buip); + xfs_trans_add_item(tp, &budp->bud_item); + return budp; +} + +/* + * Finish an bmap update and log it to the BUD. Note that the + * transaction is marked dirty regardless of whether the bmap update + * succeeds or fails to support the BUI/BUD lifecycle rules. + */ +int +xfs_trans_log_finish_bmap_update( + struct xfs_trans *tp, + struct xfs_bud_log_item *budp, + struct xfs_defer_ops *dop, + enum xfs_bmap_intent_type type, + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + xfs_exntst_t state) +{ + int error; + + error = xfs_bmap_finish_one(tp, dop, ip, type, whichfork, startoff, + startblock, blockcount, state); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the BUI and frees the BUD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + budp->bud_item.li_desc->lid_flags |= XFS_LID_DIRTY; + + return error; +} + +/* Sort bmap intents by inode. */ +static int +xfs_bmap_update_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_bmap_intent *ba; + struct xfs_bmap_intent *bb; + + ba = container_of(a, struct xfs_bmap_intent, bi_list); + bb = container_of(b, struct xfs_bmap_intent, bi_list); + return ba->bi_owner->i_ino - bb->bi_owner->i_ino; +} + +/* Get an BUI. */ +STATIC void * +xfs_bmap_update_create_intent( + struct xfs_trans *tp, + unsigned int count) +{ + struct xfs_bui_log_item *buip; + + ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); + ASSERT(tp != NULL); + + buip = xfs_bui_init(tp->t_mountp); + ASSERT(buip != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &buip->bui_item); + return buip; +} + +/* Set the map extent flags for this mapping. */ +static void +xfs_trans_set_bmap_flags( + struct xfs_map_extent *bmap, + enum xfs_bmap_intent_type type, + int whichfork, + xfs_exntst_t state) +{ + bmap->me_flags = 0; + switch (type) { + case XFS_BMAP_MAP: + case XFS_BMAP_UNMAP: + bmap->me_flags = type; + break; + default: + ASSERT(0); + } + if (state == XFS_EXT_UNWRITTEN) + bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; + if (whichfork == XFS_ATTR_FORK) + bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; +} + +/* Log bmap updates in the intent item. */ +STATIC void +xfs_bmap_update_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) +{ + struct xfs_bui_log_item *buip = intent; + struct xfs_bmap_intent *bmap; + uint next_extent; + struct xfs_map_extent *map; + + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + buip->bui_item.li_desc->lid_flags |= XFS_LID_DIRTY; + + /* + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. + */ + next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; + ASSERT(next_extent < buip->bui_format.bui_nextents); + map = &buip->bui_format.bui_extents[next_extent]; + map->me_owner = bmap->bi_owner->i_ino; + map->me_startblock = bmap->bi_bmap.br_startblock; + map->me_startoff = bmap->bi_bmap.br_startoff; + map->me_len = bmap->bi_bmap.br_blockcount; + xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, + bmap->bi_bmap.br_state); +} + +/* Get an BUD so we can process all the deferred rmap updates. */ +STATIC void * +xfs_bmap_update_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_bud(tp, intent); +} + +/* Process a deferred rmap update. */ +STATIC int +xfs_bmap_update_finish_item( + struct xfs_trans *tp, + struct xfs_defer_ops *dop, + struct list_head *item, + void *done_item, + void **state) +{ + struct xfs_bmap_intent *bmap; + int error; + + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + error = xfs_trans_log_finish_bmap_update(tp, done_item, dop, + bmap->bi_type, + bmap->bi_owner, bmap->bi_whichfork, + bmap->bi_bmap.br_startoff, + bmap->bi_bmap.br_startblock, + bmap->bi_bmap.br_blockcount, + bmap->bi_bmap.br_state); + kmem_free(bmap); + return error; +} + +/* Abort all pending BUIs. */ +STATIC void +xfs_bmap_update_abort_intent( + void *intent) +{ + xfs_bui_release(intent); +} + +/* Cancel a deferred rmap update. */ +STATIC void +xfs_bmap_update_cancel_item( + struct list_head *item) +{ + struct xfs_bmap_intent *bmap; + + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + kmem_free(bmap); +} + +static const struct xfs_defer_op_type xfs_bmap_update_defer_type = { + .type = XFS_DEFER_OPS_TYPE_BMAP, + .max_items = XFS_BUI_MAX_FAST_EXTENTS, + .diff_items = xfs_bmap_update_diff_items, + .create_intent = xfs_bmap_update_create_intent, + .abort_intent = xfs_bmap_update_abort_intent, + .log_item = xfs_bmap_update_log_item, + .create_done = xfs_bmap_update_create_done, + .finish_item = xfs_bmap_update_finish_item, + .cancel_item = xfs_bmap_update_cancel_item, +}; + +/* Register the deferred op type. */ +void +xfs_bmap_update_init_defer_op(void) +{ + xfs_defer_init_op_type(&xfs_bmap_update_defer_type); +} diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c new file mode 100644 index 000000000000..94c1877af834 --- /dev/null +++ b/fs/xfs/xfs_trans_refcount.c @@ -0,0 +1,264 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_refcount_item.h" +#include "xfs_alloc.h" +#include "xfs_refcount.h" + +/* + * This routine is called to allocate a "refcount update done" + * log item. + */ +struct xfs_cud_log_item * +xfs_trans_get_cud( + struct xfs_trans *tp, + struct xfs_cui_log_item *cuip) +{ + struct xfs_cud_log_item *cudp; + + cudp = xfs_cud_init(tp->t_mountp, cuip); + xfs_trans_add_item(tp, &cudp->cud_item); + return cudp; +} + +/* + * Finish an refcount update and log it to the CUD. Note that the + * transaction is marked dirty regardless of whether the refcount + * update succeeds or fails to support the CUI/CUD lifecycle rules. + */ +int +xfs_trans_log_finish_refcount_update( + struct xfs_trans *tp, + struct xfs_cud_log_item *cudp, + struct xfs_defer_ops *dop, + enum xfs_refcount_intent_type type, + xfs_fsblock_t startblock, + xfs_extlen_t blockcount, + xfs_fsblock_t *new_fsb, + xfs_extlen_t *new_len, + struct xfs_btree_cur **pcur) +{ + int error; + + error = xfs_refcount_finish_one(tp, dop, type, startblock, + blockcount, new_fsb, new_len, pcur); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the CUI and frees the CUD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + cudp->cud_item.li_desc->lid_flags |= XFS_LID_DIRTY; + + return error; +} + +/* Sort refcount intents by AG. */ +static int +xfs_refcount_update_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_mount *mp = priv; + struct xfs_refcount_intent *ra; + struct xfs_refcount_intent *rb; + + ra = container_of(a, struct xfs_refcount_intent, ri_list); + rb = container_of(b, struct xfs_refcount_intent, ri_list); + return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - + XFS_FSB_TO_AGNO(mp, rb->ri_startblock); +} + +/* Get an CUI. */ +STATIC void * +xfs_refcount_update_create_intent( + struct xfs_trans *tp, + unsigned int count) +{ + struct xfs_cui_log_item *cuip; + + ASSERT(tp != NULL); + ASSERT(count > 0); + + cuip = xfs_cui_init(tp->t_mountp, count); + ASSERT(cuip != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &cuip->cui_item); + return cuip; +} + +/* Set the phys extent flags for this reverse mapping. */ +static void +xfs_trans_set_refcount_flags( + struct xfs_phys_extent *refc, + enum xfs_refcount_intent_type type) +{ + refc->pe_flags = 0; + switch (type) { + case XFS_REFCOUNT_INCREASE: + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: + refc->pe_flags |= type; + break; + default: + ASSERT(0); + } +} + +/* Log refcount updates in the intent item. */ +STATIC void +xfs_refcount_update_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) +{ + struct xfs_cui_log_item *cuip = intent; + struct xfs_refcount_intent *refc; + uint next_extent; + struct xfs_phys_extent *ext; + + refc = container_of(item, struct xfs_refcount_intent, ri_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + cuip->cui_item.li_desc->lid_flags |= XFS_LID_DIRTY; + + /* + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. + */ + next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; + ASSERT(next_extent < cuip->cui_format.cui_nextents); + ext = &cuip->cui_format.cui_extents[next_extent]; + ext->pe_startblock = refc->ri_startblock; + ext->pe_len = refc->ri_blockcount; + xfs_trans_set_refcount_flags(ext, refc->ri_type); +} + +/* Get an CUD so we can process all the deferred refcount updates. */ +STATIC void * +xfs_refcount_update_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_cud(tp, intent); +} + +/* Process a deferred refcount update. */ +STATIC int +xfs_refcount_update_finish_item( + struct xfs_trans *tp, + struct xfs_defer_ops *dop, + struct list_head *item, + void *done_item, + void **state) +{ + struct xfs_refcount_intent *refc; + xfs_fsblock_t new_fsb; + xfs_extlen_t new_aglen; + int error; + + refc = container_of(item, struct xfs_refcount_intent, ri_list); + error = xfs_trans_log_finish_refcount_update(tp, done_item, dop, + refc->ri_type, + refc->ri_startblock, + refc->ri_blockcount, + &new_fsb, &new_aglen, + (struct xfs_btree_cur **)state); + /* Did we run out of reservation? Requeue what we didn't finish. */ + if (!error && new_aglen > 0) { + ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || + refc->ri_type == XFS_REFCOUNT_DECREASE); + refc->ri_startblock = new_fsb; + refc->ri_blockcount = new_aglen; + return -EAGAIN; + } + kmem_free(refc); + return error; +} + +/* Clean up after processing deferred refcounts. */ +STATIC void +xfs_refcount_update_finish_cleanup( + struct xfs_trans *tp, + void *state, + int error) +{ + struct xfs_btree_cur *rcur = state; + + xfs_refcount_finish_one_cleanup(tp, rcur, error); +} + +/* Abort all pending CUIs. */ +STATIC void +xfs_refcount_update_abort_intent( + void *intent) +{ + xfs_cui_release(intent); +} + +/* Cancel a deferred refcount update. */ +STATIC void +xfs_refcount_update_cancel_item( + struct list_head *item) +{ + struct xfs_refcount_intent *refc; + + refc = container_of(item, struct xfs_refcount_intent, ri_list); + kmem_free(refc); +} + +static const struct xfs_defer_op_type xfs_refcount_update_defer_type = { + .type = XFS_DEFER_OPS_TYPE_REFCOUNT, + .max_items = XFS_CUI_MAX_FAST_EXTENTS, + .diff_items = xfs_refcount_update_diff_items, + .create_intent = xfs_refcount_update_create_intent, + .abort_intent = xfs_refcount_update_abort_intent, + .log_item = xfs_refcount_update_log_item, + .create_done = xfs_refcount_update_create_done, + .finish_item = xfs_refcount_update_finish_item, + .finish_cleanup = xfs_refcount_update_finish_cleanup, + .cancel_item = xfs_refcount_update_cancel_item, +}; + +/* Register the deferred op type. */ +void +xfs_refcount_update_init_defer_op(void) +{ + xfs_defer_init_op_type(&xfs_refcount_update_defer_type); +} diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c index 5a50ef881568..9ead064b5e90 100644 --- a/fs/xfs/xfs_trans_rmap.c +++ b/fs/xfs/xfs_trans_rmap.c @@ -48,12 +48,21 @@ xfs_trans_set_rmap_flags( case XFS_RMAP_MAP: rmap->me_flags |= XFS_RMAP_EXTENT_MAP; break; + case XFS_RMAP_MAP_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; + break; case XFS_RMAP_UNMAP: rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; break; + case XFS_RMAP_UNMAP_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; + break; case XFS_RMAP_CONVERT: rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; break; + case XFS_RMAP_CONVERT_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; + break; case XFS_RMAP_ALLOC: rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; break; diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 6df9b0749671..cc6bb319e464 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -69,10 +69,6 @@ struct exception_table_entry unsigned long insn, fixup; }; -/* Returns 0 if exception not found and fixup otherwise. */ -extern unsigned long search_exception_table(unsigned long); - - /* * architectures with an MMU should override these two */ diff --git a/include/dt-bindings/thermal/tegra124-soctherm.h b/include/dt-bindings/thermal/tegra124-soctherm.h index 729ab9fc325e..2a99f1d52bb5 100644 --- a/include/dt-bindings/thermal/tegra124-soctherm.h +++ b/include/dt-bindings/thermal/tegra124-soctherm.h @@ -11,4 +11,9 @@ #define TEGRA124_SOCTHERM_SENSOR_PLLX 3 #define TEGRA124_SOCTHERM_SENSOR_NUM 4 +#define TEGRA_SOCTHERM_THROT_LEVEL_LOW 0 +#define TEGRA_SOCTHERM_THROT_LEVEL_MED 1 +#define TEGRA_SOCTHERM_THROT_LEVEL_HIGH 2 +#define TEGRA_SOCTHERM_THROT_LEVEL_NONE -1 + #endif diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index e82e3ee2c54a..1035879b322c 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -67,6 +67,17 @@ #define CNTL_LDMAFIFOTIME (1 << 15) #define CNTL_WATERMARK (1 << 16) +/* ST Microelectronics variant bits */ +#define CNTL_ST_1XBPP_444 0x0 +#define CNTL_ST_1XBPP_5551 (1 << 17) +#define CNTL_ST_1XBPP_565 (1 << 18) +#define CNTL_ST_CDWID_12 0x0 +#define CNTL_ST_CDWID_16 (1 << 19) +#define CNTL_ST_CDWID_18 (1 << 20) +#define CNTL_ST_CDWID_24 ((1 << 19)|(1 << 20)) +#define CNTL_ST_CEAEN (1 << 21) +#define CNTL_ST_LCDBPP24_PACKED (6 << 1) + enum { /* individual formats */ CLCD_CAP_RGB444 = (1 << 0), @@ -93,6 +104,8 @@ enum { CLCD_CAP_ALL = CLCD_CAP_BGR | CLCD_CAP_RGB, }; +struct backlight_device; + struct clcd_panel { struct fb_videomode mode; signed short width; /* width in mm */ @@ -105,6 +118,13 @@ struct clcd_panel { fixedtimings:1, grayscale:1; unsigned int connector; + struct backlight_device *backlight; + /* + * If the B/R lines are switched between the CLCD + * and the panel we need to know this and not try to + * compensate with the BGR bit in the control register. + */ + bool bgr_connection; }; struct clcd_regs { @@ -170,11 +190,38 @@ struct clcd_board { struct amba_device; struct clk; +/** + * struct clcd_vendor_data - holds hardware (IP-block) vendor-specific + * variant information + * + * @clock_timregs: the CLCD needs to be clocked when accessing the + * timer registers, or the hardware will hang. + * @packed_24_bit_pixels: this variant supports 24bit packed pixel data, + * so that RGB accesses 3 bytes at a time, not just on even 32bit + * boundaries, packing the pixel data in memory. ST Microelectronics + * have this. + * @st_bitmux_control: ST Microelectronics have implemented output + * bit line multiplexing into the CLCD control register. This indicates + * that we need to use this. + * @init_board: custom board init function for this variant + * @init_panel: custom panel init function for this variant + */ +struct clcd_vendor_data { + bool clock_timregs; + bool packed_24_bit_pixels; + bool st_bitmux_control; + int (*init_board)(struct amba_device *adev, + struct clcd_board *board); + int (*init_panel)(struct clcd_fb *fb, + struct device_node *panel); +}; + /* this data structure describes each frame buffer device we find */ struct clcd_fb { struct fb_info fb; struct amba_device *dev; struct clk *clk; + struct clcd_vendor_data *vendor; struct clcd_panel *panel; struct clcd_board *board; void *board_data; @@ -231,16 +278,22 @@ static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) if (var->grayscale) val |= CNTL_LCDBW; - if (fb->panel->caps && fb->board->caps && - var->bits_per_pixel >= 16) { + if (fb->panel->caps && fb->board->caps && var->bits_per_pixel >= 16) { /* * if board and panel supply capabilities, we can support - * changing BGR/RGB depending on supplied parameters + * changing BGR/RGB depending on supplied parameters. Here + * we switch to what the framebuffer is providing if need + * be, so if the framebuffer is BGR but the display connection + * is RGB (first case) we switch it around. Vice versa mutatis + * mutandis if the framebuffer is RGB but the display connection + * is BGR, we flip it around. */ if (var->red.offset == 0) val &= ~CNTL_BGR; else val |= CNTL_BGR; + if (fb->panel->bgr_connection) + val ^= CNTL_BGR; } switch (var->bits_per_pixel) { @@ -270,6 +323,10 @@ static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) else val |= CNTL_LCDBPP16_444; break; + case 24: + /* Modified variant supporting 24 bit packed pixels */ + val |= CNTL_ST_LCDBPP24_PACKED; + break; case 32: val |= CNTL_LCDBPP24; break; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index b03c0625fa6e..5ab958cdc50b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -157,12 +157,13 @@ struct fid { * @fh_to_dentry is given a &struct super_block (@sb) and a file handle * fragment (@fh, @fh_len). It should return a &struct dentry which refers * to the same file that the file handle fragment refers to. If it cannot, - * it should return a %NULL pointer if the file was found but no acceptable - * &dentries were available, or an %ERR_PTR error code indicating why it - * couldn't be found (e.g. %ENOENT or %ENOMEM). Any suitable dentry can be - * returned including, if necessary, a new dentry created with d_alloc_root. - * The caller can then find any other extant dentries by following the - * d_alias links. + * it should return a %NULL pointer if the file cannot be found, or an + * %ERR_PTR error code of %ENOMEM if a memory allocation failure occurred. + * Any other error code is treated like %NULL, and will cause an %ESTALE error + * for callers of exportfs_decode_fh(). + * Any suitable dentry can be returned including, if necessary, a new dentry + * created with d_alloc_root. The caller can then find any other extant + * dentries by following the d_alias links. * * fh_to_parent: * Same as @fh_to_dentry, except that it returns a pointer to the parent diff --git a/include/linux/falloc.h b/include/linux/falloc.h index 996111000a8c..7494dc67c66f 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -25,6 +25,7 @@ struct space_resv { FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_COLLAPSE_RANGE | \ FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE) + FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) #endif /* _FALLOC_H_ */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 77c141797152..58276144ba81 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -92,12 +92,21 @@ __mlx5_mask(typ, fld)) ___t; \ }) -#define MLX5_SET64(typ, p, fld, v) do { \ +#define __MLX5_SET64(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) != 64); \ - BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ *((__be64 *)(p) + __mlx5_64_off(typ, fld)) = cpu_to_be64(v); \ } while (0) +#define MLX5_SET64(typ, p, fld, v) do { \ + BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ + __MLX5_SET64(typ, p, fld, v); \ +} while (0) + +#define MLX5_ARRAY_SET64(typ, p, fld, idx, v) do { \ + BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ + __MLX5_SET64(typ, p, fld[idx], v); \ +} while (0) + #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld))) #define MLX5_GET64_PR(typ, p, fld) ({ \ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c6564ada9beb..9094faf0699d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -67,6 +67,7 @@ struct nfs4_stateid_struct { NFS4_DELEGATION_STATEID_TYPE, NFS4_LAYOUT_STATEID_TYPE, NFS4_PNFS_DS_STATEID_TYPE, + NFS4_REVOKED_STATEID_TYPE, } type; }; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 14a762d2734d..b34097c67848 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -103,6 +103,9 @@ struct nfs_client { #define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ #define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */ +#if IS_ENABLED(CONFIG_NFS_V4_1) + wait_queue_head_t cl_lock_waitq; +#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4 */ /* Our own IP address, as a null-terminated string. diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7cc0deee5bde..beb1e10f446e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -125,6 +125,11 @@ struct nfs_fattr { | NFS_ATTR_FATTR_V4_SECURITY_LABEL) /* + * Maximal number of supported layout drivers. + */ +#define NFS_MAX_LAYOUT_TYPES 8 + +/* * Info on the file system */ struct nfs_fsinfo { @@ -139,7 +144,8 @@ struct nfs_fsinfo { __u64 maxfilesize; struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ - __u32 layouttype; /* supported pnfs layout driver */ + __u32 nlayouttypes; /* number of layouttypes */ + __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ }; diff --git a/include/linux/pwm.h b/include/linux/pwm.h index f1bbae014889..2c6c5114c089 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -641,6 +641,7 @@ static inline void pwm_remove_table(struct pwm_lookup *table, size_t num) #ifdef CONFIG_PWM_SYSFS void pwmchip_sysfs_export(struct pwm_chip *chip); void pwmchip_sysfs_unexport(struct pwm_chip *chip); +void pwmchip_sysfs_unexport_children(struct pwm_chip *chip); #else static inline void pwmchip_sysfs_export(struct pwm_chip *chip) { @@ -649,6 +650,10 @@ static inline void pwmchip_sysfs_export(struct pwm_chip *chip) static inline void pwmchip_sysfs_unexport(struct pwm_chip *chip) { } + +static inline void pwmchip_sysfs_unexport_children(struct pwm_chip *chip) +{ +} #endif /* CONFIG_PWM_SYSFS */ #endif /* __LINUX_PWM_H */ diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 4ccf184e971f..b1bc62ba20a2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -131,6 +131,7 @@ struct rpc_authops { struct rpc_auth * (*create)(struct rpc_auth_create_args *, struct rpc_clnt *); void (*destroy)(struct rpc_auth *); + int (*hash_cred)(struct auth_cred *, unsigned int); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); int (*list_pseudoflavors)(rpc_authflavor_t *, int); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 5c02b0691587..85cc819676e8 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -125,6 +125,13 @@ struct rpc_create_args { struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ }; +struct rpc_add_xprt_test { + int (*add_xprt_test)(struct rpc_clnt *, + struct rpc_xprt *, + void *calldata); + void *data; +}; + /* Values for "flags" field */ #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) @@ -198,6 +205,16 @@ int rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *, void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo); +int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *, + struct rpc_xprt_switch *, + struct rpc_xprt *, + void *); + const char *rpc_proc_name(const struct rpc_task *task); + +void rpc_clnt_xprt_switch_put(struct rpc_clnt *); +void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); +bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, + const struct sockaddr *sap); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 3b1ff38f0c37..cfda6adcf33c 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -41,10 +41,15 @@ #define _LINUX_SUNRPC_RPC_RDMA_H #include <linux/types.h> +#include <linux/bitops.h> #define RPCRDMA_VERSION 1 #define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) +enum { + RPCRDMA_V1_DEF_INLINE_SIZE = 1024, +}; + struct rpcrdma_segment { __be32 rs_handle; /* Registered memory handle */ __be32 rs_length; /* Length of the chunk in bytes */ @@ -129,4 +134,38 @@ enum rpcrdma_proc { #define rdma_done cpu_to_be32(RDMA_DONE) #define rdma_error cpu_to_be32(RDMA_ERROR) +/* + * Private extension to RPC-over-RDMA Version One. + * Message passed during RDMA-CM connection set-up. + * + * Add new fields at the end, and don't permute existing + * fields. + */ +struct rpcrdma_connect_private { + __be32 cp_magic; + u8 cp_version; + u8 cp_flags; + u8 cp_send_size; + u8 cp_recv_size; +} __packed; + +#define rpcrdma_cmp_magic __cpu_to_be32(0xf6ab0e18) + +enum { + RPCRDMA_CMP_VERSION = 1, + RPCRDMA_CMP_F_SND_W_INV_OK = BIT(0), +}; + +static inline u8 +rpcrdma_encode_buffer_size(unsigned int size) +{ + return (size >> 10) - 1; +} + +static inline unsigned int +rpcrdma_decode_buffer_size(u8 val) +{ + return ((unsigned int)val + 1) << 10; +} + #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 817af0b4385e..7ba040c797ec 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -239,8 +239,8 @@ struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, void *); void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); -void * rpc_malloc(struct rpc_task *, size_t); -void rpc_free(void *); +int rpc_malloc(struct rpc_task *); +void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d6917b896d3a..cc3ae16eac68 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -86,6 +86,7 @@ struct svc_rdma_op_ctxt { unsigned long flags; enum dma_data_direction direction; int count; + unsigned int mapped_sges; struct ib_sge sge[RPCSVC_MAXPAGES]; struct page *pages[RPCSVC_MAXPAGES]; }; @@ -136,6 +137,7 @@ struct svcxprt_rdma { int sc_ord; /* RDMA read limit */ int sc_max_sge; int sc_max_sge_rd; /* max sge for read target */ + bool sc_snd_w_inv; /* OK to use Send With Invalidate */ atomic_t sc_sq_count; /* Number of SQ WR on queue */ unsigned int sc_sq_depth; /* Depth of SQ */ @@ -193,6 +195,14 @@ struct svcxprt_rdma { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/* Track DMA maps for this transport and context */ +static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt) +{ + ctxt->mapped_sges++; + atomic_inc(&rdma->sc_dma_used); +} + /* svc_rdma_backchannel.c */ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 70c6b92e15a7..56c48c884a24 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -67,6 +67,18 @@ struct xdr_buf { len; /* Length of XDR encoded message */ }; +static inline void +xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) +{ + buf->head[0].iov_base = start; + buf->head[0].iov_len = len; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->flags = 0; + buf->len = 0; + buf->buflen = len; +} + /* * pre-xdr'ed macros. */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a16070dd03ee..a5da60b24d83 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -83,9 +83,11 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; - __u32 * rq_buffer; /* XDR encode buffer */ - size_t rq_callsize, - rq_rcvsize; + void *rq_xprtdata; /* Per-xprt private data */ + void *rq_buffer; /* Call XDR encode buffer */ + size_t rq_callsize; + void *rq_rbuffer; /* Reply XDR decode buffer */ + size_t rq_rcvsize; size_t rq_xmit_bytes_sent; /* total bytes sent */ size_t rq_reply_bytes_recvd; /* total reply bytes */ /* received */ @@ -127,8 +129,8 @@ struct rpc_xprt_ops { void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); - void * (*buf_alloc)(struct rpc_task *task, size_t size); - void (*buf_free)(void *buffer); + int (*buf_alloc)(struct rpc_task *task); + void (*buf_free)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index 5a9acffa41be..507418c1c69e 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -66,4 +66,6 @@ extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi); +extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap); #endif diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 39267dc3486a..221b7a2e5406 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -53,8 +53,8 @@ #define RPCRDMA_MAX_SLOT_TABLE (256U) #define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ -#define RPCRDMA_DEF_INLINE (1024) /* default inline thresh */ -#define RPCRDMA_MAX_INLINE (3068) /* max inline thresh */ +#define RPCRDMA_DEF_INLINE (4096) /* default inline thresh */ +#define RPCRDMA_MAX_INLINE (65536) /* max inline thresh */ /* Memory registration strategies, by number. * This is part of a kernel / user space API. Do not remove. */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index ee517bef0db0..511182a88e76 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -92,12 +92,24 @@ enum thermal_trend { THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */ }; +/* Thermal notification reason */ +enum thermal_notify_event { + THERMAL_EVENT_UNSPECIFIED, /* Unspecified event */ + THERMAL_EVENT_TEMP_SAMPLE, /* New Temperature sample */ + THERMAL_TRIP_VIOLATED, /* TRIP Point violation */ + THERMAL_TRIP_CHANGED, /* TRIP Point temperature changed */ + THERMAL_DEVICE_DOWN, /* Thermal device is down */ + THERMAL_DEVICE_UP, /* Thermal device is up after a down event */ + THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */ +}; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); int (*get_temp) (struct thermal_zone_device *, int *); + int (*set_trips) (struct thermal_zone_device *, int, int); int (*get_mode) (struct thermal_zone_device *, enum thermal_device_mode *); int (*set_mode) (struct thermal_zone_device *, @@ -168,6 +180,10 @@ struct thermal_attr { * @last_temperature: previous temperature read * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION * @passive: 1 if you've crossed a passive trip point, 0 otherwise. + * @prev_low_trip: the low current temperature if you've crossed a passive + trip point. + * @prev_high_trip: the above current temperature if you've crossed a + passive trip point. * @forced_passive: If > 0, temperature at which to switch on all ACPI * processor cooling devices. Currently only used by the * step-wise governor. @@ -182,6 +198,7 @@ struct thermal_attr { * @lock: lock to protect thermal_instances list * @node: node in thermal_tz_list (in thermal_core.c) * @poll_queue: delayed work for polling + * @notify_event: Last notification event */ struct thermal_zone_device { int id; @@ -199,6 +216,8 @@ struct thermal_zone_device { int last_temperature; int emul_temperature; int passive; + int prev_low_trip; + int prev_high_trip; unsigned int forced_passive; atomic_t need_update; struct thermal_zone_device_ops *ops; @@ -210,6 +229,7 @@ struct thermal_zone_device { struct mutex lock; struct list_head node; struct delayed_work poll_queue; + enum thermal_notify_event notify_event; }; /** @@ -333,6 +353,9 @@ struct thermal_genl_event { * * Optional: * @get_trend: a pointer to a function that reads the sensor temperature trend. + * @set_trips: a pointer to a function that sets a temperature window. When + * this window is left the driver must inform the thermal core via + * thermal_zone_device_update. * @set_emul_temp: a pointer to a function that sets sensor emulated * temperature. * @set_trip_temp: a pointer to a function that sets the trip temperature on @@ -340,7 +363,8 @@ struct thermal_genl_event { */ struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); - int (*get_trend)(void *, long *); + int (*get_trend)(void *, int, enum thermal_trend *); + int (*set_trips)(void *, int, int); int (*set_emul_temp)(void *, int); int (*set_trip_temp)(void *, int, int); }; @@ -425,7 +449,9 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, unsigned int); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); -void thermal_zone_device_update(struct thermal_zone_device *); +void thermal_zone_device_update(struct thermal_zone_device *, + enum thermal_notify_event); +void thermal_zone_set_trips(struct thermal_zone_device *); struct thermal_cooling_device *thermal_cooling_device_register(char *, void *, const struct thermal_cooling_device_ops *); @@ -435,6 +461,8 @@ thermal_of_cooling_device_register(struct device_node *np, char *, void *, void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); +int thermal_zone_get_slope(struct thermal_zone_device *tz); +int thermal_zone_get_offset(struct thermal_zone_device *tz); int get_tz_trend(struct thermal_zone_device *, int); struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, @@ -473,7 +501,10 @@ static inline int thermal_zone_unbind_cooling_device( struct thermal_zone_device *tz, int trip, struct thermal_cooling_device *cdev) { return -ENODEV; } -static inline void thermal_zone_device_update(struct thermal_zone_device *tz) +static inline void thermal_zone_device_update(struct thermal_zone_device *tz, + enum thermal_notify_event event) +{ } +static inline void thermal_zone_set_trips(struct thermal_zone_device *tz) { } static inline struct thermal_cooling_device * thermal_cooling_device_register(char *type, void *devdata, @@ -492,6 +523,12 @@ static inline struct thermal_zone_device *thermal_zone_get_zone_by_name( static inline int thermal_zone_get_temp( struct thermal_zone_device *tz, int *temp) { return -ENODEV; } +static inline int thermal_zone_get_slope( + struct thermal_zone_device *tz) +{ return -ENODEV; } +static inline int thermal_zone_get_offset( + struct thermal_zone_device *tz) +{ return -ENODEV; } static inline int get_tz_trend(struct thermal_zone_device *tz, int trip) { return -ENODEV; } static inline struct thermal_instance * diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 7047bc7f8106..35a4d8185b51 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -19,6 +19,7 @@ struct watchdog_ops; struct watchdog_device; struct watchdog_core_data; +struct watchdog_governor; /** struct watchdog_ops - The watchdog-devices operations * @@ -28,6 +29,7 @@ struct watchdog_core_data; * @ping: The routine that sends a keepalive ping to the watchdog device. * @status: The routine that shows the status of the watchdog device. * @set_timeout:The routine for setting the watchdog devices timeout value (in seconds). + * @set_pretimeout:The routine for setting the watchdog devices pretimeout. * @get_timeleft:The routine that gets the time left before a reset (in seconds). * @restart: The routine for restarting the machine. * @ioctl: The routines that handles extra ioctl calls. @@ -46,6 +48,7 @@ struct watchdog_ops { int (*ping)(struct watchdog_device *); unsigned int (*status)(struct watchdog_device *); int (*set_timeout)(struct watchdog_device *, unsigned int); + int (*set_pretimeout)(struct watchdog_device *, unsigned int); unsigned int (*get_timeleft)(struct watchdog_device *); int (*restart)(struct watchdog_device *, unsigned long, void *); long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long); @@ -59,8 +62,10 @@ struct watchdog_ops { * watchdog device. * @info: Pointer to a watchdog_info structure. * @ops: Pointer to the list of watchdog operations. + * @gov: Pointer to watchdog pretimeout governor. * @bootstatus: Status of the watchdog device at boot. * @timeout: The watchdog devices timeout value (in seconds). + * @pretimeout: The watchdog devices pre_timeout value. * @min_timeout:The watchdog devices minimum timeout value (in seconds). * @max_timeout:The watchdog devices maximum timeout value (in seconds) * as configurable from user space. Only relevant if @@ -94,8 +99,10 @@ struct watchdog_device { const struct attribute_group **groups; const struct watchdog_info *info; const struct watchdog_ops *ops; + const struct watchdog_governor *gov; unsigned int bootstatus; unsigned int timeout; + unsigned int pretimeout; unsigned int min_timeout; unsigned int max_timeout; unsigned int min_hw_heartbeat_ms; @@ -163,6 +170,13 @@ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigne t > wdd->max_timeout); } +/* Use the following function to check if a pretimeout value is invalid */ +static inline bool watchdog_pretimeout_invalid(struct watchdog_device *wdd, + unsigned int t) +{ + return t && wdd->timeout && t >= wdd->timeout; +} + /* Use the following functions to manipulate watchdog driver specific data */ static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data) { @@ -174,6 +188,16 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) return wdd->driver_data; } +/* Use the following functions to report watchdog pretimeout event */ +#if IS_ENABLED(CONFIG_WATCHDOG_PRETIMEOUT_GOV) +void watchdog_notify_pretimeout(struct watchdog_device *wdd); +#else +static inline void watchdog_notify_pretimeout(struct watchdog_device *wdd) +{ + pr_alert("watchdog%d: pretimeout event\n", wdd->id); +} +#endif + /* drivers/watchdog/watchdog_core.c */ void watchdog_set_restart_priority(struct watchdog_device *wdd, int priority); extern int watchdog_init_timeout(struct watchdog_device *wdd, diff --git a/include/net/bonding.h b/include/net/bonding.h index 6360c259da6d..f32f7ef8a23a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -37,18 +37,6 @@ #ifndef __long_aligned #define __long_aligned __attribute__((aligned((sizeof(long))))) #endif -/* - * Less bad way to call ioctl from within the kernel; this needs to be - * done some other way to get the call out of interrupt context. - * Needs "ioctl" variable to be supplied by calling context. - */ -#define IOCTL(dev, arg, cmd) ({ \ - int res = 0; \ - mm_segment_t fs = get_fs(); \ - set_fs(get_ds()); \ - res = ioctl(dev, arg, cmd); \ - set_fs(fs); \ - res; }) #define BOND_MODE(bond) ((bond)->params.mode) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index b220dabeab45..3832099289c5 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -114,6 +114,25 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } +static inline bool netif_index_is_l3_master(struct net *net, int ifindex) +{ + struct net_device *dev; + bool rc = false; + + if (ifindex == 0) + return false; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + rc = netif_is_l3_master(dev); + + rcu_read_unlock(); + + return rc; +} + struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); static inline @@ -207,6 +226,11 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } +static inline bool netif_index_is_l3_master(struct net *net, int ifindex) +{ + return false; +} + static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h index 3e445a760f14..b075f601919b 100644 --- a/include/uapi/linux/falloc.h +++ b/include/uapi/linux/falloc.h @@ -58,4 +58,22 @@ */ #define FALLOC_FL_INSERT_RANGE 0x20 +/* + * FALLOC_FL_UNSHARE_RANGE is used to unshare shared blocks within the + * file size without overwriting any existing data. The purpose of this + * call is to preemptively reallocate any blocks that are subject to + * copy-on-write. + * + * Different filesystems may implement different limitations on the + * granularity of the operation. Most will limit operations to filesystem + * block size boundaries, but this boundary may be larger or smaller + * depending on the filesystem and/or the configuration of the filesystem + * or file. + * + * This flag can only be used with allocate-mode fallocate, which is + * to say that it cannot be used with the punch, zero, collapse, or + * insert range modes. + */ +#define FALLOC_FL_UNSHARE_RANGE 0x40 + #endif /* _UAPI_FALLOC_H_ */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 2473272169f2..acb2b6152ba0 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -158,7 +158,8 @@ struct fsxattr { __u32 fsx_extsize; /* extsize field value (get/set)*/ __u32 fsx_nextents; /* nextents field value (get) */ __u32 fsx_projid; /* project identifier (get/set) */ - unsigned char fsx_pad[12]; + __u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/ + unsigned char fsx_pad[8]; }; /* @@ -179,6 +180,7 @@ struct fsxattr { #define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ +#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 2b871e0858d9..4ae62796bfde 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -39,8 +39,9 @@ #define NFS4_FH_VOL_MIGRATION 0x0004 #define NFS4_FH_VOL_RENAME 0x0008 -#define NFS4_OPEN_RESULT_CONFIRM 0x0002 -#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 +#define NFS4_OPEN_RESULT_CONFIRM 0x0002 +#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 +#define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F #define NFS4_SHARE_ACCESS_READ 0x0001 diff --git a/include/video/exynos_mipi_dsim.h b/include/video/exynos_mipi_dsim.h deleted file mode 100644 index 6a578f8a1b3e..000000000000 --- a/include/video/exynos_mipi_dsim.h +++ /dev/null @@ -1,358 +0,0 @@ -/* include/video/exynos_mipi_dsim.h - * - * Platform data header for Samsung SoC MIPI-DSIM. - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd - * - * InKi Dae <inki.dae@samsung.com> - * Donghwa Lee <dh09.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#ifndef _EXYNOS_MIPI_DSIM_H -#define _EXYNOS_MIPI_DSIM_H - -#include <linux/device.h> -#include <linux/fb.h> - -#define PANEL_NAME_SIZE (32) - -/* - * Enumerate display interface type. - * - * DSIM_COMMAND means cpu interface and rgb interface for DSIM_VIDEO. - * - * P.S. MIPI DSI Master has two display controller intefaces, RGB Interface - * for main display and CPU Interface(same as I80 Interface) for main - * and sub display. - */ -enum mipi_dsim_interface_type { - DSIM_COMMAND, - DSIM_VIDEO -}; - -enum mipi_dsim_virtual_ch_no { - DSIM_VIRTUAL_CH_0, - DSIM_VIRTUAL_CH_1, - DSIM_VIRTUAL_CH_2, - DSIM_VIRTUAL_CH_3 -}; - -enum mipi_dsim_burst_mode_type { - DSIM_NON_BURST_SYNC_EVENT, - DSIM_BURST_SYNC_EVENT, - DSIM_NON_BURST_SYNC_PULSE, - DSIM_BURST, - DSIM_NON_VIDEO_MODE -}; - -enum mipi_dsim_no_of_data_lane { - DSIM_DATA_LANE_1, - DSIM_DATA_LANE_2, - DSIM_DATA_LANE_3, - DSIM_DATA_LANE_4 -}; - -enum mipi_dsim_byte_clk_src { - DSIM_PLL_OUT_DIV8, - DSIM_EXT_CLK_DIV8, - DSIM_EXT_CLK_BYPASS -}; - -enum mipi_dsim_pixel_format { - DSIM_CMD_3BPP, - DSIM_CMD_8BPP, - DSIM_CMD_12BPP, - DSIM_CMD_16BPP, - DSIM_VID_16BPP_565, - DSIM_VID_18BPP_666PACKED, - DSIM_18BPP_666LOOSELYPACKED, - DSIM_24BPP_888 -}; - -/* - * struct mipi_dsim_config - interface for configuring mipi-dsi controller. - * - * @auto_flush: enable or disable Auto flush of MD FIFO using VSYNC pulse. - * @eot_disable: enable or disable EoT packet in HS mode. - * @auto_vertical_cnt: specifies auto vertical count mode. - * in Video mode, the vertical line transition uses line counter - * configured by VSA, VBP, and Vertical resolution. - * If this bit is set to '1', the line counter does not use VSA and VBP - * registers.(in command mode, this variable is ignored) - * @hse: set horizontal sync event mode. - * In VSYNC pulse and Vporch area, MIPI DSI master transfers only HSYNC - * start packet to MIPI DSI slave at MIPI DSI spec1.1r02. - * this bit transfers HSYNC end packet in VSYNC pulse and Vporch area - * (in mommand mode, this variable is ignored) - * @hfp: specifies HFP disable mode. - * if this variable is set, DSI master ignores HFP area in VIDEO mode. - * (in command mode, this variable is ignored) - * @hbp: specifies HBP disable mode. - * if this variable is set, DSI master ignores HBP area in VIDEO mode. - * (in command mode, this variable is ignored) - * @hsa: specifies HSA disable mode. - * if this variable is set, DSI master ignores HSA area in VIDEO mode. - * (in command mode, this variable is ignored) - * @cma_allow: specifies the number of horizontal lines, where command packet - * transmission is allowed after Stable VFP period. - * @e_interface: specifies interface to be used.(CPU or RGB interface) - * @e_virtual_ch: specifies virtual channel number that main or - * sub diaplsy uses. - * @e_pixel_format: specifies pixel stream format for main or sub display. - * @e_burst_mode: selects Burst mode in Video mode. - * in Non-burst mode, RGB data area is filled with RGB data and NULL - * packets, according to input bandwidth of RGB interface. - * In Burst mode, RGB data area is filled with RGB data only. - * @e_no_data_lane: specifies data lane count to be used by Master. - * @e_byte_clk: select byte clock source. (it must be DSIM_PLL_OUT_DIV8) - * DSIM_EXT_CLK_DIV8 and DSIM_EXT_CLK_BYPASSS are not supported. - * @pll_stable_time: specifies the PLL Timer for stability of the ganerated - * clock(System clock cycle base) - * if the timer value goes to 0x00000000, the clock stable bit of status - * and interrupt register is set. - * @esc_clk: specifies escape clock frequency for getting the escape clock - * prescaler value. - * @stop_holding_cnt: specifies the interval value between transmitting - * read packet(or write "set_tear_on" command) and BTA request. - * after transmitting read packet or write "set_tear_on" command, - * BTA requests to D-PHY automatically. this counter value specifies - * the interval between them. - * @bta_timeout: specifies the timer for BTA. - * this register specifies time out from BTA request to change - * the direction with respect to Tx escape clock. - * @rx_timeout: specifies the timer for LP Rx mode timeout. - * this register specifies time out on how long RxValid deasserts, - * after RxLpdt asserts with respect to Tx escape clock. - * - RxValid specifies Rx data valid indicator. - * - RxLpdt specifies an indicator that D-PHY is under RxLpdt mode. - * - RxValid and RxLpdt specifies signal from D-PHY. - */ -struct mipi_dsim_config { - unsigned char auto_flush; - unsigned char eot_disable; - - unsigned char auto_vertical_cnt; - unsigned char hse; - unsigned char hfp; - unsigned char hbp; - unsigned char hsa; - unsigned char cmd_allow; - - enum mipi_dsim_interface_type e_interface; - enum mipi_dsim_virtual_ch_no e_virtual_ch; - enum mipi_dsim_pixel_format e_pixel_format; - enum mipi_dsim_burst_mode_type e_burst_mode; - enum mipi_dsim_no_of_data_lane e_no_data_lane; - enum mipi_dsim_byte_clk_src e_byte_clk; - - /* - * =========================================== - * | P | M | S | MHz | - * ------------------------------------------- - * | 3 | 100 | 3 | 100 | - * | 3 | 100 | 2 | 200 | - * | 3 | 63 | 1 | 252 | - * | 4 | 100 | 1 | 300 | - * | 4 | 110 | 1 | 330 | - * | 12 | 350 | 1 | 350 | - * | 3 | 100 | 1 | 400 | - * | 4 | 150 | 1 | 450 | - * | 6 | 118 | 1 | 472 | - * | 3 | 120 | 1 | 480 | - * | 12 | 250 | 0 | 500 | - * | 4 | 100 | 0 | 600 | - * | 3 | 81 | 0 | 648 | - * | 3 | 88 | 0 | 704 | - * | 3 | 90 | 0 | 720 | - * | 3 | 100 | 0 | 800 | - * | 12 | 425 | 0 | 850 | - * | 4 | 150 | 0 | 900 | - * | 12 | 475 | 0 | 950 | - * | 6 | 250 | 0 | 1000 | - * ------------------------------------------- - */ - - /* - * pms could be calculated as the following. - * M * 24 / P * 2 ^ S = MHz - */ - unsigned char p; - unsigned short m; - unsigned char s; - - unsigned int pll_stable_time; - unsigned long esc_clk; - - unsigned short stop_holding_cnt; - unsigned char bta_timeout; - unsigned short rx_timeout; -}; - -/* - * struct mipi_dsim_device - global interface for mipi-dsi driver. - * - * @dev: driver model representation of the device. - * @id: unique device id. - * @clock: pointer to MIPI-DSI clock of clock framework. - * @irq: interrupt number to MIPI-DSI controller. - * @reg_base: base address to memory mapped SRF of MIPI-DSI controller. - * (virtual address) - * @lock: the mutex protecting this data structure. - * @dsim_info: infomation for configuring mipi-dsi controller. - * @master_ops: callbacks to mipi-dsi operations. - * @dsim_lcd_dev: pointer to activated ddi device. - * (it would be registered by mipi-dsi driver.) - * @dsim_lcd_drv: pointer to activated_ddi driver. - * (it would be registered by mipi-dsi driver.) - * @lcd_info: pointer to mipi_lcd_info structure. - * @state: specifies status of MIPI-DSI controller. - * the status could be RESET, INIT, STOP, HSCLKEN and ULPS. - * @data_lane: specifiec enabled data lane number. - * this variable would be set by driver according to e_no_data_lane - * automatically. - * @e_clk_src: select byte clock source. - * @pd: pointer to MIPI-DSI driver platform data. - * @phy: pointer to the MIPI-DSI PHY - */ -struct mipi_dsim_device { - struct device *dev; - int id; - struct clk *clock; - unsigned int irq; - void __iomem *reg_base; - struct mutex lock; - - struct mipi_dsim_config *dsim_config; - struct mipi_dsim_master_ops *master_ops; - struct mipi_dsim_lcd_device *dsim_lcd_dev; - struct mipi_dsim_lcd_driver *dsim_lcd_drv; - - unsigned int state; - unsigned int data_lane; - unsigned int e_clk_src; - bool suspended; - - struct mipi_dsim_platform_data *pd; - struct phy *phy; -}; - -/* - * struct mipi_dsim_platform_data - interface to platform data - * for mipi-dsi driver. - * - * @lcd_panel_name: specifies lcd panel name registered to mipi-dsi driver. - * lcd panel driver searched would be actived. - * @dsim_config: pointer of structure for configuring mipi-dsi controller. - * @enabled: indicate whether mipi controller got enabled or not. - * @lcd_panel_info: pointer for lcd panel specific structure. - * this structure specifies width, height, timing and polarity and so on. - */ -struct mipi_dsim_platform_data { - char lcd_panel_name[PANEL_NAME_SIZE]; - - struct mipi_dsim_config *dsim_config; - unsigned int enabled; - void *lcd_panel_info; -}; - -/* - * struct mipi_dsim_master_ops - callbacks to mipi-dsi operations. - * - * @cmd_write: transfer command to lcd panel at LP mode. - * @cmd_read: read command from rx register. - * @get_dsim_frame_done: get the status that all screen data have been - * transferred to mipi-dsi. - * @clear_dsim_frame_done: clear frame done status. - * @get_fb_frame_done: get frame done status of display controller. - * @trigger: trigger display controller. - * - this one would be used only in case of CPU mode. - * @set_early_blank_mode: set framebuffer blank mode. - * - this callback should be called prior to fb_blank() by a client driver - * only if needing. - * @set_blank_mode: set framebuffer blank mode. - * - this callback should be called after fb_blank() by a client driver - * only if needing. - */ - -struct mipi_dsim_master_ops { - int (*cmd_write)(struct mipi_dsim_device *dsim, unsigned int data_id, - const unsigned char *data0, unsigned int data1); - int (*cmd_read)(struct mipi_dsim_device *dsim, unsigned int data_id, - unsigned int data0, unsigned int req_size, u8 *rx_buf); - int (*get_dsim_frame_done)(struct mipi_dsim_device *dsim); - int (*clear_dsim_frame_done)(struct mipi_dsim_device *dsim); - - int (*get_fb_frame_done)(struct fb_info *info); - void (*trigger)(struct fb_info *info); - int (*set_early_blank_mode)(struct mipi_dsim_device *dsim, int power); - int (*set_blank_mode)(struct mipi_dsim_device *dsim, int power); -}; - -/* - * device structure for mipi-dsi based lcd panel. - * - * @name: name of the device to use with this device, or an - * alias for that name. - * @dev: driver model representation of the device. - * @id: id of device to be registered. - * @bus_id: bus id for identifing connected bus - * and this bus id should be same as id of mipi_dsim_device. - * @irq: irq number for signaling when framebuffer transfer of - * lcd panel module is completed. - * this irq would be used only for MIPI-DSI based CPU mode lcd panel. - * @master: pointer to mipi-dsi master device object. - * @platform_data: lcd panel specific platform data. - */ -struct mipi_dsim_lcd_device { - char *name; - struct device dev; - int id; - int bus_id; - int irq; - int panel_reverse; - - struct mipi_dsim_device *master; - void *platform_data; -}; - -/* - * driver structure for mipi-dsi based lcd panel. - * - * this structure should be registered by lcd panel driver. - * mipi-dsi driver seeks lcd panel registered through name field - * and calls these callback functions in appropriate time. - * - * @name: name of the driver to use with this device, or an - * alias for that name. - * @id: id of driver to be registered. - * this id would be used for finding device object registered. - */ -struct mipi_dsim_lcd_driver { - char *name; - int id; - - void (*power_on)(struct mipi_dsim_lcd_device *dsim_dev, int enable); - void (*set_sequence)(struct mipi_dsim_lcd_device *dsim_dev); - int (*probe)(struct mipi_dsim_lcd_device *dsim_dev); - int (*remove)(struct mipi_dsim_lcd_device *dsim_dev); - void (*shutdown)(struct mipi_dsim_lcd_device *dsim_dev); - int (*suspend)(struct mipi_dsim_lcd_device *dsim_dev); - int (*resume)(struct mipi_dsim_lcd_device *dsim_dev); -}; - -/* - * register mipi_dsim_lcd_device to mipi-dsi master. - */ -int exynos_mipi_dsi_register_lcd_device(struct mipi_dsim_lcd_device - *lcd_dev); -/** - * register mipi_dsim_lcd_driver object defined by lcd panel driver - * to mipi-dsi driver. - */ -int exynos_mipi_dsi_register_lcd_driver(struct mipi_dsim_lcd_driver - *lcd_drv); -#endif /* _EXYNOS_MIPI_DSIM_H */ diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 992ab9d99f35..e57980845549 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,8 +1,4 @@ -# We are fully aware of the dangers of __builtin_return_address() -FRAME_CFLAGS := $(call cc-disable-warning,frame-address) -KBUILD_CFLAGS += $(FRAME_CFLAGS) - # Do not instrument the tracer itself: ifdef CONFIG_FUNCTION_TRACER diff --git a/mm/Makefile b/mm/Makefile index 2ca1faf3fa09..295bd7a9f76b 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -21,9 +21,6 @@ KCOV_INSTRUMENT_memcontrol.o := n KCOV_INSTRUMENT_mmzone.o := n KCOV_INSTRUMENT_vmstat.o := n -# Since __builtin_frame_address does work as used, disable the warning. -CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address) - mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index e657258e1f2c..8bd569695e76 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -217,6 +217,7 @@ static const struct brport_attribute *brport_attrs[] = { #endif &brport_attr_proxyarp, &brport_attr_proxyarp_wifi, + &brport_attr_multicast_flood, NULL }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b06d2f46b83e..fb7348f13501 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1144,6 +1144,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi)) return 0; + memset(&vf_vlan_info, 0, sizeof(vf_vlan_info)); + vf_mac.vf = vf_vlan.vf = vf_vlan_info.vf = diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f2be689a6c85..62d4d90c1389 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2265,7 +2265,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (err) { res.fi = NULL; res.table = NULL; - if (fl4->flowi4_oif) { + if (fl4->flowi4_oif && + !netif_index_is_l3_master(net, fl4->flowi4_oif)) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 54cf7197c7ab..5a27ab4eab39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1190,6 +1190,16 @@ out: return NULL; } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -1319,6 +1329,7 @@ ipv6_pktoptions: np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { skb_set_owner_r(opt_skb, sk); + tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); @@ -1352,15 +1363,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->sacked = 0; } -static void tcp_v6_restore_cb(struct sk_buff *skb) -{ - /* We need to move header back to the beginning if xfrm6_policy_check() - * and tcp_v6_fill_cb() are going to be called again. - */ - memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, - sizeof(struct inet6_skb_parm)); -} - static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index c8c82e109c68..22087062bd10 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -343,7 +343,7 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) key->eth.cvlan.tci = 0; key->eth.cvlan.tpid = 0; - if (likely(skb_vlan_tag_present(skb))) { + if (skb_vlan_tag_present(skb)) { key->eth.vlan.tci = htons(skb->vlan_tci); key->eth.vlan.tpid = skb->vlan_proto; } else { diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 95c36147a6e1..e7da29021b38 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -176,7 +176,7 @@ static void do_setup(struct net_device *netdev) netdev->vlan_features = netdev->features; netdev->hw_enc_features = netdev->features; - netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; netdev->hw_features = netdev->features & ~NETIF_F_LLTX; eth_hw_addr_random(netdev); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 8f198437c724..7387418ac514 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -485,7 +485,8 @@ static unsigned int packet_length(const struct sk_buff *skb) { unsigned int length = skb->len - ETH_HLEN; - if (skb_vlan_tagged(skb)) + if (!skb_vlan_tag_present(skb) && + eth_type_vlan(skb->protocol)) length -= VLAN_HLEN; /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c9102172ce3b..a512b18c0088 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -341,22 +341,25 @@ int tcf_register_action(struct tc_action_ops *act, if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup) return -EINVAL; + /* We have to register pernet ops before making the action ops visible, + * otherwise tcf_action_init_1() could get a partially initialized + * netns. + */ + ret = register_pernet_subsys(ops); + if (ret) + return ret; + write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); + unregister_pernet_subsys(ops); return -EEXIST; } } list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); - ret = register_pernet_subsys(ops); - if (ret) { - tcf_unregister_action(act, ops); - return ret; - } - return 0; } EXPORT_SYMBOL(tcf_register_action); @@ -367,8 +370,6 @@ int tcf_unregister_action(struct tc_action_ops *act, struct tc_action_ops *a; int err = -ENOENT; - unregister_pernet_subsys(ops); - write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (a == act) { @@ -378,6 +379,8 @@ int tcf_unregister_action(struct tc_action_ops *act, } } write_unlock(&act_mod_lock); + if (!err) + unregister_pernet_subsys(ops); return err; } EXPORT_SYMBOL(tcf_unregister_action); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 11da7da0b7c4..2ee29a3375f6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -101,7 +101,7 @@ EXPORT_SYMBOL(unregister_tcf_proto_ops); static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event); + unsigned long fh, int event, bool unicast); static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, @@ -112,7 +112,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL; it_chain = &tp->next) - tfilter_notify(net, oskb, n, tp, 0, event); + tfilter_notify(net, oskb, n, tp, 0, event, false); } /* Select new prio value from the range, managed by kernel. */ @@ -319,7 +319,8 @@ replay: RCU_INIT_POINTER(*back, next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, + RTM_DELTFILTER, false); tcf_destroy(tp, true); err = 0; goto errout; @@ -345,14 +346,14 @@ replay: struct tcf_proto *next = rtnl_dereference(tp->next); tfilter_notify(net, skb, n, tp, fh, - RTM_DELTFILTER); + RTM_DELTFILTER, false); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, fh, - RTM_NEWTFILTER); + RTM_NEWTFILTER, true); goto errout; default: err = -EINVAL; @@ -367,7 +368,7 @@ replay: RCU_INIT_POINTER(tp->next, rtnl_dereference(*back)); rcu_assign_pointer(*back, tp); } - tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); + tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); } else { if (tp_created) tcf_destroy(tp, true); @@ -419,7 +420,7 @@ nla_put_failure: static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event) + unsigned long fh, int event, bool unicast) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -433,6 +434,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, return -EINVAL; } + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 5c7549b5b92c..41adf362936d 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -246,7 +246,7 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, } else { strp->rx_interrupted = 1; } - strp_parser_err(strp, err, desc); + strp_parser_err(strp, len, desc); break; } else if (len > strp->sk->sk_rcvbuf) { /* Message length exceeds maximum allowed */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a7e42f9a405c..2bff63a73cf8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -551,7 +551,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, *entry, *new; unsigned int nr; - nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits); + nr = auth->au_ops->hash_cred(acred, cache->hashbits); rcu_read_lock(); hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 83dffeadf20a..f1df9837f1ac 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -78,6 +78,14 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task, return auth->au_ops->lookup_cred(auth, acred, lookupflags); } +static int +generic_hash_cred(struct auth_cred *acred, unsigned int hashbits) +{ + return hash_64(from_kgid(&init_user_ns, acred->gid) | + ((u64)from_kuid(&init_user_ns, acred->uid) << + (sizeof(gid_t) * 8)), hashbits); +} + /* * Lookup generic creds for current process */ @@ -258,6 +266,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) static const struct rpc_authops generic_auth_ops = { .owner = THIS_MODULE, .au_name = "Generic", + .hash_cred = generic_hash_cred, .lookup_cred = generic_lookup_cred, .crcreate = generic_create_cred, .key_timeout = generic_key_timeout, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 976c7812bbd5..d8bd97a5a7c9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1298,6 +1298,12 @@ gss_destroy_cred(struct rpc_cred *cred) gss_destroy_nullcred(cred); } +static int +gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) +{ + return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits); +} + /* * Lookup RPCSEC_GSS cred for the current process */ @@ -1982,6 +1988,7 @@ static const struct rpc_authops authgss_ops = { .au_name = "RPCSEC_GSS", .create = gss_create, .destroy = gss_destroy, + .hash_cred = gss_hash_cred, .lookup_cred = gss_lookup_cred, .crcreate = gss_create_cred, .list_pseudoflavors = gss_mech_list_pseudoflavors, diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index a1d768a973f5..306fc0f54596 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -46,6 +46,14 @@ unx_destroy(struct rpc_auth *auth) rpcauth_clear_credcache(auth->au_credcache); } +static int +unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) +{ + return hash_64(from_kgid(&init_user_ns, acred->gid) | + ((u64)from_kuid(&init_user_ns, acred->uid) << + (sizeof(gid_t) * 8)), hashbits); +} + /* * Lookup AUTH_UNIX creds for current process */ @@ -220,6 +228,7 @@ const struct rpc_authops authunix_ops = { .au_name = "UNIX", .create = unx_create, .destroy = unx_destroy, + .hash_cred = unx_hash_cred, .lookup_cred = unx_lookup_cred, .crcreate = unx_create_cred, }; diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 229956bf8457..ac701c28f44f 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -76,13 +76,7 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) page = alloc_page(gfp_flags); if (page == NULL) return -ENOMEM; - buf->head[0].iov_base = page_address(page); - buf->head[0].iov_len = PAGE_SIZE; - buf->tail[0].iov_base = NULL; - buf->tail[0].iov_len = 0; - buf->page_len = 0; - buf->len = 0; - buf->buflen = PAGE_SIZE; + xdr_buf_init(buf, page_address(page), PAGE_SIZE); return 0; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 4d8e11f94a35..8aabe12201f8 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -353,7 +353,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd) spin_unlock(&cache_list_lock); /* start the cleaning process */ - schedule_delayed_work(&cache_cleaner, 0); + queue_delayed_work(system_power_efficient_wq, &cache_cleaner, 0); } EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail); @@ -476,7 +476,8 @@ static void do_cache_clean(struct work_struct *work) delay = 0; if (delay) - schedule_delayed_work(&cache_cleaner, delay); + queue_delayed_work(system_power_efficient_wq, + &cache_cleaner, delay); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 66f23b376fa0..34dd7b26ee5f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -184,7 +184,6 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, struct super_block *sb) { struct dentry *dentry; - int err = 0; switch (event) { case RPC_PIPEFS_MOUNT: @@ -201,7 +200,7 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event); return -ENOTSUPP; } - return err; + return 0; } static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event, @@ -988,7 +987,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) { if (clnt != NULL) { - rpc_task_release_client(task); if (task->tk_xprt == NULL) task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi); task->tk_client = clnt; @@ -1693,6 +1691,7 @@ call_allocate(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; + int status; dprint_status(task); @@ -1718,11 +1717,14 @@ call_allocate(struct rpc_task *task) req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen; req->rq_rcvsize <<= 2; - req->rq_buffer = xprt->ops->buf_alloc(task, - req->rq_callsize + req->rq_rcvsize); - if (req->rq_buffer != NULL) - return; + status = xprt->ops->buf_alloc(task); xprt_inject_disconnect(xprt); + if (status == 0) + return; + if (status != -ENOMEM) { + rpc_exit(task, status); + return; + } dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); @@ -1748,18 +1750,6 @@ rpc_task_force_reencode(struct rpc_task *task) task->tk_rqstp->rq_bytes_sent = 0; } -static inline void -rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) -{ - buf->head[0].iov_base = start; - buf->head[0].iov_len = len; - buf->tail[0].iov_len = 0; - buf->page_len = 0; - buf->flags = 0; - buf->len = 0; - buf->buflen = len; -} - /* * 3. Encode arguments of an RPC call */ @@ -1772,12 +1762,12 @@ rpc_xdr_encode(struct rpc_task *task) dprint_status(task); - rpc_xdr_buf_init(&req->rq_snd_buf, - req->rq_buffer, - req->rq_callsize); - rpc_xdr_buf_init(&req->rq_rcv_buf, - (char *)req->rq_buffer + req->rq_callsize, - req->rq_rcvsize); + xdr_buf_init(&req->rq_snd_buf, + req->rq_buffer, + req->rq_callsize); + xdr_buf_init(&req->rq_rcv_buf, + req->rq_rbuffer, + req->rq_rcvsize); p = rpc_encode_header(task); if (p == NULL) { @@ -2616,6 +2606,70 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt); /** + * rpc_clnt_setup_test_and_add_xprt() + * + * This is an rpc_clnt_add_xprt setup() function which returns 1 so: + * 1) caller of the test function must dereference the rpc_xprt_switch + * and the rpc_xprt. + * 2) test function must call rpc_xprt_switch_add_xprt, usually in + * the rpc_call_done routine. + * + * Upon success (return of 1), the test function adds the new + * transport to the rpc_clnt xprt switch + * + * @clnt: struct rpc_clnt to get the new transport + * @xps: the rpc_xprt_switch to hold the new transport + * @xprt: the rpc_xprt to test + * @data: a struct rpc_add_xprt_test pointer that holds the test function + * and test function call data + */ +int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, + struct rpc_xprt_switch *xps, + struct rpc_xprt *xprt, + void *data) +{ + struct rpc_cred *cred; + struct rpc_task *task; + struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data; + int status = -EADDRINUSE; + + xprt = xprt_get(xprt); + xprt_switch_get(xps); + + if (rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr)) + goto out_err; + + /* Test the connection */ + cred = authnull_ops.lookup_cred(NULL, NULL, 0); + task = rpc_call_null_helper(clnt, xprt, cred, + RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + NULL, NULL); + put_rpccred(cred); + if (IS_ERR(task)) { + status = PTR_ERR(task); + goto out_err; + } + status = task->tk_status; + rpc_put_task(task); + + if (status < 0) + goto out_err; + + /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */ + xtest->add_xprt_test(clnt, xprt, xtest->data); + + /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */ + return 1; +out_err: + xprt_put(xprt); + xprt_switch_put(xps); + pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not added\n", + status, xprt->address_strings[RPC_DISPLAY_ADDR]); + return status; +} +EXPORT_SYMBOL_GPL(rpc_clnt_setup_test_and_add_xprt); + +/** * rpc_clnt_add_xprt - Add a new transport to a rpc_clnt * @clnt: pointer to struct rpc_clnt * @xprtargs: pointer to struct xprt_create @@ -2697,6 +2751,34 @@ rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo) } EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); +void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) +{ + xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); +} +EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put); + +void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) +{ + rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch), + xprt); +} +EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt); + +bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, + const struct sockaddr *sap) +{ + struct rpc_xprt_switch *xps; + bool ret; + + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); + + rcu_read_lock(); + ret = rpc_xprt_switch_has_addr(xps, sap); + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_has_addr); + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static void rpc_show_header(void) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9ae588511aaf..5db68b371db2 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -849,14 +849,17 @@ static void rpc_async_schedule(struct work_struct *work) } /** - * rpc_malloc - allocate an RPC buffer - * @task: RPC task that will use this buffer - * @size: requested byte size + * rpc_malloc - allocate RPC buffer resources + * @task: RPC task + * + * A single memory region is allocated, which is split between the + * RPC call and RPC reply that this task is being used for. When + * this RPC is retired, the memory is released by calling rpc_free. * * To prevent rpciod from hanging, this allocator never sleeps, - * returning NULL and suppressing warning if the request cannot be serviced - * immediately. - * The caller can arrange to sleep in a way that is safe for rpciod. + * returning -ENOMEM and suppressing warning if the request cannot + * be serviced immediately. The caller can arrange to sleep in a + * way that is safe for rpciod. * * Most requests are 'small' (under 2KiB) and can be serviced from a * mempool, ensuring that NFS reads and writes can always proceed, @@ -865,8 +868,10 @@ static void rpc_async_schedule(struct work_struct *work) * In order to avoid memory starvation triggering more writebacks of * NFS requests, we avoid using GFP_KERNEL. */ -void *rpc_malloc(struct rpc_task *task, size_t size) +int rpc_malloc(struct rpc_task *task) { + struct rpc_rqst *rqst = task->tk_rqstp; + size_t size = rqst->rq_callsize + rqst->rq_rcvsize; struct rpc_buffer *buf; gfp_t gfp = GFP_NOIO | __GFP_NOWARN; @@ -880,28 +885,28 @@ void *rpc_malloc(struct rpc_task *task, size_t size) buf = kmalloc(size, gfp); if (!buf) - return NULL; + return -ENOMEM; buf->len = size; dprintk("RPC: %5u allocated buffer of size %zu at %p\n", task->tk_pid, size, buf); - return &buf->data; + rqst->rq_buffer = buf->data; + rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; + return 0; } EXPORT_SYMBOL_GPL(rpc_malloc); /** - * rpc_free - free buffer allocated via rpc_malloc - * @buffer: buffer to free + * rpc_free - free RPC buffer resources allocated via rpc_malloc + * @task: RPC task * */ -void rpc_free(void *buffer) +void rpc_free(struct rpc_task *task) { + void *buffer = task->tk_rqstp->rq_buffer; size_t size; struct rpc_buffer *buf; - if (!buffer) - return; - buf = container_of(buffer, struct rpc_buffer, data); size = buf->len; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c5b0cb4f4056..7c8070ec93c8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -401,6 +401,21 @@ int svc_bind(struct svc_serv *serv, struct net *net) } EXPORT_SYMBOL_GPL(svc_bind); +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static void +__svc_init_bc(struct svc_serv *serv) +{ + INIT_LIST_HEAD(&serv->sv_cb_list); + spin_lock_init(&serv->sv_cb_lock); + init_waitqueue_head(&serv->sv_cb_waitq); +} +#else +static void +__svc_init_bc(struct svc_serv *serv) +{ +} +#endif + /* * Create an RPC service */ @@ -443,6 +458,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, init_timer(&serv->sv_temptimer); spin_lock_init(&serv->sv_lock); + __svc_init_bc(serv); + serv->sv_nrpools = npools; serv->sv_pools = kcalloc(serv->sv_nrpools, sizeof(struct svc_pool), diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index c4f3cc0c0775..7f1071e103ca 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -767,7 +767,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) newbase -= xdr->buf->page_base; if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); + xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); } static bool xdr_set_next_buffer(struct xdr_stream *xdr) @@ -776,7 +776,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) xdr_set_next_page(xdr); else if (xdr->iov == xdr->buf->head) { if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); + xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); } return xdr->p != xdr->end; } @@ -859,12 +859,15 @@ EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer); static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) { __be32 *p; - void *cpdest = xdr->scratch.iov_base; + char *cpdest = xdr->scratch.iov_base; size_t cplen = (char *)xdr->end - (char *)xdr->p; if (nbytes > xdr->scratch.iov_len) return NULL; - memcpy(cpdest, xdr->p, cplen); + p = __xdr_inline_decode(xdr, cplen); + if (p == NULL) + return NULL; + memcpy(cpdest, p, cplen); cpdest += cplen; nbytes -= cplen; if (!xdr_set_next_buffer(xdr)) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ea244b29138b..685e6d225414 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1295,7 +1295,7 @@ void xprt_release(struct rpc_task *task) xprt_schedule_autodisconnect(xprt); spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) - xprt->ops->buf_free(req->rq_buffer); + xprt->ops->buf_free(task); xprt_inject_disconnect(xprt); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 66c9d63f4797..ae92a9e9ba52 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -15,6 +15,7 @@ #include <asm/cmpxchg.h> #include <linux/spinlock.h> #include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/xprtmultipath.h> typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head, @@ -49,7 +50,8 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, if (xprt == NULL) return; spin_lock(&xps->xps_lock); - if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) + if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) && + !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr)) xprt_switch_add_xprt_locked(xps, xprt); spin_unlock(&xps->xps_lock); } @@ -232,6 +234,26 @@ struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) return xprt_switch_find_current_entry(head, xpi->xpi_cursor); } +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) +{ + struct list_head *head; + struct rpc_xprt *pos; + + if (xps == NULL || sap == NULL) + return false; + + head = &xps->xps_xprt_list; + list_for_each_entry_rcu(pos, head, xprt_switch) { + if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) { + pr_info("RPC: addr %s already in xprt switch\n", + pos->address_strings[RPC_DISPLAY_ADDR]); + return true; + } + } + return false; +} + static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 87762d976b63..2c472e1b4827 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -27,7 +27,7 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, list_del(&req->rl_all); spin_unlock(&buf->rb_reqslock); - rpcrdma_destroy_req(&r_xprt->rx_ia, req); + rpcrdma_destroy_req(req); kfree(rqst); } @@ -35,10 +35,8 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; - struct xdr_buf *buf; size_t size; req = rpcrdma_create_req(r_xprt); @@ -46,30 +44,19 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, return PTR_ERR(req); req->rl_backchannel = true; - size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); - rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); + rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, + DMA_TO_DEVICE, GFP_KERNEL); if (IS_ERR(rb)) goto out_fail; req->rl_rdmabuf = rb; - size += RPCRDMA_INLINE_READ_THRESHOLD(rqst); - rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); + size = r_xprt->rx_data.inline_rsize; + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); if (IS_ERR(rb)) goto out_fail; - rb->rg_owner = req; req->rl_sendbuf = rb; - /* so that rpcr_to_rdmar works when receiving a request */ - rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base; - - buf = &rqst->rq_snd_buf; - buf->head[0].iov_base = rqst->rq_buffer; - buf->head[0].iov_len = 0; - buf->tail[0].iov_base = NULL; - buf->tail[0].iov_len = 0; - buf->page_len = 0; - buf->len = 0; - buf->buflen = size; - + xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size); + rpcrdma_set_xprtdata(rqst, req); return 0; out_fail: @@ -219,7 +206,6 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_msg *headerp; - size_t rpclen; headerp = rdmab_to_msg(req->rl_rdmabuf); headerp->rm_xid = rqst->rq_xid; @@ -231,26 +217,9 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) headerp->rm_body.rm_chunks[1] = xdr_zero; headerp->rm_body.rm_chunks[2] = xdr_zero; - rpclen = rqst->rq_svec[0].iov_len; - -#ifdef RPCRDMA_BACKCHANNEL_DEBUG - pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", - __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); - pr_info("RPC: %s: RPC/RDMA: %*ph\n", - __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); - pr_info("RPC: %s: RPC: %*ph\n", - __func__, (int)rpclen, rqst->rq_svec[0].iov_base); -#endif - - req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); - req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; - req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); - - req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); - req->rl_send_iov[1].length = rpclen; - req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); - - req->rl_niovs = 2; + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN, + &rqst->rq_snd_buf, rpcrdma_noch)) + return -EIO; return 0; } @@ -402,7 +371,7 @@ out_overflow: out_short: pr_warn("RPC/RDMA short backward direction call\n"); - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) xprt_disconnect_done(xprt); else pr_warn("RPC: %s: reposting rep %p\n", diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 21cb3b150b37..1ebb09e1ac4f 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -160,9 +160,8 @@ static int fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { - rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, - RPCRDMA_MAX_DATA_SEGS / - RPCRDMA_MAX_FMR_SGES)); + ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / + RPCRDMA_MAX_FMR_SGES); return 0; } @@ -274,6 +273,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ list_for_each_entry(mw, &req->rl_registered, mw_list) list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); + r_xprt->rx_stats.local_inv_needed++; rc = ib_unmap_fmr(&unmap_list); if (rc) goto out_reset; @@ -331,4 +331,5 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { .ro_init_mr = fmr_op_init_mr, .ro_release_mr = fmr_op_release_mr, .ro_displayname = "fmr", + .ro_send_w_inv_ok = 0, }; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 892b5e1d9b09..210949562786 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -67,6 +67,8 @@ * pending send queue WRs before the transport is reconnected. */ +#include <linux/sunrpc/rpc_rdma.h> + #include "xprt_rdma.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -161,7 +163,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) return PTR_ERR(f->fr_mr); } - dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); + dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); f->fr_state = FRMR_IS_INVALID; return 0; } @@ -242,9 +244,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, depth; } - rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, - RPCRDMA_MAX_DATA_SEGS / - ia->ri_max_frmr_depth)); + ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / + ia->ri_max_frmr_depth); return 0; } @@ -329,7 +330,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); if (wc->status != IB_WC_SUCCESS) __frwr_sendcompletion_flush(wc, frmr, "localinv"); - complete_all(&frmr->fr_linv_done); + complete(&frmr->fr_linv_done); } /* Post a REG_MR Work Request to register a memory region @@ -396,7 +397,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, goto out_mapmr_err; dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", - __func__, mw, mw->mw_nents, mr->length); + __func__, frmr, mw->mw_nents, mr->length); key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); @@ -449,6 +450,8 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw) struct rpcrdma_frmr *f = &mw->frmr; struct ib_send_wr *invalidate_wr; + dprintk("RPC: %s: invalidating frmr %p\n", __func__, f); + f->fr_state = FRMR_IS_INVALID; invalidate_wr = &f->fr_invwr; @@ -472,6 +475,7 @@ static void frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; + struct rpcrdma_rep *rep = req->rl_reply; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw, *tmp; struct rpcrdma_frmr *f; @@ -487,6 +491,12 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) f = NULL; invalidate_wrs = pos = prev = NULL; list_for_each_entry(mw, &req->rl_registered, mw_list) { + if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) && + (mw->mw_handle == rep->rr_inv_rkey)) { + mw->frmr.fr_state = FRMR_IS_INVALID; + continue; + } + pos = __frwr_prepare_linv_wr(mw); if (!invalidate_wrs) @@ -496,6 +506,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) prev = pos; f = &mw->frmr; } + if (!f) + goto unmap; /* Strong send queue ordering guarantees that when the * last WR in the chain completes, all WRs in the chain @@ -510,6 +522,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * replaces the QP. The RPC reply handler won't call us * unless ri_id->qp is a valid pointer. */ + r_xprt->rx_stats.local_inv_needed++; rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); if (rc) goto reset_mrs; @@ -521,6 +534,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ unmap: list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { + dprintk("RPC: %s: unmapping frmr %p\n", + __func__, &mw->frmr); list_del_init(&mw->mw_list); ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); @@ -576,4 +591,5 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_init_mr = frwr_op_init_mr, .ro_release_mr = frwr_op_release_mr, .ro_displayname = "frwr", + .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK, }; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a47f170b20ef..d987c2d3dd6e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -53,14 +53,6 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif -enum rpcrdma_chunktype { - rpcrdma_noch = 0, - rpcrdma_readch, - rpcrdma_areadch, - rpcrdma_writech, - rpcrdma_replych -}; - static const char transfertypes[][12] = { "inline", /* no chunks */ "read list", /* some argument via rdma read */ @@ -118,10 +110,12 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) return size; } -void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia, - struct rpcrdma_create_data_internal *cdata, - unsigned int maxsegs) +void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + unsigned int maxsegs = ia->ri_max_segs; + ia->ri_max_inline_write = cdata->inline_wsize - rpcrdma_max_call_header_size(maxsegs); ia->ri_max_inline_read = cdata->inline_rsize - @@ -155,42 +149,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; } -static int -rpcrdma_tail_pullup(struct xdr_buf *buf) -{ - size_t tlen = buf->tail[0].iov_len; - size_t skip = tlen & 3; - - /* Do not include the tail if it is only an XDR pad */ - if (tlen < 4) - return 0; - - /* xdr_write_pages() adds a pad at the beginning of the tail - * if the content in "buf->pages" is unaligned. Force the - * tail's actual content to land at the next XDR position - * after the head instead. - */ - if (skip) { - unsigned char *src, *dst; - unsigned int count; - - src = buf->tail[0].iov_base; - dst = buf->head[0].iov_base; - dst += buf->head[0].iov_len; - - src += skip; - tlen -= skip; - - dprintk("RPC: %s: skip=%zu, memmove(%p, %p, %zu)\n", - __func__, skip, dst, src, tlen); - - for (count = tlen; count; count--) - *dst++ = *src++; - } - - return tlen; -} - /* Split "vec" on page boundaries into segments. FMR registers pages, * not a byte range. Other modes coalesce these segments into a single * MR when they can. @@ -229,7 +187,8 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) static int rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, - enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg) + enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, + bool reminv_expected) { int len, n, p, page_base; struct page **ppages; @@ -271,6 +230,13 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, if (type == rpcrdma_readch) return n; + /* When encoding the Write list, some servers need to see an extra + * segment for odd-length Write chunks. The upper layer provides + * space in the tail iovec for this purpose. + */ + if (type == rpcrdma_writech && reminv_expected) + return n; + if (xdrbuf->tail[0].iov_len) { /* the rpcrdma protocol allows us to omit any trailing * xdr pad bytes, saving the server an RDMA operation. */ @@ -327,7 +293,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, if (rtype == rpcrdma_areadch) pos = 0; seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg); + nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); if (nsegs < 0) return ERR_PTR(nsegs); @@ -391,7 +357,8 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, seg = req->rl_segments; nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, rqst->rq_rcv_buf.head[0].iov_len, - wtype, seg); + wtype, seg, + r_xprt->rx_ia.ri_reminv_expected); if (nsegs < 0) return ERR_PTR(nsegs); @@ -456,7 +423,8 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg); + nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, + r_xprt->rx_ia.ri_reminv_expected); if (nsegs < 0) return ERR_PTR(nsegs); @@ -491,74 +459,184 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, return iptr; } -/* - * Copy write data inline. - * This function is used for "small" requests. Data which is passed - * to RPC via iovecs (or page list) is copied directly into the - * pre-registered memory buffer for this request. For small amounts - * of data, this is efficient. The cutoff value is tunable. +/* Prepare the RPC-over-RDMA header SGE. */ -static void rpcrdma_inline_pullup(struct rpc_rqst *rqst) +static bool +rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + u32 len) { - int i, npages, curlen; - int copy_len; - unsigned char *srcp, *destp; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); - int page_base; - struct page **ppages; + struct rpcrdma_regbuf *rb = req->rl_rdmabuf; + struct ib_sge *sge = &req->rl_send_sge[0]; + + if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) { + if (!__rpcrdma_dma_map_regbuf(ia, rb)) + return false; + sge->addr = rdmab_addr(rb); + sge->lkey = rdmab_lkey(rb); + } + sge->length = len; + + ib_dma_sync_single_for_device(ia->ri_device, sge->addr, + sge->length, DMA_TO_DEVICE); + req->rl_send_wr.num_sge++; + return true; +} - destp = rqst->rq_svec[0].iov_base; - curlen = rqst->rq_svec[0].iov_len; - destp += curlen; +/* Prepare the Send SGEs. The head and tail iovec, and each entry + * in the page list, gets its own SGE. + */ +static bool +rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) +{ + unsigned int sge_no, page_base, len, remaining; + struct rpcrdma_regbuf *rb = req->rl_sendbuf; + struct ib_device *device = ia->ri_device; + struct ib_sge *sge = req->rl_send_sge; + u32 lkey = ia->ri_pd->local_dma_lkey; + struct page *page, **ppages; + + /* The head iovec is straightforward, as it is already + * DMA-mapped. Sync the content that has changed. + */ + if (!rpcrdma_dma_map_regbuf(ia, rb)) + return false; + sge_no = 1; + sge[sge_no].addr = rdmab_addr(rb); + sge[sge_no].length = xdr->head[0].iov_len; + sge[sge_no].lkey = rdmab_lkey(rb); + ib_dma_sync_single_for_device(device, sge[sge_no].addr, + sge[sge_no].length, DMA_TO_DEVICE); + + /* If there is a Read chunk, the page list is being handled + * via explicit RDMA, and thus is skipped here. However, the + * tail iovec may include an XDR pad for the page list, as + * well as additional content, and may not reside in the + * same page as the head iovec. + */ + if (rtype == rpcrdma_readch) { + len = xdr->tail[0].iov_len; - dprintk("RPC: %s: destp 0x%p len %d hdrlen %d\n", - __func__, destp, rqst->rq_slen, curlen); + /* Do not include the tail if it is only an XDR pad */ + if (len < 4) + goto out; - copy_len = rqst->rq_snd_buf.page_len; + page = virt_to_page(xdr->tail[0].iov_base); + page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; - if (rqst->rq_snd_buf.tail[0].iov_len) { - curlen = rqst->rq_snd_buf.tail[0].iov_len; - if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) { - memmove(destp + copy_len, - rqst->rq_snd_buf.tail[0].iov_base, curlen); - r_xprt->rx_stats.pullup_copy_count += curlen; + /* If the content in the page list is an odd length, + * xdr_write_pages() has added a pad at the beginning + * of the tail iovec. Force the tail's non-pad content + * to land at the next XDR position in the Send message. + */ + page_base += len & 3; + len -= len & 3; + goto map_tail; + } + + /* If there is a page list present, temporarily DMA map + * and prepare an SGE for each page to be sent. + */ + if (xdr->page_len) { + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + page_base = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + sge_no++; + if (sge_no > RPCRDMA_MAX_SEND_SGES - 2) + goto out_mapping_overflow; + + len = min_t(u32, PAGE_SIZE - page_base, remaining); + sge[sge_no].addr = ib_dma_map_page(device, *ppages, + page_base, len, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device, sge[sge_no].addr)) + goto out_mapping_err; + sge[sge_no].length = len; + sge[sge_no].lkey = lkey; + + req->rl_mapped_sges++; + ppages++; + remaining -= len; + page_base = 0; } - dprintk("RPC: %s: tail destp 0x%p len %d\n", - __func__, destp + copy_len, curlen); - rqst->rq_svec[0].iov_len += curlen; } - r_xprt->rx_stats.pullup_copy_count += copy_len; - page_base = rqst->rq_snd_buf.page_base; - ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT); - page_base &= ~PAGE_MASK; - npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT; - for (i = 0; copy_len && i < npages; i++) { - curlen = PAGE_SIZE - page_base; - if (curlen > copy_len) - curlen = copy_len; - dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", - __func__, i, destp, copy_len, curlen); - srcp = kmap_atomic(ppages[i]); - memcpy(destp, srcp+page_base, curlen); - kunmap_atomic(srcp); - rqst->rq_svec[0].iov_len += curlen; - destp += curlen; - copy_len -= curlen; - page_base = 0; + /* The tail iovec is not always constructed in the same + * page where the head iovec resides (see, for example, + * gss_wrap_req_priv). To neatly accommodate that case, + * DMA map it separately. + */ + if (xdr->tail[0].iov_len) { + page = virt_to_page(xdr->tail[0].iov_base); + page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; + len = xdr->tail[0].iov_len; + +map_tail: + sge_no++; + sge[sge_no].addr = ib_dma_map_page(device, page, + page_base, len, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device, sge[sge_no].addr)) + goto out_mapping_err; + sge[sge_no].length = len; + sge[sge_no].lkey = lkey; + req->rl_mapped_sges++; } - /* header now contains entire send message */ + +out: + req->rl_send_wr.num_sge = sge_no + 1; + return true; + +out_mapping_overflow: + pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); + return false; + +out_mapping_err: + pr_err("rpcrdma: Send mapping error\n"); + return false; +} + +bool +rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + u32 hdrlen, struct xdr_buf *xdr, + enum rpcrdma_chunktype rtype) +{ + req->rl_send_wr.num_sge = 0; + req->rl_mapped_sges = 0; + + if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen)) + goto out_map; + + if (rtype != rpcrdma_areadch) + if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype)) + goto out_map; + + return true; + +out_map: + pr_err("rpcrdma: failed to DMA map a Send buffer\n"); + return false; +} + +void +rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +{ + struct ib_device *device = ia->ri_device; + struct ib_sge *sge; + int count; + + sge = &req->rl_send_sge[2]; + for (count = req->rl_mapped_sges; count--; sge++) + ib_dma_unmap_page(device, sge->addr, sge->length, + DMA_TO_DEVICE); + req->rl_mapped_sges = 0; } /* * Marshal a request: the primary job of this routine is to choose * the transfer modes. See comments below. * - * Prepares up to two IOVs per Call message: - * - * [0] -- RPC RDMA header - * [1] -- the RPC header/data - * * Returns zero on success, otherwise a negative errno. */ @@ -626,12 +704,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) */ if (rpcrdma_args_inline(r_xprt, rqst)) { rtype = rpcrdma_noch; - rpcrdma_inline_pullup(rqst); - rpclen = rqst->rq_svec[0].iov_len; + rpclen = rqst->rq_snd_buf.len; } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { rtype = rpcrdma_readch; - rpclen = rqst->rq_svec[0].iov_len; - rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); + rpclen = rqst->rq_snd_buf.head[0].iov_len + + rqst->rq_snd_buf.tail[0].iov_len; } else { r_xprt->rx_stats.nomsg_call_count++; headerp->rm_type = htonl(RDMA_NOMSG); @@ -673,34 +750,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) goto out_unmap; hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; - if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) - goto out_overflow; - dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", rqst->rq_task->tk_pid, __func__, transfertypes[rtype], transfertypes[wtype], hdrlen, rpclen); - req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); - req->rl_send_iov[0].length = hdrlen; - req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); - - req->rl_niovs = 1; - if (rtype == rpcrdma_areadch) - return 0; - - req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); - req->rl_send_iov[1].length = rpclen; - req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); - - req->rl_niovs = 2; + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, + &rqst->rq_snd_buf, rtype)) { + iptr = ERR_PTR(-EIO); + goto out_unmap; + } return 0; -out_overflow: - pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n", - hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]); - iptr = ERR_PTR(-EIO); - out_unmap: r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); return PTR_ERR(iptr); @@ -916,8 +977,10 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) * allowed to timeout, to discover the errors at that time. */ void -rpcrdma_reply_handler(struct rpcrdma_rep *rep) +rpcrdma_reply_handler(struct work_struct *work) { + struct rpcrdma_rep *rep = + container_of(work, struct rpcrdma_rep, rr_work); struct rpcrdma_msg *headerp; struct rpcrdma_req *req; struct rpc_rqst *rqst; @@ -1132,6 +1195,6 @@ out_duplicate: repost: r_xprt->rx_stats.bad_reply_count++; - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) rpcrdma_recv_buffer_put(rep); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index a2a7519b0f23..2d8545c34095 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, ret = -EIO; goto out_unmap; } - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); memset(&send_wr, 0, sizeof(send_wr)); ctxt->cqe.done = svc_rdma_wc_send; @@ -159,33 +159,34 @@ out_unmap: /* Server-side transport endpoint wants a whole page for its send * buffer. The client RPC code constructs the RPC header in this * buffer before it invokes ->send_request. - * - * Returns NULL if there was a temporary allocation failure. */ -static void * -xprt_rdma_bc_allocate(struct rpc_task *task, size_t size) +static int +xprt_rdma_bc_allocate(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; + size_t size = rqst->rq_callsize; struct svcxprt_rdma *rdma; struct page *page; rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - /* Prevent an infinite loop: try to make this case work */ - if (size > PAGE_SIZE) + if (size > PAGE_SIZE) { WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", size); + return -EINVAL; + } page = alloc_page(RPCRDMA_DEF_GFP); if (!page) - return NULL; + return -ENOMEM; - return page_address(page); + rqst->rq_buffer = page_address(page); + return 0; } static void -xprt_rdma_bc_free(void *buffer) +xprt_rdma_bc_free(struct rpc_task *task) { /* No-op: ctxt and page have already been freed. */ } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2c25606f2561..ad1df979b3f0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, ctxt->sge[pno].addr); if (ret) goto err; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; ctxt->sge[pno].length = len; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 54d533300620..f5a91edcd233 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp, return rp_ary; } +/* RPC-over-RDMA Version One private extension: Remote Invalidation. + * Responder's choice: requester signals it can handle Send With + * Invalidate, and responder chooses one rkey to invalidate. + * + * Find a candidate rkey to invalidate when sending a reply. Picks the + * first rkey it finds in the chunks lists. + * + * Returns zero if RPC's chunk lists are empty. + */ +static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp, + struct rpcrdma_write_array *wr_ary, + struct rpcrdma_write_array *rp_ary) +{ + struct rpcrdma_read_chunk *rd_ary; + struct rpcrdma_segment *arg_ch; + u32 inv_rkey; + + inv_rkey = 0; + + rd_ary = svc_rdma_get_read_chunk(rdma_argp); + if (rd_ary) { + inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle); + goto out; + } + + if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) { + arg_ch = &wr_ary->wc_array[0].wc_target; + inv_rkey = be32_to_cpu(arg_ch->rs_handle); + goto out; + } + + if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) { + arg_ch = &rp_ary->wc_array[0].wc_target; + inv_rkey = be32_to_cpu(arg_ch->rs_handle); + goto out; + } + +out: + dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey); + return inv_rkey; +} + /* Assumptions: * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ @@ -280,7 +322,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge[sge_no].addr)) goto err; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; ctxt->count++; sge_off = 0; @@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma, struct page *page, struct rpcrdma_msg *rdma_resp, struct svc_rdma_req_map *vec, - int byte_count) + int byte_count, + u32 inv_rkey) { struct svc_rdma_op_ctxt *ctxt; struct ib_send_wr send_wr; @@ -489,7 +532,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); ctxt->direction = DMA_TO_DEVICE; @@ -505,7 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma, if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[sge_no].addr)) goto err; - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } @@ -523,23 +566,9 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; - /* - * If there are more pages than SGE, terminate SGE - * list so that svc_rdma_unmap_dma doesn't attempt to - * unmap garbage. - */ - if (page_no+1 >= sge_no) - ctxt->sge[page_no+1].length = 0; } rqstp->rq_next_page = rqstp->rq_respages + 1; - /* The loop above bumps sc_dma_used for each sge. The - * xdr_buf.tail gets a separate sge, but resides in the - * same page as xdr_buf.head. Don't count it twice. - */ - if (sge_no > ctxt->count) - atomic_dec(&rdma->sc_dma_used); - if (sge_no > rdma->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err; @@ -549,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma, send_wr.wr_cqe = &ctxt->cqe; send_wr.sg_list = ctxt->sge; send_wr.num_sge = sge_no; - send_wr.opcode = IB_WR_SEND; + if (inv_rkey) { + send_wr.opcode = IB_WR_SEND_WITH_INV; + send_wr.ex.invalidate_rkey = inv_rkey; + } else + send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; ret = svc_rdma_send(rdma, &send_wr); @@ -581,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) int inline_bytes; struct page *res_page; struct svc_rdma_req_map *vec; + u32 inv_rkey; dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); @@ -591,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) wr_ary = svc_rdma_get_write_array(rdma_argp); rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary); + inv_rkey = 0; + if (rdma->sc_snd_w_inv) + inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary); + /* Build an req vec for the XDR */ vec = svc_rdma_get_req_map(rdma); ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL); @@ -633,9 +671,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) goto err1; ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec, - inline_bytes); + inline_bytes, inv_rkey); if (ret < 0) - goto err1; + goto err0; svc_rdma_put_req_map(rdma, vec); dprintk("svcrdma: send_reply returns %d\n", ret); @@ -692,7 +730,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, svc_rdma_put_context(ctxt, 1); return; } - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); /* Prepare SEND WR */ memset(&err_wr, 0, sizeof(err_wr)); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index eb2857f52b05..6864fb967038 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) out: ctxt->count = 0; + ctxt->mapped_sges = 0; ctxt->frmr = NULL; return ctxt; @@ -221,22 +222,27 @@ out_empty: void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) { struct svcxprt_rdma *xprt = ctxt->xprt; - int i; - for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { + struct ib_device *device = xprt->sc_cm_id->device; + u32 lkey = xprt->sc_pd->local_dma_lkey; + unsigned int i, count; + + for (count = 0, i = 0; i < ctxt->mapped_sges; i++) { /* * Unmap the DMA addr in the SGE if the lkey matches * the local_dma_lkey, otherwise, ignore it since it is * an FRMR lkey and will be unmapped later when the * last WR that uses it completes. */ - if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) { - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_page(xprt->sc_cm_id->device, + if (ctxt->sge[i].lkey == lkey) { + count++; + ib_dma_unmap_page(device, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->direction); } } + ctxt->mapped_sges = 0; + atomic_sub(count, &xprt->sc_dma_used); } void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) @@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; @@ -642,6 +648,26 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags) return ret; } +static void +svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt, + struct rdma_conn_param *param) +{ + const struct rpcrdma_connect_private *pmsg = param->private_data; + + if (pmsg && + pmsg->cp_magic == rpcrdma_cmp_magic && + pmsg->cp_version == RPCRDMA_CMP_VERSION) { + newxprt->sc_snd_w_inv = pmsg->cp_flags & + RPCRDMA_CMP_F_SND_W_INV_OK; + + dprintk("svcrdma: client send_size %u, recv_size %u " + "remote inv %ssupported\n", + rpcrdma_decode_buffer_size(pmsg->cp_send_size), + rpcrdma_decode_buffer_size(pmsg->cp_recv_size), + newxprt->sc_snd_w_inv ? "" : "un"); + } +} + /* * This function handles the CONNECT_REQUEST event on a listening * endpoint. It is passed the cma_id for the _new_ connection. The context in @@ -653,7 +679,8 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags) * will call the recvfrom method on the listen xprt which will accept the new * connection. */ -static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) +static void handle_connect_req(struct rdma_cm_id *new_cma_id, + struct rdma_conn_param *param) { struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *newxprt; @@ -669,9 +696,10 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) new_cma_id->context = newxprt; dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", newxprt, newxprt->sc_cm_id, listen_xprt); + svc_rdma_parse_connect_private(newxprt, param); /* Save client advertised inbound read limit for use later in accept. */ - newxprt->sc_ord = client_ird; + newxprt->sc_ord = param->initiator_depth; /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; @@ -706,8 +734,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " "event = %s (%d)\n", cma_id, cma_id->context, rdma_event_msg(event->event), event->event); - handle_connect_req(cma_id, - event->param.conn.initiator_depth); + handle_connect_req(cma_id, &event->param.conn); break; case RDMA_CM_EVENT_ESTABLISHED: @@ -941,6 +968,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct svcxprt_rdma *listen_rdma; struct svcxprt_rdma *newxprt = NULL; struct rdma_conn_param conn_param; + struct rpcrdma_connect_private pmsg; struct ib_qp_init_attr qp_attr; struct ib_device *dev; unsigned int i; @@ -1070,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dev->attrs.max_fast_reg_page_list_len; newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; newxprt->sc_reader = rdma_read_chunk_frmr; - } + } else + newxprt->sc_snd_w_inv = false; /* * Determine if a DMA MR is required and if so, what privs are required @@ -1094,11 +1123,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Swap out the handler */ newxprt->sc_cm_id->event_handler = rdma_cma_handler; + /* Construct RDMA-CM private message */ + pmsg.cp_magic = rpcrdma_cmp_magic; + pmsg.cp_version = RPCRDMA_CMP_VERSION; + pmsg.cp_flags = 0; + pmsg.cp_send_size = pmsg.cp_recv_size = + rpcrdma_encode_buffer_size(newxprt->sc_max_req_size); + /* Accept Connection */ set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 0; conn_param.initiator_depth = newxprt->sc_ord; + conn_param.private_data = &pmsg; + conn_param.private_data_len = sizeof(pmsg); ret = rdma_accept(newxprt->sc_cm_id, &conn_param); if (ret) { dprintk("svcrdma: failed to accept new connection, ret=%d\n", diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 81f0e879f019..ed5e285fd2ea 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -97,7 +97,7 @@ static struct ctl_table xr_tunables_table[] = { .data = &xprt_rdma_max_inline_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_inline_size, .extra2 = &max_inline_size, }, @@ -106,7 +106,7 @@ static struct ctl_table xr_tunables_table[] = { .data = &xprt_rdma_max_inline_write, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_inline_size, .extra2 = &max_inline_size, }, @@ -477,115 +477,152 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) } } -/* - * The RDMA allocate/free functions need the task structure as a place - * to hide the struct rpcrdma_req, which is necessary for the actual send/recv - * sequence. +/* Allocate a fixed-size buffer in which to construct and send the + * RPC-over-RDMA header for this request. + */ +static bool +rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, + gfp_t flags) +{ + size_t size = RPCRDMA_HDRBUF_SIZE; + struct rpcrdma_regbuf *rb; + + if (req->rl_rdmabuf) + return true; + + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); + if (IS_ERR(rb)) + return false; + + r_xprt->rx_stats.hardway_register_count += size; + req->rl_rdmabuf = rb; + return true; +} + +static bool +rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, + size_t size, gfp_t flags) +{ + struct rpcrdma_regbuf *rb; + + if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size) + return true; + + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); + if (IS_ERR(rb)) + return false; + + rpcrdma_free_regbuf(req->rl_sendbuf); + r_xprt->rx_stats.hardway_register_count += size; + req->rl_sendbuf = rb; + return true; +} + +/* The rq_rcv_buf is used only if a Reply chunk is necessary. + * The decision to use a Reply chunk is made later in + * rpcrdma_marshal_req. This buffer is registered at that time. * - * The RPC layer allocates both send and receive buffers in the same call - * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer). - * We may register rq_rcv_buf when using reply chunks. + * Otherwise, the associated RPC Reply arrives in a separate + * Receive buffer, arbitrarily chosen by the HCA. The buffer + * allocated here for the RPC Reply is not utilized in that + * case. See rpcrdma_inline_fixup. + * + * A regbuf is used here to remember the buffer size. */ -static void * -xprt_rdma_allocate(struct rpc_task *task, size_t size) +static bool +rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, + size_t size, gfp_t flags) { - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_regbuf *rb; + + if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size) + return true; + + rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); + if (IS_ERR(rb)) + return false; + + rpcrdma_free_regbuf(req->rl_recvbuf); + r_xprt->rx_stats.hardway_register_count += size; + req->rl_recvbuf = rb; + return true; +} + +/** + * xprt_rdma_allocate - allocate transport resources for an RPC + * @task: RPC task + * + * Return values: + * 0: Success; rq_buffer points to RPC buffer to use + * ENOMEM: Out of memory, call again later + * EIO: A permanent error occurred, do not retry + * + * The RDMA allocate/free functions need the task structure as a place + * to hide the struct rpcrdma_req, which is necessary for the actual + * send/recv sequence. + * + * xprt_rdma_allocate provides buffers that are already mapped for + * DMA, and a local DMA lkey is provided for each. + */ +static int +xprt_rdma_allocate(struct rpc_task *task) +{ + struct rpc_rqst *rqst = task->tk_rqstp; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req; - size_t min_size; gfp_t flags; req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) - return NULL; + return -ENOMEM; flags = RPCRDMA_DEF_GFP; if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; - if (req->rl_rdmabuf == NULL) - goto out_rdmabuf; - if (req->rl_sendbuf == NULL) - goto out_sendbuf; - if (size > req->rl_sendbuf->rg_size) - goto out_sendbuf; - -out: - dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); - req->rl_connect_cookie = 0; /* our reserved value */ - req->rl_task = task; - return req->rl_sendbuf->rg_base; - -out_rdmabuf: - min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); - rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags); - if (IS_ERR(rb)) + if (!rpcrdma_get_rdmabuf(r_xprt, req, flags)) goto out_fail; - req->rl_rdmabuf = rb; - -out_sendbuf: - /* XDR encoding and RPC/RDMA marshaling of this request has not - * yet occurred. Thus a lower bound is needed to prevent buffer - * overrun during marshaling. - * - * RPC/RDMA marshaling may choose to send payload bearing ops - * inline, if the result is smaller than the inline threshold. - * The value of the "size" argument accounts for header - * requirements but not for the payload in these cases. - * - * Likewise, allocate enough space to receive a reply up to the - * size of the inline threshold. - * - * It's unlikely that both the send header and the received - * reply will be large, but slush is provided here to allow - * flexibility when marshaling. - */ - min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp); - min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); - if (size < min_size) - size = min_size; - - rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags); - if (IS_ERR(rb)) + if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) + goto out_fail; + if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; - rb->rg_owner = req; - r_xprt->rx_stats.hardway_register_count += size; - rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf); - req->rl_sendbuf = rb; - goto out; + dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n", + task->tk_pid, __func__, rqst->rq_callsize, + rqst->rq_rcvsize, req); + + req->rl_connect_cookie = 0; /* our reserved value */ + rpcrdma_set_xprtdata(rqst, req); + rqst->rq_buffer = req->rl_sendbuf->rg_base; + rqst->rq_rbuffer = req->rl_recvbuf->rg_base; + return 0; out_fail: rpcrdma_buffer_put(req); - return NULL; + return -ENOMEM; } -/* - * This function returns all RDMA resources to the pool. +/** + * xprt_rdma_free - release resources allocated by xprt_rdma_allocate + * @task: RPC task + * + * Caller guarantees rqst->rq_buffer is non-NULL. */ static void -xprt_rdma_free(void *buffer) +xprt_rdma_free(struct rpc_task *task) { - struct rpcrdma_req *req; - struct rpcrdma_xprt *r_xprt; - struct rpcrdma_regbuf *rb; - - if (buffer == NULL) - return; + struct rpc_rqst *rqst = task->tk_rqstp; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_ia *ia = &r_xprt->rx_ia; - rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); - req = rb->rg_owner; if (req->rl_backchannel) return; - r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); - dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, - !RPC_IS_ASYNC(req->rl_task)); - + ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); + rpcrdma_unmap_sges(ia, req); rpcrdma_buffer_put(req); } @@ -685,10 +722,11 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.failed_marshal_count, r_xprt->rx_stats.bad_reply_count, r_xprt->rx_stats.nomsg_call_count); - seq_printf(seq, "%lu %lu %lu\n", + seq_printf(seq, "%lu %lu %lu %lu\n", r_xprt->rx_stats.mrs_recovered, r_xprt->rx_stats.mrs_orphaned, - r_xprt->rx_stats.mrs_allocated); + r_xprt->rx_stats.mrs_allocated, + r_xprt->rx_stats.local_inv_needed); } static int diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index be3178e5e2d2..ec74289af7ec 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -129,15 +129,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) wc->status, wc->vendor_err); } -static void -rpcrdma_receive_worker(struct work_struct *work) -{ - struct rpcrdma_rep *rep = - container_of(work, struct rpcrdma_rep, rr_work); - - rpcrdma_reply_handler(rep); -} - /* Perform basic sanity checking to avoid using garbage * to update the credit grant value. */ @@ -161,13 +152,13 @@ rpcrdma_update_granted_credits(struct rpcrdma_rep *rep) } /** - * rpcrdma_receive_wc - Invoked by RDMA provider for each polled Receive WC + * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC * @cq: completion queue (ignored) * @wc: completed WR * */ static void -rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc) +rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, @@ -185,6 +176,9 @@ rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc) __func__, rep, wc->byte_len); rep->rr_len = wc->byte_len; + rep->rr_wc_flags = wc->wc_flags; + rep->rr_inv_rkey = wc->ex.invalidate_rkey; + ib_dma_sync_single_for_cpu(rep->rr_device, rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); @@ -204,6 +198,36 @@ out_fail: goto out_schedule; } +static void +rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, + struct rdma_conn_param *param) +{ + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + const struct rpcrdma_connect_private *pmsg = param->private_data; + unsigned int rsize, wsize; + + /* Default settings for RPC-over-RDMA Version One */ + r_xprt->rx_ia.ri_reminv_expected = false; + rsize = RPCRDMA_V1_DEF_INLINE_SIZE; + wsize = RPCRDMA_V1_DEF_INLINE_SIZE; + + if (pmsg && + pmsg->cp_magic == rpcrdma_cmp_magic && + pmsg->cp_version == RPCRDMA_CMP_VERSION) { + r_xprt->rx_ia.ri_reminv_expected = true; + rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); + wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); + } + + if (rsize < cdata->inline_rsize) + cdata->inline_rsize = rsize; + if (wsize < cdata->inline_wsize) + cdata->inline_wsize = wsize; + pr_info("rpcrdma: max send %u, max recv %u\n", + cdata->inline_wsize, cdata->inline_rsize); + rpcrdma_set_max_header_sizes(r_xprt); +} + static int rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) { @@ -244,6 +268,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) " (%d initiator)\n", __func__, attr->max_dest_rd_atomic, attr->max_rd_atomic); + rpcrdma_update_connect_private(xprt, &event->param.conn); goto connected; case RDMA_CM_EVENT_CONNECT_ERROR: connstate = -ENOTCONN; @@ -454,11 +479,12 @@ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { + struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; struct ib_cq *sendcq, *recvcq; unsigned int max_qp_wr; int rc; - if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) { + if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { dprintk("RPC: %s: insufficient sge's available\n", __func__); return -ENOMEM; @@ -487,7 +513,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ - ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; + ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; @@ -536,9 +562,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Initialize cma parameters */ memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma)); - /* RPC/RDMA does not use private data */ - ep->rep_remote_cma.private_data = NULL; - ep->rep_remote_cma.private_data_len = 0; + /* Prepare RDMA-CM private message */ + pmsg->cp_magic = rpcrdma_cmp_magic; + pmsg->cp_version = RPCRDMA_CMP_VERSION; + pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok; + pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); + pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); + ep->rep_remote_cma.private_data = pmsg; + ep->rep_remote_cma.private_data_len = sizeof(*pmsg); /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; @@ -849,6 +880,10 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) req->rl_cqe.done = rpcrdma_wc_send; req->rl_buffer = &r_xprt->rx_buf; INIT_LIST_HEAD(&req->rl_registered); + req->rl_send_wr.next = NULL; + req->rl_send_wr.wr_cqe = &req->rl_cqe; + req->rl_send_wr.sg_list = req->rl_send_sge; + req->rl_send_wr.opcode = IB_WR_SEND; return req; } @@ -865,17 +900,21 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) if (rep == NULL) goto out; - rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize, - GFP_KERNEL); + rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize, + DMA_FROM_DEVICE, GFP_KERNEL); if (IS_ERR(rep->rr_rdmabuf)) { rc = PTR_ERR(rep->rr_rdmabuf); goto out_free; } rep->rr_device = ia->ri_device; - rep->rr_cqe.done = rpcrdma_receive_wc; + rep->rr_cqe.done = rpcrdma_wc_receive; rep->rr_rxprt = r_xprt; - INIT_WORK(&rep->rr_work, rpcrdma_receive_worker); + INIT_WORK(&rep->rr_work, rpcrdma_reply_handler); + rep->rr_recv_wr.next = NULL; + rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; + rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; + rep->rr_recv_wr.num_sge = 1; return rep; out_free: @@ -966,17 +1005,18 @@ rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf) } static void -rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) +rpcrdma_destroy_rep(struct rpcrdma_rep *rep) { - rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); + rpcrdma_free_regbuf(rep->rr_rdmabuf); kfree(rep); } void -rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +rpcrdma_destroy_req(struct rpcrdma_req *req) { - rpcrdma_free_regbuf(ia, req->rl_sendbuf); - rpcrdma_free_regbuf(ia, req->rl_rdmabuf); + rpcrdma_free_regbuf(req->rl_recvbuf); + rpcrdma_free_regbuf(req->rl_sendbuf); + rpcrdma_free_regbuf(req->rl_rdmabuf); kfree(req); } @@ -1009,15 +1049,13 @@ rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { - struct rpcrdma_ia *ia = rdmab_to_ia(buf); - cancel_delayed_work_sync(&buf->rb_recovery_worker); while (!list_empty(&buf->rb_recv_bufs)) { struct rpcrdma_rep *rep; rep = rpcrdma_buffer_get_rep_locked(buf); - rpcrdma_destroy_rep(ia, rep); + rpcrdma_destroy_rep(rep); } buf->rb_send_count = 0; @@ -1030,7 +1068,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) list_del(&req->rl_all); spin_unlock(&buf->rb_reqslock); - rpcrdma_destroy_req(ia, req); + rpcrdma_destroy_req(req); spin_lock(&buf->rb_reqslock); } spin_unlock(&buf->rb_reqslock); @@ -1129,7 +1167,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; struct rpcrdma_rep *rep = req->rl_reply; - req->rl_niovs = 0; + req->rl_send_wr.num_sge = 0; req->rl_reply = NULL; spin_lock(&buffers->rb_lock); @@ -1171,70 +1209,81 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) spin_unlock(&buffers->rb_lock); } -/* - * Wrappers for internal-use kmalloc memory registration, used by buffer code. - */ - /** - * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers - * @ia: controlling rpcrdma_ia + * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers * @size: size of buffer to be allocated, in bytes + * @direction: direction of data movement * @flags: GFP flags * - * Returns pointer to private header of an area of internally - * registered memory, or an ERR_PTR. The registered buffer follows - * the end of the private header. + * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that + * can be persistently DMA-mapped for I/O. * * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for - * receiving the payload of RDMA RECV operations. regbufs are not - * used for RDMA READ/WRITE operations, thus are registered only for - * LOCAL access. + * receiving the payload of RDMA RECV operations. During Long Calls + * or Replies they may be registered externally via ro_map. */ struct rpcrdma_regbuf * -rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags) +rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, + gfp_t flags) { struct rpcrdma_regbuf *rb; - struct ib_sge *iov; rb = kmalloc(sizeof(*rb) + size, flags); if (rb == NULL) - goto out; + return ERR_PTR(-ENOMEM); - iov = &rb->rg_iov; - iov->addr = ib_dma_map_single(ia->ri_device, - (void *)rb->rg_base, size, - DMA_BIDIRECTIONAL); - if (ib_dma_mapping_error(ia->ri_device, iov->addr)) - goto out_free; + rb->rg_device = NULL; + rb->rg_direction = direction; + rb->rg_iov.length = size; - iov->length = size; - iov->lkey = ia->ri_pd->local_dma_lkey; - rb->rg_size = size; - rb->rg_owner = NULL; return rb; +} -out_free: - kfree(rb); -out: - return ERR_PTR(-ENOMEM); +/** + * __rpcrdma_map_regbuf - DMA-map a regbuf + * @ia: controlling rpcrdma_ia + * @rb: regbuf to be mapped + */ +bool +__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +{ + if (rb->rg_direction == DMA_NONE) + return false; + + rb->rg_iov.addr = ib_dma_map_single(ia->ri_device, + (void *)rb->rg_base, + rdmab_length(rb), + rb->rg_direction); + if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb))) + return false; + + rb->rg_device = ia->ri_device; + rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; + return true; +} + +static void +rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) +{ + if (!rpcrdma_regbuf_is_mapped(rb)) + return; + + ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), + rdmab_length(rb), rb->rg_direction); + rb->rg_device = NULL; } /** * rpcrdma_free_regbuf - deregister and free registered buffer - * @ia: controlling rpcrdma_ia * @rb: regbuf to be deregistered and freed */ void -rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) { - struct ib_sge *iov; - if (!rb) return; - iov = &rb->rg_iov; - ib_dma_unmap_single(ia->ri_device, - iov->addr, iov->length, DMA_BIDIRECTIONAL); + rpcrdma_dma_unmap_regbuf(rb); kfree(rb); } @@ -1248,39 +1297,28 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_req *req) { - struct ib_device *device = ia->ri_device; - struct ib_send_wr send_wr, *send_wr_fail; - struct rpcrdma_rep *rep = req->rl_reply; - struct ib_sge *iov = req->rl_send_iov; - int i, rc; + struct ib_send_wr *send_wr = &req->rl_send_wr; + struct ib_send_wr *send_wr_fail; + int rc; - if (rep) { - rc = rpcrdma_ep_post_recv(ia, ep, rep); + if (req->rl_reply) { + rc = rpcrdma_ep_post_recv(ia, req->rl_reply); if (rc) return rc; req->rl_reply = NULL; } - send_wr.next = NULL; - send_wr.wr_cqe = &req->rl_cqe; - send_wr.sg_list = iov; - send_wr.num_sge = req->rl_niovs; - send_wr.opcode = IB_WR_SEND; - - for (i = 0; i < send_wr.num_sge; i++) - ib_dma_sync_single_for_device(device, iov[i].addr, - iov[i].length, DMA_TO_DEVICE); dprintk("RPC: %s: posting %d s/g entries\n", - __func__, send_wr.num_sge); + __func__, send_wr->num_sge); if (DECR_CQCOUNT(ep) > 0) - send_wr.send_flags = 0; + send_wr->send_flags = 0; else { /* Provider must take a send completion every now and then */ INIT_CQCOUNT(ep); - send_wr.send_flags = IB_SEND_SIGNALED; + send_wr->send_flags = IB_SEND_SIGNALED; } - rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); + rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); if (rc) goto out_postsend_err; return 0; @@ -1290,32 +1328,24 @@ out_postsend_err: return -ENOTCONN; } -/* - * (Re)post a receive buffer. - */ int rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, - struct rpcrdma_ep *ep, struct rpcrdma_rep *rep) { - struct ib_recv_wr recv_wr, *recv_wr_fail; + struct ib_recv_wr *recv_wr_fail; int rc; - recv_wr.next = NULL; - recv_wr.wr_cqe = &rep->rr_cqe; - recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; - recv_wr.num_sge = 1; - - ib_dma_sync_single_for_cpu(ia->ri_device, - rdmab_addr(rep->rr_rdmabuf), - rdmab_length(rep->rr_rdmabuf), - DMA_BIDIRECTIONAL); - - rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); + if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) + goto out_map; + rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); if (rc) goto out_postrecv; return 0; +out_map: + pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); + return -EIO; + out_postrecv: pr_err("rpcrdma: ib_post_recv returned %i\n", rc); return -ENOTCONN; @@ -1333,7 +1363,6 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) { struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_rep *rep; int rc; @@ -1344,7 +1373,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) rep = rpcrdma_buffer_get_rep_locked(buffers); spin_unlock(&buffers->rb_lock); - rc = rpcrdma_ep_post_recv(ia, ep, rep); + rc = rpcrdma_ep_post_recv(ia, rep); if (rc) goto out_rc; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index a71b0f5897d8..0d35b761c883 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -70,9 +70,11 @@ struct rpcrdma_ia { struct ib_pd *ri_pd; struct completion ri_done; int ri_async_rc; + unsigned int ri_max_segs; unsigned int ri_max_frmr_depth; unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; + bool ri_reminv_expected; struct ib_qp_attr ri_qp_attr; struct ib_qp_init_attr ri_qp_init_attr; }; @@ -87,6 +89,7 @@ struct rpcrdma_ep { int rep_connected; struct ib_qp_init_attr rep_attr; wait_queue_head_t rep_connect_wait; + struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; struct sockaddr_storage rep_remote_addr; struct delayed_work rep_connect_worker; @@ -112,9 +115,9 @@ struct rpcrdma_ep { */ struct rpcrdma_regbuf { - size_t rg_size; - struct rpcrdma_req *rg_owner; struct ib_sge rg_iov; + struct ib_device *rg_device; + enum dma_data_direction rg_direction; __be32 rg_base[0] __attribute__ ((aligned(256))); }; @@ -162,7 +165,10 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) * The smallest inline threshold is 1024 bytes, ensuring that * at least 750 bytes are available for RPC messages. */ -#define RPCRDMA_MAX_HDR_SEGS (8) +enum { + RPCRDMA_MAX_HDR_SEGS = 8, + RPCRDMA_HDRBUF_SIZE = 256, +}; /* * struct rpcrdma_rep -- this structure encapsulates state required to recv @@ -182,10 +188,13 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) struct rpcrdma_rep { struct ib_cqe rr_cqe; unsigned int rr_len; + int rr_wc_flags; + u32 rr_inv_rkey; struct ib_device *rr_device; struct rpcrdma_xprt *rr_rxprt; struct work_struct rr_work; struct list_head rr_list; + struct ib_recv_wr rr_recv_wr; struct rpcrdma_regbuf *rr_rdmabuf; }; @@ -276,19 +285,30 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ char *mr_offset; /* kva if no page, else offset */ }; -#define RPCRDMA_MAX_IOVS (2) +/* Reserve enough Send SGEs to send a maximum size inline request: + * - RPC-over-RDMA header + * - xdr_buf head iovec + * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages + * - xdr_buf tail iovec + */ +enum { + RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, + RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, + RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, +}; struct rpcrdma_buffer; struct rpcrdma_req { struct list_head rl_free; - unsigned int rl_niovs; + unsigned int rl_mapped_sges; unsigned int rl_connect_cookie; - struct rpc_task *rl_task; struct rpcrdma_buffer *rl_buffer; - struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ - struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS]; - struct rpcrdma_regbuf *rl_rdmabuf; - struct rpcrdma_regbuf *rl_sendbuf; + struct rpcrdma_rep *rl_reply; + struct ib_send_wr rl_send_wr; + struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES]; + struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ + struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ + struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ struct ib_cqe rl_cqe; struct list_head rl_all; @@ -298,14 +318,16 @@ struct rpcrdma_req { struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; }; +static inline void +rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) +{ + rqst->rq_xprtdata = req; +} + static inline struct rpcrdma_req * rpcr_to_rdmar(struct rpc_rqst *rqst) { - void *buffer = rqst->rq_buffer; - struct rpcrdma_regbuf *rb; - - rb = container_of(buffer, struct rpcrdma_regbuf, rg_base); - return rb->rg_owner; + return rqst->rq_xprtdata; } /* @@ -356,15 +378,6 @@ struct rpcrdma_create_data_internal { unsigned int padding; /* non-rdma write header padding */ }; -#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ - (rpcx_to_rdmad(rq->rq_xprt).inline_rsize) - -#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ - (rpcx_to_rdmad(rq->rq_xprt).inline_wsize) - -#define RPCRDMA_INLINE_PAD_VALUE(rq)\ - rpcx_to_rdmad(rq->rq_xprt).padding - /* * Statistics for RPCRDMA */ @@ -386,6 +399,7 @@ struct rpcrdma_stats { unsigned long mrs_recovered; unsigned long mrs_orphaned; unsigned long mrs_allocated; + unsigned long local_inv_needed; }; /* @@ -409,6 +423,7 @@ struct rpcrdma_memreg_ops { struct rpcrdma_mw *); void (*ro_release_mr)(struct rpcrdma_mw *); const char *ro_displayname; + const int ro_send_w_inv_ok; }; extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; @@ -461,15 +476,14 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, struct rpcrdma_req *); -int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, - struct rpcrdma_rep *); +int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); /* * Buffer calls - xprtrdma/verbs.c */ struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); -void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *); +void rpcrdma_destroy_req(struct rpcrdma_req *); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); @@ -482,10 +496,24 @@ void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); -struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, - size_t, gfp_t); -void rpcrdma_free_regbuf(struct rpcrdma_ia *, - struct rpcrdma_regbuf *); +struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, + gfp_t); +bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); +void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); + +static inline bool +rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) +{ + return rb->rg_device != NULL; +} + +static inline bool +rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +{ + if (likely(rpcrdma_regbuf_is_mapped(rb))) + return true; + return __rpcrdma_dma_map_regbuf(ia, rb); +} int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); @@ -507,15 +535,25 @@ rpcrdma_data_dir(bool writing) */ void rpcrdma_connect_worker(struct work_struct *); void rpcrdma_conn_func(struct rpcrdma_ep *); -void rpcrdma_reply_handler(struct rpcrdma_rep *); +void rpcrdma_reply_handler(struct work_struct *); /* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c */ + +enum rpcrdma_chunktype { + rpcrdma_noch = 0, + rpcrdma_readch, + rpcrdma_areadch, + rpcrdma_writech, + rpcrdma_replych +}; + +bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, + u32, struct xdr_buf *, enum rpcrdma_chunktype); +void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); int rpcrdma_marshal_req(struct rpc_rqst *); -void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *, - struct rpcrdma_create_data_internal *, - unsigned int); +void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); /* RPC/RDMA module init - xprtrdma/transport.c */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf168838a029..0137af1c0916 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -473,7 +473,16 @@ static int xs_nospace(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); /* Race breaker in case memory is freed before above code is called */ - sk->sk_write_space(sk); + if (ret == -EAGAIN) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + + sk->sk_write_space(sk); + } return ret; } @@ -2533,35 +2542,38 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) * we allocate pages instead doing a kmalloc like rpc_malloc is because we want * to use the server side send routines. */ -static void *bc_malloc(struct rpc_task *task, size_t size) +static int bc_malloc(struct rpc_task *task) { + struct rpc_rqst *rqst = task->tk_rqstp; + size_t size = rqst->rq_callsize; struct page *page; struct rpc_buffer *buf; - WARN_ON_ONCE(size > PAGE_SIZE - sizeof(struct rpc_buffer)); - if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) - return NULL; + if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) { + WARN_ONCE(1, "xprtsock: large bc buffer request (size %zu)\n", + size); + return -EINVAL; + } page = alloc_page(GFP_KERNEL); if (!page) - return NULL; + return -ENOMEM; buf = page_address(page); buf->len = PAGE_SIZE; - return buf->data; + rqst->rq_buffer = buf->data; + return 0; } /* * Free the space allocated in the bc_alloc routine */ -static void bc_free(void *buffer) +static void bc_free(struct rpc_task *task) { + void *buffer = task->tk_rqstp->rq_buffer; struct rpc_buffer *buf; - if (!buffer) - return; - buf = container_of(buffer, struct rpc_buffer, data); free_page((unsigned long)buf); } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index d80cd3f7503f..78cab9c5a445 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -407,6 +407,7 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb, if (ntohs(addr->proto) == ETH_P_IP) { struct sockaddr_in ip4; + memset(&ip4, 0, sizeof(ip4)); ip4.sin_family = AF_INET; ip4.sin_port = addr->port; ip4.sin_addr.s_addr = addr->ipv4.s_addr; @@ -417,6 +418,7 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb, } else if (ntohs(addr->proto) == ETH_P_IPV6) { struct sockaddr_in6 ip6; + memset(&ip6, 0, sizeof(ip6)); ip6.sin6_family = AF_INET6; ip6.sin6_port = addr->port; memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr)); |