diff options
author | Steve French <sfrench@us.ibm.com> | 2008-02-15 21:06:08 +0000 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2008-02-15 21:06:08 +0000 |
commit | 0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41 (patch) | |
tree | b80b1d344ec24cad28b057ef803cebac9434be01 /Documentation | |
parent | 70eff55d2d979cca700aa6906494f0c474f3f7ff (diff) | |
parent | 101142c37be8e5af9b847860219217e6b958c739 (diff) | |
download | linux-0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41.tar.gz linux-0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41.tar.bz2 linux-0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41.zip |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'Documentation')
93 files changed, 2749 insertions, 455 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 40ac7759c3bb..30b327a116ea 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -14,6 +14,7 @@ Following translations are available on the WWW: - this file. ABI/ - info on kernel <-> userspace ABI and relative interface stability. + BUG-HUNTING - brute force method of doing binary search of patches to find bug. Changes @@ -66,6 +67,8 @@ VGA-softcursor.txt - how to change your VGA cursor from a blinking underscore. accounting/ - documentation on accounting and taskstats. +acpi/ + - info on ACPI-specific hooks in the kernel. aoe/ - description of AoE (ATA over Ethernet) along with config examples. applying-patches.txt @@ -106,6 +109,8 @@ cpu-hotplug.txt - document describing CPU hotplug support in the Linux kernel. cpu-load.txt - document describing how CPU load statistics are collected. +cpuidle/ + - info on CPU_IDLE, CPU idle state management subsystem. cpusets.txt - documents the cpusets feature; assign CPUs and Mem to a set of tasks. cputopology.txt @@ -126,18 +131,16 @@ devices.txt - plain ASCII listing of all the nodes in /dev/ with major minor #'s. digiepca.txt - info on Digi Intl. {PC,PCI,EISA}Xx and Xem series cards. -dnotify.txt - - info about directory notification in Linux. dontdiff - file containing a list of files that should never be diff'ed. driver-model/ - directory with info about Linux driver model. -drivers/ - - directory with driver documentation (currently only EDAC). dvb/ - info on Linux Digital Video Broadcast (DVB) subsystem. early-userspace/ - info about initramfs, klibc, and userspace early during boot. +edac.txt + - information on EDAC - Error Detection And Correction eisa.txt - info on EISA bus support. exception.txt @@ -226,6 +229,8 @@ kref.txt - docs on adding reference counters (krefs) to kernel objects. laptop-mode.txt - how to conserve battery power using laptop-mode. +laptops/ + - directory with laptop related info and laptop driver documentation. ldm.txt - a brief description of LDM (Windows Dynamic Disks). leds-class.txt @@ -334,20 +339,8 @@ rtc.txt - notes on how to use the Real Time Clock (aka CMOS clock) driver. s390/ - directory with info on using Linux on the IBM S390. -sched-arch.txt - - CPU Scheduler implementation hints for architecture specific code. -sched-coding.txt - - reference for various scheduler-related methods in the O(1) scheduler. -sched-design.txt - - goals, design and implementation of the Linux O(1) scheduler. -sched-design-CFS.txt - - goals, design and implementation of the Complete Fair Scheduler. -sched-domains.txt - - information on scheduling domains. -sched-nice-design.txt - - How and why the scheduler's nice levels are implemented. -sched-stats.txt - - information on schedstats (Linux Scheduler Statistics). +scheduler/ + - directory with info on the scheduler. scsi/ - directory with info on Linux scsi support. serial/ @@ -360,14 +353,8 @@ sgi-visws.txt - short blurb on the SGI Visual Workstations. sh/ - directory with info on porting Linux to a new architecture. -sharedsubtree.txt - - a description of shared subtrees for namespaces. smart-config.txt - description of the Smart Config makefile feature. -sony-laptop.txt - - Sony Notebook Control Driver (SNC) Readme. -sonypi.txt - - info on Linux Sony Programmable I/O Device support. sound/ - directory with info on sound card support. sparc/ @@ -398,8 +385,6 @@ sysrq.txt - info on the magic SysRq key. telephony/ - directory with info on telephony (e.g. voice over IP) support. -thinkpad-acpi.txt - - information on the (IBM and Lenovo) ThinkPad ACPI Extras driver. time_interpolators.txt - info on time interpolators. tipar.txt diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats new file mode 100644 index 000000000000..99233902e09e --- /dev/null +++ b/Documentation/ABI/testing/procfs-diskstats @@ -0,0 +1,22 @@ +What: /proc/diskstats +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /proc/diskstats file displays the I/O statistics + of block devices. Each line contains the following 14 + fields: + 1 - major number + 2 - minor mumber + 3 - device name + 4 - reads completed succesfully + 5 - reads merged + 6 - sectors read + 7 - time spent reading (ms) + 8 - writes completed + 9 - writes merged + 10 - sectors written + 11 - time spent writing (ms) + 12 - I/Os currently in progress + 13 - time spent doing I/Os (ms) + 14 - weighted time spent doing I/Os (ms) + For more details refer to Documentation/iostats.txt diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block new file mode 100644 index 000000000000..4bd9ea539129 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block @@ -0,0 +1,28 @@ +What: /sys/block/<disk>/stat +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /sys/block/<disk>/stat files displays the I/O + statistics of disk <disk>. They contain 11 fields: + 1 - reads completed succesfully + 2 - reads merged + 3 - sectors read + 4 - time spent reading (ms) + 5 - writes completed + 6 - writes merged + 7 - sectors written + 8 - time spent writing (ms) + 9 - I/Os currently in progress + 10 - time spent doing I/Os (ms) + 11 - weighted time spent doing I/Os (ms) + For more details refer Documentation/iostats.txt + + +What: /sys/block/<disk>/<part>/stat +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /sys/block/<disk>/<part>/stat files display the + I/O statistics of partition <part>. The format is the + same as the above-written /sys/block/<disk>/stat + format. diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi new file mode 100644 index 000000000000..9470ed9afcc0 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -0,0 +1,99 @@ +What: /sys/firmware/acpi/interrupts/ +Date: February 2008 +Contact: Len Brown <lenb@kernel.org> +Description: + All ACPI interrupts are handled via a single IRQ, + the System Control Interrupt (SCI), which appears + as "acpi" in /proc/interrupts. + + However, one of the main functions of ACPI is to make + the platform understand random hardware without + special driver support. So while the SCI handles a few + well known (fixed feature) interrupts sources, such + as the power button, it can also handle a variable + number of a "General Purpose Events" (GPE). + + A GPE vectors to a specified handler in AML, which + can do a anything the BIOS writer wants from + OS context. GPE 0x12, for example, would vector + to a level or edge handler called _L12 or _E12. + The handler may do its business and return. + Or the handler may send send a Notify event + to a Linux device driver registered on an ACPI device, + such as a battery, or a processor. + + To figure out where all the SCI's are coming from, + /sys/firmware/acpi/interrupts contains a file listing + every possible source, and the count of how many + times it has triggered. + + $ cd /sys/firmware/acpi/interrupts + $ grep . * + error:0 + ff_gbl_lock:0 + ff_pmtimer:0 + ff_pwr_btn:0 + ff_rt_clk:0 + ff_slp_btn:0 + gpe00:0 + gpe01:0 + gpe02:0 + gpe03:0 + gpe04:0 + gpe05:0 + gpe06:0 + gpe07:0 + gpe08:0 + gpe09:174 + gpe0A:0 + gpe0B:0 + gpe0C:0 + gpe0D:0 + gpe0E:0 + gpe0F:0 + gpe10:0 + gpe11:60 + gpe12:0 + gpe13:0 + gpe14:0 + gpe15:0 + gpe16:0 + gpe17:0 + gpe18:0 + gpe19:7 + gpe1A:0 + gpe1B:0 + gpe1C:0 + gpe1D:0 + gpe1E:0 + gpe1F:0 + gpe_all:241 + sci:241 + + sci - The total number of times the ACPI SCI + has claimed an interrupt. + + gpe_all - count of SCI caused by GPEs. + + gpeXX - count for individual GPE source + + ff_gbl_lock - Global Lock + + ff_pmtimer - PM Timer + + ff_pwr_btn - Power Button + + ff_rt_clk - Real Time Clock + + ff_slp_btn - Sleep Button + + error - an interrupt that can't be accounted for above. + + Root has permission to clear any of these counters. Eg. + # echo 0 > gpe11 + + All counters can be cleared by clearing the total "sci": + # echo 0 > sci + + None of these counters has an effect on the function + of the system, they are simply statistics. diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/testing/sysfs-kernel-uids index 648d65dbc0e7..28f14695a852 100644 --- a/Documentation/ABI/testing/sysfs-kernel-uids +++ b/Documentation/ABI/testing/sysfs-kernel-uids @@ -11,4 +11,4 @@ Description: example would be, if User A has shares = 1024 and user B has shares = 2048, User B will get twice the CPU bandwidth user A will. For more details refer - Documentation/sched-design-CFS.txt + Documentation/scheduler/sched-design-CFS.txt diff --git a/Documentation/BUG-HUNTING b/Documentation/BUG-HUNTING index 6c816751b868..65022a87bf17 100644 --- a/Documentation/BUG-HUNTING +++ b/Documentation/BUG-HUNTING @@ -214,6 +214,23 @@ And recompile the kernel with CONFIG_DEBUG_INFO enabled: gdb vmlinux (gdb) p vt_ioctl (gdb) l *(0x<address of vt_ioctl> + 0xda8) +or, as one command + (gdb) l *(vt_ioctl + 0xda8) + +If you have a call trace, such as :- +>Call Trace: +> [<ffffffff8802c8e9>] :jbd:log_wait_commit+0xa3/0xf5 +> [<ffffffff810482d9>] autoremove_wake_function+0x0/0x2e +> [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee +> ... +this shows the problem in the :jbd: module. You can load that module in gdb +and list the relevant code. + gdb fs/jbd/jbd.ko + (gdb) p log_wait_commit + (gdb) l *(0x<address> + 0xa3) +or + (gdb) l *(log_wait_commit + 0xa3) + Another very useful option of the Kernel Hacking section in menuconfig is Debug memory allocations. This will help you see whether data has been diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 6a0ad4715e9f..300e1707893f 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -8,7 +8,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ - procfs-guide.xml writing_usb_driver.xml \ + procfs-guide.xml writing_usb_driver.xml networking.xml \ kernel-api.xml filesystems.xml lsm.xml usb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl index 5eaef87e8f1b..5e87ad58c0b5 100644 --- a/Documentation/DocBook/filesystems.tmpl +++ b/Documentation/DocBook/filesystems.tmpl @@ -398,4 +398,24 @@ an example. </chapter> + <chapter id="splice"> + <title>splice API</title> + <para> + splice is a method for moving blocks of data around inside the + kernel, without continually transferring them between the kernel + and user space. + </para> +!Ffs/splice.c + </chapter> + + <chapter id="pipes"> + <title>pipes API</title> + <para> + Pipe interfaces are all for in-kernel (builtin image) use. + They are not exported for use by modules. + </para> +!Iinclude/linux/pipe_fs_i.h +!Ffs/pipe.c + </chapter> + </book> diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl index 4215f69ce7e6..3a882d9a90a9 100644 --- a/Documentation/DocBook/genericirq.tmpl +++ b/Documentation/DocBook/genericirq.tmpl @@ -172,7 +172,7 @@ <listitem><para>Chiplevel hardware encapsulation</para></listitem> </orderedlist> </para> - <sect1> + <sect1 id="Interrupt_control_flow"> <title>Interrupt control flow</title> <para> Each interrupt is described by an interrupt descriptor structure @@ -190,7 +190,7 @@ referenced by the assigned chip descriptor structure. </para> </sect1> - <sect1> + <sect1 id="Highlevel_Driver_API"> <title>Highlevel Driver API</title> <para> The highlevel Driver API consists of following functions: @@ -210,7 +210,7 @@ See the autogenerated function documentation for details. </para> </sect1> - <sect1> + <sect1 id="Highlevel_IRQ_flow_handlers"> <title>Highlevel IRQ flow handlers</title> <para> The generic layer provides a set of pre-defined irq-flow methods: @@ -224,9 +224,9 @@ specific) are assigned to specific interrupts by the architecture either during bootup or during device initialization. </para> - <sect2> + <sect2 id="Default_flow_implementations"> <title>Default flow implementations</title> - <sect3> + <sect3 id="Helper_functions"> <title>Helper functions</title> <para> The helper functions call the chip primitives and @@ -267,9 +267,9 @@ noop(irq) </para> </sect3> </sect2> - <sect2> + <sect2 id="Default_flow_handler_implementations"> <title>Default flow handler implementations</title> - <sect3> + <sect3 id="Default_Level_IRQ_flow_handler"> <title>Default Level IRQ flow handler</title> <para> handle_level_irq provides a generic implementation @@ -284,7 +284,7 @@ desc->chip->end(); </programlisting> </para> </sect3> - <sect3> + <sect3 id="Default_Edge_IRQ_flow_handler"> <title>Default Edge IRQ flow handler</title> <para> handle_edge_irq provides a generic implementation @@ -311,7 +311,7 @@ desc->chip->end(); </programlisting> </para> </sect3> - <sect3> + <sect3 id="Default_simple_IRQ_flow_handler"> <title>Default simple IRQ flow handler</title> <para> handle_simple_irq provides a generic implementation @@ -328,7 +328,7 @@ handle_IRQ_event(desc->action); </programlisting> </para> </sect3> - <sect3> + <sect3 id="Default_per_CPU_flow_handler"> <title>Default per CPU flow handler</title> <para> handle_percpu_irq provides a generic implementation @@ -349,7 +349,7 @@ desc->chip->end(); </para> </sect3> </sect2> - <sect2> + <sect2 id="Quirks_and_optimizations"> <title>Quirks and optimizations</title> <para> The generic functions are intended for 'clean' architectures and chips, @@ -358,7 +358,7 @@ desc->chip->end(); overriding the highlevel irq-flow handler. </para> </sect2> - <sect2> + <sect2 id="Delayed_interrupt_disable"> <title>Delayed interrupt disable</title> <para> This per interrupt selectable feature, which was introduced by Russell @@ -380,7 +380,7 @@ desc->chip->end(); </para> </sect2> </sect1> - <sect1> + <sect1 id="Chiplevel_hardware_encapsulation"> <title>Chiplevel hardware encapsulation</title> <para> The chip level hardware descriptor structure irq_chip diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 77436d735013..f31601e8bd89 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -165,6 +165,7 @@ X!Ilib/string.c !Emm/vmalloc.c !Imm/page_alloc.c !Emm/mempool.c +!Emm/dmapool.c !Emm/page-writeback.c !Emm/truncate.c </sect1> @@ -203,65 +204,6 @@ X!Ilib/string.c </sect1> </chapter> - <chapter id="netcore"> - <title>Linux Networking</title> - <sect1><title>Networking Base Types</title> -!Iinclude/linux/net.h - </sect1> - <sect1><title>Socket Buffer Functions</title> -!Iinclude/linux/skbuff.h -!Iinclude/net/sock.h -!Enet/socket.c -!Enet/core/skbuff.c -!Enet/core/sock.c -!Enet/core/datagram.c -!Enet/core/stream.c - </sect1> - <sect1><title>Socket Filter</title> -!Enet/core/filter.c - </sect1> - <sect1><title>Generic Network Statistics</title> -!Iinclude/linux/gen_stats.h -!Enet/core/gen_stats.c -!Enet/core/gen_estimator.c - </sect1> - <sect1><title>SUN RPC subsystem</title> -<!-- The !D functionality is not perfect, garbage has to be protected by comments -!Dnet/sunrpc/sunrpc_syms.c ---> -!Enet/sunrpc/xdr.c -!Enet/sunrpc/svcsock.c -!Enet/sunrpc/sched.c - </sect1> - </chapter> - - <chapter id="netdev"> - <title>Network device support</title> - <sect1><title>Driver Support</title> -!Enet/core/dev.c -!Enet/ethernet/eth.c -!Enet/sched/sch_generic.c -!Iinclude/linux/etherdevice.h -!Iinclude/linux/netdevice.h - </sect1> - <sect1><title>PHY Support</title> -!Edrivers/net/phy/phy.c -!Idrivers/net/phy/phy.c -!Edrivers/net/phy/phy_device.c -!Idrivers/net/phy/phy_device.c -!Edrivers/net/phy/mdio_bus.c -!Idrivers/net/phy/mdio_bus.c - </sect1> -<!-- FIXME: Removed for now since no structured comments in source - <sect1><title>Wireless</title> -X!Enet/core/wireless.c - </sect1> ---> - <sect1><title>Synchronous PPP</title> -!Edrivers/net/wan/syncppp.c - </sect1> - </chapter> - <chapter id="modload"> <title>Module Support</title> <sect1><title>Module Loading</title> @@ -371,7 +313,6 @@ X!Iinclude/linux/device.h !Edrivers/base/class.c !Edrivers/base/firmware_class.c !Edrivers/base/transport_class.c -!Edrivers/base/dmapool.c <!-- Cannot be included, because attribute_container_add_class_device_adapter and attribute_container_classdev_to_container @@ -508,11 +449,6 @@ X!Isound/sound_firmware.c !Edrivers/serial/8250.c </chapter> - <chapter id="z85230"> - <title>Z85230 Support Library</title> -!Edrivers/net/wan/z85230.c - </chapter> - <chapter id="fbdev"> <title>Frame Buffer Library</title> @@ -712,24 +648,4 @@ X!Idrivers/video/console/fonts.c !Edrivers/i2c/i2c-core.c </chapter> - <chapter id="splice"> - <title>splice API</title> - <para> - splice is a method for moving blocks of data around inside the - kernel, without continually transferring them between the kernel - and user space. - </para> -!Ffs/splice.c - </chapter> - - <chapter id="pipes"> - <title>pipes API</title> - <para> - Pipe interfaces are all for in-kernel (builtin image) use. - They are not exported for use by modules. - </para> -!Iinclude/linux/pipe_fs_i.h -!Ffs/pipe.c - </chapter> - </book> diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 01825ee7db64..2e9d6b41f034 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -717,7 +717,7 @@ used, and when it gets full, throws out the least used one. <para> For our first example, we assume that all operations are in user context (ie. from system calls), so we can sleep. This means we can -use a semaphore to protect the cache and all the objects within +use a mutex to protect the cache and all the objects within it. Here's the code: </para> @@ -725,7 +725,7 @@ it. Here's the code: #include <linux/list.h> #include <linux/slab.h> #include <linux/string.h> -#include <asm/semaphore.h> +#include <linux/mutex.h> #include <asm/errno.h> struct object @@ -737,7 +737,7 @@ struct object }; /* Protects the cache, cache_num, and the objects within it */ -static DECLARE_MUTEX(cache_lock); +static DEFINE_MUTEX(cache_lock); static LIST_HEAD(cache); static unsigned int cache_num = 0; #define MAX_CACHE_SIZE 10 @@ -789,17 +789,17 @@ int cache_add(int id, const char *name) obj->id = id; obj->popularity = 0; - down(&cache_lock); + mutex_lock(&cache_lock); __cache_add(obj); - up(&cache_lock); + mutex_unlock(&cache_lock); return 0; } void cache_delete(int id) { - down(&cache_lock); + mutex_lock(&cache_lock); __cache_delete(__cache_find(id)); - up(&cache_lock); + mutex_unlock(&cache_lock); } int cache_find(int id, char *name) @@ -807,13 +807,13 @@ int cache_find(int id, char *name) struct object *obj; int ret = -ENOENT; - down(&cache_lock); + mutex_lock(&cache_lock); obj = __cache_find(id); if (obj) { ret = 0; strcpy(name, obj->name); } - up(&cache_lock); + mutex_unlock(&cache_lock); return ret; } </programlisting> @@ -853,7 +853,7 @@ The change is shown below, in standard patch format: the int popularity; }; --static DECLARE_MUTEX(cache_lock); +-static DEFINE_MUTEX(cache_lock); +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; static LIST_HEAD(cache); static unsigned int cache_num = 0; @@ -870,22 +870,22 @@ The change is shown below, in standard patch format: the obj->id = id; obj->popularity = 0; -- down(&cache_lock); +- mutex_lock(&cache_lock); + spin_lock_irqsave(&cache_lock, flags); __cache_add(obj); -- up(&cache_lock); +- mutex_unlock(&cache_lock); + spin_unlock_irqrestore(&cache_lock, flags); return 0; } void cache_delete(int id) { -- down(&cache_lock); +- mutex_lock(&cache_lock); + unsigned long flags; + + spin_lock_irqsave(&cache_lock, flags); __cache_delete(__cache_find(id)); -- up(&cache_lock); +- mutex_unlock(&cache_lock); + spin_unlock_irqrestore(&cache_lock, flags); } @@ -895,14 +895,14 @@ The change is shown below, in standard patch format: the int ret = -ENOENT; + unsigned long flags; -- down(&cache_lock); +- mutex_lock(&cache_lock); + spin_lock_irqsave(&cache_lock, flags); obj = __cache_find(id); if (obj) { ret = 0; strcpy(name, obj->name); } -- up(&cache_lock); +- mutex_unlock(&cache_lock); + spin_unlock_irqrestore(&cache_lock, flags); return ret; } diff --git a/Documentation/DocBook/lsm.tmpl b/Documentation/DocBook/lsm.tmpl index f63822195871..fe7664ce9667 100644 --- a/Documentation/DocBook/lsm.tmpl +++ b/Documentation/DocBook/lsm.tmpl @@ -33,7 +33,7 @@ </authorgroup> </articleinfo> -<sect1><title>Introduction</title> +<sect1 id="Introduction"><title>Introduction</title> <para> In March 2001, the National Security Agency (NSA) gave a presentation diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index 957cf5c26831..8e145857fc9d 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl @@ -80,7 +80,7 @@ struct member has a short description which is marked with an [XXX] identifier. The following chapters explain the meaning of those identifiers. </para> - <sect1> + <sect1 id="Function_identifiers_XXX"> <title>Function identifiers [XXX]</title> <para> The functions are marked with [XXX] identifiers in the short @@ -115,7 +115,7 @@ </para></listitem> </itemizedlist> </sect1> - <sect1> + <sect1 id="Struct_member_identifiers_XXX"> <title>Struct member identifiers [XXX]</title> <para> The struct members are marked with [XXX] identifiers in the @@ -159,7 +159,7 @@ basic functions and fill out some really board dependent members in the nand chip description structure. </para> - <sect1> + <sect1 id="Basic_defines"> <title>Basic defines</title> <para> At least you have to provide a mtd structure and @@ -185,7 +185,7 @@ static struct nand_chip board_chip; static unsigned long baseaddr; </programlisting> </sect1> - <sect1> + <sect1 id="Partition_defines"> <title>Partition defines</title> <para> If you want to divide your device into partitions, then @@ -204,7 +204,7 @@ static struct mtd_partition partition_info[] = { }; </programlisting> </sect1> - <sect1> + <sect1 id="Hardware_control_functions"> <title>Hardware control function</title> <para> The hardware control function provides access to the @@ -246,7 +246,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd) } </programlisting> </sect1> - <sect1> + <sect1 id="Device_ready_function"> <title>Device ready function</title> <para> If the hardware interface has the ready busy pin of the NAND chip connected to a @@ -257,7 +257,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd) the function must not be defined and the function pointer this->dev_ready is set to NULL. </para> </sect1> - <sect1> + <sect1 id="Init_function"> <title>Init function</title> <para> The init function allocates memory and sets up all the board @@ -325,7 +325,7 @@ out: module_init(board_init); </programlisting> </sect1> - <sect1> + <sect1 id="Exit_function"> <title>Exit function</title> <para> The exit function is only neccecary if the driver is @@ -359,7 +359,7 @@ module_exit(board_cleanup); driver. For a list of functions which can be overridden by the board driver see the documentation of the nand_chip structure. </para> - <sect1> + <sect1 id="Multiple_chip_control"> <title>Multiple chip control</title> <para> The nand driver can control chip arrays. Therefor the @@ -419,9 +419,9 @@ static void board_select_chip (struct mtd_info *mtd, int chip) } </programlisting> </sect1> - <sect1> + <sect1 id="Hardware_ECC_support"> <title>Hardware ECC support</title> - <sect2> + <sect2 id="Functions_and_constants"> <title>Functions and constants</title> <para> The nand driver supports three different types of @@ -475,7 +475,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) </itemizedlist> </para> </sect2> - <sect2> + <sect2 id="Hardware_ECC_with_syndrome_calculation"> <title>Hardware ECC with syndrome calculation</title> <para> Many hardware ECC implementations provide Reed-Solomon @@ -500,7 +500,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) </para> </sect2> </sect1> - <sect1> + <sect1 id="Bad_Block_table_support"> <title>Bad block table support</title> <para> Most NAND chips mark the bad blocks at a defined @@ -552,7 +552,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) allows faster access than always checking the bad block information on the flash chip itself. </para> - <sect2> + <sect2 id="Flash_based_tables"> <title>Flash based tables</title> <para> It may be desired or neccecary to keep a bad block table in FLASH. @@ -587,7 +587,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) </itemizedlist> </para> </sect2> - <sect2> + <sect2 id="User_defined_tables"> <title>User defined tables</title> <para> User defined tables are created by filling out a @@ -676,7 +676,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) </para> </sect2> </sect1> - <sect1> + <sect1 id="Spare_area_placement"> <title>Spare area (auto)placement</title> <para> The nand driver implements different possibilities for @@ -730,7 +730,7 @@ struct nand_oobinfo { </para></listitem> </itemizedlist> </para> - <sect2> + <sect2 id="Placement_defined_by_fs_driver"> <title>Placement defined by fs driver</title> <para> The calling function provides a pointer to a nand_oobinfo @@ -760,7 +760,7 @@ struct nand_oobinfo { done according to the given scheme in the nand_oobinfo structure. </para> </sect2> - <sect2> + <sect2 id="Automatic_placement"> <title>Automatic placement</title> <para> Automatic placement uses the built in defaults to place the @@ -774,7 +774,7 @@ struct nand_oobinfo { done according to the default builtin scheme. </para> </sect2> - <sect2> + <sect2 id="User_space_placement_selection"> <title>User space placement selection</title> <para> All non ecc functions like mtd->read and mtd->write use an internal @@ -789,9 +789,9 @@ struct nand_oobinfo { </para> </sect2> </sect1> - <sect1> + <sect1 id="Spare_area_autoplacement_default"> <title>Spare area autoplacement default schemes</title> - <sect2> + <sect2 id="pagesize_256"> <title>256 byte pagesize</title> <informaltable><tgroup cols="3"><tbody> <row> @@ -843,7 +843,7 @@ pages this byte is reserved</entry> </row> </tbody></tgroup></informaltable> </sect2> - <sect2> + <sect2 id="pagesize_512"> <title>512 byte pagesize</title> <informaltable><tgroup cols="3"><tbody> <row> @@ -906,7 +906,7 @@ in this page</entry> </row> </tbody></tgroup></informaltable> </sect2> - <sect2> + <sect2 id="pagesize_2048"> <title>2048 byte pagesize</title> <informaltable><tgroup cols="3"><tbody> <row> @@ -1126,9 +1126,9 @@ in this page</entry> <para> This chapter describes the constants which might be relevant for a driver developer. </para> - <sect1> + <sect1 id="Chip_option_constants"> <title>Chip option constants</title> - <sect2> + <sect2 id="Constants_for_chip_id_table"> <title>Constants for chip id table</title> <para> These constants are defined in nand.h. They are ored together to describe @@ -1153,7 +1153,7 @@ in this page</entry> </programlisting> </para> </sect2> - <sect2> + <sect2 id="Constants_for_runtime_options"> <title>Constants for runtime options</title> <para> These constants are defined in nand.h. They are ored together to describe @@ -1171,7 +1171,7 @@ in this page</entry> </sect2> </sect1> - <sect1> + <sect1 id="EEC_selection_constants"> <title>ECC selection constants</title> <para> Use these constants to select the ECC algorithm. @@ -1192,7 +1192,7 @@ in this page</entry> </para> </sect1> - <sect1> + <sect1 id="Hardware_control_related_constants"> <title>Hardware control related constants</title> <para> These constants describe the requested hardware access function when @@ -1218,7 +1218,7 @@ in this page</entry> </para> </sect1> - <sect1> + <sect1 id="Bad_block_table_constants"> <title>Bad block table related constants</title> <para> These constants describe the options used for bad block diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl new file mode 100644 index 000000000000..f24f9e85e4ae --- /dev/null +++ b/Documentation/DocBook/networking.tmpl @@ -0,0 +1,106 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" + "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> + +<book id="LinuxNetworking"> + <bookinfo> + <title>Linux Networking and Network Devices APIs</title> + + <legalnotice> + <para> + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later + version. + </para> + + <para> + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + </para> + + <para> + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + </para> + + <para> + For more details see the file COPYING in the source + distribution of Linux. + </para> + </legalnotice> + </bookinfo> + +<toc></toc> + + <chapter id="netcore"> + <title>Linux Networking</title> + <sect1><title>Networking Base Types</title> +!Iinclude/linux/net.h + </sect1> + <sect1><title>Socket Buffer Functions</title> +!Iinclude/linux/skbuff.h +!Iinclude/net/sock.h +!Enet/socket.c +!Enet/core/skbuff.c +!Enet/core/sock.c +!Enet/core/datagram.c +!Enet/core/stream.c + </sect1> + <sect1><title>Socket Filter</title> +!Enet/core/filter.c + </sect1> + <sect1><title>Generic Network Statistics</title> +!Iinclude/linux/gen_stats.h +!Enet/core/gen_stats.c +!Enet/core/gen_estimator.c + </sect1> + <sect1><title>SUN RPC subsystem</title> +<!-- The !D functionality is not perfect, garbage has to be protected by comments +!Dnet/sunrpc/sunrpc_syms.c +--> +!Enet/sunrpc/xdr.c +!Enet/sunrpc/svc_xprt.c +!Enet/sunrpc/xprt.c +!Enet/sunrpc/sched.c +!Enet/sunrpc/socklib.c +!Enet/sunrpc/stats.c +!Enet/sunrpc/rpc_pipe.c +!Enet/sunrpc/rpcb_clnt.c +!Enet/sunrpc/clnt.c + </sect1> + </chapter> + + <chapter id="netdev"> + <title>Network device support</title> + <sect1><title>Driver Support</title> +!Enet/core/dev.c +!Enet/ethernet/eth.c +!Enet/sched/sch_generic.c +!Iinclude/linux/etherdevice.h +!Iinclude/linux/netdevice.h + </sect1> + <sect1><title>PHY Support</title> +!Edrivers/net/phy/phy.c +!Idrivers/net/phy/phy.c +!Edrivers/net/phy/phy_device.c +!Idrivers/net/phy/phy_device.c +!Edrivers/net/phy/mdio_bus.c +!Idrivers/net/phy/mdio_bus.c + </sect1> +<!-- FIXME: Removed for now since no structured comments in source + <sect1><title>Wireless</title> +X!Enet/core/wireless.c + </sect1> +--> + <sect1><title>Synchronous PPP</title> +!Edrivers/net/wan/syncppp.c + </sect1> + </chapter> + +</book> diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl index 2de84dc195a8..1fd6a1ec7591 100644 --- a/Documentation/DocBook/procfs-guide.tmpl +++ b/Documentation/DocBook/procfs-guide.tmpl @@ -85,7 +85,7 @@ - <preface> + <preface id="Preface"> <title>Preface</title> <para> @@ -230,7 +230,7 @@ - <sect1> + <sect1 id="Creating_a_symlink"> <title>Creating a symlink</title> <funcsynopsis> @@ -254,7 +254,7 @@ </para> </sect1> - <sect1> + <sect1 id="Creating_a_directory"> <title>Creating a directory</title> <funcsynopsis> @@ -274,7 +274,7 @@ - <sect1> + <sect1 id="Removing_an_entry"> <title>Removing an entry</title> <funcsynopsis> @@ -340,7 +340,7 @@ entry->write_proc = write_proc_foo; - <sect1> + <sect1 id="Reading_data"> <title>Reading data</title> <para> @@ -448,7 +448,7 @@ entry->write_proc = write_proc_foo; - <sect1> + <sect1 id="Writing_data"> <title>Writing data</title> <para> @@ -579,7 +579,7 @@ int foo_read_func(char *page, char **start, off_t off, - <sect1> + <sect1 id="Modules"> <title>Modules</title> <para> @@ -599,7 +599,7 @@ entry->owner = THIS_MODULE; - <sect1> + <sect1 id="Mode_and_ownership"> <title>Mode and ownership</title> <para> diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl index a8b88c47e809..b9e143e28c64 100644 --- a/Documentation/DocBook/rapidio.tmpl +++ b/Documentation/DocBook/rapidio.tmpl @@ -77,11 +77,11 @@ <chapter id="bugs"> <title>Known Bugs and Limitations</title> - <sect1> + <sect1 id="known_bugs"> <title>Bugs</title> <para>None. ;)</para> </sect1> - <sect1> + <sect1 id="Limitations"> <title>Limitations</title> <para> <orderedlist> @@ -100,7 +100,7 @@ on devices, request/map memory region resources, and manage mailboxes/doorbells. </para> - <sect1> + <sect1 id="Functions"> <title>Functions</title> !Iinclude/linux/rio_drv.h !Edrivers/rapidio/rio-driver.c @@ -116,23 +116,23 @@ subsystem. </para> - <sect1><title>Structures</title> + <sect1 id="Structures"><title>Structures</title> !Iinclude/linux/rio.h </sect1> - <sect1><title>Enumeration and Discovery</title> + <sect1 id="Enumeration_and_Discovery"><title>Enumeration and Discovery</title> !Idrivers/rapidio/rio-scan.c </sect1> - <sect1><title>Driver functionality</title> + <sect1 id="Driver_functionality"><title>Driver functionality</title> !Idrivers/rapidio/rio.c !Idrivers/rapidio/rio-access.c </sect1> - <sect1><title>Device model support</title> + <sect1 id="Device_model_support"><title>Device model support</title> !Idrivers/rapidio/rio-driver.c </sect1> - <sect1><title>Sysfs support</title> + <sect1 id="Sysfs_support"><title>Sysfs support</title> !Idrivers/rapidio/rio-sysfs.c </sect1> - <sect1><title>PPC32 support</title> + <sect1 id="PPC32_support"><title>PPC32 support</title> !Iarch/powerpc/kernel/rio.c !Earch/powerpc/sysdev/fsl_rio.c !Iarch/powerpc/sysdev/fsl_rio.c diff --git a/Documentation/DocBook/scsi.tmpl b/Documentation/DocBook/scsi.tmpl index f299ab182bbe..10a150ae2a7e 100644 --- a/Documentation/DocBook/scsi.tmpl +++ b/Documentation/DocBook/scsi.tmpl @@ -12,7 +12,7 @@ <surname>Bottomley</surname> <affiliation> <address> - <email>James.Bottomley@steeleye.com</email> + <email>James.Bottomley@hansenpartnership.com</email> </address> </affiliation> </author> diff --git a/Documentation/DocBook/videobook.tmpl b/Documentation/DocBook/videobook.tmpl index b3d93ee27693..89817795e668 100644 --- a/Documentation/DocBook/videobook.tmpl +++ b/Documentation/DocBook/videobook.tmpl @@ -170,7 +170,7 @@ int __init myradio_init(struct video_init *v) <para> The types available are </para> - <table frame="all"><title>Device Types</title> + <table frame="all" id="Device_Types"><title>Device Types</title> <tgroup cols="3" align="left"> <tbody> <row> @@ -291,7 +291,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) allows the applications to find out what sort of a card they have found and to figure out what they want to do about it. The fields in the structure are </para> - <table frame="all"><title>struct video_capability fields</title> + <table frame="all" id="video_capability_fields"><title>struct video_capability fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -365,7 +365,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) <para> The video_tuner structure has the following fields </para> - <table frame="all"><title>struct video_tuner fields</title> + <table frame="all" id="video_tuner_fields"><title>struct video_tuner fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -398,7 +398,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tgroup> </table> - <table frame="all"><title>struct video_tuner flags</title> + <table frame="all" id="video_tuner_flags"><title>struct video_tuner flags</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -421,7 +421,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tgroup> </table> - <table frame="all"><title>struct video_tuner modes</title> + <table frame="all" id="video_tuner_modes"><title>struct video_tuner modes</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -572,7 +572,7 @@ static int current_volume=0; <para> Then we fill in the video_audio structure. This has the following format </para> - <table frame="all"><title>struct video_audio fields</title> + <table frame="all" id="video_audio_fields"><title>struct video_audio fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -607,7 +607,7 @@ static int current_volume=0; </tgroup> </table> - <table frame="all"><title>struct video_audio flags</title> + <table frame="all" id="video_audio_flags"><title>struct video_audio flags</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -625,7 +625,7 @@ static int current_volume=0; </tgroup> </table> - <table frame="all"><title>struct video_audio modes</title> + <table frame="all" id="video_audio_modes"><title>struct video_audio modes</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -775,7 +775,7 @@ module_exit(cleanup); </para> </sect1> </chapter> - <chapter> + <chapter id="Video_Capture_Devices"> <title>Video Capture Devices</title> <sect1 id="introvid"> <title>Video Capture Device Types</title> @@ -855,7 +855,7 @@ static struct video_device my_camera We use the extra video capability flags that did not apply to the radio interface. The video related flags are </para> - <table frame="all"><title>Capture Capabilities</title> + <table frame="all" id="Capture_Capabilities"><title>Capture Capabilities</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1195,7 +1195,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) inputs to the video card). Our example card has a single camera input. The fields in the structure are </para> - <table frame="all"><title>struct video_channel fields</title> + <table frame="all" id="video_channel_fields"><title>struct video_channel fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1218,7 +1218,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tbody> </tgroup> </table> - <table frame="all"><title>struct video_channel flags</title> + <table frame="all" id="video_channel_flags"><title>struct video_channel flags</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1229,7 +1229,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tbody> </tgroup> </table> - <table frame="all"><title>struct video_channel types</title> + <table frame="all" id="video_channel_types"><title>struct video_channel types</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1242,7 +1242,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) </tbody> </tgroup> </table> - <table frame="all"><title>struct video_channel norms</title> + <table frame="all" id="video_channel_norms"><title>struct video_channel norms</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1328,7 +1328,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) for every other pixel in the image. The other common formats the interface defines are </para> - <table frame="all"><title>Framebuffer Encodings</title> + <table frame="all" id="Framebuffer_Encodings"><title>Framebuffer Encodings</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1466,7 +1466,7 @@ static struct video_buffer capture_fb; display. The video_window structure is used to describe the way the image should be displayed. </para> - <table frame="all"><title>struct video_window fields</title> + <table frame="all" id="video_window_fields"><title>struct video_window fields</title> <tgroup cols="2" align="left"> <tbody> <row> @@ -1503,7 +1503,7 @@ static struct video_buffer capture_fb; <para> Each clip is a struct video_clip which has the following fields </para> - <table frame="all"><title>video_clip fields</title> + <table frame="all" id="video_clip_fields"><title>video_clip fields</title> <tgroup cols="2" align="left"> <tbody> <row> diff --git a/Documentation/DocBook/z8530book.tmpl b/Documentation/DocBook/z8530book.tmpl index a507876447aa..42c75ba71ba2 100644 --- a/Documentation/DocBook/z8530book.tmpl +++ b/Documentation/DocBook/z8530book.tmpl @@ -77,7 +77,7 @@ </para> </chapter> - <chapter> + <chapter id="Driver_Modes"> <title>Driver Modes</title> <para> The Z85230 driver layer can drive Z8530, Z85C30 and Z85230 devices @@ -108,7 +108,7 @@ </para> </chapter> - <chapter> + <chapter id="Using_the_Z85230_driver"> <title>Using the Z85230 driver</title> <para> The Z85230 driver provides the back end interface to your board. To @@ -174,7 +174,7 @@ </para> </chapter> - <chapter> + <chapter id="Attaching_Network_Interfaces"> <title>Attaching Network Interfaces</title> <para> If you wish to use the network interface facilities of the driver, @@ -216,7 +216,7 @@ </para> </chapter> - <chapter> + <chapter id="Configuring_And_Activating_The_Port"> <title>Configuring And Activating The Port</title> <para> The Z85230 driver provides helper functions and tables to load the @@ -300,7 +300,7 @@ </para> </chapter> - <chapter> + <chapter id="Network_Layer_Functions"> <title>Network Layer Functions</title> <para> The Z8530 layer provides functions to queue packets for @@ -327,7 +327,7 @@ </para> </chapter> - <chapter> + <chapter id="Porting_The_Z8530_Driver"> <title>Porting The Z8530 Driver</title> <para> The Z8530 driver is written to be portable. In DMA mode it makes diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index d0634a5c3445..c64158ecde43 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt @@ -25,7 +25,7 @@ the NMI handler to take the default machine-specific action. This nmi_callback variable is a global function pointer to the current NMI handler. - fastcall void do_nmi(struct pt_regs * regs, long error_code) + void do_nmi(struct pt_regs * regs, long error_code) { int cpu; diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index 34e06d2f194f..da10e0714241 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist @@ -20,7 +20,11 @@ kernel patches. 4: ppc64 is a good architecture for cross-compilation checking because it tends to use `unsigned long' for 64-bit quantities. -5: Matches kernel coding style(!) +5: Check your patch for general style as detailed in + Documentation/CodingStyle. Check for trivial violations with the + patch style checker prior to submission (scripts/checkpatch.pl). + You should be able to justify all violations that remain in + your patch. 6: Any new or modified CONFIG options don't muck up the config menu. @@ -79,13 +83,3 @@ kernel patches. 23: Tested after it has been merged into the -mm patchset to make sure that it still works with all of the other queued patches and various changes in the VM, VFS, and other subsystems. - -24: Avoid whitespace damage such as indenting with spaces or whitespace - at the end of lines. You can test this by feeding the patch to - "git apply --check --whitespace=error-all" - -25: Check your patch for general style as detailed in - Documentation/CodingStyle. Check for trivial violations with the - patch style checker prior to submission (scripts/checkpatch.pl). - You should be able to justify all violations that remain in - your patch. diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index d6cb1a86fd61..40121b5cca14 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -168,7 +168,7 @@ int get_family_id(int sd) char buf[256]; } ans; - int id, rc; + int id = 0, rc; struct nlattr *na; int rep_len; @@ -209,7 +209,7 @@ void print_delayacct(struct taskstats *t) void task_context_switch_counts(struct taskstats *t) { printf("\n\nTask %15s%15s\n" - " %15lu%15lu\n", + " %15llu%15llu\n", "voluntary", "nonvoluntary", t->nvcsw, t->nivcsw); } @@ -399,7 +399,7 @@ int main(int argc, char *argv[]) goto done; } - PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n", + PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n", sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len); diff --git a/Documentation/acpi/dsdt-override.txt b/Documentation/acpi/dsdt-override.txt new file mode 100644 index 000000000000..5008f256a2db --- /dev/null +++ b/Documentation/acpi/dsdt-override.txt @@ -0,0 +1,15 @@ +Linux supports two methods of overriding the BIOS DSDT: + +CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel. + +CONFIG_ACPI_CUSTOM_DSDT_INITRD adds the image to the initrd. + +When to use these methods is described in detail on the +Linux/ACPI home page: +http://www.lesswatts.org/projects/acpi/overridingDSDT.php + +Note that if both options are used, the DSDT supplied +by the INITRD method takes precedence. + +Documentation/initramfs-add-dsdt.sh is provided for convenience +for use with the CONFIG_ACPI_CUSTOM_DSDT_INITRD method. diff --git a/Documentation/acpi/initramfs-add-dsdt.sh b/Documentation/acpi/initramfs-add-dsdt.sh new file mode 100755 index 000000000000..17ef6e838e14 --- /dev/null +++ b/Documentation/acpi/initramfs-add-dsdt.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Adds a DSDT file to the initrd (if it's an initramfs) +# first argument is the name of archive +# second argument is the name of the file to add +# The file will be copied as /DSDT.aml + +# 20060126: fix "Premature end of file" with some old cpio (Roland Robic) +# 20060205: this time it should really work + +# check the arguments +if [ $# -ne 2 ]; then + program_name=$(basename $0) + echo "\ +$program_name: too few arguments +Usage: $program_name initrd-name.img DSDT-to-add.aml +Adds a DSDT file to an initrd (in initramfs format) + + initrd-name.img: filename of the initrd in initramfs format + DSDT-to-add.aml: filename of the DSDT file to add + " 1>&2 + exit 1 +fi + +# we should check it's an initramfs + +tempcpio=$(mktemp -d) +# cleanup on exit, hangup, interrupt, quit, termination +trap 'rm -rf $tempcpio' 0 1 2 3 15 + +# extract the archive +gunzip -c "$1" > "$tempcpio"/initramfs.cpio || exit 1 + +# copy the DSDT file at the root of the directory so that we can call it "/DSDT.aml" +cp -f "$2" "$tempcpio"/DSDT.aml + +# add the file +cd "$tempcpio" +(echo DSDT.aml | cpio --quiet -H newc -o -A -O "$tempcpio"/initramfs.cpio) || exit 1 +cd "$OLDPWD" + +# re-compress the archive +gzip -c "$tempcpio"/initramfs.cpio > "$1" + diff --git a/Documentation/acpi/method-tracing.txt b/Documentation/acpi/method-tracing.txt new file mode 100644 index 000000000000..f6efb1ea559a --- /dev/null +++ b/Documentation/acpi/method-tracing.txt @@ -0,0 +1,26 @@ +/sys/module/acpi/parameters/: + +trace_method_name + The AML method name that the user wants to trace + +trace_debug_layer + The temporary debug_layer used when tracing the method. + Using 0xffffffff by default if it is 0. + +trace_debug_level + The temporary debug_level used when tracing the method. + Using 0x00ffffff by default if it is 0. + +trace_state + The status of the tracing feature. + + "enabled" means this feature is enabled + and the AML method is traced every time it's executed. + + "1" means this feature is enabled and the AML method + will only be traced during the next execution. + + "disabled" means this feature is disabled. + Users can enable/disable this debug tracing feature by + "echo string > /sys/module/acpi/parameters/trace_state". + "string" should be one of "enable", "disable" and "1". diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh index 97374aacacb2..44c0ab702432 100644 --- a/Documentation/aoe/mkdevs.sh +++ b/Documentation/aoe/mkdevs.sh @@ -29,6 +29,8 @@ rm -f $dir/interfaces mknod -m 0200 $dir/interfaces c $MAJOR 4 rm -f $dir/revalidate mknod -m 0200 $dir/revalidate c $MAJOR 5 +rm -f $dir/flush +mknod -m 0200 $dir/flush c $MAJOR 6 export n_partitions mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'` diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh index 6449911c6a71..15e86f58c036 100644 --- a/Documentation/aoe/udev-install.sh +++ b/Documentation/aoe/udev-install.sh @@ -23,7 +23,10 @@ fi # /etc/udev/rules.d # rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`" -if test -z "$rules_d" || test ! -d "$rules_d"; then +if test -z "$rules_d" ; then + rules_d=/etc/udev/rules.d +fi +if test ! -d "$rules_d"; then echo "$me Error: cannot find udev rules directory" 1>&2 exit 1 fi diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt index a7ed1dc4f331..8686e789542e 100644 --- a/Documentation/aoe/udev.txt +++ b/Documentation/aoe/udev.txt @@ -1,6 +1,7 @@ # These rules tell udev what device nodes to create for aoe support. -# They may be installed along the following lines (adjusted to what -# you see on your system). +# They may be installed along the following lines. Check the section +# 8 udev manpage to see whether your udev supports SUBSYSTEM, and +# whether it uses one or two equal signs for SUBSYSTEM and KERNEL. # # ecashin@makki ~$ su # Password: @@ -15,10 +16,11 @@ # # aoe char devices -SUBSYSTEM="aoe", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" -SUBSYSTEM="aoe", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440" -SUBSYSTEM="aoe", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" -SUBSYSTEM="aoe", KERNEL="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM=="aoe", KERNEL=="err", NAME="etherd/%k", GROUP="disk", MODE="0440" +SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220" # aoe block devices -KERNEL="etherd*", NAME="%k", GROUP="disk" +KERNEL=="etherd*", NAME="%k", GROUP="disk" diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt index 98a26f81fa75..42d7c4cb39cd 100644 --- a/Documentation/cgroups.txt +++ b/Documentation/cgroups.txt @@ -456,7 +456,7 @@ methods are create/destroy. Any others that are null are presumed to be successful no-ops. struct cgroup_subsys_state *create(struct cgroup *cont) -LL=cgroup_mutex +(cgroup_mutex held by caller) Called to create a subsystem state object for a cgroup. The subsystem should allocate its subsystem state object for the passed @@ -471,14 +471,19 @@ it's the root of the hierarchy) and may be an appropriate place for initialization code. void destroy(struct cgroup *cont) -LL=cgroup_mutex +(cgroup_mutex held by caller) -The cgroup system is about to destroy the passed cgroup; the -subsystem should do any necessary cleanup +The cgroup system is about to destroy the passed cgroup; the subsystem +should do any necessary cleanup and free its subsystem state +object. By the time this method is called, the cgroup has already been +unlinked from the file system and from the child list of its parent; +cgroup->parent is still valid. (Note - can also be called for a +newly-created cgroup if an error occurs after this subsystem's +create() method has been called for the new cgroup). int can_attach(struct cgroup_subsys *ss, struct cgroup *cont, struct task_struct *task) -LL=cgroup_mutex +(cgroup_mutex held by caller) Called prior to moving a task into a cgroup; if the subsystem returns an error, this will abort the attach operation. If a NULL @@ -489,25 +494,20 @@ remain valid while the caller holds cgroup_mutex. void attach(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *old_cont, struct task_struct *task) -LL=cgroup_mutex - Called after the task has been attached to the cgroup, to allow any post-attachment activity that requires memory allocations or blocking. void fork(struct cgroup_subsy *ss, struct task_struct *task) -LL=callback_mutex, maybe read_lock(tasklist_lock) Called when a task is forked into a cgroup. Also called during registration for all existing tasks. void exit(struct cgroup_subsys *ss, struct task_struct *task) -LL=callback_mutex Called during task exit int populate(struct cgroup_subsys *ss, struct cgroup *cont) -LL=none Called after creation of a cgroup to allow a subsystem to populate the cgroup directory with file entries. The subsystem should make @@ -524,7 +524,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set up. void bind(struct cgroup_subsys *ss, struct cgroup *root) -LL=callback_mutex +(cgroup_mutex held by caller) Called when a cgroup subsystem is rebound to a different hierarchy and root cgroup. Currently this will only involve movement between diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt new file mode 100644 index 000000000000..b5bbea92a61a --- /dev/null +++ b/Documentation/controllers/memory.txt @@ -0,0 +1,279 @@ +Memory Controller + +Salient features + +a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages +b. The infrastructure allows easy addition of other types of memory to control +c. Provides *zero overhead* for non memory controller users +d. Provides a double LRU: global memory pressure causes reclaim from the + global LRU; a cgroup on hitting a limit, reclaims from the per + cgroup LRU + +NOTE: Swap Cache (unmapped) is not accounted now. + +Benefits and Purpose of the memory controller + +The memory controller isolates the memory behaviour of a group of tasks +from the rest of the system. The article on LWN [12] mentions some probable +uses of the memory controller. The memory controller can be used to + +a. Isolate an application or a group of applications + Memory hungry applications can be isolated and limited to a smaller + amount of memory. +b. Create a cgroup with limited amount of memory, this can be used + as a good alternative to booting with mem=XXXX. +c. Virtualization solutions can control the amount of memory they want + to assign to a virtual machine instance. +d. A CD/DVD burner could control the amount of memory used by the + rest of the system to ensure that burning does not fail due to lack + of available memory. +e. There are several other use cases, find one or use the controller just + for fun (to learn and hack on the VM subsystem). + +1. History + +The memory controller has a long history. A request for comments for the memory +controller was posted by Balbir Singh [1]. At the time the RFC was posted +there were several implementations for memory control. The goal of the +RFC was to build consensus and agreement for the minimal features required +for memory control. The first RSS controller was posted by Balbir Singh[2] +in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the +RSS controller. At OLS, at the resource management BoF, everyone suggested +that we handle both page cache and RSS together. Another request was raised +to allow user space handling of OOM. The current memory controller is +at version 6; it combines both mapped (RSS) and unmapped Page +Cache Control [11]. + +2. Memory Control + +Memory is a unique resource in the sense that it is present in a limited +amount. If a task requires a lot of CPU processing, the task can spread +its processing over a period of hours, days, months or years, but with +memory, the same physical memory needs to be reused to accomplish the task. + +The memory controller implementation has been divided into phases. These +are: + +1. Memory controller +2. mlock(2) controller +3. Kernel user memory accounting and slab control +4. user mappings length controller + +The memory controller is the first controller developed. + +2.1. Design + +The core of the design is a counter called the res_counter. The res_counter +tracks the current memory usage and limit of the group of processes associated +with the controller. Each cgroup has a memory controller specific data +structure (mem_cgroup) associated with it. + +2.2. Accounting + + +--------------------+ + | mem_cgroup | + | (res_counter) | + +--------------------+ + / ^ \ + / | \ + +---------------+ | +---------------+ + | mm_struct | |.... | mm_struct | + | | | | | + +---------------+ | +---------------+ + | + + --------------+ + | + +---------------+ +------+--------+ + | page +----------> page_cgroup| + | | | | + +---------------+ +---------------+ + + (Figure 1: Hierarchy of Accounting) + + +Figure 1 shows the important aspects of the controller + +1. Accounting happens per cgroup +2. Each mm_struct knows about which cgroup it belongs to +3. Each page has a pointer to the page_cgroup, which in turn knows the + cgroup it belongs to + +The accounting is done as follows: mem_cgroup_charge() is invoked to setup +the necessary data structures and check if the cgroup that is being charged +is over its limit. If it is then reclaim is invoked on the cgroup. +More details can be found in the reclaim section of this document. +If everything goes well, a page meta-data-structure called page_cgroup is +allocated and associated with the page. This routine also adds the page to +the per cgroup LRU. + +2.2.1 Accounting details + +All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted. +RSS pages are accounted at the time of page_add_*_rmap() unless they've already +been accounted for earlier. A file page will be accounted for as Page Cache; +it's mapped into the page tables of a process, duplicate accounting is carefully +avoided. Page Cache pages are accounted at the time of add_to_page_cache(). +The corresponding routines that remove a page from the page tables or removes +a page from Page Cache is used to decrement the accounting counters of the +cgroup. + +2.3 Shared Page Accounting + +Shared pages are accounted on the basis of the first touch approach. The +cgroup that first touches a page is accounted for the page. The principle +behind this approach is that a cgroup that aggressively uses a shared +page will eventually get charged for it (once it is uncharged from +the cgroup that brought it in -- this will happen on memory pressure). + +2.4 Reclaim + +Each cgroup maintains a per cgroup LRU that consists of an active +and inactive list. When a cgroup goes over its limit, we first try +to reclaim memory from the cgroup so as to make space for the new +pages that the cgroup has touched. If the reclaim is unsuccessful, +an OOM routine is invoked to select and kill the bulkiest task in the +cgroup. + +The reclaim algorithm has not been modified for cgroups, except that +pages that are selected for reclaiming come from the per cgroup LRU +list. + +2. Locking + +The memory controller uses the following hierarchy + +1. zone->lru_lock is used for selecting pages to be isolated +2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone) +3. lock_page_cgroup() is used to protect page->page_cgroup + +3. User Interface + +0. Configuration + +a. Enable CONFIG_CGROUPS +b. Enable CONFIG_RESOURCE_COUNTERS +c. Enable CONFIG_CGROUP_MEM_CONT + +1. Prepare the cgroups +# mkdir -p /cgroups +# mount -t cgroup none /cgroups -o memory + +2. Make the new group and move bash into it +# mkdir /cgroups/0 +# echo $$ > /cgroups/0/tasks + +Since now we're in the 0 cgroup, +We can alter the memory limit: +# echo -n 4M > /cgroups/0/memory.limit_in_bytes + +NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, +mega or gigabytes. + +# cat /cgroups/0/memory.limit_in_bytes +4194304 Bytes + +NOTE: The interface has now changed to display the usage in bytes +instead of pages + +We can check the usage: +# cat /cgroups/0/memory.usage_in_bytes +1216512 Bytes + +A successful write to this file does not guarantee a successful set of +this limit to the value written into the file. This can be due to a +number of factors, such as rounding up to page boundaries or the total +availability of memory on the system. The user is required to re-read +this file after a write to guarantee the value committed by the kernel. + +# echo -n 1 > memory.limit_in_bytes +# cat memory.limit_in_bytes +4096 Bytes + +The memory.failcnt field gives the number of times that the cgroup limit was +exceeded. + +The memory.stat file gives accounting information. Now, the number of +caches, RSS and Active pages/Inactive pages are shown. + +The memory.force_empty gives an interface to drop *all* charges by force. + +# echo -n 1 > memory.force_empty + +will drop all charges in cgroup. Currently, this is maintained for test. + +4. Testing + +Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. +Apart from that v6 has been tested with several applications and regular +daily use. The controller has also been tested on the PPC64, x86_64 and +UML platforms. + +4.1 Troubleshooting + +Sometimes a user might find that the application under a cgroup is +terminated. There are several causes for this: + +1. The cgroup limit is too low (just too low to do anything useful) +2. The user is using anonymous memory and swap is turned off or too low + +A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of +some of the pages cached in the cgroup (page cache pages). + +4.2 Task migration + +When a task migrates from one cgroup to another, it's charge is not +carried forward. The pages allocated from the original cgroup still +remain charged to it, the charge is dropped when the page is freed or +reclaimed. + +4.3 Removing a cgroup + +A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a +cgroup might have some charge associated with it, even though all +tasks have migrated away from it. Such charges are automatically dropped at +rmdir() if there are no tasks. + +4.4 Choosing what to account -- Page Cache (unmapped) vs RSS (mapped)? + +The type of memory accounted by the cgroup can be limited to just +mapped pages by writing "1" to memory.control_type field + +echo -n 1 > memory.control_type + +5. TODO + +1. Add support for accounting huge pages (as a separate controller) +2. Make per-cgroup scanner reclaim not-shared pages first +3. Teach controller to account for shared-pages +4. Start reclamation when the limit is lowered +5. Start reclamation in the background when the limit is + not yet hit but the usage is getting closer + +Summary + +Overall, the memory controller has been a stable controller and has been +commented and discussed quite extensively in the community. + +References + +1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ +2. Singh, Balbir. Memory Controller (RSS Control), + http://lwn.net/Articles/222762/ +3. Emelianov, Pavel. Resource controllers based on process cgroups + http://lkml.org/lkml/2007/3/6/198 +4. Emelianov, Pavel. RSS controller based on process cgroups (v2) + http://lkml.org/lkml/2007/4/9/74 +5. Emelianov, Pavel. RSS controller based on process cgroups (v3) + http://lkml.org/lkml/2007/5/30/244 +6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ +7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control + subsystem (v3), http://lwn.net/Articles/235534/ +8. Singh, Balbir. RSS controller V2 test results (lmbench), + http://lkml.org/lkml/2007/5/17/232 +9. Singh, Balbir. RSS controller V2 AIM9 results + http://lkml.org/lkml/2007/5/18/1 +10. Singh, Balbir. Memory controller v6 results, + http://lkml.org/lkml/2007/8/19/36 +11. Singh, Balbir. Memory controller v6, http://lkml.org/lkml/2007/8/17/69 +12. Corbet, Jonathan, Controlling memory use in cgroups, + http://lwn.net/Articles/243795/ diff --git a/Documentation/cpuidle/core.txt b/Documentation/cpuidle/core.txt new file mode 100644 index 000000000000..63ecc5dc9d8a --- /dev/null +++ b/Documentation/cpuidle/core.txt @@ -0,0 +1,23 @@ + + Supporting multiple CPU idle levels in kernel + + cpuidle + +General Information: + +Various CPUs today support multiple idle levels that are differentiated +by varying exit latencies and power consumption during idle. +cpuidle is a generic in-kernel infrastructure that separates +idle policy (governor) from idle mechanism (driver) and provides a +standardized infrastructure to support independent development of +governors and drivers. + +cpuidle resides under drivers/cpuidle. + +Boot options: +"cpuidle_sysfs_switch" +enables current_governor interface in /sys/devices/system/cpu/cpuidle/, +which can be used to switch governors at run time. This boot option +is meant for developer testing only. In normal usage, kernel picks the +best governor based on governor ratings. +SEE ALSO: sysfs.txt in this directory. diff --git a/Documentation/cpuidle/driver.txt b/Documentation/cpuidle/driver.txt new file mode 100644 index 000000000000..7a9e09ece931 --- /dev/null +++ b/Documentation/cpuidle/driver.txt @@ -0,0 +1,31 @@ + + + Supporting multiple CPU idle levels in kernel + + cpuidle drivers + + + + +cpuidle driver hooks into the cpuidle infrastructure and handles the +architecture/platform dependent part of CPU idle states. Driver +provides the platform idle state detection capability and also +has mechanisms in place to support actual entry-exit into CPU idle states. + +cpuidle driver initializes the cpuidle_device structure for each CPU device +and registers with cpuidle using cpuidle_register_device. + +It can also support the dynamic changes (like battery <-> AC), by using +cpuidle_pause_and_lock, cpuidle_disable_device and cpuidle_enable_device, +cpuidle_resume_and_unlock. + +Interfaces: +extern int cpuidle_register_driver(struct cpuidle_driver *drv); +extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); +extern int cpuidle_register_device(struct cpuidle_device *dev); +extern void cpuidle_unregister_device(struct cpuidle_device *dev); + +extern void cpuidle_pause_and_lock(void); +extern void cpuidle_resume_and_unlock(void); +extern int cpuidle_enable_device(struct cpuidle_device *dev); +extern void cpuidle_disable_device(struct cpuidle_device *dev); diff --git a/Documentation/cpuidle/governor.txt b/Documentation/cpuidle/governor.txt new file mode 100644 index 000000000000..12c6bd50c9f6 --- /dev/null +++ b/Documentation/cpuidle/governor.txt @@ -0,0 +1,29 @@ + + + + Supporting multiple CPU idle levels in kernel + + cpuidle governors + + + + +cpuidle governor is policy routine that decides what idle state to enter at +any given time. cpuidle core uses different callbacks to the governor. + +* enable() to enable governor for a particular device +* disable() to disable governor for a particular device +* select() to select an idle state to enter +* reflect() called after returning from the idle state, which can be used + by the governor for some record keeping. + +More than one governor can be registered at the same time and +users can switch between drivers using /sysfs interface (when enabled). +More than one governor part is supported for developers to easily experiment +with different governors. By default, most optimal governor based on your +kernel configuration and platform will be selected by cpuidle. + +Interfaces: +extern int cpuidle_register_governor(struct cpuidle_governor *gov); +extern void cpuidle_unregister_governor(struct cpuidle_governor *gov); +struct cpuidle_governor diff --git a/Documentation/cpuidle/sysfs.txt b/Documentation/cpuidle/sysfs.txt new file mode 100644 index 000000000000..50d7b1642759 --- /dev/null +++ b/Documentation/cpuidle/sysfs.txt @@ -0,0 +1,79 @@ + + + Supporting multiple CPU idle levels in kernel + + cpuidle sysfs + +System global cpuidle related information and tunables are under +/sys/devices/system/cpu/cpuidle + +The current interfaces in this directory has self-explanatory names: +* current_driver +* current_governor_ro + +With cpuidle_sysfs_switch boot option (meant for developer testing) +following objects are visible instead. +* current_driver +* available_governors +* current_governor +In this case users can switch the governor at run time by writing +to current_governor. + + +Per logical CPU specific cpuidle information are under +/sys/devices/system/cpu/cpuX/cpuidle +for each online cpu X + +-------------------------------------------------------------------------------- +# ls -lR /sys/devices/system/cpu/cpu0/cpuidle/ +/sys/devices/system/cpu/cpu0/cpuidle/: +total 0 +drwxr-xr-x 2 root root 0 Feb 8 10:42 state0 +drwxr-xr-x 2 root root 0 Feb 8 10:42 state1 +drwxr-xr-x 2 root root 0 Feb 8 10:42 state2 +drwxr-xr-x 2 root root 0 Feb 8 10:42 state3 + +/sys/devices/system/cpu/cpu0/cpuidle/state0: +total 0 +-r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-r--r--r-- 1 root root 4096 Feb 8 10:42 latency +-r--r--r-- 1 root root 4096 Feb 8 10:42 name +-r--r--r-- 1 root root 4096 Feb 8 10:42 power +-r--r--r-- 1 root root 4096 Feb 8 10:42 time +-r--r--r-- 1 root root 4096 Feb 8 10:42 usage + +/sys/devices/system/cpu/cpu0/cpuidle/state1: +total 0 +-r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-r--r--r-- 1 root root 4096 Feb 8 10:42 latency +-r--r--r-- 1 root root 4096 Feb 8 10:42 name +-r--r--r-- 1 root root 4096 Feb 8 10:42 power +-r--r--r-- 1 root root 4096 Feb 8 10:42 time +-r--r--r-- 1 root root 4096 Feb 8 10:42 usage + +/sys/devices/system/cpu/cpu0/cpuidle/state2: +total 0 +-r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-r--r--r-- 1 root root 4096 Feb 8 10:42 latency +-r--r--r-- 1 root root 4096 Feb 8 10:42 name +-r--r--r-- 1 root root 4096 Feb 8 10:42 power +-r--r--r-- 1 root root 4096 Feb 8 10:42 time +-r--r--r-- 1 root root 4096 Feb 8 10:42 usage + +/sys/devices/system/cpu/cpu0/cpuidle/state3: +total 0 +-r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-r--r--r-- 1 root root 4096 Feb 8 10:42 latency +-r--r--r-- 1 root root 4096 Feb 8 10:42 name +-r--r--r-- 1 root root 4096 Feb 8 10:42 power +-r--r--r-- 1 root root 4096 Feb 8 10:42 time +-r--r--r-- 1 root root 4096 Feb 8 10:42 usage +-------------------------------------------------------------------------------- + + +* desc : Small description about the idle state (string) +* latency : Latency to exit out of this idle state (in microseconds) +* name : Name of the idle state (string) +* power : Power consumed while in this idle state (in milliwatts) +* time : Total time spent in this idle state (in microseconds) +* usage : Number of times this state was entered (count) diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index 141bef1c8599..43db6fe12814 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -523,21 +523,14 @@ from one cpuset to another, then the kernel will adjust the tasks memory placement, as above, the next time that the kernel attempts to allocate a page of memory for that task. -If a cpuset has its CPUs modified, then each task using that -cpuset does _not_ change its behavior automatically. In order to -minimize the impact on the critical scheduling code in the kernel, -tasks will continue to use their prior CPU placement until they -are rebound to their cpuset, by rewriting their pid to the 'tasks' -file of their cpuset. If a task had been bound to some subset of its -cpuset using the sched_setaffinity() call, and if any of that subset -is still allowed in its new cpuset settings, then the task will be -restricted to the intersection of the CPUs it was allowed on before, -and its new cpuset CPU placement. If, on the other hand, there is -no overlap between a tasks prior placement and its new cpuset CPU -placement, then the task will be allowed to run on any CPU allowed -in its new cpuset. If a task is moved from one cpuset to another, -its CPU placement is updated in the same way as if the tasks pid is -rewritten to the 'tasks' file of its current cpuset. +If a cpuset has its 'cpus' modified, then each task in that cpuset +will have its allowed CPU placement changed immediately. Similarly, +if a tasks pid is written to a cpusets 'tasks' file, in either its +current cpuset or another cpuset, then its allowed CPU placement is +changed immediately. If such a task had been bound to some subset +of its cpuset using the sched_setaffinity() call, the task will be +allowed to run on any CPU allowed in its new cpuset, negating the +affect of the prior sched_setaffinity() call. In summary, the memory placement of a task whose cpuset is changed is updated by the kernel, on the next allocation of a page for that task, diff --git a/Documentation/drivers/edac/edac.txt b/Documentation/edac.txt index a5c36842ecef..a5c36842ecef 100644 --- a/Documentation/drivers/edac/edac.txt +++ b/Documentation/edac.txt diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index 113165b48305..2ebb94d6ed8e 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt @@ -170,7 +170,6 @@ Sylpheed (GUI) - Works well for inlining text (or using attachments). - Allows use of an external editor. -- Not good for IMAP. - Is slow on large folders. - Won't do TLS SMTP auth over a non-SSL connection. - Has a helpful ruler bar in the compose window. diff --git a/Documentation/fb/deferred_io.txt b/Documentation/fb/deferred_io.txt index 63883a892120..748328370250 100644 --- a/Documentation/fb/deferred_io.txt +++ b/Documentation/fb/deferred_io.txt @@ -7,10 +7,10 @@ IO. The following example may be a useful explanation of how one such setup works: - userspace app like Xfbdev mmaps framebuffer -- deferred IO and driver sets up nopage and page_mkwrite handlers +- deferred IO and driver sets up fault and page_mkwrite handlers - userspace app tries to write to mmaped vaddress -- we get pagefault and reach nopage handler -- nopage handler finds and returns physical page +- we get pagefault and reach fault handler +- fault handler finds and returns physical page - we get page_mkwrite where we add this page to a list - schedule a workqueue task to be run after a delay - app continues writing to that page with no additional cost. this is diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index a7d9d179131a..4d3aa519eadf 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,14 +6,6 @@ be removed from this file. --------------------------- -What: MXSER -When: December 2007 -Why: Old mxser driver is obsoleted by the mxser_new. Give it some time yet - and remove it. -Who: Jiri Slaby <jirislaby@gmail.com> - ---------------------------- - What: dev->power.power_state When: July 2007 Why: Broken design for runtime control over driver power states, confusing @@ -107,17 +99,6 @@ Who: Eric Biederman <ebiederm@xmission.com> --------------------------- -What: a.out interpreter support for ELF executables -When: 2.6.25 -Files: fs/binfmt_elf.c -Why: Using a.out interpreters for ELF executables was a feature for - transition from a.out to ELF. But now it is unlikely to be still - needed anymore and removing it would simplify the hairy ELF - loader code. -Who: Andi Kleen <ak@suse.de> - ---------------------------- - What: remove EXPORT_SYMBOL(kernel_thread) When: August 2006 Files: arch/*/kernel/*_ksyms.c @@ -130,15 +111,6 @@ Who: Christoph Hellwig <hch@lst.de> --------------------------- -What: CONFIG_FORCED_INLINING -When: June 2006 -Why: Config option is there to see if gcc is good enough. (in january - 2006). If it is, the behavior should just be the default. If it's not, - the option should just go away entirely. -Who: Arjan van de Ven - ---------------------------- - What: eepro100 network driver When: January 2007 Why: replaced by the e100 driver @@ -200,21 +172,6 @@ Who: Len Brown <len.brown@intel.com> --------------------------- -What: 'time' kernel boot parameter -When: January 2008 -Why: replaced by 'printk.time=<value>' so that printk timestamps can be - enabled or disabled as needed -Who: Randy Dunlap <randy.dunlap@oracle.com> - ---------------------------- - -What: drivers depending on OSS_OBSOLETE -When: options in 2.6.23, code in 2.6.25 -Why: obsolete OSS drivers -Who: Adrian Bunk <bunk@stusta.de> - ---------------------------- - What: libata spindown skipping and warning When: Dec 2008 Why: Some halt(8) implementations synchronize caches for and spin @@ -338,3 +295,14 @@ Why: The support code for the old firmware hurts code readability/maintainabilit and slightly hurts runtime performance. Bugfixes for the old firmware are not provided by Broadcom anymore. Who: Michael Buesch <mb@bu3sch.de> + +--------------------------- + +What: Solaris/SunOS syscall and binary support on Sparc +When: 2.6.26 +Why: Largely unmaintained and almost entirely unused. File system + layering used to divert library and dynamic linker searches to + /usr/gnemul is extremely buggy and unfixable. Making it work + is largely pointless as without a lot of work only the most + trivial of Solaris binaries can work with the emulation code. +Who: David S. Miller <davem@davemloft.net> diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 1de155e2dc36..e68021c08fbd 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -32,6 +32,8 @@ directory-locking - info about the locking scheme used for directory operations. dlmfs.txt - info on the userspace interface to the OCFS2 DLM. +dnotify.txt + - info about directory notification in Linux. ecryptfs.txt - docs on eCryptfs: stacked cryptographic filesystem for Linux. ext2.txt @@ -80,6 +82,8 @@ relay.txt - info on relay, for efficient streaming from kernel to user space. romfs.txt - description of the ROMFS filesystem. +sharedsubtree.txt + - a description of shared subtrees for namespaces. smbfs.txt - info on using filesystems with the SMB protocol (Win 3.11 and NT). spufs.txt diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 37c10cba7177..42d4b30b1045 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -90,7 +90,6 @@ of the locking scheme for directory operations. prototypes: struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*read_inode) (struct inode *); void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); @@ -114,7 +113,6 @@ locking rules: BKL s_lock s_umount alloc_inode: no no no destroy_inode: no -read_inode: no (see below) dirty_inode: no (must not sleep) write_inode: no put_inode: no @@ -133,7 +131,6 @@ show_options: no (vfsmount->sem) quota_read: no no no (see below) quota_write: no no no (see below) -->read_inode() is not a method - it's a callback used in iget(). ->remount_fs() will have the s_umount lock if it's already mounted. When called from get_sb_single, it does NOT have the s_umount lock. ->quota_read() and ->quota_write() functions are both guaranteed to diff --git a/Documentation/dnotify.txt b/Documentation/filesystems/dnotify.txt index 6984fca6002a..9f5d338ddbb8 100644 --- a/Documentation/dnotify.txt +++ b/Documentation/filesystems/dnotify.txt @@ -69,24 +69,24 @@ Example #include <signal.h> #include <stdio.h> #include <unistd.h> - + static volatile int event_fd; - + static void handler(int sig, siginfo_t *si, void *data) { event_fd = si->si_fd; } - + int main(void) { struct sigaction act; int fd; - + act.sa_sigaction = handler; sigemptyset(&act.sa_mask); act.sa_flags = SA_SIGINFO; sigaction(SIGRTMIN + 1, &act, NULL); - + fd = open(".", O_RDONLY); fcntl(fd, F_SETSIG, SIGRTMIN + 1); fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt index 758e50401c16..6973b980ca2a 100644 --- a/Documentation/filesystems/isofs.txt +++ b/Documentation/filesystems/isofs.txt @@ -24,6 +24,7 @@ Mount options unique to the isofs filesystem. map=normal Map non-Rock Ridge filenames to lower case map=acorn As map=normal but also apply Acorn extensions if present mode=xxx Sets the permissions on files to xxx + dmode=xxx Sets the permissions on directories to xxx nojoliet Ignore Joliet extensions if they are present. norock Ignore Rock Ridge extensions if they are present. hide Completely strip hidden files from the file system. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 0f33c77bc14b..92b888d540a6 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -34,8 +34,8 @@ FOO_I(inode) (see in-tree filesystems for examples). Make them ->alloc_inode and ->destroy_inode in your super_operations. -Keep in mind that now you need explicit initialization of private data - -typically in ->read_inode() and after getting an inode from new_inode(). +Keep in mind that now you need explicit initialization of private data +typically between calling iget_locked() and unlocking the inode. At some point that will become mandatory. @@ -173,10 +173,10 @@ should be a non-blocking function that initializes those parts of a newly created inode to allow the test function to succeed. 'data' is passed as an opaque value to both test and set functions. -When the inode has been created by iget5_locked(), it will be returned with -the I_NEW flag set and will still be locked. read_inode has not been -called so the file system still has to finalize the initialization. Once -the inode is initialized it must be unlocked by calling unlock_new_inode(). +When the inode has been created by iget5_locked(), it will be returned with the +I_NEW flag set and will still be locked. The filesystem then needs to finalize +the initialization. Once the inode is initialized it must be unlocked by +calling unlock_new_inode(). The filesystem is responsible for setting (and possibly testing) i_ino when appropriate. There is also a simpler iget_locked function that @@ -184,11 +184,19 @@ just takes the superblock and inode number as arguments and does the test and set for you. e.g. - inode = iget_locked(sb, ino); - if (inode->i_state & I_NEW) { - read_inode_from_disk(inode); - unlock_new_inode(inode); - } + inode = iget_locked(sb, ino); + if (inode->i_state & I_NEW) { + err = read_inode_from_disk(inode); + if (err < 0) { + iget_failed(inode); + return err; + } + unlock_new_inode(inode); + } + +Note that if the process of setting up a new inode fails, then iget_failed() +should be called on the inode to render it dead, and an appropriate error +should be passed back to the caller. --- [recommended] diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index e2799b5fafea..5681e2fa1496 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1029,6 +1029,14 @@ nr_inodes Denotes the number of inodes the system has allocated. This number will grow and shrink dynamically. +nr_open +------- + +Denotes the maximum number of file-handles a process can +allocate. Default value is 1024*1024 (1048576) which should be +enough for most machines. Actual limit depends on RLIMIT_NOFILE +resource limit. + nr_free_inodes -------------- diff --git a/Documentation/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 736540045dc7..736540045dc7 100644 --- a/Documentation/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 9d019d35728f..81e5be6e6e35 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -151,7 +151,7 @@ The get_sb() method has the following arguments: const char *dev_name: the device name we are mounting. void *data: arbitrary mount options, usually comes as an ASCII - string + string (see "Mount Options" section) struct vfsmount *mnt: a vfs-internal representation of a mount point @@ -182,7 +182,7 @@ A fill_super() method implementation has the following arguments: must initialize this properly. void *data: arbitrary mount options, usually comes as an ASCII - string + string (see "Mount Options" section) int silent: whether or not to be silent on error @@ -203,8 +203,6 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*read_inode) (struct inode *); - void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); @@ -242,15 +240,6 @@ or bottom half). ->alloc_inode was defined and simply undoes anything done by ->alloc_inode. - read_inode: this method is called to read a specific inode from the - mounted filesystem. The i_ino member in the struct inode is - initialized by the VFS to indicate which inode to read. Other - members are filled in by this method. - - You can set this to NULL and use iget5_locked() instead of iget() - to read inodes. This is necessary for filesystems for which the - inode number is not sufficient to identify an inode. - dirty_inode: this method is called by the VFS to mark an inode dirty. write_inode: this method is called when the VFS needs to write an @@ -302,15 +291,16 @@ or bottom half). umount_begin: called when the VFS is unmounting a filesystem. - show_options: called by the VFS to show mount options for /proc/<pid>/mounts. + show_options: called by the VFS to show mount options for + /proc/<pid>/mounts. (see "Mount Options" section) quota_read: called by the VFS to read from filesystem quota file. quota_write: called by the VFS to write to filesystem quota file. -The read_inode() method is responsible for filling in the "i_op" -field. This is a pointer to a "struct inode_operations" which -describes the methods that can be performed on individual inodes. +Whoever sets up the inode is responsible for filling in the "i_op" field. This +is a pointer to a "struct inode_operations" which describes the methods that +can be performed on individual inodes. The Inode Object @@ -980,6 +970,49 @@ manipulate dentries: For further information on dentry locking, please refer to the document Documentation/filesystems/dentry-locking.txt. +Mount Options +============= + +Parsing options +--------------- + +On mount and remount the filesystem is passed a string containing a +comma separated list of mount options. The options can have either of +these forms: + + option + option=value + +The <linux/parser.h> header defines an API that helps parse these +options. There are plenty of examples on how to use it in existing +filesystems. + +Showing options +--------------- + +If a filesystem accepts mount options, it must define show_options() +to show all the currently active options. The rules are: + + - options MUST be shown which are not default or their values differ + from the default + + - options MAY be shown which are enabled by default or have their + default value + +Options used only internally between a mount helper and the kernel +(such as file descriptors), or which only have an effect during the +mounting (such as ones controlling the creation of a journal) are exempt +from the above rules. + +The underlying reason for the above rules is to make sure, that a +mount can be accurately replicated (e.g. umounting and mounting again) +based on the information found in /proc/mounts. + +A simple method of saving options at mount/remount time and showing +them is provided with the save_mount_options() and +generic_show_options() helper functions. Please note, that using +these may have drawbacks. For more info see header comments for these +functions in fs/namespace.c. Resources ========= diff --git a/Documentation/hwmon/ads7828 b/Documentation/hwmon/ads7828 new file mode 100644 index 000000000000..75bc4beaf447 --- /dev/null +++ b/Documentation/hwmon/ads7828 @@ -0,0 +1,36 @@ +Kernel driver ads7828 +===================== + +Supported chips: + * Texas Instruments/Burr-Brown ADS7828 + Prefix: 'ads7828' + Addresses scanned: I2C 0x48, 0x49, 0x4a, 0x4b + Datasheet: Publicly available at the Texas Instruments website : + http://focus.ti.com/lit/ds/symlink/ads7828.pdf + +Authors: + Steve Hardy <steve@linuxrealtime.co.uk> + +Module Parameters +----------------- + +* se_input: bool (default Y) + Single ended operation - set to N for differential mode +* int_vref: bool (default Y) + Operate with the internal 2.5V reference - set to N for external reference +* vref_mv: int (default 2500) + If using an external reference, set this to the reference voltage in mV + +Description +----------- + +This driver implements support for the Texas Instruments ADS7828. + +This device is a 12-bit 8-channel A-D converter. + +It can operate in single ended mode (8 +ve inputs) or in differential mode, +where 4 differential pairs can be measured. + +The chip also has the facility to use an external voltage reference. This +may be required if your hardware supplies the ADS7828 from a 5V supply, see +the datasheet for more details. diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87 index 5b704a40256b..f4ce1fdbeff6 100644 --- a/Documentation/hwmon/it87 +++ b/Documentation/hwmon/it87 @@ -30,7 +30,7 @@ Supported chips: Datasheet: No longer be available Authors: - Christophe Gauthron <chrisg@0-in.com> + Christophe Gauthron Jean Delvare <khali@linux-fr.org> diff --git a/Documentation/hwmon/lm78 b/Documentation/hwmon/lm78 index dfc318a60fd4..60932e26abaa 100644 --- a/Documentation/hwmon/lm78 +++ b/Documentation/hwmon/lm78 @@ -4,12 +4,12 @@ Kernel driver lm78 Supported chips: * National Semiconductor LM78 / LM78-J Prefix: 'lm78' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) + Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports) Datasheet: Publicly available at the National Semiconductor website http://www.national.com/ * National Semiconductor LM79 Prefix: 'lm79' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) + Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports) Datasheet: Publicly available at the National Semiconductor website http://www.national.com/ diff --git a/Documentation/hwmon/lm87 b/Documentation/hwmon/lm87 index c952c57f0e11..ec27aa1b94cb 100644 --- a/Documentation/hwmon/lm87 +++ b/Documentation/hwmon/lm87 @@ -4,8 +4,12 @@ Kernel driver lm87 Supported chips: * National Semiconductor LM87 Prefix: 'lm87' - Addresses scanned: I2C 0x2c - 0x2f + Addresses scanned: I2C 0x2c - 0x2e Datasheet: http://www.national.com/pf/LM/LM87.html + * Analog Devices ADM1024 + Prefix: 'adm1024' + Addresses scanned: I2C 0x2c - 0x2e + Datasheet: http://www.analog.com/en/prod/0,2877,ADM1024,00.html Authors: Frodo Looijaard <frodol@dds.nl>, @@ -19,11 +23,12 @@ Authors: Description ----------- -This driver implements support for the National Semiconductor LM87. +This driver implements support for the National Semiconductor LM87 +and the Analog Devices ADM1024. The LM87 implements up to three temperature sensors, up to two fan rotation speed sensors, up to seven voltage sensors, alarms, and some -miscellaneous stuff. +miscellaneous stuff. The ADM1024 is fully compatible. Temperatures are measured in degrees Celsius. Each input has a high and low alarm settings. A high limit produces an alarm when the value diff --git a/Documentation/hwmon/userspace-tools b/Documentation/hwmon/userspace-tools index 19900a8fe679..9865aeedc58f 100644 --- a/Documentation/hwmon/userspace-tools +++ b/Documentation/hwmon/userspace-tools @@ -14,7 +14,7 @@ Lm-sensors Core set of utilities that will allow you to obtain health information, setup monitoring limits etc. You can get them on their homepage -http://www.lm-sensors.nu/ or as a package from your Linux distribution. +http://www.lm-sensors.org/ or as a package from your Linux distribution. If from website: Get lm-sensors from project web site. Please note, you need only userspace diff --git a/Documentation/hwmon/w83627ehf b/Documentation/hwmon/w83627ehf index ccc2bcb61068..d6e1ae30fa6e 100644 --- a/Documentation/hwmon/w83627ehf +++ b/Documentation/hwmon/w83627ehf @@ -23,8 +23,9 @@ W83627DHG super I/O chips. We will refer to them collectively as Winbond chips. The chips implement three temperature sensors, five fan rotation speed sensors, ten analog voltage sensors (only nine for the 627DHG), one -VID (6 pins), alarms with beep warnings (control unimplemented), and -some automatic fan regulation strategies (plus manual fan control mode). +VID (6 pins for the 627EHF/EHG, 8 pins for the 627DHG), alarms with beep +warnings (control unimplemented), and some automatic fan regulation +strategies (plus manual fan control mode). Temperatures are measured in degrees Celsius and measurement resolution is 1 degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when diff --git a/Documentation/hwmon/w83627hf b/Documentation/hwmon/w83627hf index 792231921241..880a59f53da9 100644 --- a/Documentation/hwmon/w83627hf +++ b/Documentation/hwmon/w83627hf @@ -73,5 +73,4 @@ doesn't help, you may just ignore the bogus VID reading with no harm done. For further information on this driver see the w83781d driver documentation. -[1] http://www2.lm-sensors.nu/~lm78/cvs/browse.cgi/lm_sensors2/doc/vid - +[1] http://www.lm-sensors.org/browser/lm-sensors/trunk/doc/vid diff --git a/Documentation/hwmon/w83781d b/Documentation/hwmon/w83781d index b1e9f80098ee..6f800a0283e9 100644 --- a/Documentation/hwmon/w83781d +++ b/Documentation/hwmon/w83781d @@ -4,20 +4,16 @@ Kernel driver w83781d Supported chips: * Winbond W83781D Prefix: 'w83781d' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) + Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports) Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/w83781d.pdf * Winbond W83782D Prefix: 'w83782d' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) + Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports) Datasheet: http://www.winbond.com/PDF/sheet/w83782d.pdf * Winbond W83783S Prefix: 'w83783s' Addresses scanned: I2C 0x2d Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/w83783s.pdf - * Winbond W83627HF - Prefix: 'w83627hf' - Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports) - Datasheet: http://www.winbond.com/PDF/sheet/w83627hf.pdf * Asus AS99127F Prefix: 'as99127f' Addresses scanned: I2C 0x28 - 0x2f @@ -50,20 +46,18 @@ force_subclients=bus,caddr,saddr,saddr Description ----------- -This driver implements support for the Winbond W83781D, W83782D, W83783S, -W83627HF chips, and the Asus AS99127F chips. We will refer to them -collectively as W8378* chips. +This driver implements support for the Winbond W83781D, W83782D, W83783S +chips, and the Asus AS99127F chips. We will refer to them collectively as +W8378* chips. There is quite some difference between these chips, but they are similar enough that it was sensible to put them together in one driver. -The W83627HF chip is assumed to be identical to the ISA W83782D. The Asus chips are similar to an I2C-only W83782D. Chip #vin #fanin #pwm #temp wchipid vendid i2c ISA as99127f 7 3 0 3 0x31 0x12c3 yes no as99127f rev.2 (type_name = as99127f) 0x31 0x5ca3 yes no w83781d 7 3 0 3 0x10-1 0x5ca3 yes yes -w83627hf 9 3 2 3 0x21 0x5ca3 yes yes(LPC) w83782d 9 3 2-4 3 0x30 0x5ca3 yes yes w83783s 5-6 3 2 1-2 0x40 0x5ca3 yes no @@ -143,9 +137,9 @@ Individual alarm and beep bits: 0x000400: in6 0x000800: fan3 0x001000: chassis -0x002000: temp3 (W83782D and W83627HF only) -0x010000: in7 (W83782D and W83627HF only) -0x020000: in8 (W83782D and W83627HF only) +0x002000: temp3 (W83782D only) +0x010000: in7 (W83782D only) +0x020000: in8 (W83782D only) If an alarm triggers, it will remain triggered until the hardware register is read at least once. This means that the cause for the alarm may diff --git a/Documentation/hwmon/w83l786ng b/Documentation/hwmon/w83l786ng new file mode 100644 index 000000000000..d8f55d7fff10 --- /dev/null +++ b/Documentation/hwmon/w83l786ng @@ -0,0 +1,54 @@ +Kernel driver w83l786ng +===================== + +Supported chips: + * Winbond W83L786NG/W83L786NR + Prefix: 'w83l786ng' + Addresses scanned: I2C 0x2e - 0x2f + Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83L786NRNG09.pdf + +Author: Kevin Lo <kevlo@kevlo.org> + + +Module Parameters +----------------- + +* reset boolean + (default 0) + Use 'reset=1' to reset the chip (via index 0x40, bit 7). The default + behavior is no chip reset to preserve BIOS settings + + +Description +----------- + +This driver implements support for Winbond W83L786NG/W83L786NR chips. + +The driver implements two temperature sensors, two fan rotation speed +sensors, and three voltage sensors. + +Temperatures are measured in degrees Celsius and measurement resolution is 1 +degC for temp1 and temp2. + +Fan rotation speeds are reported in RPM (rotations per minute). Fan readings +readings can be divided by a programmable divider (1, 2, 4, 8, 16, 32, 64 +or 128 for fan 1/2) to give the readings more range or accuracy. + +Voltage sensors (also known as IN sensors) report their values in millivolts. +An alarm is triggered if the voltage has crossed a programmable minimum +or maximum limit. + +/sys files +---------- + +pwm[1-2] - this file stores PWM duty cycle or DC value (fan speed) in range: + 0 (stop) to 255 (full) +pwm[1-2]_enable - this file controls mode of fan/temperature control: + * 0 Manual Mode + * 1 Thermal Cruise + * 2 Smart Fan II + * 4 FAN_SET +pwm[1-2]_mode - Select PWM of DC mode + * 0 DC + * 1 PWM +tolerance[1-2] - Value in degrees of Celsius (degC) for +- T diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4 index cf6b6cb02aa1..ef1efa79b1df 100644 --- a/Documentation/i2c/busses/i2c-piix4 +++ b/Documentation/i2c/busses/i2c-piix4 @@ -95,4 +95,4 @@ of all affected systems, so the only safe solution was to prevent access to the SMBus on all IBM systems (detected using DMI data.) For additional information, read: -http://www2.lm-sensors.nu/~lm78/cvs/lm_sensors2/README.thinkpad +http://www.lm-sensors.org/browser/lm-sensors/trunk/README.thinkpad diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt index 47fc86830cd7..81905e81585e 100644 --- a/Documentation/input/input-programming.txt +++ b/Documentation/input/input-programming.txt @@ -22,7 +22,7 @@ static struct input_dev *button_dev; static void button_interrupt(int irq, void *dummy, struct pt_regs *fp) { - input_report_key(button_dev, BTN_1, inb(BUTTON_PORT) & 1); + input_report_key(button_dev, BTN_0, inb(BUTTON_PORT) & 1); input_sync(button_dev); } diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt index b963c3b4afa5..5925c3cd030d 100644 --- a/Documentation/iostats.txt +++ b/Documentation/iostats.txt @@ -58,7 +58,7 @@ they should not wrap twice before you notice them. Each set of stats only applies to the indicated device; if you want system-wide stats you'll have to find all the devices and sum them all up. -Field 1 -- # of reads issued +Field 1 -- # of reads completed This is the total number of reads completed successfully. Field 2 -- # of reads merged, field 6 -- # of writes merged Reads and writes which are adjacent to each other may be merged for @@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time of queuing for partitions, and at completion for whole disks. This is a subtle distinction that is probably uninteresting for most cases. +More significant is the error induced by counting the numbers of +reads/writes before merges for partitions and after for disks. Since a +typical workload usually contains a lot of successive and adjacent requests, +the number of reads/writes issued can be several times higher than the +number of reads/writes completed. + +In 2.6.25, the full statistic set is again available for partitions and +disk and partition statistics are consistent again. Since we still don't +keep record of the partition-relative address, an operation is attributed to +the partition which contains the first sector of the request after the +eventual merges. As requests can be merged across partition, this could lead +to some (probably insignificant) innacuracy. + Additional notes ---------------- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8fd5aa40585f..a4fc7fc21439 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -147,8 +147,10 @@ and is between 256 and 4096 characters. It is defined in the file default: 0 acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode } - See Documentation/power/video.txt + Format: { s3_bios, s3_mode, s3_beep } + See Documentation/power/video.txt for s3_bios and s3_mode. + s3_beep is for debugging; it makes the PC's speaker beep + as soon as the kernel's real-mode entry point is called. acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode Format: { level | edge | high | low } @@ -175,6 +177,9 @@ and is between 256 and 4096 characters. It is defined in the file acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT + acpi_no_initrd_override [KNL,ACPI] + Disable loading custom ACPI tables from the initramfs + acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS Format: To spoof as Windows 98: ="Microsoft Windows" @@ -1968,9 +1973,6 @@ and is between 256 and 4096 characters. It is defined in the file <deci-seconds>: poll all this frequency 0: no polling (default) - time Show timing data prefixed to each printk message line - [deprecated, see 'printk.time'] - tipar.timeout= [HW,PPT] Set communications timeout in tenths of a second (default 15). diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 53a63890aea4..83f515c2905a 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -92,11 +92,12 @@ handler has run. Up to MAX_STACK_SIZE bytes are copied -- e.g., 64 bytes on i386. Note that the probed function's args may be passed on the stack -or in registers (e.g., for x86_64 or for an i386 fastcall function). -The jprobe will work in either case, so long as the handler's -prototype matches that of the probed function. +or in registers. The jprobe will work in either case, so long as the +handler's prototype matches that of the probed function. -1.3 How Does a Return Probe Work? +1.3 Return Probes + +1.3.1 How Does a Return Probe Work? When you call register_kretprobe(), Kprobes establishes a kprobe at the entry to the function. When the probed function is called and this @@ -107,9 +108,9 @@ At boot time, Kprobes registers a kprobe at the trampoline. When the probed function executes its return instruction, control passes to the trampoline and that probe is hit. Kprobes' trampoline -handler calls the user-specified handler associated with the kretprobe, -then sets the saved instruction pointer to the saved return address, -and that's where execution resumes upon return from the trap. +handler calls the user-specified return handler associated with the +kretprobe, then sets the saved instruction pointer to the saved return +address, and that's where execution resumes upon return from the trap. While the probed function is executing, its return address is stored in an object of type kretprobe_instance. Before calling @@ -131,6 +132,30 @@ zero when the return probe is registered, and is incremented every time the probed function is entered but there is no kretprobe_instance object available for establishing the return probe. +1.3.2 Kretprobe entry-handler + +Kretprobes also provides an optional user-specified handler which runs +on function entry. This handler is specified by setting the entry_handler +field of the kretprobe struct. Whenever the kprobe placed by kretprobe at the +function entry is hit, the user-defined entry_handler, if any, is invoked. +If the entry_handler returns 0 (success) then a corresponding return handler +is guaranteed to be called upon function return. If the entry_handler +returns a non-zero error then Kprobes leaves the return address as is, and +the kretprobe has no further effect for that particular function instance. + +Multiple entry and return handler invocations are matched using the unique +kretprobe_instance object associated with them. Additionally, a user +may also specify per return-instance private data to be part of each +kretprobe_instance object. This is especially useful when sharing private +data between corresponding user entry and return handlers. The size of each +private data object can be specified at kretprobe registration time by +setting the data_size field of the kretprobe struct. This data can be +accessed through the data field of each kretprobe_instance object. + +In case probed function is entered but there is no kretprobe_instance +object available, then in addition to incrementing the nmissed count, +the user entry_handler invocation is also skipped. + 2. Architectures Supported Kprobes, jprobes, and return probes are implemented on the following @@ -244,9 +269,9 @@ Kprobes runs the handler whose address is jp->entry. The handler should have the same arg list and return type as the probed function; and just before it returns, it must call jprobe_return(). (The handler never actually returns, since jprobe_return() returns -control to Kprobes.) If the probed function is declared asmlinkage, -fastcall, or anything else that affects how args are passed, the -handler's declaration must match. +control to Kprobes.) If the probed function is declared asmlinkage +or anything else that affects how args are passed, the handler's +declaration must match. register_jprobe() returns 0 on success, or a negative errno otherwise. @@ -274,6 +299,8 @@ of interest: - ret_addr: the return address - rp: points to the corresponding kretprobe object - task: points to the corresponding task struct +- data: points to per return-instance private data; see "Kretprobe + entry-handler" for details. The regs_return_value(regs) macro provides a simple abstraction to extract the return value from the appropriate register as defined by @@ -556,23 +583,52 @@ report failed calls to sys_open(). #include <linux/kernel.h> #include <linux/module.h> #include <linux/kprobes.h> +#include <linux/ktime.h> + +/* per-instance private data */ +struct my_data { + ktime_t entry_stamp; +}; static const char *probed_func = "sys_open"; -/* Return-probe handler: If the probed function fails, log the return value. */ -static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +/* Timestamp function entry. */ +static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct my_data *data; + + if(!current->mm) + return 1; /* skip kernel threads */ + + data = (struct my_data *)ri->data; + data->entry_stamp = ktime_get(); + return 0; +} + +/* If the probed function failed, log the return value and duration. + * Duration may turn out to be zero consistently, depending upon the + * granularity of time accounting on the platform. */ +static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { int retval = regs_return_value(regs); + struct my_data *data = (struct my_data *)ri->data; + s64 delta; + ktime_t now; + if (retval < 0) { - printk("%s returns %d\n", probed_func, retval); + now = ktime_get(); + delta = ktime_to_ns(ktime_sub(now, data->entry_stamp)); + printk("%s: return val = %d (duration = %lld ns)\n", + probed_func, retval, delta); } return 0; } static struct kretprobe my_kretprobe = { - .handler = ret_handler, - /* Probe up to 20 instances concurrently. */ - .maxactive = 20 + .handler = return_handler, + .entry_handler = entry_handler, + .data_size = sizeof(struct my_data), + .maxactive = 20, /* probe up to 20 instances concurrently */ }; static int __init kretprobe_init(void) @@ -584,7 +640,7 @@ static int __init kretprobe_init(void) printk("register_kretprobe failed, returned %d\n", ret); return -1; } - printk("Planted return probe at %p\n", my_kretprobe.kp.addr); + printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name); return 0; } @@ -594,7 +650,7 @@ static void __exit kretprobe_exit(void) printk("kretprobe unregistered\n"); /* nmissed > 0 suggests that maxactive was set too low. */ printk("Missed probing %d instances of %s\n", - my_kretprobe.nmissed, probed_func); + my_kretprobe.nmissed, probed_func); } module_init(kretprobe_init) diff --git a/Documentation/kref.txt b/Documentation/kref.txt index f38b59d00c63..130b6e87aa7e 100644 --- a/Documentation/kref.txt +++ b/Documentation/kref.txt @@ -141,10 +141,10 @@ The last rule (rule 3) is the nastiest one to handle. Say, for instance, you have a list of items that are each kref-ed, and you wish to get the first one. You can't just pull the first item off the list and kref_get() it. That violates rule 3 because you are not already -holding a valid pointer. You must add locks or semaphores. For -instance: +holding a valid pointer. You must add a mutex (or some other lock). +For instance: -static DECLARE_MUTEX(sem); +static DEFINE_MUTEX(mutex); static LIST_HEAD(q); struct my_data { @@ -155,12 +155,12 @@ struct my_data static struct my_data *get_entry() { struct my_data *entry = NULL; - down(&sem); + mutex_lock(&mutex); if (!list_empty(&q)) { entry = container_of(q.next, struct my_q_entry, link); kref_get(&entry->refcount); } - up(&sem); + mutex_unlock(&mutex); return entry; } @@ -174,9 +174,9 @@ static void release_entry(struct kref *ref) static void put_entry(struct my_data *entry) { - down(&sem); + mutex_lock(&mutex); kref_put(&entry->refcount, release_entry); - up(&sem); + mutex_unlock(&mutex); } The kref_put() return value is useful if you do not want to hold the @@ -191,13 +191,13 @@ static void release_entry(struct kref *ref) static void put_entry(struct my_data *entry) { - down(&sem); + mutex_lock(&mutex); if (kref_put(&entry->refcount, release_entry)) { list_del(&entry->link); - up(&sem); + mutex_unlock(&mutex); kfree(entry); } else - up(&sem); + mutex_unlock(&mutex); } This is really more useful if you have to call other routines as part diff --git a/Documentation/laptops/00-INDEX b/Documentation/laptops/00-INDEX new file mode 100644 index 000000000000..729c2c062e10 --- /dev/null +++ b/Documentation/laptops/00-INDEX @@ -0,0 +1,10 @@ +00-INDEX + - This file +acer-wmi.txt + - information on the Acer Laptop WMI Extras driver. +sony-laptop.txt + - Sony Notebook Control Driver (SNC) Readme. +sonypi.txt + - info on Linux Sony Programmable I/O Device support. +thinkpad-acpi.txt + - information on the (IBM and Lenovo) ThinkPad ACPI Extras driver. diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt new file mode 100644 index 000000000000..b06696329cff --- /dev/null +++ b/Documentation/laptops/acer-wmi.txt @@ -0,0 +1,202 @@ +Acer Laptop WMI Extras Driver +http://code.google.com/p/aceracpi +Version 0.1 +9th February 2008 + +Copyright 2007-2008 Carlos Corbacho <carlos@strangeworlds.co.uk> + +acer-wmi is a driver to allow you to control various parts of your Acer laptop +hardware under Linux which are exposed via ACPI-WMI. + +This driver completely replaces the old out-of-tree acer_acpi, which I am +currently maintaining for bug fixes only on pre-2.6.25 kernels. All development +work is now focused solely on acer-wmi. + +Disclaimer +********** + +Acer and Wistron have provided nothing towards the development acer_acpi or +acer-wmi. All information we have has been through the efforts of the developers +and the users to discover as much as possible about the hardware. + +As such, I do warn that this could break your hardware - this is extremely +unlikely of course, but please bear this in mind. + +Background +********** + +acer-wmi is derived from acer_acpi, originally developed by Mark +Smith in 2005, then taken over by Carlos Corbacho in 2007, in order to activate +the wireless LAN card under a 64-bit version of Linux, as acerhk[1] (the +previous solution to the problem) relied on making 32 bit BIOS calls which are +not possible in kernel space from a 64 bit OS. + +[1] acerhk: http://www.cakey.de/acerhk/ + +Supported Hardware +****************** + +Please see the website for the current list of known working hardare: + +http://code.google.com/p/aceracpi/wiki/SupportedHardware + +If your laptop is not listed, or listed as unknown, and works with acer-wmi, +please contact me with a copy of the DSDT. + +If your Acer laptop doesn't work with acer-wmi, I would also like to see the +DSDT. + +To send me the DSDT, as root/sudo: + +cat /sys/firmware/acpi/DSDT > dsdt + +And send me the resulting 'dsdt' file. + +Usage +***** + +On Acer laptops, acer-wmi should already be autoloaded based on DMI matching. +For non-Acer laptops, until WMI based autoloading support is added, you will +need to manually load acer-wmi. + +acer-wmi creates /sys/devices/platform/acer-wmi, and fills it with various +files whose usage is detailed below, which enables you to control some of the +following (varies between models): + +* the wireless LAN card radio +* inbuilt Bluetooth adapter +* inbuilt 3G card +* mail LED of your laptop +* brightness of the LCD panel + +Wireless +******** + +With regards to wireless, all acer-wmi does is enable the radio on the card. It +is not responsible for the wireless LED - once the radio is enabled, this is +down to the wireless driver for your card. So the behaviour of the wireless LED, +once you enable the radio, will depend on your hardware and driver combination. + +e.g. With the BCM4318 on the Acer Aspire 5020 series: + +ndiswrapper: Light blinks on when transmitting +bcm43xx/b43: Solid light, blinks off when transmitting + +Wireless radio control is unconditionally enabled - all Acer laptops that support +acer-wmi come with built-in wireless. However, should you feel so inclined to +ever wish to remove the card, or swap it out at some point, please get in touch +with me, as we may well be able to gain some data on wireless card detection. + +To read the status of the wireless radio (0=off, 1=on): +cat /sys/devices/platform/acer-wmi/wireless + +To enable the wireless radio: +echo 1 > /sys/devices/platform/acer-wmi/wireless + +To disable the wireless radio: +echo 0 > /sys/devices/platform/acer-wmi/wireless + +To set the state of the wireless radio when loading acer-wmi, pass: +wireless=X (where X is 0 or 1) + +Bluetooth +********* + +For bluetooth, this is an internal USB dongle, so once enabled, you will get +a USB device connection event, and a new USB device appears. When you disable +bluetooth, you get the reverse - a USB device disconnect event, followed by the +device disappearing again. + +Bluetooth is autodetected by acer-wmi, so if you do not have a bluetooth module +installed in your laptop, this file won't exist (please be aware that it is +quite common for Acer not to fit bluetooth to their laptops - so just because +you have a bluetooth button on the laptop, doesn't mean that bluetooth is +installed). + +For the adventurously minded - if you want to buy an internal bluetooth +module off the internet that is compatible with your laptop and fit it, then +it will work just fine with acer-wmi. + +To read the status of the bluetooth module (0=off, 1=on): +cat /sys/devices/platform/acer-wmi/wireless + +To enable the bluetooth module: +echo 1 > /sys/devices/platform/acer-wmi/bluetooth + +To disable the bluetooth module: +echo 0 > /sys/devices/platform/acer-wmi/bluetooth + +To set the state of the bluetooth module when loading acer-wmi, pass: +bluetooth=X (where X is 0 or 1) + +3G +** + +3G is currently not autodetected, so the 'threeg' file is always created under +sysfs. So far, no-one in possession of an Acer laptop with 3G built-in appears to +have tried Linux, or reported back, so we don't have any information on this. + +If you have an Acer laptop that does have a 3G card in, please contact me so we +can properly detect these, and find out a bit more about them. + +To read the status of the 3G card (0=off, 1=on): +cat /sys/devices/platform/acer-wmi/threeg + +To enable the 3G card: +echo 1 > /sys/devices/platform/acer-wmi/threeg + +To disable the 3G card: +echo 0 > /sys/devices/platform/acer-wmi/threeg + +To set the state of the 3G card when loading acer-wmi, pass: +threeg=X (where X is 0 or 1) + +Mail LED +******** + +This can be found in most older Acer laptops supported by acer-wmi, and many +newer ones - it is built into the 'mail' button, and blinks when active. + +On newer (WMID) laptops though, we have no way of detecting the mail LED. If +your laptop identifies itself in dmesg as a WMID model, then please try loading +acer_acpi with: + +force_series=2490 + +This will use a known alternative method of reading/ writing the mail LED. If +it works, please report back to me with the DMI data from your laptop so this +can be added to acer-wmi. + +The LED is exposed through the LED subsystem, and can be found in: + +/sys/devices/platform/acer-wmi/leds/acer-mail:green/ + +The mail LED is autodetected, so if you don't have one, the LED device won't +be registered. + +If you have a mail LED that is not green, please report this to me. + +Backlight +********* + +The backlight brightness control is available on all acer-wmi supported +hardware. The maximum brightness level is usually 15, but on some newer laptops +it's 10 (this is again autodetected). + +The backlight is exposed through the backlight subsystem, and can be found in: + +/sys/devices/platform/acer-wmi/backlight/acer-wmi/ + +Credits +******* + +Olaf Tauber, who did the real hard work when he developed acerhk +http://www.informatik.hu-berlin.de/~tauber/acerhk +All the authors of laptop ACPI modules in the kernel, whose work +was an inspiration in the early days of acer_acpi +Mathieu Segaud, who solved the problem with having to modprobe the driver +twice in acer_acpi 0.2. +Jim Ramsay, who added support for the WMID interface +Mark Smith, who started the original acer_acpi + +And the many people who have used both acer_acpi and acer-wmi. diff --git a/Documentation/sony-laptop.txt b/Documentation/laptops/sony-laptop.txt index 7a5c1a81905c..8b2bc1572d98 100644 --- a/Documentation/sony-laptop.txt +++ b/Documentation/laptops/sony-laptop.txt @@ -114,4 +114,3 @@ Bugs/Limitations: sonypi driver (through /dev/sonypi) does not try to use the sony-laptop driver. In the future, spicctrl could try sonypi first, and if it isn't present, try sony-laptop instead. - diff --git a/Documentation/sonypi.txt b/Documentation/laptops/sonypi.txt index 4857acfc50f1..4857acfc50f1 100644 --- a/Documentation/sonypi.txt +++ b/Documentation/laptops/sonypi.txt diff --git a/Documentation/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 10c041ca13c7..6c2477754a2a 100644 --- a/Documentation/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -1,7 +1,7 @@ ThinkPad ACPI Extras Driver - Version 0.17 - October 04th, 2007 + Version 0.19 + January 06th, 2008 Borislav Deianov <borislav@users.sf.net> Henrique de Moraes Holschuh <hmh@hmh.eng.br> @@ -215,6 +215,11 @@ The following commands can be written to the /proc/acpi/ibm/hotkey file: ... any other 8-hex-digit mask ... echo reset > /proc/acpi/ibm/hotkey -- restore the original mask +The procfs interface does not support NVRAM polling control. So as to +maintain maximum bug-to-bug compatibility, it does not report any masks, +nor does it allow one to manipulate the hot key mask when the firmware +does not support masks at all, even if NVRAM polling is in use. + sysfs notes: hotkey_bios_enabled: @@ -231,17 +236,26 @@ sysfs notes: to this value. hotkey_enable: - Enables/disables the hot keys feature, and reports - current status of the hot keys feature. + Enables/disables the hot keys feature in the ACPI + firmware, and reports current status of the hot keys + feature. Has no effect on the NVRAM hot key polling + functionality. 0: disables the hot keys feature / feature disabled 1: enables the hot keys feature / feature enabled hotkey_mask: - bit mask to enable driver-handling and ACPI event - generation for each hot key (see above). Returns the - current status of the hot keys mask, and allows one to - modify it. + bit mask to enable driver-handling (and depending on + the firmware, ACPI event generation) for each hot key + (see above). Returns the current status of the hot keys + mask, and allows one to modify it. + + Note: when NVRAM polling is active, the firmware mask + will be different from the value returned by + hotkey_mask. The driver will retain enabled bits for + hotkeys that are under NVRAM polling even if the + firmware refuses them, and will not set these bits on + the firmware hot key mask. hotkey_all_mask: bit mask that should enable event reporting for all @@ -257,12 +271,48 @@ sysfs notes: handled by the firmware anyway. Echo it to hotkey_mask above, to use. + hotkey_source_mask: + bit mask that selects which hot keys will the driver + poll the NVRAM for. This is auto-detected by the driver + based on the capabilities reported by the ACPI firmware, + but it can be overridden at runtime. + + Hot keys whose bits are set in both hotkey_source_mask + and also on hotkey_mask are polled for in NVRAM. Only a + few hot keys are available through CMOS NVRAM polling. + + Warning: when in NVRAM mode, the volume up/down/mute + keys are synthesized according to changes in the mixer, + so you have to use volume up or volume down to unmute, + as per the ThinkPad volume mixer user interface. When + in ACPI event mode, volume up/down/mute are reported as + separate events, but this behaviour may be corrected in + future releases of this driver, in which case the + ThinkPad volume mixer user interface semanthics will be + enforced. + + hotkey_poll_freq: + frequency in Hz for hot key polling. It must be between + 0 and 25 Hz. Polling is only carried out when strictly + needed. + + Setting hotkey_poll_freq to zero disables polling, and + will cause hot key presses that require NVRAM polling + to never be reported. + + Setting hotkey_poll_freq too low will cause repeated + pressings of the same hot key to be misreported as a + single key press, or to not even be detected at all. + The recommended polling frequency is 10Hz. + hotkey_radio_sw: if the ThinkPad has a hardware radio switch, this attribute will read 0 if the switch is in the "radios disabled" postition, and 1 if the switch is in the "radios enabled" position. + This attribute has poll()/select() support. + hotkey_report_mode: Returns the state of the procfs ACPI event report mode filter for hot keys. If it is set to 1 (the default), @@ -277,6 +327,25 @@ sysfs notes: May return -EPERM (write access locked out by module parameter) or -EACCES (read-only). + wakeup_reason: + Set to 1 if the system is waking up because the user + requested a bay ejection. Set to 2 if the system is + waking up because the user requested the system to + undock. Set to zero for normal wake-ups or wake-ups + due to unknown reasons. + + This attribute has poll()/select() support. + + wakeup_hotunplug_complete: + Set to 1 if the system was waken up because of an + undock or bay ejection request, and that request + was sucessfully completed. At this point, it might + be useful to send the system back to sleep, at the + user's choice. Refer to HKEY events 0x4003 and + 0x3003, below. + + This attribute has poll()/select() support. + input layer notes: A Hot key is mapped to a single input layer EV_KEY event, possibly @@ -427,6 +496,23 @@ Non hot-key ACPI HKEY event map: The above events are not propagated by the driver, except for legacy compatibility purposes when hotkey_report_mode is set to 1. +0x2304 System is waking up from suspend to undock +0x2305 System is waking up from suspend to eject bay +0x2404 System is waking up from hibernation to undock +0x2405 System is waking up from hibernation to eject bay + +The above events are never propagated by the driver. + +0x3003 Bay ejection (see 0x2x05) complete, can sleep again +0x4003 Undocked (see 0x2x04), can sleep again +0x5009 Tablet swivel: switched to tablet mode +0x500A Tablet swivel: switched to normal mode +0x500B Tablet pen insterted into its storage bay +0x500C Tablet pen removed from its storage bay +0x5010 Brightness level changed (newer Lenovo BIOSes) + +The above events are propagated by the driver. + Compatibility notes: ibm-acpi and thinkpad-acpi 0.15 (mainline kernels before 2.6.23) never @@ -1263,3 +1349,17 @@ Sysfs interface changelog: and the hwmon class for libsensors4 (lm-sensors 3) compatibility. Moved all hwmon attributes to this new platform device. + +0x020100: Marker for thinkpad-acpi with hot key NVRAM polling + support. If you must, use it to know you should not + start an userspace NVRAM poller (allows to detect when + NVRAM is compiled out by the user because it is + unneeded/undesired in the first place). +0x020101: Marker for thinkpad-acpi with hot key NVRAM polling + and proper hotkey_mask semanthics (version 8 of the + NVRAM polling patch). Some development snapshots of + 0.18 had an earlier version that did strange things + to hotkey_mask. + +0x020200: Add poll()/select() support to the following attributes: + hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason diff --git a/Documentation/leds-class.txt b/Documentation/leds-class.txt index 8c35c0426110..56757c751d6f 100644 --- a/Documentation/leds-class.txt +++ b/Documentation/leds-class.txt @@ -39,12 +39,33 @@ LED Device Naming Is currently of the form: -"devicename:colour" +"devicename:colour:function" There have been calls for LED properties such as colour to be exported as individual led class attributes. As a solution which doesn't incur as much overhead, I suggest these become part of the device name. The naming scheme -above leaves scope for further attributes should they be needed. +above leaves scope for further attributes should they be needed. If sections +of the name don't apply, just leave that section blank. + + +Hardware accelerated blink of LEDs +================================== + +Some LEDs can be programmed to blink without any CPU interaction. To +support this feature, a LED driver can optionally implement the +blink_set() function (see <linux/leds.h>). If implemeted, triggers can +attempt to use it before falling back to software timers. The blink_set() +function should return 0 if the blink setting is supported, or -EINVAL +otherwise, which means that LED blinking will be handled by software. + +The blink_set() function should choose a user friendly blinking +value if it is called with *delay_on==0 && *delay_off==0 parameters. In +this case the driver should give back the chosen value through delay_on +and delay_off parameters to the leds subsystem. + +Any call to the brightness_set() callback function should cancel the +previously programmed hardware blinking function so setting the brightness +to 0 can also cancel the blinking of the LED. Known Issues @@ -55,10 +76,6 @@ would cause nightmare dependency issues. I see this as a minor issue compared to the benefits the simple trigger functionality brings. The rest of the LED subsystem can be modular. -Some leds can be programmed to flash in hardware. As this isn't a generic -LED device property, this should be exported as a device specific sysfs -attribute rather than part of the class if this functionality is required. - Future Development ================== diff --git a/Documentation/md.txt b/Documentation/md.txt index 5818628207b5..396cdd982c26 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -416,6 +416,16 @@ also have sectors in total that could need to be processed. The two numbers are separated by a '/' thus effectively showing one value, a fraction of the process that is complete. + A 'select' on this attribute will return when resync completes, + when it reaches the current sync_max (below) and possibly at + other times. + + sync_max + This is a number of sectors at which point a resync/recovery + process will pause. When a resync is active, the value can + only ever be increased, never decreased. The value of 'max' + effectively disables the limit. + sync_speed This shows the current actual speed, in K/sec, of the current diff --git a/Documentation/mn10300/ABI.txt b/Documentation/mn10300/ABI.txt new file mode 100644 index 000000000000..1fef1f06dfd2 --- /dev/null +++ b/Documentation/mn10300/ABI.txt @@ -0,0 +1,149 @@ + ========================= + MN10300 FUNCTION CALL ABI + ========================= + +======= +GENERAL +======= + +The MN10300/AM33 kernel runs in little-endian mode; big-endian mode is not +supported. + +The stack grows downwards, and should always be 32-bit aligned. There are +separate stack pointer registers for userspace and the kernel. + + +================ +ARGUMENT PASSING +================ + +The first two arguments (assuming up to 32-bits per argument) to a function are +passed in the D0 and D1 registers respectively; all other arguments are passed +on the stack. + +If 64-bit arguments are being passed, then they are never split between +registers and the stack. If the first argument is a 64-bit value, it will be +passed in D0:D1. If the first argument is not a 64-bit value, but the second +is, the second will be passed entirely on the stack and D1 will be unused. + +Arguments smaller than 32-bits are not coelesced within a register or a stack +word. For example, two byte-sized arguments will always be passed in separate +registers or word-sized stack slots. + + +================= +CALLING FUNCTIONS +================= + +The caller must allocate twelve bytes on the stack for the callee's use before +it inserts a CALL instruction. The CALL instruction will write into the TOS +word, but won't actually modify the stack pointer; similarly, the RET +instruction reads from the TOS word of the stack, but doesn't move the stack +pointer beyond it. + + + Stack: + | | + | | + |---------------| SP+20 + | 4th Arg | + |---------------| SP+16 + | 3rd Arg | + |---------------| SP+12 + | D1 Save Slot | + |---------------| SP+8 + | D0 Save Slot | + |---------------| SP+4 + | Return Addr | + |---------------| SP + | | + | | + + +The caller must leave space on the stack (hence an allocation of twelve bytes) +in which the callee may store the first two arguments. + + +============ +RETURN VALUE +============ + +The return value is passed in D0 for an integer (or D0:D1 for a 64-bit value), +or A0 for a pointer. + +If the return value is a value larger than 64-bits, or is a structure or an +array, then a hidden first argument will be passed to the callee by the caller: +this will point to a piece of memory large enough to hold the result of the +function. In this case, the callee will return the value in that piece of +memory, and no value will be returned in D0 or A0. + + +=================== +REGISTER CLOBBERING +=================== + +The values in certain registers may be clobbered by the callee, and other +values must be saved: + + Clobber: D0-D1, A0-A1, E0-E3 + Save: D2-D3, A2-A3, E4-E7, SP + +All other non-supervisor-only registers are clobberable (such as MDR, MCRL, +MCRH). + + +================= +SPECIAL REGISTERS +================= + +Certain ordinary registers may carry special usage for the compiler: + + A3: Frame pointer + E2: TLS pointer + + +========== +KERNEL ABI +========== + +The kernel may use a slightly different ABI internally. + + (*) E2 + + If CONFIG_MN10300_CURRENT_IN_E2 is defined, then the current task pointer + will be kept in the E2 register, and that register will be marked + unavailable for the compiler to use as a scratch register. + + Normally the kernel uses something like: + + MOV SP,An + AND 0xFFFFE000,An + MOV (An),Rm // Rm holds current + MOV (yyy,Rm) // Access current->yyy + + To find the address of current; but since this option permits current to + be carried globally in an register, it can use: + + MOV (yyy,E2) // Access current->yyy + + instead. + + +=============== +SYSTEM CALL ABI +=============== + +System calls are called with the following convention: + + REGISTER ENTRY EXIT + =============== ======================= ======================= + D0 Syscall number Return value + A0 1st syscall argument Saved + D1 2nd syscall argument Saved + A3 3rd syscall argument Saved + A2 4th syscall argument Saved + D3 5th syscall argument Saved + D2 6th syscall argument Saved + +All other registers are saved. The layout is a consequence of the way the MOVM +instruction stores registers onto the stack. diff --git a/Documentation/mn10300/compartmentalisation.txt b/Documentation/mn10300/compartmentalisation.txt new file mode 100644 index 000000000000..8958b51dac4b --- /dev/null +++ b/Documentation/mn10300/compartmentalisation.txt @@ -0,0 +1,60 @@ + ========================================= + PART-SPECIFIC SOURCE COMPARTMENTALISATION + ========================================= + +The sources for various parts are compartmentalised at two different levels: + + (1) Processor level + + The "processor level" is a CPU core plus the other on-silicon + peripherals. + + Processor-specific header files are divided among directories in a similar + way to the CPU level: + + (*) include/asm-mn10300/proc-mn103e010/ + + Support for the AM33v2 CPU core. + + The appropriate processor is selected by a CONFIG_MN10300_PROC_YYYY option + from the "Processor support" choice menu in the arch/mn10300/Kconfig file. + + + (2) Unit level + + The "unit level" is a processor plus all the external peripherals + controlled by that processor. + + Unit-specific header files are divided among directories in a similar way + to the CPU level; not only that, but specific sources may also be + segregated into separate directories under the arch directory: + + (*) include/asm-mn10300/unit-asb2303/ + (*) arch/mn10300/unit-asb2303/ + + Support for the ASB2303 board with an ASB2308 daughter board. + + (*) include/asm-mn10300/unit-asb2305/ + (*) arch/mn10300/unit-asb2305/ + + Support for the ASB2305 board. + + The appropriate processor is selected by a CONFIG_MN10300_UNIT_ZZZZ option + from the "Unit type" choice menu in the arch/mn10300/Kconfig file. + + +============ +COMPILE TIME +============ + +When the kernel is compiled, symbolic links will be made in the asm header file +directory for this arch: + + include/asm-mn10300/proc => include/asm-mn10300/proc-YYYY/ + include/asm-mn10300/unit => include/asm-mn10300/unit-ZZZZ/ + +So that the header files contained in those directories can be accessed without +lots of #ifdef-age. + +The appropriate arch/mn10300/unit-ZZZZ directory will also be entered by the +compilation process; all other unit-specific directories will be ignored. diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index aea7e9209667..9d60ab717a7b 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -386,6 +386,11 @@ before suspending; then remount them after resuming. There is a work-around for this problem. For more information, see Documentation/usb/persist.txt. +Q: Can I suspend-to-disk using a swap partition under LVM? + +A: No. You can suspend successfully, but you'll not be able to +resume. uswsusp should be able to work with LVM. See suspend.sf.net. + Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were compiled with the similar configuration files. Anyway I found that suspend to disk (and resume) is much slower on 2.6.16 compared to diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index b5e46efeba84..7b4e8a70882c 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -57,6 +57,7 @@ Table of Contents n) 4xx/Axon EMAC ethernet nodes o) Xilinx IP cores p) Freescale Synchronous Serial Interface + q) USB EHCI controllers VII - Specifying interrupt information for devices 1) interrupts property @@ -2577,6 +2578,20 @@ platforms are moved over to use the flattened-device-tree model. Requred properties: - current-speed : Baud rate of uartlite + v) Xilinx hwicap + + Xilinx hwicap devices provide access to the configuration logic + of the FPGA through the Internal Configuration Access Port + (ICAP). The ICAP enables partial reconfiguration of the FPGA, + readback of the configuration information, and some control over + 'warm boots' of the FPGA fabric. + + Required properties: + - xlnx,family : The family of the FPGA, necessary since the + capabilities of the underlying ICAP hardware + differ between different families. May be + 'virtex2p', 'virtex4', or 'virtex5'. + p) Freescale Synchronous Serial Interface The SSI is a serial device that communicates with audio codecs. It can @@ -2775,6 +2790,33 @@ platforms are moved over to use the flattened-device-tree model. interrupt-parent = < &ipic >; }; + q) USB EHCI controllers + + Required properties: + - compatible : should be "usb-ehci". + - reg : should contain at least address and length of the standard EHCI + register set for the device. Optional platform-dependent registers + (debug-port or other) can be also specified here, but only after + definition of standard EHCI registers. + - interrupts : one EHCI interrupt should be described here. + If device registers are implemented in big endian mode, the device + node should have "big-endian-regs" property. + If controller implementation operates with big endian descriptors, + "big-endian-desc" property should be specified. + If both big endian registers and descriptors are used by the controller + implementation, "big-endian" property can be specified instead of having + both "big-endian-regs" and "big-endian-desc". + + Example (Sequoia 440EPx): + ehci@e0000300 { + compatible = "ibm,usb-ehci-440epx", "usb-ehci"; + interrupt-parent = <&UIC0>; + interrupts = <1a 4>; + reg = <0 e0000300 90 0 e0000390 70>; + big-endian; + }; + + More devices will be defined as this spec matures. VII - Specifying interrupt information for devices diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt index e20b19c1b60d..8deffcd68cb8 100644 --- a/Documentation/rtc.txt +++ b/Documentation/rtc.txt @@ -182,8 +182,8 @@ driver returns ENOIOCTLCMD. Some common examples: since the frequency is stored in the irq_freq member of the rtc_device structure. Your driver needs to initialize the irq_freq member during init. Make sure you check the requested frequency is in range of your - hardware in the irq_set_freq function. If you cannot actually change - the frequency, just return -ENOTTY. + hardware in the irq_set_freq function. If it isn't, return -EINVAL. If + you cannot actually change the frequency, do not define irq_set_freq. If all else fails, check out the rtc-test.c driver! @@ -268,8 +268,8 @@ int main(int argc, char **argv) /* This read will block */ retval = read(fd, &data, sizeof(unsigned long)); if (retval == -1) { - perror("read"); - exit(errno); + perror("read"); + exit(errno); } fprintf(stderr, " %d",i); fflush(stderr); @@ -326,11 +326,11 @@ test_READ: rtc_tm.tm_sec %= 60; rtc_tm.tm_min++; } - if (rtc_tm.tm_min == 60) { + if (rtc_tm.tm_min == 60) { rtc_tm.tm_min = 0; rtc_tm.tm_hour++; } - if (rtc_tm.tm_hour == 24) + if (rtc_tm.tm_hour == 24) rtc_tm.tm_hour = 0; retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); @@ -407,8 +407,8 @@ test_PIE: "\n...Periodic IRQ rate is fixed\n"); goto done; } - perror("RTC_IRQP_SET ioctl"); - exit(errno); + perror("RTC_IRQP_SET ioctl"); + exit(errno); } fprintf(stderr, "\n%ldHz:\t", tmp); @@ -417,27 +417,27 @@ test_PIE: /* Enable periodic interrupts */ retval = ioctl(fd, RTC_PIE_ON, 0); if (retval == -1) { - perror("RTC_PIE_ON ioctl"); - exit(errno); + perror("RTC_PIE_ON ioctl"); + exit(errno); } for (i=1; i<21; i++) { - /* This blocks */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; + /* This blocks */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; } /* Disable periodic interrupts */ retval = ioctl(fd, RTC_PIE_OFF, 0); if (retval == -1) { - perror("RTC_PIE_OFF ioctl"); - exit(errno); + perror("RTC_PIE_OFF ioctl"); + exit(errno); } } diff --git a/Documentation/sched-rt-group.txt b/Documentation/sched-rt-group.txt new file mode 100644 index 000000000000..1c6332f4543c --- /dev/null +++ b/Documentation/sched-rt-group.txt @@ -0,0 +1,59 @@ + + +Real-Time group scheduling. + +The problem space: + +In order to schedule multiple groups of realtime tasks each group must +be assigned a fixed portion of the CPU time available. Without a minimum +guarantee a realtime group can obviously fall short. A fuzzy upper limit +is of no use since it cannot be relied upon. Which leaves us with just +the single fixed portion. + +CPU time is divided by means of specifying how much time can be spent +running in a given period. Say a frame fixed realtime renderer must +deliver 25 frames a second, which yields a period of 0.04s. Now say +it will also have to play some music and respond to input, leaving it +with around 80% for the graphics. We can then give this group a runtime +of 0.8 * 0.04s = 0.032s. + +This way the graphics group will have a 0.04s period with a 0.032s runtime +limit. + +Now if the audio thread needs to refill the DMA buffer every 0.005s, but +needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s += 0.00015s. + + +The Interface: + +system wide: + +/proc/sys/kernel/sched_rt_period_ms +/proc/sys/kernel/sched_rt_runtime_us + +CONFIG_FAIR_USER_SCHED + +/sys/kernel/uids/<uid>/cpu_rt_runtime_us + +or + +CONFIG_FAIR_CGROUP_SCHED + +/cgroup/<cgroup>/cpu.rt_runtime_us + +[ time is specified in us because the interface is s32; this gives an + operating range of ~35m to 1us ] + +The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ]. + +A runtime of -1 specifies runtime == period, ie. no limit. + +New groups get the period from /proc/sys/kernel/sched_rt_period_us and +a runtime of 0. + +Settings are constrained to: + + \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period + +in order to keep the configuration schedulable. diff --git a/Documentation/scheduler/00-INDEX b/Documentation/scheduler/00-INDEX new file mode 100644 index 000000000000..b5f5ca069b2d --- /dev/null +++ b/Documentation/scheduler/00-INDEX @@ -0,0 +1,16 @@ +00-INDEX + - this file. +sched-arch.txt + - CPU Scheduler implementation hints for architecture specific code. +sched-coding.txt + - reference for various scheduler-related methods in the O(1) scheduler. +sched-design.txt + - goals, design and implementation of the Linux O(1) scheduler. +sched-design-CFS.txt + - goals, design and implementation of the Complete Fair Scheduler. +sched-domains.txt + - information on scheduling domains. +sched-nice-design.txt + - How and why the scheduler's nice levels are implemented. +sched-stats.txt + - information on schedstats (Linux Scheduler Statistics). diff --git a/Documentation/sched-arch.txt b/Documentation/scheduler/sched-arch.txt index 941615a9769b..941615a9769b 100644 --- a/Documentation/sched-arch.txt +++ b/Documentation/scheduler/sched-arch.txt diff --git a/Documentation/sched-coding.txt b/Documentation/scheduler/sched-coding.txt index cbd8db752acf..cbd8db752acf 100644 --- a/Documentation/sched-coding.txt +++ b/Documentation/scheduler/sched-coding.txt diff --git a/Documentation/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 88bcb8767335..88bcb8767335 100644 --- a/Documentation/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt diff --git a/Documentation/sched-design.txt b/Documentation/scheduler/sched-design.txt index 1605bf0cba8b..1605bf0cba8b 100644 --- a/Documentation/sched-design.txt +++ b/Documentation/scheduler/sched-design.txt diff --git a/Documentation/sched-domains.txt b/Documentation/scheduler/sched-domains.txt index a9e990ab980f..a9e990ab980f 100644 --- a/Documentation/sched-domains.txt +++ b/Documentation/scheduler/sched-domains.txt diff --git a/Documentation/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt index e2bae5a577e3..e2bae5a577e3 100644 --- a/Documentation/sched-nice-design.txt +++ b/Documentation/scheduler/sched-nice-design.txt diff --git a/Documentation/sched-stats.txt b/Documentation/scheduler/sched-stats.txt index 442e14d35dea..442e14d35dea 100644 --- a/Documentation/sched-stats.txt +++ b/Documentation/scheduler/sched-stats.txt diff --git a/Documentation/scsi/ChangeLog.arcmsr b/Documentation/scsi/ChangeLog.arcmsr index cd8403a33ee6..de2bcacfa870 100644 --- a/Documentation/scsi/ChangeLog.arcmsr +++ b/Documentation/scsi/ChangeLog.arcmsr @@ -68,4 +68,45 @@ ** 2. modify the arcmsr_pci_slot_reset function ** 3. modify the arcmsr_pci_ers_disconnect_forepart function ** 4. modify the arcmsr_pci_ers_need_reset_forepart function +** 1.20.00.15 09/27/2007 Erich Chen & Nick Cheng +** 1. add arcmsr_enable_eoi_mode() on adapter Type B +** 2. add readl(reg->iop2drv_doorbell_reg) in arcmsr_handle_hbb_isr() +** in case of the doorbell interrupt clearance is cached +** 1.20.00.15 10/01/2007 Erich Chen & Nick Cheng +** 1. modify acb->devstate[i][j] +** as ARECA_RAID_GOOD instead of +** ARECA_RAID_GONE in arcmsr_alloc_ccb_pool +** 1.20.00.15 11/06/2007 Erich Chen & Nick Cheng +** 1. add conditional declaration for +** arcmsr_pci_error_detected() and +** arcmsr_pci_slot_reset +** 1.20.00.15 11/23/2007 Erich Chen & Nick Cheng +** 1.check if the sg list member number +** exceeds arcmsr default limit in arcmsr_build_ccb() +** 2.change the returned value type of arcmsr_build_ccb() +** from "void" to "int" +** 3.add the conditional check if arcmsr_build_ccb() +** returns FAILED +** 1.20.00.15 12/04/2007 Erich Chen & Nick Cheng +** 1. modify arcmsr_drain_donequeue() to ignore unknown +** command and let kernel process command timeout. +** This could handle IO request violating max. segments +** while Linux XFS over DM-CRYPT. +** Thanks to Milan Broz's comments <mbroz@redhat.com> +** 1.20.00.15 12/24/2007 Erich Chen & Nick Cheng +** 1.fix the portability problems +** 2.fix type B where we should _not_ iounmap() acb->pmu; +** it's not ioremapped. +** 3.add return -ENOMEM if ioremap() fails +** 4.transfer IS_SG64_ADDR w/ cpu_to_le32() +** in arcmsr_build_ccb +** 5. modify acb->devstate[i][j] as ARECA_RAID_GONE instead of +** ARECA_RAID_GOOD in arcmsr_alloc_ccb_pool() +** 6.fix arcmsr_cdb->Context as (unsigned long)arcmsr_cdb +** 7.add the checking state of +** (outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT) == 0 +** in arcmsr_handle_hba_isr +** 8.replace pci_alloc_consistent()/pci_free_consistent() with kmalloc()/kfree() in arcmsr_iop_message_xfer() +** 9. fix the release of dma memory for type B in arcmsr_free_ccb_pool() +** 10.fix the arcmsr_polling_hbb_ccbdone() ************************************************************************** diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt index 6f70f2b9327e..a6d5354639b2 100644 --- a/Documentation/scsi/scsi_mid_low_api.txt +++ b/Documentation/scsi/scsi_mid_low_api.txt @@ -1407,7 +1407,7 @@ Credits ======= The following people have contributed to this document: Mike Anderson <andmike at us dot ibm dot com> - James Bottomley <James dot Bottomley at steeleye dot com> + James Bottomley <James dot Bottomley at hansenpartnership dot com> Patrick Mansfield <patmans at us dot ibm dot com> Christoph Hellwig <hch at infradead dot org> Doug Ledford <dledford at redhat dot com> diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index aa986a35e994..f99254327ae5 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs: - inode-max - inode-nr - inode-state +- nr_open - overflowuid - overflowgid - suid_dumpable @@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum. ============================================================== +nr_open: + +This denotes the maximum number of file-handles a process can +allocate. Default value is 1024*1024 (1048576) which should be +enough for most machines. Actual limit depends on RLIMIT_NOFILE +resource limit. + +============================================================== + inode-max, inode-nr & inode-state: As with file handles, the kernel allocates the inode structures diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 8984a5396271..276a7e637822 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -29,7 +29,7 @@ show up in /proc/sys/kernel: - java-interpreter [ binfmt_java, obsolete ] - kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] -- modprobe ==> Documentation/kmod.txt +- modprobe ==> Documentation/debugging-modules.txt - msgmax - msgmnb - msgmni @@ -41,6 +41,7 @@ show up in /proc/sys/kernel: - pid_max - powersave-nap [ PPC only ] - printk +- randomize_va_space - real-root-dev ==> Documentation/initrd.txt - reboot-cmd [ SPARC only ] - rtsig-max @@ -280,6 +281,34 @@ send before ratelimiting kicks in. ============================================================== +randomize-va-space: + +This option can be used to select the type of process address +space randomization that is used in the system, for architectures +that support this feature. + +0 - Turn the process address space randomization off by default. + +1 - Make the addresses of mmap base, stack and VDSO page randomized. + This, among other things, implies that shared libraries will be + loaded to random addresses. Also for PIE-linked binaries, the location + of code start is randomized. + + With heap randomization, the situation is a little bit more + complicated. + There a few legacy applications out there (such as some ancient + versions of libc.so.5 from 1996) that assume that brk area starts + just after the end of the code+bss. These applications break when + start of the brk area is randomized. There are however no known + non-legacy applications that would be broken this way, so for most + systems it is safe to choose full randomization. However there is + a CONFIG_COMPAT_BRK option for systems with ancient and/or broken + binaries, that makes heap non-randomized, but keeps all other + parts of process address space randomized if randomize_va_space + sysctl is turned on. + +============================================================== + reboot-cmd: (Sparc only) ??? This seems to be a way to give an argument to the Sparc diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 24eac1bc735d..8a4863c4edd4 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -32,6 +32,7 @@ Currently, these files are in /proc/sys/vm: - min_unmapped_ratio - min_slab_ratio - panic_on_oom +- oom_dump_tasks - oom_kill_allocating_task - mmap_min_address - numa_zonelist_order @@ -232,6 +233,27 @@ according to your policy of failover. ============================================================= +oom_dump_tasks + +Enables a system-wide task dump (excluding kernel threads) to be +produced when the kernel performs an OOM-killing and includes such +information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and +name. This is helpful to determine why the OOM killer was invoked +and to identify the rogue task that caused it. + +If this is set to zero, this information is suppressed. On very +large systems with thousands of tasks it may not be feasible to dump +the memory state information for each one. Such systems should not +be forced to incur a performance penalty in OOM conditions when the +information may not be desired. + +If this is set to non-zero, this information is shown whenever the +OOM killer actually kills a memory-hogging task. + +The default value is 0. + +============================================================= + oom_kill_allocating_task This enables or disables killing the OOM-triggering task in diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt new file mode 100644 index 000000000000..ba9c2da5a8c2 --- /dev/null +++ b/Documentation/thermal/sysfs-api.txt @@ -0,0 +1,245 @@ +Generic Thermal Sysfs driver How To +========================= + +Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com> + +Updated: 2 January 2008 + +Copyright (c) 2008 Intel Corporation + + +0. Introduction + +The generic thermal sysfs provides a set of interfaces for thermal zone devices (sensors) +and thermal cooling devices (fan, processor...) to register with the thermal management +solution and to be a part of it. + +This how-to focuses on enabling new thermal zone and cooling devices to participate +in thermal management. +This solution is platform independent and any type of thermal zone devices and +cooling devices should be able to make use of the infrastructure. + +The main task of the thermal sysfs driver is to expose thermal zone attributes as well +as cooling device attributes to the user space. +An intelligent thermal management application can make decisions based on inputs +from thermal zone attributes (the current temperature and trip point temperature) +and throttle appropriate devices. + +[0-*] denotes any positive number starting from 0 +[1-*] denotes any positive number starting from 1 + +1. thermal sysfs driver interface functions + +1.1 thermal zone device interface +1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *name, int trips, + void *devdata, struct thermal_zone_device_ops *ops) + + This interface function adds a new thermal zone device (sensor) to + /sys/class/thermal folder as thermal_zone[0-*]. + It tries to bind all the thermal cooling devices registered at the same time. + + name: the thermal zone name. + trips: the total number of trip points this thermal zone supports. + devdata: device private data + ops: thermal zone device call-backs. + .bind: bind the thermal zone device with a thermal cooling device. + .unbind: unbind the thermal zone device with a thermal cooling device. + .get_temp: get the current temperature of the thermal zone. + .get_mode: get the current mode (user/kernel) of the thermal zone. + "kernel" means thermal management is done in kernel. + "user" will prevent kernel thermal driver actions upon trip points + so that user applications can take charge of thermal management. + .set_mode: set the mode (user/kernel) of the thermal zone. + .get_trip_type: get the type of certain trip point. + .get_trip_temp: get the temperature above which the certain trip point + will be fired. + +1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz) + + This interface function removes the thermal zone device. + It deletes the corresponding entry form /sys/class/thermal folder and unbind all + the thermal cooling devices it uses. + +1.2 thermal cooling device interface +1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name, + void *devdata, struct thermal_cooling_device_ops *) + + This interface function adds a new thermal cooling device (fan/processor/...) to + /sys/class/thermal/ folder as cooling_device[0-*]. + It tries to bind itself to all the thermal zone devices register at the same time. + name: the cooling device name. + devdata: device private data. + ops: thermal cooling devices call-backs. + .get_max_state: get the Maximum throttle state of the cooling device. + .get_cur_state: get the Current throttle state of the cooling device. + .set_cur_state: set the Current throttle state of the cooling device. + +1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) + + This interface function remove the thermal cooling device. + It deletes the corresponding entry form /sys/class/thermal folder and unbind + itself from all the thermal zone devices using it. + +1.3 interface for binding a thermal zone device with a thermal cooling device +1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, + int trip, struct thermal_cooling_device *cdev); + + This interface function bind a thermal cooling device to the certain trip point + of a thermal zone device. + This function is usually called in the thermal zone device .bind callback. + tz: the thermal zone device + cdev: thermal cooling device + trip: indicates which trip point the cooling devices is associated with + in this thermal zone. + +1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, + int trip, struct thermal_cooling_device *cdev); + + This interface function unbind a thermal cooling device from the certain trip point + of a thermal zone device. + This function is usually called in the thermal zone device .unbind callback. + tz: the thermal zone device + cdev: thermal cooling device + trip: indicates which trip point the cooling devices is associated with + in this thermal zone. + +2. sysfs attributes structure + +RO read only value +RW read/write value + +All thermal sysfs attributes will be represented under /sys/class/thermal + +Thermal zone device sys I/F, created once it's registered: +|thermal_zone[0-*]: + |-----type: Type of the thermal zone + |-----temp: Current temperature + |-----mode: Working mode of the thermal zone + |-----trip_point_[0-*]_temp: Trip point temperature + |-----trip_point_[0-*]_type: Trip point type + +Thermal cooling device sys I/F, created once it's registered: +|cooling_device[0-*]: + |-----type : Type of the cooling device(processor/fan/...) + |-----max_state: Maximum cooling state of the cooling device + |-----cur_state: Current cooling state of the cooling device + + +These two dynamic attributes are created/removed in pairs. +They represent the relationship between a thermal zone and its associated cooling device. +They are created/removed for each +thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution. + +|thermal_zone[0-*] + |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone + |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with + + +*************************** +* Thermal zone attributes * +*************************** + +type Strings which represent the thermal zone type. + This is given by thermal zone driver as part of registration. + Eg: "ACPI thermal zone" indicates it's a ACPI thermal device + RO + Optional + +temp Current temperature as reported by thermal zone (sensor) + Unit: degree Celsius + RO + Required + +mode One of the predefined values in [kernel, user] + This file gives information about the algorithm + that is currently managing the thermal zone. + It can be either default kernel based algorithm + or user space application. + RW + Optional + kernel = Thermal management in kernel thermal zone driver. + user = Preventing kernel thermal zone driver actions upon + trip points so that user application can take full + charge of the thermal management. + +trip_point_[0-*]_temp The temperature above which trip point will be fired + Unit: degree Celsius + RO + Optional + +trip_point_[0-*]_type Strings which indicate the type of the trip point + E.g. it can be one of critical, hot, passive, + active[0-*] for ACPI thermal zone. + RO + Optional + +cdev[0-*] Sysfs link to the thermal cooling device node where the sys I/F + for cooling device throttling control represents. + RO + Optional + +cdev[0-*]_trip_point The trip point with which cdev[0-*] is associated in this thermal zone + -1 means the cooling device is not associated with any trip point. + RO + Optional + +****************************** +* Cooling device attributes * +****************************** + +type String which represents the type of device + eg: For generic ACPI: this should be "Fan", + "Processor" or "LCD" + eg. For memory controller device on intel_menlow platform: + this should be "Memory controller" + RO + Optional + +max_state The maximum permissible cooling state of this cooling device. + RO + Required + +cur_state The current cooling state of this cooling device. + the value can any integer numbers between 0 and max_state, + cur_state == 0 means no cooling + cur_state == max_state means the maximum cooling. + RW + Required + +3. A simple implementation + +ACPI thermal zone may support multiple trip points like critical/hot/passive/active. +If an ACPI thermal zone supports critical, passive, active[0] and active[1] at the same time, +it may register itself as a thermal_zone_device (thermal_zone1) with 4 trip points in all. +It has one processor and one fan, which are both registered as thermal_cooling_device. +If the processor is listed in _PSL method, and the fan is listed in _AL0 method, +the sys I/F structure will be built like this: + +/sys/class/thermal: + +|thermal_zone1: + |-----type: ACPI thermal zone + |-----temp: 37 + |-----mode: kernel + |-----trip_point_0_temp: 100 + |-----trip_point_0_type: critical + |-----trip_point_1_temp: 80 + |-----trip_point_1_type: passive + |-----trip_point_2_temp: 70 + |-----trip_point_2_type: active[0] + |-----trip_point_3_temp: 60 + |-----trip_point_3_type: active[1] + |-----cdev0: --->/sys/class/thermal/cooling_device0 + |-----cdev0_trip_point: 1 /* cdev0 can be used for passive */ + |-----cdev1: --->/sys/class/thermal/cooling_device3 + |-----cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/ + +|cooling_device0: + |-----type: Processor + |-----max_state: 8 + |-----cur_state: 0 + +|cooling_device3: + |-----type: Fan + |-----max_state: 2 + |-----cur_state: 0 diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt new file mode 100644 index 000000000000..6223eace3c09 --- /dev/null +++ b/Documentation/unaligned-memory-access.txt @@ -0,0 +1,226 @@ +UNALIGNED MEMORY ACCESSES +========================= + +Linux runs on a wide variety of architectures which have varying behaviour +when it comes to memory access. This document presents some details about +unaligned accesses, why you need to write code that doesn't cause them, +and how to write such code! + + +The definition of an unaligned access +===================================== + +Unaligned memory accesses occur when you try to read N bytes of data starting +from an address that is not evenly divisible by N (i.e. addr % N != 0). +For example, reading 4 bytes of data from address 0x10004 is fine, but +reading 4 bytes of data from address 0x10005 would be an unaligned memory +access. + +The above may seem a little vague, as memory access can happen in different +ways. The context here is at the machine code level: certain instructions read +or write a number of bytes to or from memory (e.g. movb, movw, movl in x86 +assembly). As will become clear, it is relatively easy to spot C statements +which will compile to multiple-byte memory access instructions, namely when +dealing with types such as u16, u32 and u64. + + +Natural alignment +================= + +The rule mentioned above forms what we refer to as natural alignment: +When accessing N bytes of memory, the base memory address must be evenly +divisible by N, i.e. addr % N == 0. + +When writing code, assume the target architecture has natural alignment +requirements. + +In reality, only a few architectures require natural alignment on all sizes +of memory access. However, we must consider ALL supported architectures; +writing code that satisfies natural alignment requirements is the easiest way +to achieve full portability. + + +Why unaligned access is bad +=========================== + +The effects of performing an unaligned memory access vary from architecture +to architecture. It would be easy to write a whole document on the differences +here; a summary of the common scenarios is presented below: + + - Some architectures are able to perform unaligned memory accesses + transparently, but there is usually a significant performance cost. + - Some architectures raise processor exceptions when unaligned accesses + happen. The exception handler is able to correct the unaligned access, + at significant cost to performance. + - Some architectures raise processor exceptions when unaligned accesses + happen, but the exceptions do not contain enough information for the + unaligned access to be corrected. + - Some architectures are not capable of unaligned memory access, but will + silently perform a different memory access to the one that was requested, + resulting a a subtle code bug that is hard to detect! + +It should be obvious from the above that if your code causes unaligned +memory accesses to happen, your code will not work correctly on certain +platforms and will cause performance problems on others. + + +Code that does not cause unaligned access +========================================= + +At first, the concepts above may seem a little hard to relate to actual +coding practice. After all, you don't have a great deal of control over +memory addresses of certain variables, etc. + +Fortunately things are not too complex, as in most cases, the compiler +ensures that things will work for you. For example, take the following +structure: + + struct foo { + u16 field1; + u32 field2; + u8 field3; + }; + +Let us assume that an instance of the above structure resides in memory +starting at address 0x10000. With a basic level of understanding, it would +not be unreasonable to expect that accessing field2 would cause an unaligned +access. You'd be expecting field2 to be located at offset 2 bytes into the +structure, i.e. address 0x10002, but that address is not evenly divisible +by 4 (remember, we're reading a 4 byte value here). + +Fortunately, the compiler understands the alignment constraints, so in the +above case it would insert 2 bytes of padding in between field1 and field2. +Therefore, for standard structure types you can always rely on the compiler +to pad structures so that accesses to fields are suitably aligned (assuming +you do not cast the field to a type of different length). + +Similarly, you can also rely on the compiler to align variables and function +parameters to a naturally aligned scheme, based on the size of the type of +the variable. + +At this point, it should be clear that accessing a single byte (u8 or char) +will never cause an unaligned access, because all memory addresses are evenly +divisible by one. + +On a related topic, with the above considerations in mind you may observe +that you could reorder the fields in the structure in order to place fields +where padding would otherwise be inserted, and hence reduce the overall +resident memory size of structure instances. The optimal layout of the +above example is: + + struct foo { + u32 field2; + u16 field1; + u8 field3; + }; + +For a natural alignment scheme, the compiler would only have to add a single +byte of padding at the end of the structure. This padding is added in order +to satisfy alignment constraints for arrays of these structures. + +Another point worth mentioning is the use of __attribute__((packed)) on a +structure type. This GCC-specific attribute tells the compiler never to +insert any padding within structures, useful when you want to use a C struct +to represent some data that comes in a fixed arrangement 'off the wire'. + +You might be inclined to believe that usage of this attribute can easily +lead to unaligned accesses when accessing fields that do not satisfy +architectural alignment requirements. However, again, the compiler is aware +of the alignment constraints and will generate extra instructions to perform +the memory access in a way that does not cause unaligned access. Of course, +the extra instructions obviously cause a loss in performance compared to the +non-packed case, so the packed attribute should only be used when avoiding +structure padding is of importance. + + +Code that causes unaligned access +================================= + +With the above in mind, let's move onto a real life example of a function +that can cause an unaligned memory access. The following function adapted +from include/linux/etherdevice.h is an optimized routine to compare two +ethernet MAC addresses for equality. + +unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2) +{ + const u16 *a = (const u16 *) addr1; + const u16 *b = (const u16 *) addr2; + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; +} + +In the above function, the reference to a[0] causes 2 bytes (16 bits) to +be read from memory starting at address addr1. Think about what would happen +if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned +access.) + +Despite the potential unaligned access problems with the above function, it +is included in the kernel anyway but is understood to only work on +16-bit-aligned addresses. It is up to the caller to ensure this alignment or +not use this function at all. This alignment-unsafe function is still useful +as it is a decent optimization for the cases when you can ensure alignment, +which is true almost all of the time in ethernet networking context. + + +Here is another example of some code that could cause unaligned accesses: + void myfunc(u8 *data, u32 value) + { + [...] + *((u32 *) data) = cpu_to_le32(value); + [...] + } + +This code will cause unaligned accesses every time the data parameter points +to an address that is not evenly divisible by 4. + +In summary, the 2 main scenarios where you may run into unaligned access +problems involve: + 1. Casting variables to types of different lengths + 2. Pointer arithmetic followed by access to at least 2 bytes of data + + +Avoiding unaligned accesses +=========================== + +The easiest way to avoid unaligned access is to use the get_unaligned() and +put_unaligned() macros provided by the <asm/unaligned.h> header file. + +Going back to an earlier example of code that potentially causes unaligned +access: + + void myfunc(u8 *data, u32 value) + { + [...] + *((u32 *) data) = cpu_to_le32(value); + [...] + } + +To avoid the unaligned memory access, you would rewrite it as follows: + + void myfunc(u8 *data, u32 value) + { + [...] + value = cpu_to_le32(value); + put_unaligned(value, (u32 *) data); + [...] + } + +The get_unaligned() macro works similarly. Assuming 'data' is a pointer to +memory and you wish to avoid unaligned access, its usage is as follows: + + u32 value = get_unaligned((u32 *) data); + +These macros work work for memory accesses of any length (not just 32 bits as +in the examples above). Be aware that when compared to standard access of +aligned memory, using these macros to access unaligned memory can be costly in +terms of performance. + +If use of such macros is not convenient, another option is to use memcpy(), +where the source or destination (or both) are of type u8* or unsigned char*. +Due to the byte-wise nature of this operation, unaligned accesses are avoided. + +-- +Author: Daniel Drake <dsd@gentoo.org> +With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt, +Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, +Uli Kunitz, Vadim Lobanov + diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index 488c1f31b992..7123fee708ca 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c @@ -32,6 +32,13 @@ struct slabinfo { int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; unsigned long partial, objects, slabs; + unsigned long alloc_fastpath, alloc_slowpath; + unsigned long free_fastpath, free_slowpath; + unsigned long free_frozen, free_add_partial, free_remove_partial; + unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; + unsigned long cpuslab_flush, deactivate_full, deactivate_empty; + unsigned long deactivate_to_head, deactivate_to_tail; + unsigned long deactivate_remote_frees; int numa[MAX_NODES]; int numa_partial[MAX_NODES]; } slabinfo[MAX_SLABS]; @@ -64,8 +71,10 @@ int show_inverted = 0; int show_single_ref = 0; int show_totals = 0; int sort_size = 0; +int sort_active = 0; int set_debug = 0; int show_ops = 0; +int show_activity = 0; /* Debug options */ int sanity = 0; @@ -93,8 +102,10 @@ void usage(void) printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" "-a|--aliases Show aliases\n" + "-A|--activity Most active slabs first\n" "-d<options>|--debug=<options> Set/Clear Debug options\n" - "-e|--empty Show empty slabs\n" + "-D|--display-active Switch line format to activity\n" + "-e|--empty Show empty slabs\n" "-f|--first-alias Show first alias\n" "-h|--help Show usage information\n" "-i|--inverted Inverted list\n" @@ -281,8 +292,11 @@ int line = 0; void first_line(void) { - printf("Name Objects Objsize Space " - "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); + if (show_activity) + printf("Name Objects Alloc Free %%Fast\n"); + else + printf("Name Objects Objsize Space " + "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); } /* @@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s) return s->slabs * (page_size << s->order); } +unsigned long slab_activity(struct slabinfo *s) +{ + return s->alloc_fastpath + s->free_fastpath + + s->alloc_slowpath + s->free_slowpath; +} + void slab_numa(struct slabinfo *s, int mode) { int node; @@ -392,6 +412,71 @@ const char *onoff(int x) return "Off"; } +void slab_stats(struct slabinfo *s) +{ + unsigned long total_alloc; + unsigned long total_free; + unsigned long total; + + if (!s->alloc_slab) + return; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + if (!total_alloc) + return; + + printf("\n"); + printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); + printf("--------------------------------------------------\n"); + printf("Fastpath %8lu %8lu %3lu %3lu\n", + s->alloc_fastpath, s->free_fastpath, + s->alloc_fastpath * 100 / total_alloc, + s->free_fastpath * 100 / total_free); + printf("Slowpath %8lu %8lu %3lu %3lu\n", + total_alloc - s->alloc_fastpath, s->free_slowpath, + (total_alloc - s->alloc_fastpath) * 100 / total_alloc, + s->free_slowpath * 100 / total_free); + printf("Page Alloc %8lu %8lu %3lu %3lu\n", + s->alloc_slab, s->free_slab, + s->alloc_slab * 100 / total_alloc, + s->free_slab * 100 / total_free); + printf("Add partial %8lu %8lu %3lu %3lu\n", + s->deactivate_to_head + s->deactivate_to_tail, + s->free_add_partial, + (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, + s->free_add_partial * 100 / total_free); + printf("Remove partial %8lu %8lu %3lu %3lu\n", + s->alloc_from_partial, s->free_remove_partial, + s->alloc_from_partial * 100 / total_alloc, + s->free_remove_partial * 100 / total_free); + + printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", + s->deactivate_remote_frees, s->free_frozen, + s->deactivate_remote_frees * 100 / total_alloc, + s->free_frozen * 100 / total_free); + + printf("Total %8lu %8lu\n\n", total_alloc, total_free); + + if (s->cpuslab_flush) + printf("Flushes %8lu\n", s->cpuslab_flush); + + if (s->alloc_refill) + printf("Refill %8lu\n", s->alloc_refill); + + total = s->deactivate_full + s->deactivate_empty + + s->deactivate_to_head + s->deactivate_to_tail; + + if (total) + printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) " + "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n", + s->deactivate_full, (s->deactivate_full * 100) / total, + s->deactivate_empty, (s->deactivate_empty * 100) / total, + s->deactivate_to_head, (s->deactivate_to_head * 100) / total, + s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); +} + void report(struct slabinfo *s) { if (strcmp(s->name, "*") == 0) @@ -430,6 +515,7 @@ void report(struct slabinfo *s) ops(s); show_tracking(s); slab_numa(s, 1); + slab_stats(s); } void slabcache(struct slabinfo *s) @@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s) *p++ = 'T'; *p = 0; - printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", - s->name, s->objects, s->object_size, size_str, dist_str, - s->objs_per_slab, s->order, - s->slabs ? (s->partial * 100) / s->slabs : 100, - s->slabs ? (s->objects * s->object_size * 100) / - (s->slabs * (page_size << s->order)) : 100, - flags); + if (show_activity) { + unsigned long total_alloc; + unsigned long total_free; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + printf("%-21s %8ld %8ld %8ld %3ld %3ld \n", + s->name, s->objects, + total_alloc, total_free, + total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, + total_free ? (s->free_fastpath * 100 / total_free) : 0); + } + else + printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", + s->name, s->objects, s->object_size, size_str, dist_str, + s->objs_per_slab, s->order, + s->slabs ? (s->partial * 100) / s->slabs : 100, + s->slabs ? (s->objects * s->object_size * 100) / + (s->slabs * (page_size << s->order)) : 100, + flags); } /* @@ -892,6 +992,8 @@ void sort_slabs(void) if (sort_size) result = slab_size(s1) < slab_size(s2); + else if (sort_active) + result = slab_activity(s1) < slab_activity(s2); else result = strcasecmp(s1->name, s2->name); @@ -1074,6 +1176,23 @@ void read_slab_dir(void) free(t); slab->store_user = get_obj("store_user"); slab->trace = get_obj("trace"); + slab->alloc_fastpath = get_obj("alloc_fastpath"); + slab->alloc_slowpath = get_obj("alloc_slowpath"); + slab->free_fastpath = get_obj("free_fastpath"); + slab->free_slowpath = get_obj("free_slowpath"); + slab->free_frozen= get_obj("free_frozen"); + slab->free_add_partial = get_obj("free_add_partial"); + slab->free_remove_partial = get_obj("free_remove_partial"); + slab->alloc_from_partial = get_obj("alloc_from_partial"); + slab->alloc_slab = get_obj("alloc_slab"); + slab->alloc_refill = get_obj("alloc_refill"); + slab->free_slab = get_obj("free_slab"); + slab->cpuslab_flush = get_obj("cpuslab_flush"); + slab->deactivate_full = get_obj("deactivate_full"); + slab->deactivate_empty = get_obj("deactivate_empty"); + slab->deactivate_to_head = get_obj("deactivate_to_head"); + slab->deactivate_to_tail = get_obj("deactivate_to_tail"); + slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); chdir(".."); if (slab->name[0] == ':') alias_targets++; @@ -1124,7 +1243,9 @@ void output_slabs(void) struct option opts[] = { { "aliases", 0, NULL, 'a' }, + { "activity", 0, NULL, 'A' }, { "debug", 2, NULL, 'd' }, + { "display-activity", 0, NULL, 'D' }, { "empty", 0, NULL, 'e' }, { "first-alias", 0, NULL, 'f' }, { "help", 0, NULL, 'h' }, @@ -1149,7 +1270,7 @@ int main(int argc, char *argv[]) page_size = getpagesize(); - while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS", + while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS", opts, NULL)) != -1) switch (c) { case '1': @@ -1158,11 +1279,17 @@ int main(int argc, char *argv[]) case 'a': show_alias = 1; break; + case 'A': + sort_active = 1; + break; case 'd': set_debug = 1; if (!debug_opt_scan(optarg)) fatal("Invalid debug option '%s'\n", optarg); break; + case 'D': + show_activity = 1; + break; case 'e': show_empty = 1; break; diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX index 752613c4cea2..7b0ceaaad7af 100644 --- a/Documentation/w1/masters/00-INDEX +++ b/Documentation/w1/masters/00-INDEX @@ -4,3 +4,5 @@ ds2482 - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses. ds2490 - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges. +w1-gpio + - GPIO 1-wire bus master driver. diff --git a/Documentation/w1/masters/w1-gpio b/Documentation/w1/masters/w1-gpio new file mode 100644 index 000000000000..af5d3b4aa851 --- /dev/null +++ b/Documentation/w1/masters/w1-gpio @@ -0,0 +1,33 @@ +Kernel driver w1-gpio +===================== + +Author: Ville Syrjala <syrjala@sci.fi> + + +Description +----------- + +GPIO 1-wire bus master driver. The driver uses the GPIO API to control the +wire and the GPIO pin can be specified using platform data. + + +Example (mach-at91) +------------------- + +#include <linux/w1-gpio.h> + +static struct w1_gpio_platform_data foo_w1_gpio_pdata = { + .pin = AT91_PIN_PB20, + .is_open_drain = 1, +}; + +static struct platform_device foo_w1_device = { + .name = "w1-gpio", + .id = -1, + .dev.platform_data = &foo_w1_gpio_pdata, +}; + +... + at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1); + at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1); + platform_device_register(&foo_w1_device); |