diff options
510 files changed, 13413 insertions, 5339 deletions
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt new file mode 100644 index 000000000000..989c2fcd8111 --- /dev/null +++ b/Documentation/Smack.txt @@ -0,0 +1,493 @@ + + + "Good for you, you've decided to clean the elevator!" + - The Elevator, from Dark Star + +Smack is the the Simplified Mandatory Access Control Kernel. +Smack is a kernel based implementation of mandatory access +control that includes simplicity in its primary design goals. + +Smack is not the only Mandatory Access Control scheme +available for Linux. Those new to Mandatory Access Control +are encouraged to compare Smack with the other mechanisms +available to determine which is best suited to the problem +at hand. + +Smack consists of three major components: + - The kernel + - A start-up script and a few modified applications + - Configuration data + +The kernel component of Smack is implemented as a Linux +Security Modules (LSM) module. It requires netlabel and +works best with file systems that support extended attributes, +although xattr support is not strictly required. +It is safe to run a Smack kernel under a "vanilla" distribution. +Smack kernels use the CIPSO IP option. Some network +configurations are intolerant of IP options and can impede +access to systems that use them as Smack does. + +The startup script etc-init.d-smack should be installed +in /etc/init.d/smack and should be invoked early in the +start-up process. On Fedora rc5.d/S02smack is recommended. +This script ensures that certain devices have the correct +Smack attributes and loads the Smack configuration if +any is defined. This script invokes two programs that +ensure configuration data is properly formatted. These +programs are /usr/sbin/smackload and /usr/sin/smackcipso. +The system will run just fine without these programs, +but it will be difficult to set access rules properly. + +A version of "ls" that provides a "-M" option to display +Smack labels on long listing is available. + +A hacked version of sshd that allows network logins by users +with specific Smack labels is available. This version does +not work for scp. You must set the /etc/ssh/sshd_config +line: + UsePrivilegeSeparation no + +The format of /etc/smack/usr is: + + username smack + +In keeping with the intent of Smack, configuration data is +minimal and not strictly required. The most important +configuration step is mounting the smackfs pseudo filesystem. + +Add this line to /etc/fstab: + + smackfs /smack smackfs smackfsdef=* 0 0 + +and create the /smack directory for mounting. + +Smack uses extended attributes (xattrs) to store file labels. +The command to set a Smack label on a file is: + + # attr -S -s SMACK64 -V "value" path + +NOTE: Smack labels are limited to 23 characters. The attr command + does not enforce this restriction and can be used to set + invalid Smack labels on files. + +If you don't do anything special all users will get the floor ("_") +label when they log in. If you do want to log in via the hacked ssh +at other labels use the attr command to set the smack value on the +home directory and it's contents. + +You can add access rules in /etc/smack/accesses. They take the form: + + subjectlabel objectlabel access + +access is a combination of the letters rwxa which specify the +kind of access permitted a subject with subjectlabel on an +object with objectlabel. If there is no rule no access is allowed. + +A process can see the smack label it is running with by +reading /proc/self/attr/current. A privileged process can +set the process smack by writing there. + +Look for additional programs on http://schaufler-ca.com + +From the Smack Whitepaper: + +The Simplified Mandatory Access Control Kernel + +Casey Schaufler +casey@schaufler-ca.com + +Mandatory Access Control + +Computer systems employ a variety of schemes to constrain how information is +shared among the people and services using the machine. Some of these schemes +allow the program or user to decide what other programs or users are allowed +access to pieces of data. These schemes are called discretionary access +control mechanisms because the access control is specified at the discretion +of the user. Other schemes do not leave the decision regarding what a user or +program can access up to users or programs. These schemes are called mandatory +access control mechanisms because you don't have a choice regarding the users +or programs that have access to pieces of data. + +Bell & LaPadula + +From the middle of the 1980's until the turn of the century Mandatory Access +Control (MAC) was very closely associated with the Bell & LaPadula security +model, a mathematical description of the United States Department of Defense +policy for marking paper documents. MAC in this form enjoyed a following +within the Capital Beltway and Scandinavian supercomputer centers but was +often sited as failing to address general needs. + +Domain Type Enforcement + +Around the turn of the century Domain Type Enforcement (DTE) became popular. +This scheme organizes users, programs, and data into domains that are +protected from each other. This scheme has been widely deployed as a component +of popular Linux distributions. The administrative overhead required to +maintain this scheme and the detailed understanding of the whole system +necessary to provide a secure domain mapping leads to the scheme being +disabled or used in limited ways in the majority of cases. + +Smack + +Smack is a Mandatory Access Control mechanism designed to provide useful MAC +while avoiding the pitfalls of its predecessors. The limitations of Bell & +LaPadula are addressed by providing a scheme whereby access can be controlled +according to the requirements of the system and its purpose rather than those +imposed by an arcane government policy. The complexity of Domain Type +Enforcement and avoided by defining access controls in terms of the access +modes already in use. + +Smack Terminology + +The jargon used to talk about Smack will be familiar to those who have dealt +with other MAC systems and shouldn't be too difficult for the uninitiated to +pick up. There are four terms that are used in a specific way and that are +especially important: + + Subject: A subject is an active entity on the computer system. + On Smack a subject is a task, which is in turn the basic unit + of execution. + + Object: An object is a passive entity on the computer system. + On Smack files of all types, IPC, and tasks can be objects. + + Access: Any attempt by a subject to put information into or get + information from an object is an access. + + Label: Data that identifies the Mandatory Access Control + characteristics of a subject or an object. + +These definitions are consistent with the traditional use in the security +community. There are also some terms from Linux that are likely to crop up: + + Capability: A task that possesses a capability has permission to + violate an aspect of the system security policy, as identified by + the specific capability. A task that possesses one or more + capabilities is a privileged task, whereas a task with no + capabilities is an unprivileged task. + + Privilege: A task that is allowed to violate the system security + policy is said to have privilege. As of this writing a task can + have privilege either by possessing capabilities or by having an + effective user of root. + +Smack Basics + +Smack is an extension to a Linux system. It enforces additional restrictions +on what subjects can access which objects, based on the labels attached to +each of the subject and the object. + +Labels + +Smack labels are ASCII character strings, one to twenty-three characters in +length. Single character labels using special characters, that being anything +other than a letter or digit, are reserved for use by the Smack development +team. Smack labels are unstructured, case sensitive, and the only operation +ever performed on them is comparison for equality. Smack labels cannot +contain unprintable characters or the "/" (slash) character. + +There are some predefined labels: + + _ Pronounced "floor", a single underscore character. + ^ Pronounced "hat", a single circumflex character. + * Pronounced "star", a single asterisk character. + ? Pronounced "huh", a single question mark character. + +Every task on a Smack system is assigned a label. System tasks, such as +init(8) and systems daemons, are run with the floor ("_") label. User tasks +are assigned labels according to the specification found in the +/etc/smack/user configuration file. + +Access Rules + +Smack uses the traditional access modes of Linux. These modes are read, +execute, write, and occasionally append. There are a few cases where the +access mode may not be obvious. These include: + + Signals: A signal is a write operation from the subject task to + the object task. + Internet Domain IPC: Transmission of a packet is considered a + write operation from the source task to the destination task. + +Smack restricts access based on the label attached to a subject and the label +attached to the object it is trying to access. The rules enforced are, in +order: + + 1. Any access requested by a task labeled "*" is denied. + 2. A read or execute access requested by a task labeled "^" + is permitted. + 3. A read or execute access requested on an object labeled "_" + is permitted. + 4. Any access requested on an object labeled "*" is permitted. + 5. Any access requested by a task on an object with the same + label is permitted. + 6. Any access requested that is explicitly defined in the loaded + rule set is permitted. + 7. Any other access is denied. + +Smack Access Rules + +With the isolation provided by Smack access separation is simple. There are +many interesting cases where limited access by subjects to objects with +different labels is desired. One example is the familiar spy model of +sensitivity, where a scientist working on a highly classified project would be +able to read documents of lower classifications and anything she writes will +be "born" highly classified. To accommodate such schemes Smack includes a +mechanism for specifying rules allowing access between labels. + +Access Rule Format + +The format of an access rule is: + + subject-label object-label access + +Where subject-label is the Smack label of the task, object-label is the Smack +label of the thing being accessed, and access is a string specifying the sort +of access allowed. The Smack labels are limited to 23 characters. The access +specification is searched for letters that describe access modes: + + a: indicates that append access should be granted. + r: indicates that read access should be granted. + w: indicates that write access should be granted. + x: indicates that execute access should be granted. + +Uppercase values for the specification letters are allowed as well. +Access mode specifications can be in any order. Examples of acceptable rules +are: + + TopSecret Secret rx + Secret Unclass R + Manager Game x + User HR w + New Old rRrRr + Closed Off - + +Examples of unacceptable rules are: + + Top Secret Secret rx + Ace Ace r + Odd spells waxbeans + +Spaces are not allowed in labels. Since a subject always has access to files +with the same label specifying a rule for that case is pointless. Only +valid letters (rwxaRWXA) and the dash ('-') character are allowed in +access specifications. The dash is a placeholder, so "a-r" is the same +as "ar". A lone dash is used to specify that no access should be allowed. + +Applying Access Rules + +The developers of Linux rarely define new sorts of things, usually importing +schemes and concepts from other systems. Most often, the other systems are +variants of Unix. Unix has many endearing properties, but consistency of +access control models is not one of them. Smack strives to treat accesses as +uniformly as is sensible while keeping with the spirit of the underlying +mechanism. + +File system objects including files, directories, named pipes, symbolic links, +and devices require access permissions that closely match those used by mode +bit access. To open a file for reading read access is required on the file. To +search a directory requires execute access. Creating a file with write access +requires both read and write access on the containing directory. Deleting a +file requires read and write access to the file and to the containing +directory. It is possible that a user may be able to see that a file exists +but not any of its attributes by the circumstance of having read access to the +containing directory but not to the differently labeled file. This is an +artifact of the file name being data in the directory, not a part of the file. + +IPC objects, message queues, semaphore sets, and memory segments exist in flat +namespaces and access requests are only required to match the object in +question. + +Process objects reflect tasks on the system and the Smack label used to access +them is the same Smack label that the task would use for its own access +attempts. Sending a signal via the kill() system call is a write operation +from the signaler to the recipient. Debugging a process requires both reading +and writing. Creating a new task is an internal operation that results in two +tasks with identical Smack labels and requires no access checks. + +Sockets are data structures attached to processes and sending a packet from +one process to another requires that the sender have write access to the +receiver. The receiver is not required to have read access to the sender. + +Setting Access Rules + +The configuration file /etc/smack/accesses contains the rules to be set at +system startup. The contents are written to the special file /smack/load. +Rules can be written to /smack/load at any time and take effect immediately. +For any pair of subject and object labels there can be only one rule, with the +most recently specified overriding any earlier specification. + +The program smackload is provided to ensure data is formatted +properly when written to /smack/load. This program reads lines +of the form + + subjectlabel objectlabel mode. + +Task Attribute + +The Smack label of a process can be read from /proc/<pid>/attr/current. A +process can read its own Smack label from /proc/self/attr/current. A +privileged process can change its own Smack label by writing to +/proc/self/attr/current but not the label of another process. + +File Attribute + +The Smack label of a filesystem object is stored as an extended attribute +named SMACK64 on the file. This attribute is in the security namespace. It can +only be changed by a process with privilege. + +Privilege + +A process with CAP_MAC_OVERRIDE is privileged. + +Smack Networking + +As mentioned before, Smack enforces access control on network protocol +transmissions. Every packet sent by a Smack process is tagged with its Smack +label. This is done by adding a CIPSO tag to the header of the IP packet. Each +packet received is expected to have a CIPSO tag that identifies the label and +if it lacks such a tag the network ambient label is assumed. Before the packet +is delivered a check is made to determine that a subject with the label on the +packet has write access to the receiving process and if that is not the case +the packet is dropped. + +CIPSO Configuration + +It is normally unnecessary to specify the CIPSO configuration. The default +values used by the system handle all internal cases. Smack will compose CIPSO +label values to match the Smack labels being used without administrative +intervention. Unlabeled packets that come into the system will be given the +ambient label. + +Smack requires configuration in the case where packets from a system that is +not smack that speaks CIPSO may be encountered. Usually this will be a Trusted +Solaris system, but there are other, less widely deployed systems out there. +CIPSO provides 3 important values, a Domain Of Interpretation (DOI), a level, +and a category set with each packet. The DOI is intended to identify a group +of systems that use compatible labeling schemes, and the DOI specified on the +smack system must match that of the remote system or packets will be +discarded. The DOI is 3 by default. The value can be read from /smack/doi and +can be changed by writing to /smack/doi. + +The label and category set are mapped to a Smack label as defined in +/etc/smack/cipso. + +A Smack/CIPSO mapping has the form: + + smack level [category [category]*] + +Smack does not expect the level or category sets to be related in any +particular way and does not assume or assign accesses based on them. Some +examples of mappings: + + TopSecret 7 + TS:A,B 7 1 2 + SecBDE 5 2 4 6 + RAFTERS 7 12 26 + +The ":" and "," characters are permitted in a Smack label but have no special +meaning. + +The mapping of Smack labels to CIPSO values is defined by writing to +/smack/cipso. Again, the format of data written to this special file +is highly restrictive, so the program smackcipso is provided to +ensure the writes are done properly. This program takes mappings +on the standard input and sends them to /smack/cipso properly. + +In addition to explicit mappings Smack supports direct CIPSO mappings. One +CIPSO level is used to indicate that the category set passed in the packet is +in fact an encoding of the Smack label. The level used is 250 by default. The +value can be read from /smack/direct and changed by writing to /smack/direct. + +Socket Attributes + +There are two attributes that are associated with sockets. These attributes +can only be set by privileged tasks, but any task can read them for their own +sockets. + + SMACK64IPIN: The Smack label of the task object. A privileged + program that will enforce policy may set this to the star label. + + SMACK64IPOUT: The Smack label transmitted with outgoing packets. + A privileged program may set this to match the label of another + task with which it hopes to communicate. + +Writing Applications for Smack + +There are three sorts of applications that will run on a Smack system. How an +application interacts with Smack will determine what it will have to do to +work properly under Smack. + +Smack Ignorant Applications + +By far the majority of applications have no reason whatever to care about the +unique properties of Smack. Since invoking a program has no impact on the +Smack label associated with the process the only concern likely to arise is +whether the process has execute access to the program. + +Smack Relevant Applications + +Some programs can be improved by teaching them about Smack, but do not make +any security decisions themselves. The utility ls(1) is one example of such a +program. + +Smack Enforcing Applications + +These are special programs that not only know about Smack, but participate in +the enforcement of system policy. In most cases these are the programs that +set up user sessions. There are also network services that provide information +to processes running with various labels. + +File System Interfaces + +Smack maintains labels on file system objects using extended attributes. The +Smack label of a file, directory, or other file system object can be obtained +using getxattr(2). + + len = getxattr("/", "security.SMACK64", value, sizeof (value)); + +will put the Smack label of the root directory into value. A privileged +process can set the Smack label of a file system object with setxattr(2). + + len = strlen("Rubble"); + rc = setxattr("/foo", "security.SMACK64", "Rubble", len, 0); + +will set the Smack label of /foo to "Rubble" if the program has appropriate +privilege. + +Socket Interfaces + +The socket attributes can be read using fgetxattr(2). + +A privileged process can set the Smack label of outgoing packets with +fsetxattr(2). + + len = strlen("Rubble"); + rc = fsetxattr(fd, "security.SMACK64IPOUT", "Rubble", len, 0); + +will set the Smack label "Rubble" on packets going out from the socket if the +program has appropriate privilege. + + rc = fsetxattr(fd, "security.SMACK64IPIN, "*", strlen("*"), 0); + +will set the Smack label "*" as the object label against which incoming +packets will be checked if the program has appropriate privilege. + +Administration + +Smack supports some mount options: + + smackfsdef=label: specifies the label to give files that lack + the Smack label extended attribute. + + smackfsroot=label: specifies the label to assign the root of the + file system if it lacks the Smack extended attribute. + + smackfshat=label: specifies a label that must have read access to + all labels set on the filesystem. Not yet enforced. + + smackfsfloor=label: specifies a label to which all labels set on the + filesystem must have read access. Not yet enforced. + +These mount options apply to all file system types. + diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 0b1b0c008613..e2799b5fafea 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1315,13 +1315,28 @@ for writeout by the pdflush daemons. It is expressed in 100'ths of a second. Data which has been dirty in-memory for longer than this interval will be written out next time a pdflush daemon wakes up. +highmem_is_dirtyable +-------------------- + +Only present if CONFIG_HIGHMEM is set. + +This defaults to 0 (false), meaning that the ratios set above are calculated +as a percentage of lowmem only. This protects against excessive scanning +in page reclaim, swapping and general VM distress. + +Setting this to 1 can be useful on 32 bit machines where you want to make +random changes within an MMAPed file that is larger than your available +lowmem without causing large quantities of random IO. Is is safe if the +behavior of all programs running on the machine is known and memory will +not be otherwise stressed. + legacy_va_layout ---------------- If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel will use the legacy (2.4) layout for all processes. -lower_zone_protection +lowmem_reserve_ratio --------------------- For some specialised workloads on highmem machines it is dangerous for @@ -1341,25 +1356,71 @@ captured into pinned user memory. mechanism will also defend that region from allocations which could use highmem or lowmem). -The `lower_zone_protection' tunable determines how aggressive the kernel is -in defending these lower zones. The default value is zero - no -protection at all. +The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is +in defending these lower zones. If you have a machine which uses highmem or ISA DMA and your applications are using mlock(), or if you are running with no swap then -you probably should increase the lower_zone_protection setting. - -The units of this tunable are fairly vague. It is approximately equal -to "megabytes," so setting lower_zone_protection=100 will protect around 100 -megabytes of the lowmem zone from user allocations. It will also make -those 100 megabytes unavailable for use by applications and by -pagecache, so there is a cost. - -The effects of this tunable may be observed by monitoring -/proc/meminfo:LowFree. Write a single huge file and observe the point -at which LowFree ceases to fall. - -A reasonable value for lower_zone_protection is 100. +you probably should change the lowmem_reserve_ratio setting. + +The lowmem_reserve_ratio is an array. You can see them by reading this file. +- +% cat /proc/sys/vm/lowmem_reserve_ratio +256 256 32 +- +Note: # of this elements is one fewer than number of zones. Because the highest + zone's value is not necessary for following calculation. + +But, these values are not used directly. The kernel calculates # of protection +pages for each zones from them. These are shown as array of protection pages +in /proc/zoneinfo like followings. (This is an example of x86-64 box). +Each zone has an array of protection pages like this. + +- +Node 0, zone DMA + pages free 1355 + min 3 + low 3 + high 4 + : + : + numa_other 0 + protection: (0, 2004, 2004, 2004) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + pagesets + cpu: 0 pcp: 0 + : +- +These protections are added to score to judge whether this zone should be used +for page allocation or should be reclaimed. + +In this example, if normal pages (index=2) are required to this DMA zone and +pages_high is used for watermark, the kernel judges this zone should not be +used because pages_free(1355) is smaller than watermark + protection[2] +(4 + 2004 = 2008). If this protection value is 0, this zone would be used for +normal page requirement. If requirement is DMA zone(index=0), protection[0] +(=0) is used. + +zone[i]'s protection[j] is calculated by following exprssion. + +(i < j): + zone[i]->protection[j] + = (total sums of present_pages from zone[i+1] to zone[j] on the node) + / lowmem_reserve_ratio[i]; +(i = j): + (should not be protected. = 0; +(i > j): + (not necessary, but looks 0) + +The default values of lowmem_reserve_ratio[i] are + 256 (if zone[i] means DMA or DMA32 zone) + 32 (others). +As above expression, they are reciprocal number of ratio. +256 means 1/256. # of protection pages becomes about "0.39%" of total present +pages of higher zones on the node. + +If you would like to protect more pages, smaller values are effective. +The minimum value is 1 (1/1 -> 100%). page-cluster ------------ diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 6bc2ba215df9..8da724e2a0ff 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -32,7 +32,7 @@ The exact capabilities of GPIOs vary between systems. Common options: - Input values are likewise readable (1, 0). Some chips support readback of pins configured as "output", which is very useful in such "wire-OR" cases (to support bidirectional signaling). GPIO controllers may have - input de-glitch logic, sometimes with software controls. + input de-glitch/debounce logic, sometimes with software controls. - Inputs can often be used as IRQ signals, often edge triggered but sometimes level triggered. Such IRQs may be configurable as system @@ -60,10 +60,13 @@ used on a board that's wired differently. Only least-common-denominator functionality can be very portable. Other features are platform-specific, and that can be critical for glue logic. -Plus, this doesn't define an implementation framework, just an interface. +Plus, this doesn't require any implementation framework, just an interface. One platform might implement it as simple inline functions accessing chip registers; another might implement it by delegating through abstractions -used for several very different kinds of GPIO controller. +used for several very different kinds of GPIO controller. (There is some +optional code supporting such an implementation strategy, described later +in this document, but drivers acting as clients to the GPIO interface must +not care how it's implemented.) That said, if the convention is supported on their platform, drivers should use it when possible. Platforms should declare GENERIC_GPIO support in @@ -121,6 +124,11 @@ before tasking is enabled, as part of early board setup. For output GPIOs, the value provided becomes the initial output value. This helps avoid signal glitching during system startup. +For compatibility with legacy interfaces to GPIOs, setting the direction +of a GPIO implicitly requests that GPIO (see below) if it has not been +requested already. That compatibility may be removed in the future; +explicitly requesting GPIOs is strongly preferred. + Setting the direction can fail if the GPIO number is invalid, or when that particular GPIO can't be used in that mode. It's generally a bad idea to rely on boot firmware to have set the direction correctly, since @@ -133,6 +141,7 @@ Spinlock-Safe GPIO access ------------------------- Most GPIO controllers can be accessed with memory read/write instructions. That doesn't need to sleep, and can safely be done from inside IRQ handlers. +(That includes hardirq contexts on RT kernels.) Use these calls to access such GPIOs: @@ -145,7 +154,7 @@ Use these calls to access such GPIOs: The values are boolean, zero for low, nonzero for high. When reading the value of an output pin, the value returned should be what's seen on the pin ... that won't always match the specified output value, because of -issues including wire-OR and output latencies. +issues including open-drain signaling and output latencies. The get/set calls have no error returns because "invalid GPIO" should have been reported earlier from gpio_direction_*(). However, note that not all @@ -170,7 +179,8 @@ get to the head of a queue to transmit a command and get its response. This requires sleeping, which can't be done from inside IRQ handlers. Platforms that support this type of GPIO distinguish them from other GPIOs -by returning nonzero from this call: +by returning nonzero from this call (which requires a valid GPIO number, +either explicitly or implicitly requested): int gpio_cansleep(unsigned gpio); @@ -209,8 +219,11 @@ before tasking is enabled, as part of early board setup. These calls serve two basic purposes. One is marking the signals which are actually in use as GPIOs, for better diagnostics; systems may have several hundred potential GPIOs, but often only a dozen are used on any -given board. Another is to catch conflicts between drivers, reporting -errors when drivers wrongly think they have exclusive use of that signal. +given board. Another is to catch conflicts, identifying errors when +(a) two or more drivers wrongly think they have exclusive use of that +signal, or (b) something wrongly believes it's safe to remove drivers +needed to manage a signal that's in active use. That is, requesting a +GPIO can serve as a kind of lock. These two calls are optional because not not all current Linux platforms offer such functionality in their GPIO support; a valid implementation @@ -223,6 +236,9 @@ Note that requesting a GPIO does NOT cause it to be configured in any way; it just marks that GPIO as in use. Separate code must handle any pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown). +Also note that it's your responsibility to have stopped using a GPIO +before you free it. + GPIOs mapped to IRQs -------------------- @@ -238,7 +254,7 @@ map between them using calls like: Those return either the corresponding number in the other namespace, or else a negative errno code if the mapping can't be done. (For example, -some GPIOs can't used as IRQs.) It is an unchecked error to use a GPIO +some GPIOs can't be used as IRQs.) It is an unchecked error to use a GPIO number that wasn't set up as an input using gpio_direction_input(), or to use an IRQ number that didn't originally come from gpio_to_irq(). @@ -299,17 +315,110 @@ Related to multiplexing is configuration and enabling of the pullups or pulldowns integrated on some platforms. Not all platforms support them, or support them in the same way; and any given board might use external pullups (or pulldowns) so that the on-chip ones should not be used. +(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) There are other system-specific mechanisms that are not specified here, like the aforementioned options for input de-glitching and wire-OR output. Hardware may support reading or writing GPIOs in gangs, but that's usually configuration dependent: for GPIOs sharing the same bank. (GPIOs are commonly grouped in banks of 16 or 32, with a given SOC having several such -banks.) Some systems can trigger IRQs from output GPIOs. Code relying on -such mechanisms will necessarily be nonportable. +banks.) Some systems can trigger IRQs from output GPIOs, or read values +from pins not managed as GPIOs. Code relying on such mechanisms will +necessarily be nonportable. -Dynamic definition of GPIOs is not currently supported; for example, as +Dynamic definition of GPIOs is not currently standard; for example, as a side effect of configuring an add-on board with some GPIO expanders. These calls are purely for kernel space, but a userspace API could be built -on top of it. +on top of them. + + +GPIO implementor's framework (OPTIONAL) +======================================= +As noted earlier, there is an optional implementation framework making it +easier for platforms to support different kinds of GPIO controller using +the same programming interface. + +As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file +will be found there. That will list all the controllers registered through +this framework, and the state of the GPIOs currently in use. + + +Controller Drivers: gpio_chip +----------------------------- +In this framework each GPIO controller is packaged as a "struct gpio_chip" +with information common to each controller of that type: + + - methods to establish GPIO direction + - methods used to access GPIO values + - flag saying whether calls to its methods may sleep + - optional debugfs dump method (showing extra state like pullup config) + - label for diagnostics + +There is also per-instance data, which may come from device.platform_data: +the number of its first GPIO, and how many GPIOs it exposes. + +The code implementing a gpio_chip should support multiple instances of the +controller, possibly using the driver model. That code will configure each +gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be +rare; use gpiochip_remove() when it is unavoidable. + +Most often a gpio_chip is part of an instance-specific structure with state +not exposed by the GPIO interfaces, such as addressing, power management, +and more. Chips such as codecs will have complex non-GPIO state, + +Any debugfs dump method should normally ignore signals which haven't been +requested as GPIOs. They can use gpiochip_is_requested(), which returns +either NULL or the label associated with that GPIO when it was requested. + + +Platform Support +---------------- +To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB" +and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines +three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). +They may also want to provide a custom value for ARCH_NR_GPIOS. + +Trivial implementations of those functions can directly use framework +code, which always dispatches through the gpio_chip: + + #define gpio_get_value __gpio_get_value + #define gpio_set_value __gpio_set_value + #define gpio_cansleep __gpio_cansleep + +Fancier implementations could instead define those as inline functions with +logic optimizing access to specific SOC-based GPIOs. For example, if the +referenced GPIO is the constant "12", getting or setting its value could +cost as little as two or three instructions, never sleeping. When such an +optimization is not possible those calls must delegate to the framework +code, costing at least a few dozen instructions. For bitbanged I/O, such +instruction savings can be significant. + +For SOCs, platform-specific code defines and registers gpio_chip instances +for each bank of on-chip GPIOs. Those GPIOs should be numbered/labeled to +match chip vendor documentation, and directly match board schematics. They +may well start at zero and go up to a platform-specific limit. Such GPIOs +are normally integrated into platform initialization to make them always be +available, from arch_initcall() or earlier; they can often serve as IRQs. + + +Board Support +------------- +For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi +function devices, FPGAs or CPLDs -- most often board-specific code handles +registering controller devices and ensures that their drivers know what GPIO +numbers to use with gpiochip_add(). Their numbers often start right after +platform-specific GPIOs. + +For example, board setup code could create structures identifying the range +of GPIOs that chip will expose, and passes them to each GPIO expander chip +using platform_data. Then the chip driver's probe() routine could pass that +data to gpiochip_add(). + +Initialization order can be important. For example, when a device relies on +an I2C-based GPIO, its probe() routine should only be called after that GPIO +becomes available. That may mean the device should not be registered until +calls for that GPIO can work. One way to address such dependencies is for +such gpio_chip controllers to provide setup() and teardown() callbacks to +board specific code; those board specific callbacks would register devices +once all the necessary resources are available. diff --git a/Documentation/i2c/chips/pca9539 b/Documentation/i2c/chips/pca9539 index c4fce6a13537..1d81c530c4a5 100644 --- a/Documentation/i2c/chips/pca9539 +++ b/Documentation/i2c/chips/pca9539 @@ -1,6 +1,9 @@ Kernel driver pca9539 ===================== +NOTE: this driver is deprecated and will be dropped soon, use +drivers/gpio/pca9539.c instead. + Supported chips: * Philips PCA9539 Prefix: 'pca9539' diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index 4739c5c3face..96f155e68750 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt @@ -33,8 +33,8 @@ This file details changes in 2.6 which affect PCMCIA card driver authors: and can be used (e.g. for SET_NETDEV_DEV) by using handle_to_dev(client_handle_t * handle). -* Convert internal I/O port addresses to unsigned long (as of 2.6.11) - ioaddr_t should be replaced by kio_addr_t in PCMCIA card drivers. +* Convert internal I/O port addresses to unsigned int (as of 2.6.11) + ioaddr_t should be replaced by unsigned int in PCMCIA card drivers. * irq_mask and irq_list parameters (as of 2.6.11) The irq_mask and irq_list parameters should no longer be used in diff --git a/Documentation/pm_qos_interface.txt b/Documentation/pm_qos_interface.txt new file mode 100644 index 000000000000..49adb1a33514 --- /dev/null +++ b/Documentation/pm_qos_interface.txt @@ -0,0 +1,59 @@ +PM quality of Service interface. + +This interface provides a kernel and user mode interface for registering +performance expectations by drivers, subsystems and user space applications on +one of the parameters. + +Currently we have {cpu_dma_latency, network_latency, network_throughput} as the +initial set of pm_qos parameters. + +The infrastructure exposes multiple misc device nodes one per implemented +parameter. The set of parameters implement is defined by pm_qos_power_init() +and pm_qos_params.h. This is done because having the available parameters +being runtime configurable or changeable from a driver was seen as too easy to +abuse. + +For each parameter a list of performance requirements is maintained along with +an aggregated target value. The aggregated target value is updated with +changes to the requirement list or elements of the list. Typically the +aggregated target value is simply the max or min of the requirement values held +in the parameter list elements. + +From kernel mode the use of this interface is simple: +pm_qos_add_requirement(param_id, name, target_value): +Will insert a named element in the list for that identified PM_QOS parameter +with the target value. Upon change to this list the new target is recomputed +and any registered notifiers are called only if the target value is now +different. + +pm_qos_update_requirement(param_id, name, new_target_value): +Will search the list identified by the param_id for the named list element and +then update its target value, calling the notification tree if the aggregated +target is changed. with that name is already registered. + +pm_qos_remove_requirement(param_id, name): +Will search the identified list for the named element and remove it, after +removal it will update the aggregate target and call the notification tree if +the target was changed as a result of removing the named requirement. + + +From user mode: +Only processes can register a pm_qos requirement. To provide for automatic +cleanup for process the interface requires the process to register its +parameter requirements in the following way: + +To register the default pm_qos target for the specific parameter, the process +must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] + +As long as the device node is held open that process has a registered +requirement on the parameter. The name of the requirement is "process_<PID>" +derived from the current->pid from within the open system call. + +To change the requested target value the process needs to write a s32 value to +the open device node. This translates to a pm_qos_update_requirement call. + +To remove the user mode request for a target value simply close the device +node. + + + diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 6f31f0a247d0..24eac1bc735d 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -22,6 +22,7 @@ Currently, these files are in /proc/sys/vm: - dirty_background_ratio - dirty_expire_centisecs - dirty_writeback_centisecs +- highmem_is_dirtyable (only if CONFIG_HIGHMEM set) - max_map_count - min_free_kbytes - laptop_mode @@ -40,9 +41,9 @@ Currently, these files are in /proc/sys/vm: ============================================================== dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, -dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, -block_dump, swap_token_timeout, drop-caches, -hugepages_treat_as_movable: +dirty_writeback_centisecs, highmem_is_dirtyable, +vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, +drop-caches, hugepages_treat_as_movable: See Documentation/filesystems/proc.txt diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index 468b76ce66a1..8ac08311f5a5 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -165,7 +165,7 @@ dma_alloc_coherent(struct device *dev, size_t size, ret = (void *)__get_free_pages(gfp, get_order(size)); if (ret) { memset(ret, 0, size); - *dma_handle = virt_to_bus(ret); + *dma_handle = virt_to_phys(ret); } return ret; } @@ -184,7 +184,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents, BUG_ON(!sg_page(sg)); va = sg_virt(sg); - sg_dma_address(sg) = (dma_addr_t)virt_to_bus(va); + sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va); sg_dma_len(sg) = sg->length; } diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 2d00a08d3f08..26d3789dfdd0 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -9,6 +9,7 @@ #include <linux/bootmem.h> #include <linux/scatterlist.h> #include <linux/log2.h> +#include <linux/dma-mapping.h> #include <asm/io.h> #include <asm/hwrpb.h> @@ -470,22 +471,29 @@ EXPORT_SYMBOL(pci_free_consistent); #define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG)) static void -sg_classify(struct scatterlist *sg, struct scatterlist *end, int virt_ok) +sg_classify(struct device *dev, struct scatterlist *sg, struct scatterlist *end, + int virt_ok) { unsigned long next_paddr; struct scatterlist *leader; long leader_flag, leader_length; + unsigned int max_seg_size; leader = sg; leader_flag = 0; leader_length = leader->length; next_paddr = SG_ENT_PHYS_ADDRESS(leader) + leader_length; + /* we will not marge sg without device. */ + max_seg_size = dev ? dma_get_max_seg_size(dev) : 0; for (++sg; sg < end; ++sg) { unsigned long addr, len; addr = SG_ENT_PHYS_ADDRESS(sg); len = sg->length; + if (leader_length + len > max_seg_size) + goto new_segment; + if (next_paddr == addr) { sg->dma_address = -1; leader_length += len; @@ -494,6 +502,7 @@ sg_classify(struct scatterlist *sg, struct scatterlist *end, int virt_ok) leader_flag = 1; leader_length += len; } else { +new_segment: leader->dma_address = leader_flag; leader->dma_length = leader_length; leader = sg; @@ -512,7 +521,7 @@ sg_classify(struct scatterlist *sg, struct scatterlist *end, int virt_ok) in the blanks. */ static int -sg_fill(struct scatterlist *leader, struct scatterlist *end, +sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end, struct scatterlist *out, struct pci_iommu_arena *arena, dma_addr_t max_dma, int dac_allowed) { @@ -562,8 +571,8 @@ sg_fill(struct scatterlist *leader, struct scatterlist *end, /* Otherwise, break up the remaining virtually contiguous hunks into individual direct maps and retry. */ - sg_classify(leader, end, 0); - return sg_fill(leader, end, out, arena, max_dma, dac_allowed); + sg_classify(dev, leader, end, 0); + return sg_fill(dev, leader, end, out, arena, max_dma, dac_allowed); } out->dma_address = arena->dma_base + dma_ofs*PAGE_SIZE + paddr; @@ -619,12 +628,15 @@ pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, struct pci_iommu_arena *arena; dma_addr_t max_dma; int dac_allowed; + struct device *dev; if (direction == PCI_DMA_NONE) BUG(); dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0; + dev = pdev ? &pdev->dev : NULL; + /* Fast path single entry scatterlists. */ if (nents == 1) { sg->dma_length = sg->length; @@ -638,7 +650,7 @@ pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, end = sg + nents; /* First, prepare information about the entries. */ - sg_classify(sg, end, alpha_mv.mv_pci_tbi != 0); + sg_classify(dev, sg, end, alpha_mv.mv_pci_tbi != 0); /* Second, figure out where we're going to map things. */ if (alpha_mv.mv_pci_tbi) { @@ -658,7 +670,7 @@ pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, for (out = sg; sg < end; ++sg) { if ((int) sg->dma_address < 0) continue; - if (sg_fill(sg, end, out, arena, max_dma, dac_allowed) < 0) + if (sg_fill(dev, sg, end, out, arena, max_dma, dac_allowed) < 0) goto error; out++; } diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index bd5e68cd61e8..beff6297f788 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -58,7 +58,6 @@ static struct notifier_block alpha_panic_block = { #include <asm/system.h> #include <asm/hwrpb.h> #include <asm/dma.h> -#include <asm/io.h> #include <asm/mmu_context.h> #include <asm/console.h> diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 79de99e32c35..ba914af18c4f 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -495,7 +495,7 @@ sys_call_table: .quad sys_epoll_pwait .quad sys_utimensat /* 475 */ .quad sys_signalfd - .quad sys_timerfd + .quad sys_ni_syscall .quad sys_eventfd .size sys_call_table, . - sys_call_table diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 64d19eff3faa..e19e7744e366 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -385,6 +385,7 @@ config ARCH_PXA depends on MMU select ARCH_MTD_XIP select GENERIC_GPIO + select HAVE_GPIO_LIB select GENERIC_TIME select GENERIC_CLOCKEVENTS select TICK_ONESHOT @@ -1122,6 +1123,8 @@ source "drivers/i2c/Kconfig" source "drivers/spi/Kconfig" +source "drivers/gpio/Kconfig" + source "drivers/w1/Kconfig" source "drivers/power/Kconfig" diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index cecf658e3625..283e14fff993 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -359,7 +359,7 @@ CALL(sys_kexec_load) CALL(sys_utimensat) CALL(sys_signalfd) -/* 350 */ CALL(sys_timerfd) +/* 350 */ CALL(sys_ni_syscall) CALL(sys_eventfd) CALL(sys_fallocate) #ifndef syscalls_counted diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e9dfbab46cb6..eefae1de334c 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -150,7 +150,7 @@ int __cpuinit __cpu_up(unsigned int cpu) secondary_data.pgdir = 0; *pmd_offset(pgd, PHYS_OFFSET) = __pmd(0); - pgd_free(pgd); + pgd_free(&init_mm, pgd); if (ret) { printk(KERN_CRIT "CPU%u: processor failed to boot\n", cpu); diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile index 8604938bd948..6e0c4f5b5ae6 100644 --- a/arch/arm/mach-pxa/Makefile +++ b/arch/arm/mach-pxa/Makefile @@ -3,7 +3,8 @@ # # Common support (must be linked before board specific support) -obj-y += clock.o devices.o generic.o irq.o dma.o time.o +obj-y += clock.o devices.o generic.o irq.o dma.o \ + time.o gpio.o obj-$(CONFIG_PXA25x) += pxa25x.o obj-$(CONFIG_PXA27x) += pxa27x.o obj-$(CONFIG_PXA3xx) += pxa3xx.o mfp.o smemc.o diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index 76970598f550..80721c610d41 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -32,7 +32,6 @@ #include <asm/mach/map.h> #include <asm/arch/pxa-regs.h> -#include <asm/arch/gpio.h> #include "generic.h" @@ -67,97 +66,6 @@ unsigned int get_memclk_frequency_10khz(void) EXPORT_SYMBOL(get_memclk_frequency_10khz); /* - * Handy function to set GPIO alternate functions - */ -int pxa_last_gpio; - -int pxa_gpio_mode(int gpio_mode) -{ - unsigned long flags; - int gpio = gpio_mode & GPIO_MD_MASK_NR; - int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8; - int gafr; - - if (gpio > pxa_last_gpio) - return -EINVAL; - - local_irq_save(flags); - if (gpio_mode & GPIO_DFLT_LOW) - GPCR(gpio) = GPIO_bit(gpio); - else if (gpio_mode & GPIO_DFLT_HIGH) - GPSR(gpio) = GPIO_bit(gpio); - if (gpio_mode & GPIO_MD_MASK_DIR) - GPDR(gpio) |= GPIO_bit(gpio); - else - GPDR(gpio) &= ~GPIO_bit(gpio); - gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2)); - GAFR(gpio) = gafr | (fn << (((gpio) & 0xf)*2)); - local_irq_restore(flags); - - return 0; -} - -EXPORT_SYMBOL(pxa_gpio_mode); - -int gpio_direction_input(unsigned gpio) -{ - unsigned long flags; - u32 mask; - - if (gpio > pxa_last_gpio) - return -EINVAL; - - mask = GPIO_bit(gpio); - local_irq_save(flags); - GPDR(gpio) &= ~mask; - local_irq_restore(flags); - - return 0; -} -EXPORT_SYMBOL(gpio_direction_input); - -int gpio_direction_output(unsigned gpio, int value) -{ - unsigned long flags; - u32 mask; - - if (gpio > pxa_last_gpio) - return -EINVAL; - - mask = GPIO_bit(gpio); - local_irq_save(flags); - if (value) - GPSR(gpio) = mask; - else - GPCR(gpio) = mask; - GPDR(gpio) |= mask; - local_irq_restore(flags); - - return 0; -} -EXPORT_SYMBOL(gpio_direction_output); - -/* - * Return GPIO level - */ -int pxa_gpio_get_value(unsigned gpio) -{ - return __gpio_get_value(gpio); -} - -EXPORT_SYMBOL(pxa_gpio_get_value); - -/* - * Set output GPIO level - */ -void pxa_gpio_set_value(unsigned gpio, int value) -{ - __gpio_set_value(gpio, value); -} - -EXPORT_SYMBOL(pxa_gpio_set_value); - -/* * Routine to safely enable or disable a clock in the CKEN */ void __pxa_set_cken(int clock, int enable) @@ -172,7 +80,6 @@ void __pxa_set_cken(int clock, int enable) local_irq_restore(flags); } - EXPORT_SYMBOL(__pxa_set_cken); /* diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h index 1a16ad3ecee6..b3d10b0e52a0 100644 --- a/arch/arm/mach-pxa/generic.h +++ b/arch/arm/mach-pxa/generic.h @@ -16,6 +16,7 @@ extern void __init pxa_init_irq_low(void); extern void __init pxa_init_irq_high(void); extern void __init pxa_init_irq_gpio(int gpio_nr); extern void __init pxa_init_irq_set_wake(int (*set_wake)(unsigned int, unsigned int)); +extern void __init pxa_init_gpio(int gpio_nr); extern void __init pxa25x_init_irq(void); extern void __init pxa27x_init_irq(void); extern void __init pxa3xx_init_irq(void); diff --git a/arch/arm/mach-pxa/gpio.c b/arch/arm/mach-pxa/gpio.c new file mode 100644 index 000000000000..8638dd7dd076 --- /dev/null +++ b/arch/arm/mach-pxa/gpio.c @@ -0,0 +1,197 @@ +/* + * linux/arch/arm/mach-pxa/gpio.c + * + * Generic PXA GPIO handling + * + * Author: Nicolas Pitre + * Created: Jun 15, 2001 + * Copyright: MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> + +#include <asm/gpio.h> +#include <asm/hardware.h> +#include <asm/io.h> +#include <asm/arch/pxa-regs.h> + +#include "generic.h" + + +struct pxa_gpio_chip { + struct gpio_chip chip; + void __iomem *regbase; +}; + +int pxa_last_gpio; + +/* + * Configure pins for GPIO or other functions + */ +int pxa_gpio_mode(int gpio_mode) +{ + unsigned long flags; + int gpio = gpio_mode & GPIO_MD_MASK_NR; + int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8; + int gafr; + + if (gpio > pxa_last_gpio) + return -EINVAL; + + local_irq_save(flags); + if (gpio_mode & GPIO_DFLT_LOW) + GPCR(gpio) = GPIO_bit(gpio); + else if (gpio_mode & GPIO_DFLT_HIGH) + GPSR(gpio) = GPIO_bit(gpio); + if (gpio_mode & GPIO_MD_MASK_DIR) + GPDR(gpio) |= GPIO_bit(gpio); + else + GPDR(gpio) &= ~GPIO_bit(gpio); + gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2)); + GAFR(gpio) = gafr | (fn << (((gpio) & 0xf)*2)); + local_irq_restore(flags); + + return 0; +} +EXPORT_SYMBOL(pxa_gpio_mode); + +static int pxa_gpio_direction_input(struct gpio_chip *chip, unsigned offset) +{ + unsigned long flags; + u32 mask = 1 << offset; + u32 value; + struct pxa_gpio_chip *pxa; + void __iomem *gpdr; + + pxa = container_of(chip, struct pxa_gpio_chip, chip); + gpdr = pxa->regbase + GPDR_OFFSET; + local_irq_save(flags); + value = __raw_readl(gpdr); + value &= ~mask; + __raw_writel(value, gpdr); + local_irq_restore(flags); + + return 0; +} + +static int pxa_gpio_direction_output(struct gpio_chip *chip, + unsigned offset, int value) +{ + unsigned long flags; + u32 mask = 1 << offset; + u32 tmp; + struct pxa_gpio_chip *pxa; + void __iomem *gpdr; + + pxa = container_of(chip, struct pxa_gpio_chip, chip); + __raw_writel(mask, + pxa->regbase + (value ? GPSR_OFFSET : GPCR_OFFSET)); + gpdr = pxa->regbase + GPDR_OFFSET; + local_irq_save(flags); + tmp = __raw_readl(gpdr); + tmp |= mask; + __raw_writel(tmp, gpdr); + local_irq_restore(flags); + + return 0; +} + +/* + * Return GPIO level + */ +static int pxa_gpio_get(struct gpio_chip *chip, unsigned offset) +{ + u32 mask = 1 << offset; + struct pxa_gpio_chip *pxa; + + pxa = container_of(chip, struct pxa_gpio_chip, chip); + return __raw_readl(pxa->regbase + GPLR_OFFSET) & mask; +} + +/* + * Set output GPIO level + */ +static void pxa_gpio_set(struct gpio_chip *chip, unsigned offset, int value) +{ + u32 mask = 1 << offset; + struct pxa_gpio_chip *pxa; + + pxa = container_of(chip, struct pxa_gpio_chip, chip); + + if (value) + __raw_writel(mask, pxa->regbase + GPSR_OFFSET); + else + __raw_writel(mask, pxa->regbase + GPCR_OFFSET); +} + +static struct pxa_gpio_chip pxa_gpio_chip[] = { + [0] = { + .regbase = GPIO0_BASE, + .chip = { + .label = "gpio-0", + .direction_input = pxa_gpio_direction_input, + .direction_output = pxa_gpio_direction_output, + .get = pxa_gpio_get, + .set = pxa_gpio_set, + .base = 0, + .ngpio = 32, + }, + }, + [1] = { + .regbase = GPIO1_BASE, + .chip = { + .label = "gpio-1", + .direction_input = pxa_gpio_direction_input, + .direction_output = pxa_gpio_direction_output, + .get = pxa_gpio_get, + .set = pxa_gpio_set, + .base = 32, + .ngpio = 32, + }, + }, + [2] = { + .regbase = GPIO2_BASE, + .chip = { + .label = "gpio-2", + .direction_input = pxa_gpio_direction_input, + .direction_output = pxa_gpio_direction_output, + .get = pxa_gpio_get, + .set = pxa_gpio_set, + .base = 64, + .ngpio = 32, /* 21 for PXA25x */ + }, + }, +#if defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx) + [3] = { + .regbase = GPIO3_BASE, + .chip = { + .label = "gpio-3", + .direction_input = pxa_gpio_direction_input, + .direction_output = pxa_gpio_direction_output, + .get = pxa_gpio_get, + .set = pxa_gpio_set, + .base = 96, + .ngpio = 32, + }, + }, +#endif +}; + +void __init pxa_init_gpio(int gpio_nr) +{ + int i; + + /* add a GPIO chip for each register bank. + * the last PXA25x register only contains 21 GPIOs + */ + for (i = 0; i < gpio_nr; i += 32) { + if (i+32 > gpio_nr) + pxa_gpio_chip[i/32].chip.ngpio = gpio_nr - i; + gpiochip_add(&pxa_gpio_chip[i/32].chip); + } +} diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 5a1d5eef10a4..36c6a68beca2 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -311,6 +311,8 @@ void __init pxa_init_irq_gpio(int gpio_nr) /* Install handler for GPIO>=2 edge detect interrupts */ set_irq_chip(IRQ_GPIO_2_x, &pxa_internal_chip_low); set_irq_chained_handler(IRQ_GPIO_2_x, pxa_gpio_demux_handler); + + pxa_init_gpio(gpio_nr); } void __init pxa_init_irq_set_wake(int (*set_wake)(unsigned int, unsigned int)) diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 75952779ce19..303a7ff6bfd2 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -162,7 +162,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) * Free the page table, if there was one. */ if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE) - pte_free_kernel(pmd_page_vaddr(pmd)); + pte_free_kernel(&init_mm, pmd_page_vaddr(pmd)); } addr += PGDIR_SIZE; diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 50b9aed6000d..500c9610ab30 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -65,14 +65,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) return new_pgd; no_pte: - pmd_free(new_pmd); + pmd_free(mm, new_pmd); no_pmd: free_pages((unsigned long)new_pgd, 2); no_pgd: return NULL; } -void free_pgd_slow(pgd_t *pgd) +void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd) { pmd_t *pmd; struct page *pte; @@ -94,8 +94,8 @@ void free_pgd_slow(pgd_t *pgd) pmd_clear(pmd); dec_zone_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE); pte_lock_deinit(pte); - pte_free(pte); - pmd_free(pmd); + pte_free(mm, pte); + pmd_free(mm, pmd); free: free_pages((unsigned long) pgd, 2); } diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index c816f29154c9..28e0caf4156c 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -82,6 +82,7 @@ config PLATFORM_AT32AP select SUBARCH_AVR32B select MMU select PERFORMANCE_COUNTERS + select HAVE_GPIO_LIB # # CPU types diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c index d61a02da898c..38a8fa31c0b5 100644 --- a/arch/avr32/mach-at32ap/pio.c +++ b/arch/avr32/mach-at32ap/pio.c @@ -24,11 +24,11 @@ #define MAX_NR_PIO_DEVICES 8 struct pio_device { + struct gpio_chip chip; void __iomem *regs; const struct platform_device *pdev; struct clk *clk; u32 pinmux_mask; - u32 gpio_mask; char name[8]; }; @@ -64,7 +64,8 @@ void __init at32_select_periph(unsigned int pin, unsigned int periph, goto fail; } - if (unlikely(test_and_set_bit(pin_index, &pio->pinmux_mask))) { + if (unlikely(test_and_set_bit(pin_index, &pio->pinmux_mask) + || gpiochip_is_requested(&pio->chip, pin_index))) { printk("%s: pin %u is busy\n", pio->name, pin_index); goto fail; } @@ -79,9 +80,6 @@ void __init at32_select_periph(unsigned int pin, unsigned int periph, if (!(flags & AT32_GPIOF_PULLUP)) pio_writel(pio, PUDR, mask); - /* gpio_request NOT allowed */ - set_bit(pin_index, &pio->gpio_mask); - return; fail: @@ -130,9 +128,6 @@ void __init at32_select_gpio(unsigned int pin, unsigned long flags) pio_writel(pio, PER, mask); - /* gpio_request now allowed */ - clear_bit(pin_index, &pio->gpio_mask); - return; fail: @@ -166,96 +161,50 @@ fail: /* GPIO API */ -int gpio_request(unsigned int gpio, const char *label) +static int direction_input(struct gpio_chip *chip, unsigned offset) { - struct pio_device *pio; - unsigned int pin; - - pio = gpio_to_pio(gpio); - if (!pio) - return -ENODEV; + struct pio_device *pio = container_of(chip, struct pio_device, chip); + u32 mask = 1 << offset; - pin = gpio & 0x1f; - if (test_and_set_bit(pin, &pio->gpio_mask)) - return -EBUSY; + if (!(pio_readl(pio, PSR) & mask)) + return -EINVAL; + pio_writel(pio, ODR, mask); return 0; } -EXPORT_SYMBOL(gpio_request); -void gpio_free(unsigned int gpio) +static int gpio_get(struct gpio_chip *chip, unsigned offset) { - struct pio_device *pio; - unsigned int pin; + struct pio_device *pio = container_of(chip, struct pio_device, chip); - pio = gpio_to_pio(gpio); - if (!pio) { - printk(KERN_ERR - "gpio: attempted to free invalid pin %u\n", gpio); - return; - } - - pin = gpio & 0x1f; - if (!test_and_clear_bit(pin, &pio->gpio_mask)) - printk(KERN_ERR "gpio: freeing free or non-gpio pin %s-%u\n", - pio->name, pin); + return (pio_readl(pio, PDSR) >> offset) & 1; } -EXPORT_SYMBOL(gpio_free); -int gpio_direction_input(unsigned int gpio) -{ - struct pio_device *pio; - unsigned int pin; - - pio = gpio_to_pio(gpio); - if (!pio) - return -ENODEV; - - pin = gpio & 0x1f; - pio_writel(pio, ODR, 1 << pin); - - return 0; -} -EXPORT_SYMBOL(gpio_direction_input); +static void gpio_set(struct gpio_chip *chip, unsigned offset, int value); -int gpio_direction_output(unsigned int gpio, int value) +static int direction_output(struct gpio_chip *chip, unsigned offset, int value) { - struct pio_device *pio; - unsigned int pin; - - pio = gpio_to_pio(gpio); - if (!pio) - return -ENODEV; + struct pio_device *pio = container_of(chip, struct pio_device, chip); + u32 mask = 1 << offset; - gpio_set_value(gpio, value); - - pin = gpio & 0x1f; - pio_writel(pio, OER, 1 << pin); + if (!(pio_readl(pio, PSR) & mask)) + return -EINVAL; + gpio_set(chip, offset, value); + pio_writel(pio, OER, mask); return 0; } -EXPORT_SYMBOL(gpio_direction_output); -int gpio_get_value(unsigned int gpio) +static void gpio_set(struct gpio_chip *chip, unsigned offset, int value) { - struct pio_device *pio = &pio_dev[gpio >> 5]; + struct pio_device *pio = container_of(chip, struct pio_device, chip); + u32 mask = 1 << offset; - return (pio_readl(pio, PDSR) >> (gpio & 0x1f)) & 1; -} -EXPORT_SYMBOL(gpio_get_value); - -void gpio_set_value(unsigned int gpio, int value) -{ - struct pio_device *pio = &pio_dev[gpio >> 5]; - u32 mask; - - mask = 1 << (gpio & 0x1f); if (value) pio_writel(pio, SODR, mask); else pio_writel(pio, CODR, mask); } -EXPORT_SYMBOL(gpio_set_value); /*--------------------------------------------------------------------------*/ @@ -339,6 +288,63 @@ gpio_irq_setup(struct pio_device *pio, int irq, int gpio_irq) /*--------------------------------------------------------------------------*/ +#ifdef CONFIG_DEBUG_FS + +#include <linux/seq_file.h> + +/* + * This shows more info than the generic gpio dump code: + * pullups, deglitching, open drain drive. + */ +static void pio_bank_show(struct seq_file *s, struct gpio_chip *chip) +{ + struct pio_device *pio = container_of(chip, struct pio_device, chip); + u32 psr, osr, imr, pdsr, pusr, ifsr, mdsr; + unsigned i; + u32 mask; + char bank; + + psr = pio_readl(pio, PSR); + osr = pio_readl(pio, OSR); + imr = pio_readl(pio, IMR); + pdsr = pio_readl(pio, PDSR); + pusr = pio_readl(pio, PUSR); + ifsr = pio_readl(pio, IFSR); + mdsr = pio_readl(pio, MDSR); + + bank = 'A' + pio->pdev->id; + + for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { + const char *label; + + label = gpiochip_is_requested(chip, i); + if (!label) + continue; + + seq_printf(s, " gpio-%-3d P%c%-2d (%-12s) %s %s %s", + chip->base + i, bank, i, + label, + (osr & mask) ? "out" : "in ", + (mask & pdsr) ? "hi" : "lo", + (mask & pusr) ? " " : "up"); + if (ifsr & mask) + seq_printf(s, " deglitch"); + if ((osr & mdsr) & mask) + seq_printf(s, " open-drain"); + if (imr & mask) + seq_printf(s, " irq-%d edge-both", + gpio_to_irq(chip->base + i)); + seq_printf(s, "\n"); + } +} + +#else +#define pio_bank_show NULL +#endif + + +/*--------------------------------------------------------------------------*/ + static int __init pio_probe(struct platform_device *pdev) { struct pio_device *pio = NULL; @@ -349,6 +355,18 @@ static int __init pio_probe(struct platform_device *pdev) pio = &pio_dev[pdev->id]; BUG_ON(!pio->regs); + pio->chip.label = pio->name; + pio->chip.base = pdev->id * 32; + pio->chip.ngpio = 32; + + pio->chip.direction_input = direction_input; + pio->chip.get = gpio_get; + pio->chip.direction_output = direction_output; + pio->chip.set = gpio_set; + pio->chip.dbg_show = pio_bank_show; + + gpiochip_add(&pio->chip); + gpio_irq_setup(pio, irq, gpio_irq_base); platform_set_drvdata(pdev, pio); @@ -406,12 +424,6 @@ void __init at32_init_pio(struct platform_device *pdev) pio->pdev = pdev; pio->regs = ioremap(regs->start, regs->end - regs->start + 1); - /* - * request_gpio() is only valid for pins that have been - * explicitly configured as GPIO and not previously requested - */ - pio->gpio_mask = ~0UL; - /* start with irqs disabled and acked */ pio_writel(pio, IDR, ~0UL); (void) pio_readl(pio, ISR); diff --git a/arch/avr32/mach-at32ap/pio.h b/arch/avr32/mach-at32ap/pio.h index 50fa3aca32c5..7795116a483a 100644 --- a/arch/avr32/mach-at32ap/pio.h +++ b/arch/avr32/mach-at32ap/pio.h @@ -19,7 +19,7 @@ #define PIO_OSR 0x0018 #define PIO_IFER 0x0020 #define PIO_IFDR 0x0024 -#define PIO_ISFR 0x0028 +#define PIO_IFSR 0x0028 #define PIO_SODR 0x0030 #define PIO_CODR 0x0034 #define PIO_ODSR 0x0038 diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 56ff51bc8c21..fdd9bf43361e 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1373,7 +1373,7 @@ ENTRY(_sys_call_table) .long _sys_epoll_pwait .long _sys_utimensat .long _sys_signalfd - .long _sys_timerfd + .long _sys_ni_syscall .long _sys_eventfd /* 350 */ .long _sys_pread64 .long _sys_pwrite64 diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 7f0be4cd5e9a..27b082ac7f11 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -150,6 +150,7 @@ config ETRAX_FLASH_BUSWIDTH Width in bytes of the Flash bus (1, 2 or 4). Is usually 2. source arch/cris/arch-v10/Kconfig +source arch/cris/arch-v32/Kconfig endmenu @@ -157,8 +158,8 @@ source "net/Kconfig" # bring in ETRAX built-in drivers menu "Drivers for built-in interfaces" -# arch/cris/arch is a symlink to correct arch (arch-v10 or arch-v32) -source arch/cris/arch/drivers/Kconfig +source arch/cris/arch-v10/drivers/Kconfig +source arch/cris/arch-v32/drivers/Kconfig endmenu diff --git a/arch/cris/arch-v10/Kconfig b/arch/cris/arch-v10/Kconfig index f1ce6f64401d..1d61faec77cd 100644 --- a/arch/cris/arch-v10/Kconfig +++ b/arch/cris/arch-v10/Kconfig @@ -1,3 +1,5 @@ +if ETRAX_ARCH_V10 + # ETRAX 100LX v1 has a MMU "feature" requiring a low mapping config CRIS_LOW_MAP bool @@ -451,3 +453,5 @@ config ETRAX_POWERBUTTON_BIT default "25" help Configure where power button is connected. + +endif diff --git a/arch/cris/arch-v10/drivers/Kconfig b/arch/cris/arch-v10/drivers/Kconfig index e3c0f2928149..96740ef497d4 100644 --- a/arch/cris/arch-v10/drivers/Kconfig +++ b/arch/cris/arch-v10/drivers/Kconfig @@ -1,3 +1,5 @@ +if ETRAX_ARCH_V10 + config ETRAX_ETHERNET bool "Ethernet support" depends on ETRAX_ARCH_V10 @@ -806,3 +808,5 @@ config ETRAX_DS1302_TRICKLE_CHARGE 1 = 2kohm, 2 = 4kohm, 3 = 4kohm 4 = 1 diode, 8 = 2 diodes Allowed values are (increasing current): 0, 11, 10, 9, 7, 6, 5 + +endif diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index ec62c951fa3c..d1361dc119e2 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -1167,7 +1167,7 @@ sys_call_table: .long sys_epoll_pwait .long sys_utimensat /* 320 */ .long sys_signalfd - .long sys_timerfd + .long sys_ni_syscall .long sys_eventfd .long sys_fallocate diff --git a/arch/cris/arch-v32/Kconfig b/arch/cris/arch-v32/Kconfig index 4f79d8ed3e1c..d8acaa920e1c 100644 --- a/arch/cris/arch-v32/Kconfig +++ b/arch/cris/arch-v32/Kconfig @@ -1,3 +1,5 @@ +if ETRAX_ARCH_V32 + config ETRAX_DRAM_VIRTUAL_BASE hex depends on ETRAX_ARCH_V32 @@ -294,3 +296,5 @@ config ETRAX_DEF_GIO_PE_OUT help Configures the initial data for the general port E bits. Most products should use 00000 here. + +endif diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index 9bccb5e2a960..c329cce2a0c3 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -1,3 +1,5 @@ +if ETRAX_ARCH_V32 + config ETRAX_ETHERNET bool "Ethernet support" depends on ETRAX_ARCH_V32 @@ -610,3 +612,5 @@ config ETRAX_STREAMCOPROC help This option enables a driver for the stream co-processor for cryptographic operations. + +endif diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c index 66f9500fbc02..e0364654fc44 100644 --- a/arch/cris/arch-v32/drivers/pci/dma.c +++ b/arch/cris/arch-v32/drivers/pci/dma.c @@ -93,7 +93,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); if (!dev->dma_mem) - goto out; + goto iounmap_out; dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); if (!dev->dma_mem->bitmap) goto free1_out; @@ -110,6 +110,8 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, free1_out: kfree(dev->dma_mem); + iounmap_out: + iounmap(mem_base); out: return 0; } diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index bf0468cbe713..96f7d70f4473 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -138,6 +138,15 @@ config UCPAGE_OFFSET_C0000000 endchoice +config PAGE_OFFSET + hex + default 0x20000000 if UCPAGE_OFFSET_20000000 + default 0x40000000 if UCPAGE_OFFSET_40000000 + default 0x60000000 if UCPAGE_OFFSET_60000000 + default 0x80000000 if UCPAGE_OFFSET_80000000 + default 0xA0000000 if UCPAGE_OFFSET_A0000000 + default 0xC0000000 + config PROTECT_KERNEL bool "Protect core kernel against userspace" depends on !MMU diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index f42b328b1dd0..ef7527b8b0c7 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -13,7 +13,7 @@ ENTRY(_start) jiffies = jiffies_64 + 4; -__page_offset = 0xc0000000; /* start of area covered by struct pages */ +__page_offset = CONFIG_PAGE_OFFSET; /* start of area covered by struct pages */ __kernel_image_start = __page_offset; /* address at which kernel image resides */ SECTIONS diff --git a/arch/frv/mm/mmu-context.c b/arch/frv/mm/mmu-context.c index 1530a4111e6d..81757d55a5b5 100644 --- a/arch/frv/mm/mmu-context.c +++ b/arch/frv/mm/mmu-context.c @@ -181,7 +181,7 @@ int cxn_pin_by_pid(pid_t pid) /* get a handle on the mm_struct */ read_lock(&tasklist_lock); - tsk = find_task_by_pid(pid); + tsk = find_task_by_vpid(pid); if (tsk) { ret = -EINVAL; diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 7787c3cc52c6..1a2e5c8d03a9 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -140,7 +140,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; } -void pgd_free(pgd_t *pgd) +void pgd_free(struct mm_struct *mm, pgd_t *pgd) { /* in the non-PAE case, clear_page_tables() clears user pgd entries */ quicklist_free(0, pgd_dtor, pgd); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 45bf04eb7d70..c412fe63f8ec 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1265,7 +1265,7 @@ sba_fill_pdir( * the sglist do both. */ static SBA_INLINE int -sba_coalesce_chunks( struct ioc *ioc, +sba_coalesce_chunks(struct ioc *ioc, struct device *dev, struct scatterlist *startsg, int nents) { @@ -1275,6 +1275,7 @@ sba_coalesce_chunks( struct ioc *ioc, struct scatterlist *dma_sg; /* next DMA stream head */ unsigned long dma_offset, dma_len; /* start/len of DMA stream */ int n_mappings = 0; + unsigned int max_seg_size = dma_get_max_seg_size(dev); while (nents > 0) { unsigned long vaddr = (unsigned long) sba_sg_address(startsg); @@ -1314,6 +1315,9 @@ sba_coalesce_chunks( struct ioc *ioc, > DMA_CHUNK_SIZE) break; + if (dma_len + startsg->length > max_seg_size) + break; + /* ** Then look for virtually contiguous blocks. ** @@ -1441,7 +1445,7 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di ** w/o this association, we wouldn't have coherent DMA! ** Access to the virtual address is what forces a two pass algorithm. */ - coalesced = sba_coalesce_chunks(ioc, sglist, nents); + coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents); /* ** Program the I/O Pdir diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index c36f43c94600..f5d3efbfbeda 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1586,7 +1586,7 @@ sys_call_table: data8 sys_epoll_pwait // 1305 data8 sys_utimensat data8 sys_signalfd - data8 sys_timerfd + data8 sys_ni_syscall data8 sys_eventfd .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/m32r/boot/compressed/m32r_sio.c b/arch/m32r/boot/compressed/m32r_sio.c index ee3c8be12fa0..01d877c6868f 100644 --- a/arch/m32r/boot/compressed/m32r_sio.c +++ b/arch/m32r/boot/compressed/m32r_sio.c @@ -17,7 +17,7 @@ static int puts(const char *s) return 0; } -#if defined(CONFIG_PLAT_M32700UT_Alpha) || defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_OPSPUT) +#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_OPSPUT) #include <asm/m32r.h> #include <asm/io.h> @@ -52,7 +52,7 @@ static void putc(char c) } *BOOT_SIO0TXB = c; } -#else /* !(CONFIG_PLAT_M32700UT_Alpha) && !(CONFIG_PLAT_M32700UT) */ +#else /* !(CONFIG_PLAT_M32700UT) */ #if defined(CONFIG_PLAT_MAPPI2) #define SIO0STS (volatile unsigned short *)(0xa0efd000 + 14) #define SIO0TXB (volatile unsigned short *)(0xa0efd000 + 30) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 8236e42ef711..ffabd01c45eb 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -577,20 +577,6 @@ config MAC_HID depends on INPUT_ADBHID default y -config MAC_ADBKEYCODES - bool "Support for ADB raw keycodes" - depends on INPUT_ADBHID - help - This provides support for sending raw ADB keycodes to console - devices. This is the default up to 2.4.0, but in future this may be - phased out in favor of generic Linux keycodes. If you say Y here, - you can dynamically switch via the - /proc/sys/dev/mac_hid/keyboard_sends_linux_keycodes - sysctl and with the "keyboard_sends_linux_keycodes=" kernel - argument. - - If unsure, say Y here. - config ADB_KEYBOARD bool "Support for ADB keyboard (old driver)" depends on MAC && !INPUT_ADBHID diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 4a1bd44ff162..2cba605cb59d 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -13,16 +13,15 @@ # Copyright (C) 1994 by Hamish Macdonald # -# test for cross compiling -COMPILE_ARCH = $(shell uname -m) - # override top level makefile AS += -m68020 LDFLAGS := -m m68kelf LDFLAGS_MODULE += -T $(srctree)/arch/m68k/kernel/module.lds -ifneq ($(COMPILE_ARCH),$(ARCH)) - # prefix for cross-compiling binaries - CROSS_COMPILE = m68k-linux-gnu- +ifneq ($(SUBARCH),$(ARCH)) + ifeq ($(CROSS_COMPILE),) + CROSS_COMPILE := $(call cc-cross-prefix, \ + m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-) + endif endif ifdef CONFIG_SUN3 diff --git a/arch/m68k/amiga/Makefile b/arch/m68k/amiga/Makefile index 8b415651edee..6a0d7650f980 100644 --- a/arch/m68k/amiga/Makefile +++ b/arch/m68k/amiga/Makefile @@ -2,6 +2,6 @@ # Makefile for Linux arch/m68k/amiga source directory # -obj-y := config.o amiints.o cia.o chipram.o amisound.o amiga_ksyms.o +obj-y := config.o amiints.o cia.o chipram.o amisound.o obj-$(CONFIG_AMIGA_PCMCIA) += pcmcia.o diff --git a/arch/m68k/amiga/amiga_ksyms.c b/arch/m68k/amiga/amiga_ksyms.c deleted file mode 100644 index 7fdcf6bf3ada..000000000000 --- a/arch/m68k/amiga/amiga_ksyms.c +++ /dev/null @@ -1,33 +0,0 @@ -#include <linux/module.h> -#include <linux/types.h> -#include <asm/ptrace.h> -#include <asm/amigahw.h> -#include <asm/amigaints.h> -#include <asm/amipcmcia.h> - -extern volatile u_short amiga_audio_min_period; -extern u_short amiga_audio_period; - -/* - * Add things here when you find the need for it. - */ -EXPORT_SYMBOL(amiga_model); -EXPORT_SYMBOL(amiga_chipset); -EXPORT_SYMBOL(amiga_hw_present); -EXPORT_SYMBOL(amiga_eclock); -EXPORT_SYMBOL(amiga_colorclock); -EXPORT_SYMBOL(amiga_chip_alloc); -EXPORT_SYMBOL(amiga_chip_free); -EXPORT_SYMBOL(amiga_chip_avail); -EXPORT_SYMBOL(amiga_chip_size); -EXPORT_SYMBOL(amiga_audio_period); -EXPORT_SYMBOL(amiga_audio_min_period); - -#ifdef CONFIG_AMIGA_PCMCIA - EXPORT_SYMBOL(pcmcia_reset); - EXPORT_SYMBOL(pcmcia_copy_tuple); - EXPORT_SYMBOL(pcmcia_program_voltage); - EXPORT_SYMBOL(pcmcia_access_speed); - EXPORT_SYMBOL(pcmcia_write_enable); - EXPORT_SYMBOL(pcmcia_write_disable); -#endif diff --git a/arch/m68k/amiga/amisound.c b/arch/m68k/amiga/amisound.c index 1f5bfb584297..61e5c54625ae 100644 --- a/arch/m68k/amiga/amisound.c +++ b/arch/m68k/amiga/amisound.c @@ -12,6 +12,7 @@ #include <linux/timer.h> #include <linux/init.h> #include <linux/string.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/amigahw.h> @@ -21,7 +22,7 @@ static const signed char sine_data[] = { 0, 39, 75, 103, 121, 127, 121, 103, 75, 39, 0, -39, -75, -103, -121, -127, -121, -103, -75, -39 }; -#define DATA_SIZE (sizeof(sine_data)/sizeof(sine_data[0])) +#define DATA_SIZE ARRAY_SIZE(sine_data) #define custom amiga_custom @@ -31,6 +32,7 @@ static const signed char sine_data[] = { */ volatile unsigned short amiga_audio_min_period = 124; /* Default for pre-OCS */ +EXPORT_SYMBOL(amiga_audio_min_period); #define MAX_PERIOD (65535) @@ -40,6 +42,7 @@ volatile unsigned short amiga_audio_min_period = 124; /* Default for pre-OCS */ */ unsigned short amiga_audio_period = MAX_PERIOD; +EXPORT_SYMBOL(amiga_audio_period); static unsigned long clock_constant; diff --git a/arch/m68k/amiga/chipram.c b/arch/m68k/amiga/chipram.c index fa015d801617..d10726f9038b 100644 --- a/arch/m68k/amiga/chipram.c +++ b/arch/m68k/amiga/chipram.c @@ -13,10 +13,13 @@ #include <linux/ioport.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/module.h> + #include <asm/page.h> #include <asm/amigahw.h> unsigned long amiga_chip_size; +EXPORT_SYMBOL(amiga_chip_size); static struct resource chipram_res = { .name = "Chip RAM", .start = CHIP_PHYSADDR @@ -67,6 +70,7 @@ void *amiga_chip_alloc(unsigned long size, const char *name) #endif return (void *)ZTWO_VADDR(res->start); } +EXPORT_SYMBOL(amiga_chip_alloc); /* @@ -120,6 +124,7 @@ void amiga_chip_free(void *ptr) } printk("amiga_chip_free: trying to free nonexistent region at %p\n", ptr); } +EXPORT_SYMBOL(amiga_chip_free); unsigned long amiga_chip_avail(void) @@ -129,3 +134,5 @@ unsigned long amiga_chip_avail(void) #endif return chipavail; } +EXPORT_SYMBOL(amiga_chip_avail); + diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 35748531327d..50f5daab46b7 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -23,6 +23,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/zorro.h> +#include <linux/module.h> #include <asm/bootinfo.h> #include <asm/setup.h> @@ -36,13 +37,24 @@ #include <asm/io.h> unsigned long amiga_model; +EXPORT_SYMBOL(amiga_model); + unsigned long amiga_eclock; +EXPORT_SYMBOL(amiga_eclock); + unsigned long amiga_masterclock; + unsigned long amiga_colorclock; +EXPORT_SYMBOL(amiga_colorclock); + unsigned long amiga_chipset; +EXPORT_SYMBOL(amiga_chipset); + unsigned char amiga_vblank; unsigned char amiga_psfreq; + struct amiga_hw_present amiga_hw_present; +EXPORT_SYMBOL(amiga_hw_present); static char s_a500[] __initdata = "A500"; static char s_a500p[] __initdata = "A500+"; diff --git a/arch/m68k/amiga/pcmcia.c b/arch/m68k/amiga/pcmcia.c index 186662ca1a89..7106f0c3639b 100644 --- a/arch/m68k/amiga/pcmcia.c +++ b/arch/m68k/amiga/pcmcia.c @@ -15,6 +15,8 @@ #include <linux/types.h> #include <linux/jiffies.h> #include <linux/timer.h> +#include <linux/module.h> + #include <asm/amigayle.h> #include <asm/amipcmcia.h> @@ -30,6 +32,7 @@ void pcmcia_reset(void) while (time_before(jiffies, reset_start_time + 1*HZ/100)); b = gayle_reset; } +EXPORT_SYMBOL(pcmcia_reset); /* copy a tuple, including tuple header. return nb bytes copied */ @@ -61,6 +64,7 @@ int pcmcia_copy_tuple(unsigned char tuple_id, void *tuple, int max_len) return 0; } +EXPORT_SYMBOL(pcmcia_copy_tuple); void pcmcia_program_voltage(int voltage) { @@ -84,6 +88,7 @@ void pcmcia_program_voltage(int voltage) gayle.config = cfg_byte; } +EXPORT_SYMBOL(pcmcia_program_voltage); void pcmcia_access_speed(int speed) { @@ -101,13 +106,17 @@ void pcmcia_access_speed(int speed) cfg_byte = (cfg_byte & 0xf3) | s; gayle.config = cfg_byte; } +EXPORT_SYMBOL(pcmcia_access_speed); void pcmcia_write_enable(void) { gayle.cardstatus = GAYLE_CS_WR|GAYLE_CS_DA; } +EXPORT_SYMBOL(pcmcia_write_enable); void pcmcia_write_disable(void) { gayle.cardstatus = 0; } +EXPORT_SYMBOL(pcmcia_write_disable); + diff --git a/arch/m68k/atari/Makefile b/arch/m68k/atari/Makefile index 2cb86191f0aa..2cd905efe63a 100644 --- a/arch/m68k/atari/Makefile +++ b/arch/m68k/atari/Makefile @@ -3,7 +3,7 @@ # obj-y := config.o time.o debug.o ataints.o stdma.o \ - atasound.o stram.o atari_ksyms.o + atasound.o stram.o ifeq ($(CONFIG_PCI),y) obj-$(CONFIG_HADES) += hades-pci.o diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c index b85ca22024c1..b45593a60bdd 100644 --- a/arch/m68k/atari/ataints.c +++ b/arch/m68k/atari/ataints.c @@ -40,6 +40,7 @@ #include <linux/kernel_stat.h> #include <linux/init.h> #include <linux/seq_file.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/traps.h> @@ -446,6 +447,7 @@ unsigned long atari_register_vme_int(void) free_vme_vec_bitmap |= 1 << i; return VME_SOURCE_BASE + i; } +EXPORT_SYMBOL(atari_register_vme_int); void atari_unregister_vme_int(unsigned long irq) @@ -455,5 +457,6 @@ void atari_unregister_vme_int(unsigned long irq) free_vme_vec_bitmap &= ~(1 << irq); } } +EXPORT_SYMBOL(atari_unregister_vme_int); diff --git a/arch/m68k/atari/atari_ksyms.c b/arch/m68k/atari/atari_ksyms.c deleted file mode 100644 index a04757151538..000000000000 --- a/arch/m68k/atari/atari_ksyms.c +++ /dev/null @@ -1,35 +0,0 @@ -#include <linux/module.h> - -#include <asm/ptrace.h> -#include <asm/traps.h> -#include <asm/atarihw.h> -#include <asm/atariints.h> -#include <asm/atarikb.h> -#include <asm/atari_joystick.h> -#include <asm/atari_stdma.h> -#include <asm/atari_stram.h> - -extern void atari_microwire_cmd( int cmd ); -extern int atari_MFP_init_done; -extern int atari_SCC_init_done; -extern int atari_SCC_reset_done; - -EXPORT_SYMBOL(atari_mch_cookie); -EXPORT_SYMBOL(atari_mch_type); -EXPORT_SYMBOL(atari_hw_present); -EXPORT_SYMBOL(atari_switches); -EXPORT_SYMBOL(atari_dont_touch_floppy_select); -EXPORT_SYMBOL(atari_register_vme_int); -EXPORT_SYMBOL(atari_unregister_vme_int); -EXPORT_SYMBOL(stdma_lock); -EXPORT_SYMBOL(stdma_release); -EXPORT_SYMBOL(stdma_others_waiting); -EXPORT_SYMBOL(stdma_islocked); -EXPORT_SYMBOL(atari_stram_alloc); -EXPORT_SYMBOL(atari_stram_free); - -EXPORT_SYMBOL(atari_MFP_init_done); -EXPORT_SYMBOL(atari_SCC_init_done); -EXPORT_SYMBOL(atari_SCC_reset_done); - -EXPORT_SYMBOL(atari_microwire_cmd); diff --git a/arch/m68k/atari/atasound.c b/arch/m68k/atari/atasound.c index ee04250eb56b..d266fe89c125 100644 --- a/arch/m68k/atari/atasound.c +++ b/arch/m68k/atari/atasound.c @@ -22,6 +22,7 @@ #include <linux/fcntl.h> #include <linux/errno.h> #include <linux/mm.h> +#include <linux/module.h> #include <asm/atarihw.h> #include <asm/system.h> @@ -43,6 +44,7 @@ void atari_microwire_cmd (int cmd) while( tt_microwire.mask != 0x7ff) ; } +EXPORT_SYMBOL(atari_microwire_cmd); /* PSG base frequency */ diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index e40e5dcaa347..5945e1505558 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -31,6 +31,7 @@ #include <linux/delay.h> #include <linux/ioport.h> #include <linux/vt_kern.h> +#include <linux/module.h> #include <asm/bootinfo.h> #include <asm/setup.h> @@ -43,10 +44,20 @@ #include <asm/io.h> u_long atari_mch_cookie; +EXPORT_SYMBOL(atari_mch_cookie); + u_long atari_mch_type; +EXPORT_SYMBOL(atari_mch_type); + struct atari_hw_present atari_hw_present; +EXPORT_SYMBOL(atari_hw_present); + u_long atari_switches; +EXPORT_SYMBOL(atari_switches); + int atari_dont_touch_floppy_select; +EXPORT_SYMBOL(atari_dont_touch_floppy_select); + int atari_rtc_year_offset; /* local function prototypes */ diff --git a/arch/m68k/atari/debug.c b/arch/m68k/atari/debug.c index fbeed8c8ecbc..043ddbc61c7b 100644 --- a/arch/m68k/atari/debug.c +++ b/arch/m68k/atari/debug.c @@ -15,17 +15,23 @@ #include <linux/console.h> #include <linux/init.h> #include <linux/delay.h> +#include <linux/module.h> #include <asm/atarihw.h> #include <asm/atariints.h> /* Flag that Modem1 port is already initialized and used */ int atari_MFP_init_done; +EXPORT_SYMBOL(atari_MFP_init_done); + /* Flag that Modem1 port is already initialized and used */ int atari_SCC_init_done; +EXPORT_SYMBOL(atari_SCC_init_done); + /* Can be set somewhere, if a SCC master reset has already be done and should * not be repeated; used by kgdb */ int atari_SCC_reset_done; +EXPORT_SYMBOL(atari_SCC_reset_done); static struct console atari_console_driver = { .name = "debug", diff --git a/arch/m68k/atari/hades-pci.c b/arch/m68k/atari/hades-pci.c index bee2b1443e36..2bbabc008708 100644 --- a/arch/m68k/atari/hades-pci.c +++ b/arch/m68k/atari/hades-pci.c @@ -376,8 +376,8 @@ struct pci_bus_info * __init init_hades_pci(void) */ bus = kzalloc(sizeof(struct pci_bus_info), GFP_KERNEL); - if (!bus) - return NULL; + if (unlikely(!bus)) + goto iounmap_base_virt; /* * Claim resources. The m68k has no separate I/O space, both @@ -385,43 +385,25 @@ struct pci_bus_info * __init init_hades_pci(void) * the I/O resources are requested in memory space as well. */ - if (request_resource(&iomem_resource, &config_space) != 0) - { - kfree(bus); - return NULL; - } + if (unlikely(request_resource(&iomem_resource, &config_space) != 0)) + goto free_bus; - if (request_resource(&iomem_resource, &io_space) != 0) - { - release_resource(&config_space); - kfree(bus); - return NULL; - } + if (unlikely(request_resource(&iomem_resource, &io_space) != 0)) + goto release_config_space; bus->mem_space.start = HADES_MEM_BASE; bus->mem_space.end = HADES_MEM_BASE + HADES_MEM_SIZE - 1; bus->mem_space.name = pci_mem_name; #if 1 - if (request_resource(&iomem_resource, &bus->mem_space) != 0) - { - release_resource(&io_space); - release_resource(&config_space); - kfree(bus); - return NULL; - } + if (unlikely(request_resource(&iomem_resource, &bus->mem_space) != 0)) + goto release_io_space; #endif bus->io_space.start = pci_io_base_virt; bus->io_space.end = pci_io_base_virt + HADES_VIRT_IO_SIZE - 1; bus->io_space.name = pci_io_name; #if 1 - if (request_resource(&ioport_resource, &bus->io_space) != 0) - { - release_resource(&bus->mem_space); - release_resource(&io_space); - release_resource(&config_space); - kfree(bus); - return NULL; - } + if (unlikely(request_resource(&ioport_resource, &bus->io_space) != 0)) + goto release_bus_mem_space; #endif /* * Set hardware dependent functions. @@ -438,5 +420,21 @@ struct pci_bus_info * __init init_hades_pci(void) tt_mfp.active_edge &= ~0x27; return bus; + +release_bus_mem_space: + release_resource(&bus->mem_space); +release_io_space: + release_resource(&io_space); +release_config_space: + release_resource(&config_space); +free_bus: + kfree(bus); +iounmap_base_virt: + iounmap((void *)pci_io_base_virt); + + for (i = 0; i < N_SLOTS; i++) + iounmap((void *)pci_conf_base_virt[i]); + + return NULL; } #endif diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c index ab3fd5202b24..d1bd029a34ac 100644 --- a/arch/m68k/atari/stdma.c +++ b/arch/m68k/atari/stdma.c @@ -35,6 +35,7 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/wait.h> +#include <linux/module.h> #include <asm/atari_stdma.h> #include <asm/atariints.h> @@ -91,6 +92,7 @@ void stdma_lock(irq_handler_t handler, void *data) stdma_isr_data = data; local_irq_restore(flags); } +EXPORT_SYMBOL(stdma_lock); /* @@ -117,6 +119,7 @@ void stdma_release(void) local_irq_restore(flags); } +EXPORT_SYMBOL(stdma_release); /* @@ -134,6 +137,7 @@ int stdma_others_waiting(void) { return waitqueue_active(&stdma_wait); } +EXPORT_SYMBOL(stdma_others_waiting); /* @@ -155,6 +159,7 @@ int stdma_islocked(void) { return stdma_locked; } +EXPORT_SYMBOL(stdma_islocked); /* diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c index bf4588cbe371..8dda6515887a 100644 --- a/arch/m68k/atari/stram.c +++ b/arch/m68k/atari/stram.c @@ -20,6 +20,7 @@ #include <linux/bootmem.h> #include <linux/mount.h> #include <linux/blkdev.h> +#include <linux/module.h> #include <asm/setup.h> #include <asm/machdep.h> @@ -208,6 +209,7 @@ void *atari_stram_alloc(long size, const char *owner) } return( addr ); } +EXPORT_SYMBOL(atari_stram_alloc); void atari_stram_free( void *addr ) @@ -237,6 +239,7 @@ void atari_stram_free( void *addr ) printk( KERN_ERR "atari_stram_free: cannot free block at %p " "(called from %p)\n", addr, __builtin_return_address(0) ); } +EXPORT_SYMBOL(atari_stram_free); /* ------------------------------------------------------------------------ */ diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 15b80abfe94a..ff9dffa5b860 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -678,7 +678,6 @@ CONFIG_LOGO_MAC_CLUT224=y # CONFIG_MAC_SCC=y CONFIG_MAC_HID=y -CONFIG_MAC_ADBKEYCODES=y CONFIG_SERIAL_CONSOLE=y # diff --git a/arch/m68k/hp300/Makefile b/arch/m68k/hp300/Makefile index 288b9c67c9bf..96d4244c82fd 100644 --- a/arch/m68k/hp300/Makefile +++ b/arch/m68k/hp300/Makefile @@ -2,4 +2,4 @@ # Makefile for Linux arch/m68k/hp300 source directory # -obj-y := ksyms.o config.o time.o reboot.o +obj-y := config.o time.o reboot.o diff --git a/arch/m68k/hp300/ksyms.c b/arch/m68k/hp300/ksyms.c deleted file mode 100644 index 8202830763d1..000000000000 --- a/arch/m68k/hp300/ksyms.c +++ /dev/null @@ -1,9 +0,0 @@ -/* - * linux/arch/m68k/hp300/ksyms.c - * - * Copyright (C) 1998 Philip Blundell <philb@gnu.org> - * - * This file contains the HP300-specific kernel symbols. None yet. :-) - */ - -#include <linux/module.h> diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 918f5dbeaef6..6dfa3b3c0e2a 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -742,7 +742,7 @@ sys_call_table: .long sys_epoll_pwait /* 315 */ .long sys_utimensat .long sys_signalfd - .long sys_timerfd + .long sys_ni_syscall .long sys_eventfd .long sys_fallocate /* 320 */ diff --git a/arch/m68k/mac/Makefile b/arch/m68k/mac/Makefile index 995a09d912f5..1d265ba365ad 100644 --- a/arch/m68k/mac/Makefile +++ b/arch/m68k/mac/Makefile @@ -3,4 +3,4 @@ # obj-y := config.o bootparse.o macints.o iop.o via.o oss.o psc.o \ - baboon.o macboing.o debug.o misc.o mac_ksyms.o + baboon.o macboing.o debug.o misc.o diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 01b468b9392e..735a49b4b936 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -58,8 +58,6 @@ extern struct mem_info m68k_memory[NUM_MEMINFO]; extern struct mem_info m68k_ramdisk; -extern char m68k_command_line[CL_SIZE]; - void *mac_env; /* Loaded by the boot asm */ /* The phys. video addr. - might be bogus on some machines */ diff --git a/arch/m68k/mac/mac_ksyms.c b/arch/m68k/mac/mac_ksyms.c deleted file mode 100644 index 6e37ceb0f3b5..000000000000 --- a/arch/m68k/mac/mac_ksyms.c +++ /dev/null @@ -1,8 +0,0 @@ -#include <linux/module.h> -#include <asm/ptrace.h> -#include <asm/traps.h> - -/* Says whether we're using A/UX interrupts or not */ -extern int via_alt_mapping; - -EXPORT_SYMBOL(via_alt_mapping); diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 8df270e950fa..fa485df4160e 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -28,6 +28,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/ide.h> +#include <linux/module.h> #include <asm/bootinfo.h> #include <asm/macintosh.h> @@ -41,7 +42,9 @@ volatile __u8 *via1, *via2; /* See note in mac_via.h about how this is possibly not useful */ volatile long *via_memory_bogon=(long *)&via_memory_bogon; #endif -int rbv_present, via_alt_mapping; +int rbv_present; +int via_alt_mapping; +EXPORT_SYMBOL(via_alt_mapping); __u8 rbv_clear; /* diff --git a/arch/m68k/mvme16x/Makefile b/arch/m68k/mvme16x/Makefile index 950e82f21640..edb3f6e6ee6a 100644 --- a/arch/m68k/mvme16x/Makefile +++ b/arch/m68k/mvme16x/Makefile @@ -2,4 +2,4 @@ # Makefile for Linux arch/m68k/mvme16x source directory # -obj-y := config.o rtc.o mvme16x_ksyms.o +obj-y := config.o rtc.o diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index daa785161401..24cbc3030454 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -25,6 +25,7 @@ #include <linux/genhd.h> #include <linux/rtc.h> #include <linux/interrupt.h> +#include <linux/module.h> #include <asm/bootinfo.h> #include <asm/system.h> @@ -58,6 +59,7 @@ static irq_handler_t tick_handler; unsigned short mvme16x_config; +EXPORT_SYMBOL(mvme16x_config); int mvme16x_parse_bootinfo(const struct bi_record *bi) diff --git a/arch/m68k/mvme16x/mvme16x_ksyms.c b/arch/m68k/mvme16x/mvme16x_ksyms.c deleted file mode 100644 index 4a8a3634bb47..000000000000 --- a/arch/m68k/mvme16x/mvme16x_ksyms.c +++ /dev/null @@ -1,6 +0,0 @@ -#include <linux/module.h> -#include <linux/types.h> -#include <asm/ptrace.h> -#include <asm/mvme16xhw.h> - -EXPORT_SYMBOL(mvme16x_config); diff --git a/arch/m68knommu/Kconfig.debug b/arch/m68knommu/Kconfig.debug index 9ff47bd09aee..ed6d9a83bfdb 100644 --- a/arch/m68knommu/Kconfig.debug +++ b/arch/m68knommu/Kconfig.debug @@ -21,13 +21,6 @@ config BOOTPARAM_STRING default 'console=ttyS0,19200' depends on BOOTPARAM -config DUMPTOFLASH - bool "Panic/Dump to FLASH" - depends on COLDFIRE - help - Dump any panic of trap output into a flash memory segment - for later analysis. - config NO_KERNEL_MSG bool "Suppress Kernel BUG Messages" help diff --git a/arch/m68knommu/defconfig b/arch/m68knommu/defconfig index 5a0ecaaee3b0..648113075f97 100644 --- a/arch/m68knommu/defconfig +++ b/arch/m68knommu/defconfig @@ -597,7 +597,6 @@ CONFIG_MSDOS_PARTITION=y # CONFIG_FULLDEBUG is not set # CONFIG_HIGHPROFILE is not set # CONFIG_BOOTPARAM is not set -# CONFIG_DUMPTOFLASH is not set # CONFIG_NO_KERNEL_MSG is not set # CONFIG_BDM_DISABLE is not set diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c index f795062aba1e..53fad1490282 100644 --- a/arch/m68knommu/kernel/m68k_ksyms.c +++ b/arch/m68knommu/kernel/m68k_ksyms.c @@ -24,14 +24,6 @@ extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(strnlen); -EXPORT_SYMBOL(strrchr); -EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strchr); -EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(ip_fast_csum); @@ -46,9 +38,6 @@ EXPORT_SYMBOL(csum_partial_copy_nocheck); it's OK to leave it out of version control. */ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memcmp); -EXPORT_SYMBOL(memscan); -EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c index 332345d7675d..81507c53d4a9 100644 --- a/arch/m68knommu/kernel/setup.c +++ b/arch/m68knommu/kernel/setup.c @@ -64,9 +64,6 @@ void (*mach_power_off)(void); #ifdef CONFIG_M68VZ328 #define CPU "MC68VZ328" #endif -#ifdef CONFIG_M68332 - #define CPU "MC68332" -#endif #ifdef CONFIG_M68360 #define CPU "MC68360" #endif diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index 9620093514bc..1b02b8820068 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -336,7 +336,7 @@ ENTRY(sys_call_table) .long sys_epoll_pwait /* 315 */ .long sys_utimensat .long sys_signalfd - .long sys_timerfd + .long sys_ni_syscall .long sys_eventfd .long sys_fallocate /* 320 */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 82480a1717d8..f798139e888e 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -660,7 +660,7 @@ einval: li v0, -EINVAL sys sys_ioprio_get 2 /* 4315 */ sys sys_utimensat 4 sys sys_signalfd 3 - sys sys_timerfd 4 + sys sys_ni_syscall 0 sys sys_eventfd 1 sys sys_fallocate 6 /* 4320 */ .endm diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index c2c10876da2e..a626be6baea3 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -475,7 +475,7 @@ sys_call_table: PTR sys_ioprio_get PTR sys_utimensat /* 5275 */ PTR sys_signalfd - PTR sys_timerfd + PTR sys_ni_syscall PTR sys_eventfd PTR sys_fallocate .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 01993ec3368b..9d5bcaf1b389 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -401,7 +401,7 @@ EXPORT(sysn32_call_table) PTR sys_ioprio_get PTR compat_sys_utimensat PTR compat_sys_signalfd /* 5280 */ - PTR compat_sys_timerfd + PTR sys_ni_syscall PTR sys_eventfd PTR sys_fallocate .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index dd68afce7da5..fd2019c1ec2d 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -523,7 +523,7 @@ sys_call_table: PTR sys_ioprio_get /* 4315 */ PTR compat_sys_utimensat PTR compat_sys_signalfd - PTR compat_sys_timerfd + PTR sys_ni_syscall PTR sys_eventfd PTR sys32_fallocate /* 4320 */ .size sys_call_table,.-sys_call_table diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b94d4502a477..cf030b004415 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -256,6 +256,9 @@ config IOMMU_VMERGE Most drivers don't have this problem; it is safe to say Y here. +config IOMMU_HELPER + def_bool PPC64 + config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c index 84239076a5b8..3a317cb0636a 100644 --- a/arch/powerpc/kernel/dma_64.c +++ b/arch/powerpc/kernel/dma_64.c @@ -31,8 +31,8 @@ static inline unsigned long device_to_mask(struct device *dev) static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - return iommu_alloc_coherent(dev->archdata.dma_data, size, dma_handle, - device_to_mask(dev), flag, + return iommu_alloc_coherent(dev, dev->archdata.dma_data, size, + dma_handle, device_to_mask(dev), flag, dev->archdata.numa_node); } @@ -52,7 +52,7 @@ static dma_addr_t dma_iommu_map_single(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { - return iommu_map_single(dev->archdata.dma_data, vaddr, size, + return iommu_map_single(dev, dev->archdata.dma_data, vaddr, size, device_to_mask(dev), direction); } @@ -68,7 +68,7 @@ static void dma_iommu_unmap_single(struct device *dev, dma_addr_t dma_handle, static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction) { - return iommu_map_sg(dev->archdata.dma_data, sglist, nelems, + return iommu_map_sg(dev, sglist, nelems, device_to_mask(dev), direction); } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a3c406aca664..8f1f4e539c4b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -31,6 +31,7 @@ #include <linux/string.h> #include <linux/dma-mapping.h> #include <linux/bitops.h> +#include <linux/iommu-helper.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -81,17 +82,19 @@ static int __init setup_iommu(char *str) __setup("protect4gb=", setup_protect4gb); __setup("iommu=", setup_iommu); -static unsigned long iommu_range_alloc(struct iommu_table *tbl, +static unsigned long iommu_range_alloc(struct device *dev, + struct iommu_table *tbl, unsigned long npages, unsigned long *handle, unsigned long mask, unsigned int align_order) { - unsigned long n, end, i, start; + unsigned long n, end, start; unsigned long limit; int largealloc = npages > 15; int pass = 0; unsigned long align_mask; + unsigned long boundary_size; align_mask = 0xffffffffffffffffl >> (64 - align_order); @@ -136,14 +139,17 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, start &= mask; } - n = find_next_zero_bit(tbl->it_map, limit, start); - - /* Align allocation */ - n = (n + align_mask) & ~align_mask; - - end = n + npages; + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << IOMMU_PAGE_SHIFT); + else + boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); + /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ - if (unlikely(end >= limit)) { + n = iommu_area_alloc(tbl->it_map, limit, start, npages, + tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, + align_mask); + if (n == -1) { if (likely(pass < 2)) { /* First failure, just rescan the half of the table. * Second failure, rescan the other half of the table. @@ -158,14 +164,7 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, } } - for (i = n; i < end; i++) - if (test_bit(i, tbl->it_map)) { - start = i+1; - goto again; - } - - for (i = n; i < end; i++) - __set_bit(i, tbl->it_map); + end = n + npages; /* Bump the hint to a new block for small allocs. */ if (largealloc) { @@ -184,16 +183,17 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, return n; } -static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page, - unsigned int npages, enum dma_data_direction direction, - unsigned long mask, unsigned int align_order) +static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, + void *page, unsigned int npages, + enum dma_data_direction direction, + unsigned long mask, unsigned int align_order) { unsigned long entry, flags; dma_addr_t ret = DMA_ERROR_CODE; spin_lock_irqsave(&(tbl->it_lock), flags); - entry = iommu_range_alloc(tbl, npages, NULL, mask, align_order); + entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); if (unlikely(entry == DMA_ERROR_CODE)) { spin_unlock_irqrestore(&(tbl->it_lock), flags); @@ -224,7 +224,6 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned int npages) { unsigned long entry, free_entry; - unsigned long i; entry = dma_addr >> IOMMU_PAGE_SHIFT; free_entry = entry - tbl->it_offset; @@ -246,9 +245,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, } ppc_md.tce_free(tbl, entry, npages); - - for (i = 0; i < npages; i++) - __clear_bit(free_entry+i, tbl->it_map); + iommu_area_free(tbl->it_map, free_entry, npages); } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, @@ -270,16 +267,18 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, spin_unlock_irqrestore(&(tbl->it_lock), flags); } -int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist, +int iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, unsigned long mask, enum dma_data_direction direction) { + struct iommu_table *tbl = dev->archdata.dma_data; dma_addr_t dma_next = 0, dma_addr; unsigned long flags; struct scatterlist *s, *outs, *segstart; int outcount, incount, i; unsigned int align; unsigned long handle; + unsigned int max_seg_size; BUG_ON(direction == DMA_NONE); @@ -298,6 +297,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist, spin_lock_irqsave(&(tbl->it_lock), flags); + max_seg_size = dma_get_max_seg_size(dev); for_each_sg(sglist, s, nelems, i) { unsigned long vaddr, npages, entry, slen; @@ -314,7 +314,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist, if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && (vaddr & ~PAGE_MASK) == 0) align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; - entry = iommu_range_alloc(tbl, npages, &handle, + entry = iommu_range_alloc(dev, tbl, npages, &handle, mask >> IOMMU_PAGE_SHIFT, align); DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); @@ -344,7 +344,8 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist, /* We cannot merge if: * - allocated dma_addr isn't contiguous to previous allocation */ - if (novmerge || (dma_addr != dma_next)) { + if (novmerge || (dma_addr != dma_next) || + (outs->dma_length + s->length > max_seg_size)) { /* Can't merge: create a new segment */ segstart = s; outcount++; @@ -452,9 +453,6 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) { unsigned long sz; - unsigned long start_index, end_index; - unsigned long entries_per_4g; - unsigned long index; static int welcomed = 0; struct page *page; @@ -476,6 +474,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) #ifdef CONFIG_CRASH_DUMP if (ppc_md.tce_get) { + unsigned long index; unsigned long tceval; unsigned long tcecount = 0; @@ -506,23 +505,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); #endif - /* - * DMA cannot cross 4 GB boundary. Mark last entry of each 4 - * GB chunk as reserved. - */ - if (protect4gb) { - entries_per_4g = 0x100000000l >> IOMMU_PAGE_SHIFT; - - /* Mark the last bit before a 4GB boundary as used */ - start_index = tbl->it_offset | (entries_per_4g - 1); - start_index -= tbl->it_offset; - - end_index = tbl->it_size; - - for (index = start_index; index < end_index - 1; index += entries_per_4g) - __set_bit(index, tbl->it_map); - } - if (!welcomed) { printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", novmerge ? "disabled" : "enabled"); @@ -570,9 +552,9 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) * need not be page aligned, the dma_addr_t returned will point to the same * byte within the page as vaddr. */ -dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr, - size_t size, unsigned long mask, - enum dma_data_direction direction) +dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl, + void *vaddr, size_t size, unsigned long mask, + enum dma_data_direction direction) { dma_addr_t dma_handle = DMA_ERROR_CODE; unsigned long uaddr; @@ -589,7 +571,7 @@ dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr, ((unsigned long)vaddr & ~PAGE_MASK) == 0) align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; - dma_handle = iommu_alloc(tbl, vaddr, npages, direction, + dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, mask >> IOMMU_PAGE_SHIFT, align); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { @@ -621,8 +603,9 @@ void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, * Returns the virtual address of the buffer and sets dma_handle * to the dma address (mapping) of the first page. */ -void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, - dma_addr_t *dma_handle, unsigned long mask, gfp_t flag, int node) +void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, + size_t size, dma_addr_t *dma_handle, + unsigned long mask, gfp_t flag, int node) { void *ret = NULL; dma_addr_t mapping; @@ -656,7 +639,7 @@ void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, /* Set up tces to cover the allocated range */ nio_pages = size >> IOMMU_PAGE_SHIFT; io_order = get_iommu_order(size); - mapping = iommu_alloc(tbl, ret, nio_pages, DMA_BIDIRECTIONAL, + mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> IOMMU_PAGE_SHIFT, io_order); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 64488723162a..f80f90c4d58b 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -86,7 +86,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) return ret; } -void pgd_free(pgd_t *pgd) +void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_pages((unsigned long)pgd, PGDIR_ORDER); } @@ -123,7 +123,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) return ptepage; } -void pte_free_kernel(pte_t *pte) +void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { #ifdef CONFIG_SMP hash_page_sync(); @@ -131,7 +131,7 @@ void pte_free_kernel(pte_t *pte) free_page((unsigned long)pte); } -void pte_free(struct page *ptepage) +void pte_free(struct mm_struct *mm, struct page *ptepage) { #ifdef CONFIG_SMP hash_page_sync(); diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index 6a0c6f6675cd..11fa3c772ed5 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -199,7 +199,7 @@ static struct iommu_table vio_iommu_table; void *iseries_hv_alloc(size_t size, dma_addr_t *dma_handle, gfp_t flag) { - return iommu_alloc_coherent(&vio_iommu_table, size, dma_handle, + return iommu_alloc_coherent(NULL, &vio_iommu_table, size, dma_handle, DMA_32BIT_MASK, flag, -1); } EXPORT_SYMBOL_GPL(iseries_hv_alloc); @@ -213,7 +213,7 @@ EXPORT_SYMBOL_GPL(iseries_hv_free); dma_addr_t iseries_hv_map(void *vaddr, size_t size, enum dma_data_direction direction) { - return iommu_map_single(&vio_iommu_table, vaddr, size, + return iommu_map_single(NULL, &vio_iommu_table, vaddr, size, DMA_32BIT_MASK, direction); } diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c index fadacfd18806..409fcaa4994a 100644 --- a/arch/ppc/mm/pgtable.c +++ b/arch/ppc/mm/pgtable.c @@ -74,7 +74,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) return ret; } -void pgd_free(pgd_t *pgd) +void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_pages((unsigned long)pgd, PGDIR_ORDER); } @@ -111,7 +111,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) return ptepage; } -void pte_free_kernel(pte_t *pte) +void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { #ifdef CONFIG_SMP hash_page_sync(); @@ -119,7 +119,7 @@ void pte_free_kernel(pte_t *pte) free_page((unsigned long)pte); } -void pte_free(struct page *ptepage) +void pte_free(struct mm_struct *mm, struct page *ptepage) { #ifdef CONFIG_SMP hash_page_sync(); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 6ee1bedbd1bf..062c3d4c0394 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1698,14 +1698,6 @@ compat_sys_signalfd_wrapper: llgfr %r4,%r4 # compat_size_t jg compat_sys_signalfd - .globl compat_sys_timerfd_wrapper -compat_sys_timerfd_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct compat_itimerspec * - jg compat_sys_timerfd - .globl sys_eventfd_wrapper sys_eventfd_wrapper: llgfr %r2,%r2 # unsigned int diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9e26ed9fe4e7..25eac7802fc4 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -325,5 +325,5 @@ SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) SYSCALL(s390_fallocate,sys_fallocate,sys_fallocate_wrapper) SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper) -SYSCALL(sys_timerfd,sys_timerfd,compat_sys_timerfd_wrapper) +NI_SYSCALL /* 317 old sys_timer_fd */ SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper) diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S index 55722840859c..ee010f4532a0 100644 --- a/arch/sparc/kernel/systbls.S +++ b/arch/sparc/kernel/systbls.S @@ -79,7 +79,7 @@ sys_call_table: /*295*/ .long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare /*300*/ .long sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy /*305*/ .long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait -/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd, sys_eventfd, sys_fallocate +/*310*/ .long sys_utimensat, sys_signalfd, sys_ni_syscall, sys_eventfd, sys_fallocate #ifdef CONFIG_SUNOS_EMUL /* Now the SunOS syscall table. */ diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c index 070a4846c0cb..4b9115a4d92e 100644 --- a/arch/sparc64/kernel/iommu.c +++ b/arch/sparc64/kernel/iommu.c @@ -580,7 +580,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, /* Step 1: Prepare scatter list. */ - npages = prepare_sg(sglist, nelems); + npages = prepare_sg(dev, sglist, nelems); /* Step 2: Allocate a cluster and context, if necessary. */ diff --git a/arch/sparc64/kernel/iommu_common.c b/arch/sparc64/kernel/iommu_common.c index efd5dff85f60..72a4acfe8c7b 100644 --- a/arch/sparc64/kernel/iommu_common.c +++ b/arch/sparc64/kernel/iommu_common.c @@ -4,6 +4,7 @@ * Copyright (C) 1999 David S. Miller (davem@redhat.com) */ +#include <linux/dma-mapping.h> #include "iommu_common.h" /* You are _strongly_ advised to enable the following debugging code @@ -201,21 +202,24 @@ void verify_sglist(struct scatterlist *sglist, int nents, iopte_t *iopte, int np } #endif -unsigned long prepare_sg(struct scatterlist *sg, int nents) +unsigned long prepare_sg(struct device *dev, struct scatterlist *sg, int nents) { struct scatterlist *dma_sg = sg; unsigned long prev; u32 dent_addr, dent_len; + unsigned int max_seg_size; prev = (unsigned long) sg_virt(sg); prev += (unsigned long) (dent_len = sg->length); dent_addr = (u32) ((unsigned long)(sg_virt(sg)) & (IO_PAGE_SIZE - 1UL)); + max_seg_size = dma_get_max_seg_size(dev); while (--nents) { unsigned long addr; sg = sg_next(sg); addr = (unsigned long) sg_virt(sg); - if (! VCONTIG(prev, addr)) { + if (! VCONTIG(prev, addr) || + dent_len + sg->length > max_seg_size) { dma_sg->dma_address = dent_addr; dma_sg->dma_length = dent_len; dma_sg = sg_next(dma_sg); diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h index 75b5a5814522..a90d046e8024 100644 --- a/arch/sparc64/kernel/iommu_common.h +++ b/arch/sparc64/kernel/iommu_common.h @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/scatterlist.h> +#include <linux/device.h> #include <asm/iommu.h> #include <asm/scatterlist.h> @@ -46,4 +47,4 @@ extern void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int #define VCONTIG(__X, __Y) (((__X) == (__Y)) || \ (((__X) | (__Y)) << (64UL - PAGE_SHIFT)) == 0UL) -extern unsigned long prepare_sg(struct scatterlist *sg, int nents); +extern unsigned long prepare_sg(struct device *dev, struct scatterlist *sg, int nents); diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c index 1aa8e044b105..67d6dce90b1c 100644 --- a/arch/sparc64/kernel/pci_sun4v.c +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -490,7 +490,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, goto bad; /* Step 1: Prepare scatter list. */ - npages = prepare_sg(sglist, nelems); + npages = prepare_sg(dev, sglist, nelems); /* Step 2: Allocate a cluster and context, if necessary. */ spin_lock_irqsave(&iommu->lock, flags); diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index 06d10907d8ce..b8058906e727 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -80,7 +80,7 @@ sys_call_table32: .word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare /*300*/ .word compat_sys_set_robust_list, compat_sys_get_robust_list, compat_sys_migrate_pages, compat_sys_mbind, compat_sys_get_mempolicy .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait -/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, compat_sys_timerfd, sys_eventfd, compat_sys_fallocate +/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_ni_syscall, sys_eventfd, compat_sys_fallocate #endif /* CONFIG_COMPAT */ @@ -152,7 +152,7 @@ sys_call_table: .word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare /*300*/ .word sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait -/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd, sys_eventfd, sys_fallocate +/*310*/ .word sys_utimensat, sys_signalfd, sys_ni_syscall, sys_eventfd, sys_fallocate #if defined(CONFIG_SUNOS_EMUL) || defined(CONFIG_SOLARIS_EMUL) || \ defined(CONFIG_SOLARIS_EMUL_MODULE) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 55945db1313c..99e51d059a02 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -68,6 +68,10 @@ config IRQ_RELEASE_METHOD bool default y +config HZ + int + default 100 + menu "UML-specific options" config STATIC_LINK @@ -95,23 +99,6 @@ config LD_SCRIPT_DYN default y depends on !LD_SCRIPT_STATIC -config NET - bool "Networking support" - help - Unless you really know what you are doing, you should say Y here. - The reason is that some programs need kernel networking support even - when running on a stand-alone machine that isn't connected to any - other computer. If you are upgrading from an older kernel, you - should consider updating your networking tools too because changes - in the kernel and the tools often go hand in hand. The tools are - contained in the package net-tools, the location and version number - of which are given in <file:Documentation/Changes>. - - For a general introduction to Linux networking, it is highly - recommended to read the NET-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. - - source "fs/Kconfig.binfmt" config HOSTFS @@ -145,7 +132,7 @@ config HPPFS by removing or changing anything in /proc which gives away the identity of a UML. - See <http://user-mode-linux.sf.net/hppfs.html> for more information. + See <http://user-mode-linux.sf.net/old/hppfs.html> for more information. You only need this if you are setting up a UML honeypot. Otherwise, it is safe to say 'N' here. @@ -189,8 +176,7 @@ config MAGIC_SYSRQ config SMP bool "Symmetric multi-processing support (EXPERIMENTAL)" default n - #SMP_BROKEN is for x86_64. - depends on EXPERIMENTAL && (!SMP_BROKEN || (BROKEN && SMP_BROKEN)) + depends on BROKEN help This option enables UML SMP support. It is NOT related to having a real SMP box. Not directly, at least. diff --git a/arch/um/Kconfig.char b/arch/um/Kconfig.char index 9a78d354f0b4..3a4b396d7979 100644 --- a/arch/um/Kconfig.char +++ b/arch/um/Kconfig.char @@ -18,7 +18,7 @@ config SSL lines on the UML that are usually made to show up on the host as ttys or ptys. - See <http://user-mode-linux.sourceforge.net/input.html> for more + See <http://user-mode-linux.sourceforge.net/old/input.html> for more information and command line examples of how to use this facility. Unless you have a specific reason for disabling this, say Y. diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug index 1f6462ffd3e8..8fce5e536b0f 100644 --- a/arch/um/Kconfig.debug +++ b/arch/um/Kconfig.debug @@ -4,12 +4,12 @@ source "lib/Kconfig.debug" config GPROF bool "Enable gprof support" - depends on DEBUG_INFO + depends on DEBUG_INFO && FRAME_POINTER help This allows profiling of a User-Mode Linux kernel with the gprof utility. - See <http://user-mode-linux.sourceforge.net/gprof.html> for more + See <http://user-mode-linux.sourceforge.net/old/gprof.html> for more details. If you're involved in UML kernel development and want to use gprof, @@ -22,7 +22,7 @@ config GCOV This option allows developers to retrieve coverage data from a UML session. - See <http://user-mode-linux.sourceforge.net/gprof.html> for more + See <http://user-mode-linux.sourceforge.net/old/gprof.html> for more details. If you're involved in UML kernel development and want to use gcov, diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net index 66e50026ade9..9e9a4aaa703d 100644 --- a/arch/um/Kconfig.net +++ b/arch/um/Kconfig.net @@ -14,7 +14,7 @@ config UML_NET For more information, including explanations of the networking and sample configurations, see - <http://user-mode-linux.sourceforge.net/networking.html>. + <http://user-mode-linux.sourceforge.net/old/networking.html>. If you'd like to be able to enable networking in the User-Mode linux environment, say Y; otherwise say N. Note that you must @@ -38,7 +38,7 @@ config UML_NET_ETHERTAP CONFIG_NETLINK_DEV configured as Y or M. For more information, see - <http://user-mode-linux.sourceforge.net/networking.html> That site + <http://user-mode-linux.sourceforge.net/old/networking.html> That site has examples of the UML command line to use to enable Ethertap networking. @@ -72,7 +72,7 @@ config UML_NET_SLIP To use this, your host must support slip devices. For more information, see - <http://user-mode-linux.sourceforge.net/networking.html>. That site + <http://user-mode-linux.sourceforge.net/old/networking.html>. has examples of the UML command line to use to enable slip networking, and details of a few quirks with it. @@ -96,7 +96,7 @@ config UML_NET_DAEMON networking daemon on the host. For more information, see - <http://user-mode-linux.sourceforge.net/networking.html> That site + <http://user-mode-linux.sourceforge.net/old/networking.html> That site has examples of the UML command line to use to enable Daemon networking. @@ -144,7 +144,7 @@ config UML_NET_MCAST To use this, your host kernel(s) must support IP Multicasting. For more information, see - <http://user-mode-linux.sourceforge.net/networking.html> That site + <http://user-mode-linux.sourceforge.net/old/networking.html> That site has examples of the UML command line to use to enable Multicast networking, and notes about the security of this approach. @@ -165,7 +165,7 @@ config UML_NET_PCAP installed in order to build the pcap transport into UML. For more information, see - <http://user-mode-linux.sourceforge.net/networking.html> That site + <http://user-mode-linux.sourceforge.net/old/networking.html> That site has examples of the UML command line to use to enable this option. If you intend to use UML as a network monitor for the host, say diff --git a/arch/um/Makefile b/arch/um/Makefile index ba6813a4aa37..cb4af9bf2074 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -49,7 +49,7 @@ SYS_DIR := $(ARCH_DIR)/include/sysdep-$(SUBARCH) # # These apply to USER_CFLAGS to. -KBUILD_CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ +KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ -Din6addr_loopback=kernel_in6addr_loopback \ -Din6addr_any=kernel_in6addr_any @@ -58,7 +58,7 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE) USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\ $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \ - -D_FILE_OFFSET_BITS=64 + $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH) @@ -130,7 +130,9 @@ CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \ # The wrappers will select whether using "malloc" or the kernel allocator. LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc -CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) + +CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) define cmd_vmlinux__ $(CC) $(CFLAGS_vmlinux) -o $@ \ -Wl,-T,$(vmlinux-lds) $(vmlinux-init) \ @@ -158,7 +160,7 @@ ifneq ($(KBUILD_SRC),) $(Q)mkdir -p $(objtree)/include/asm-um $(Q)ln -fsn $(srctree)/include/asm-um/$(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $@ else - $(Q)cd $(TOPDIR)/$(dir $@) ; \ + $(Q)cd $(srctree)/$(dir $@) ; \ ln -sf $(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $(notdir $@) endif @@ -168,7 +170,7 @@ ifneq ($(KBUILD_SRC),) $(Q)mkdir -p $(objtree)/include/asm-um $(Q)ln -fsn $(srctree)/include/asm-$(HEADER_ARCH) include/asm-um/arch else - $(Q)cd $(TOPDIR)/include/asm-um && ln -fsn ../asm-$(HEADER_ARCH) arch + $(Q)cd $(srctree)/include/asm-um && ln -fsn ../asm-$(HEADER_ARCH) arch endif $(objtree)/$(ARCH_DIR)/include: diff --git a/arch/um/Makefile-tt b/arch/um/Makefile-tt deleted file mode 100644 index 03f7b10cfd0b..000000000000 --- a/arch/um/Makefile-tt +++ /dev/null @@ -1,5 +0,0 @@ -# -# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -# Licensed under the GPL -# - diff --git a/arch/um/defconfig b/arch/um/defconfig index f609edede065..86db2862f222 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -77,7 +77,7 @@ CONFIG_LD_SCRIPT_DYN=y CONFIG_NET=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m -# CONFIG_HOSTFS is not set +CONFIG_HOSTFS=y # CONFIG_HPPFS is not set CONFIG_MCONSOLE=y CONFIG_MAGIC_SYSRQ=y @@ -188,7 +188,7 @@ CONFIG_CON_CHAN="xterm" CONFIG_SSL_CHAN="pts" CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 +CONFIG_LEGACY_PTY_COUNT=32 # CONFIG_WATCHDOG is not set CONFIG_UML_SOUND=m CONFIG_SOUND=m @@ -508,7 +508,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set -CONFIG_DEBUG_SLAB=y +# CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SLAB_LEAK is not set # CONFIG_DEBUG_MUTEXES is not set # CONFIG_DEBUG_SPINLOCK is not set diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 83bf15a3dda8..2c898c4d6b6a 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -8,6 +8,7 @@ #include "chan_kern.h" #include "irq_kern.h" #include "irq_user.h" +#include "kern_util.h" #include "os.h" #define LINE_BUFSIZE 4096 @@ -48,7 +49,7 @@ static int write_room(struct line *line) n = line->head - line->tail; if (n <= 0) - n = LINE_BUFSIZE + n; /* The other case */ + n += LINE_BUFSIZE; /* The other case */ return n - 1; } @@ -58,17 +59,10 @@ int line_write_room(struct tty_struct *tty) unsigned long flags; int room; - if (tty->stopped) - return 0; - spin_lock_irqsave(&line->lock, flags); room = write_room(line); spin_unlock_irqrestore(&line->lock, flags); - /*XXX: Warning to remove */ - if (0 == room) - printk(KERN_DEBUG "%s: %s: no room left in buffer\n", - __FUNCTION__,tty->name); return room; } @@ -79,8 +73,7 @@ int line_chars_in_buffer(struct tty_struct *tty) int ret; spin_lock_irqsave(&line->lock, flags); - - /*write_room subtracts 1 for the needed NULL, so we readd it.*/ + /* write_room subtracts 1 for the needed NULL, so we readd it.*/ ret = LINE_BUFSIZE - (write_room(line) + 1); spin_unlock_irqrestore(&line->lock, flags); @@ -184,10 +177,6 @@ void line_flush_buffer(struct tty_struct *tty) unsigned long flags; int err; - /*XXX: copied from line_write, verify if it is correct!*/ - if (tty->stopped) - return; - spin_lock_irqsave(&line->lock, flags); err = flush_buffer(line); spin_unlock_irqrestore(&line->lock, flags); @@ -213,9 +202,6 @@ int line_write(struct tty_struct *tty, const unsigned char *buf, int len) unsigned long flags; int n, ret = 0; - if (tty->stopped) - return 0; - spin_lock_irqsave(&line->lock, flags); if (line->head != line->tail) ret = buffer_data(line, buf, len); @@ -788,9 +774,11 @@ static irqreturn_t winch_interrupt(int irq, void *data) tty = winch->tty; if (tty != NULL) { line = tty->driver_data; - chan_window_size(&line->chan_list, &tty->winsize.ws_row, - &tty->winsize.ws_col); - kill_pgrp(tty->pgrp, SIGWINCH, 1); + if (line != NULL) { + chan_window_size(&line->chan_list, &tty->winsize.ws_row, + &tty->winsize.ws_col); + kill_pgrp(tty->pgrp, SIGWINCH, 1); + } } out: if (winch->fd != -1) diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 0f3c7d14a6e3..ebb265c07e4d 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -1,23 +1,25 @@ /* * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/console.h" -#include "linux/ctype.h" -#include "linux/interrupt.h" -#include "linux/list.h" -#include "linux/mm.h" -#include "linux/module.h" -#include "linux/notifier.h" -#include "linux/reboot.h" -#include "linux/proc_fs.h" -#include "linux/slab.h" -#include "linux/syscalls.h" -#include "linux/utsname.h" -#include "linux/workqueue.h" -#include "asm/uaccess.h" +#include <linux/console.h> +#include <linux/ctype.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/reboot.h> +#include <linux/proc_fs.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/utsname.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <asm/uaccess.h> + #include "init.h" #include "irq_kern.h" #include "irq_user.h" @@ -305,7 +307,9 @@ void mconsole_stop(struct mc_request *req) deactivate_fd(req->originating_fd, MCONSOLE_IRQ); os_set_fd_block(req->originating_fd, 1); mconsole_reply(req, "stopped", 0, 0); - while (mconsole_get_request(req->originating_fd, req)) { + for (;;) { + if (!mconsole_get_request(req->originating_fd, req)) + continue; if (req->cmd->handler == mconsole_go) break; if (req->cmd->handler == mconsole_stop) { @@ -358,7 +362,7 @@ struct unplugged_pages { void *pages[UNPLUGGED_PER_PAGE]; }; -static DECLARE_MUTEX(plug_mem_mutex); +static DEFINE_MUTEX(plug_mem_mutex); static unsigned long long unplugged_pages_count = 0; static LIST_HEAD(unplugged_pages); static int unplug_index = UNPLUGGED_PER_PAGE; @@ -394,7 +398,7 @@ static int mem_config(char *str, char **error_out) diff /= PAGE_SIZE; - down(&plug_mem_mutex); + mutex_lock(&plug_mem_mutex); for (i = 0; i < diff; i++) { struct unplugged_pages *unplugged; void *addr; @@ -451,7 +455,7 @@ static int mem_config(char *str, char **error_out) err = 0; out_unlock: - up(&plug_mem_mutex); + mutex_unlock(&plug_mem_mutex); out: return err; } @@ -741,7 +745,6 @@ void mconsole_stack(struct mc_request *req) { char *ptr = req->request.data; int pid_requested= -1; - struct task_struct *from = NULL; struct task_struct *to = NULL; /* @@ -763,9 +766,7 @@ void mconsole_stack(struct mc_request *req) return; } - from = current; - - to = find_task_by_pid(pid_requested); + to = find_task_by_pid_ns(pid_requested, &init_pid_ns); if ((to == NULL) || (pid_requested == 0)) { mconsole_reply(req, "Couldn't find that pid", 1, 0); return; @@ -795,6 +796,8 @@ static int __init mconsole_init(void) printk(KERN_ERR "Failed to initialize management console\n"); return 1; } + if (os_set_fd_block(sock, 0)) + goto out; register_reboot_notifier(&reboot_notifier); @@ -803,7 +806,7 @@ static int __init mconsole_init(void) "mconsole", (void *)sock); if (err) { printk(KERN_ERR "Failed to get IRQ for management console\n"); - return 1; + goto out; } if (notify_socket != NULL) { @@ -819,6 +822,10 @@ static int __init mconsole_init(void) printk(KERN_INFO "mconsole (version %d) initialized on %s\n", MCONSOLE_VERSION, mconsole_socket_name); return 0; + + out: + os_close_file(sock); + return 1; } __initcall(mconsole_init); diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index 430c024a19b0..13af2f03ed84 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -83,9 +83,8 @@ int mconsole_get_request(int fd, struct mc_request *req) int len; req->originlen = sizeof(req->origin); - req->len = recvfrom(fd, &req->request, sizeof(req->request), - MSG_DONTWAIT, (struct sockaddr *) req->origin, - &req->originlen); + req->len = recvfrom(fd, &req->request, sizeof(req->request), 0, + (struct sockaddr *) req->origin, &req->originlen); if (req->len < 0) return 0; diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 3c6c44ca1ffa..1e8f41a99511 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -318,7 +318,7 @@ static void setup_etheraddr(char *str, unsigned char *addr, char *name) if (str == NULL) goto random; - for (i = 0;i < 6; i++) { + for (i = 0; i < 6; i++) { addr[i] = simple_strtoul(str, &end, 16); if ((end == str) || ((*end != ':') && (*end != ',') && (*end != '\0'))) { @@ -343,14 +343,13 @@ static void setup_etheraddr(char *str, unsigned char *addr, char *name) } if (!is_local_ether_addr(addr)) { printk(KERN_WARNING - "Warning: attempt to assign a globally valid ethernet " + "Warning: Assigning a globally valid ethernet " "address to a device\n"); - printk(KERN_WARNING "You should better enable the 2nd " - "rightmost bit in the first byte of the MAC,\n"); + printk(KERN_WARNING "You should set the 2nd rightmost bit in " + "the first byte of the MAC,\n"); printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n", addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], addr[5]); - goto random; } return; @@ -368,7 +367,6 @@ static struct platform_driver uml_net_driver = { .name = DRIVER_NAME, }, }; -static int driver_registered; static void net_device_release(struct device *dev) { @@ -383,6 +381,12 @@ static void net_device_release(struct device *dev) free_netdev(netdev); } +/* + * Ensures that platform_driver_register is called only once by + * eth_configure. Will be set in an initcall. + */ +static int driver_registered; + static void eth_configure(int n, void *init, char *mac, struct transport *transport) { diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c index 29185cad9fff..abf2653f5517 100644 --- a/arch/um/drivers/net_user.c +++ b/arch/um/drivers/net_user.c @@ -201,7 +201,7 @@ static int change_tramp(char **argv, char *output, int output_len) close(fds[1]); if (pid > 0) - helper_wait(pid, 0, "change_tramp"); + helper_wait(pid); return pid; } diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index 330543b3129b..19930081d3d8 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -6,6 +6,7 @@ #include "linux/completion.h" #include "linux/interrupt.h" #include "linux/list.h" +#include "linux/mutex.h" #include "asm/atomic.h" #include "init.h" #include "irq_kern.h" @@ -120,7 +121,7 @@ static int port_accept(struct port_list *port) return 0; } -static DECLARE_MUTEX(ports_sem); +static DEFINE_MUTEX(ports_mutex); static LIST_HEAD(ports); static void port_work_proc(struct work_struct *unused) @@ -161,7 +162,7 @@ void *port_data(int port_num) struct port_dev *dev = NULL; int fd; - down(&ports_sem); + mutex_lock(&ports_mutex); list_for_each(ele, &ports) { port = list_entry(ele, struct port_list, list); if (port->port == port_num) @@ -216,7 +217,7 @@ void *port_data(int port_num) out_free: kfree(port); out: - up(&ports_sem); + mutex_unlock(&ports_mutex); return dev; } diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index e942e836f995..71f0959c1535 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -5,6 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. */ +#include <linux/sched.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/miscdevice.h> diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c index b8711e50da80..8b80505a3fb0 100644 --- a/arch/um/drivers/slip_user.c +++ b/arch/um/drivers/slip_user.c @@ -109,7 +109,7 @@ static int slip_tramp(char **argv, int fd) read_output(fds[0], output, output_len); printk("%s", output); - err = helper_wait(pid, 0, argv[0]); + err = helper_wait(pid); close(fds[0]); out_free: diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c index 89c1be225fda..a0ada8fec72a 100644 --- a/arch/um/drivers/slirp_user.c +++ b/arch/um/drivers/slirp_user.c @@ -98,7 +98,7 @@ static void slirp_close(int fd, void *data) "(%d)\n", pri->pid, errno); } #endif - err = helper_wait(pri->pid, 1, "slirp_close"); + err = helper_wait(pri->pid); if (err < 0) return; diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index 875d60d0c6a2..f1786e64607f 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -15,7 +15,6 @@ #include "line.h" #include "ssl.h" #include "chan_kern.h" -#include "kern_util.h" #include "kern.h" #include "init.h" #include "irq_user.h" diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index 656036e90b19..cec0c33cdd39 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -22,7 +22,6 @@ #include "stdio_console.h" #include "line.h" #include "chan_kern.h" -#include "kern_util.h" #include "irq_user.h" #include "mconsole_kern.h" #include "init.h" diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 99f9f9605e9c..be3a2797dac4 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -49,6 +49,7 @@ #include "irq_user.h" #include "irq_kern.h" #include "ubd_user.h" +#include "kern_util.h" #include "os.h" #include "mem.h" #include "mem_kern.h" @@ -229,7 +230,7 @@ static int proc_ide_read_media(char *page, char **start, off_t off, int count, return len; } -static void make_ide_entries(char *dev_name) +static void make_ide_entries(const char *dev_name) { struct proc_dir_entry *dir, *ent; char name[64]; @@ -244,7 +245,7 @@ static void make_ide_entries(char *dev_name) ent->data = NULL; ent->read_proc = proc_ide_read_media; ent->write_proc = NULL; - sprintf(name,"ide0/%s", dev_name); + snprintf(name, sizeof(name), "ide0/%s", dev_name); proc_symlink(dev_name, proc_ide_root, name); } @@ -437,7 +438,10 @@ __uml_help(ubd_setup, " machine by running 'dd' on the device. <n> must be in the range\n" " 0 to 7. Appending an 'r' to the number will cause that device\n" " to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" -" an 's' will cause data to be written to disk on the host immediately.\n\n" +" an 's' will cause data to be written to disk on the host immediately.\n" +" 'c' will cause the device to be treated as being shared between multiple\n" +" UMLs and file locking will be turned off - this is appropriate for a\n" +" cluster filesystem and inappropriate at almost all other times.\n\n" ); static int udb_setup(char *str) @@ -456,20 +460,6 @@ __uml_help(udb_setup, " in the boot output.\n\n" ); -static int fakehd_set = 0; -static int fakehd(char *str) -{ - printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n"); - fakehd_set = 1; - return 1; -} - -__setup("fakehd", fakehd); -__uml_help(fakehd, -"fakehd\n" -" Change the ubd device name to \"hd\".\n\n" -); - static void do_ubd_request(struct request_queue * q); /* Only changed by ubd_init, which is an initcall. */ @@ -718,8 +708,10 @@ static int ubd_add(int n, char **error_out) ubd_disk_register(fake_major, ubd_dev->size, n, &fake_gendisk[n]); - /* perhaps this should also be under the "if (fake_major)" above */ - /* using the fake_disk->disk_name and also the fakehd_set name */ + /* + * Perhaps this should also be under the "if (fake_major)" above + * using the fake_disk->disk_name + */ if (fake_ide) make_ide_entries(ubd_gendisk[n]->disk_name); diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index 48fc7452bc1d..b591bb9c41dd 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -16,7 +16,6 @@ #include <sys/mman.h> #include <sys/param.h> #include "asm/types.h" -#include "kern_util.h" #include "user.h" #include "ubd_user.h" #include "os.h" diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c index d9941fe5f931..56533db25343 100644 --- a/arch/um/drivers/vde_user.c +++ b/arch/um/drivers/vde_user.c @@ -80,7 +80,7 @@ void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init) vpri->args = kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL); if (vpri->args == NULL) { - printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args" + printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args " "allocation failed"); return; } diff --git a/arch/um/include/arch.h b/arch/um/include/arch.h index 49c601ff2bac..2de92a08a76b 100644 --- a/arch/um/include/arch.h +++ b/arch/um/include/arch.h @@ -10,6 +10,6 @@ extern void arch_check_bugs(void); extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs); -extern int arch_handle_signal(int sig, struct uml_pt_regs *regs); +extern void arch_examine_signal(int sig, struct uml_pt_regs *regs); #endif diff --git a/arch/um/include/as-layout.h b/arch/um/include/as-layout.h index a5cdf953e04a..606bb5c7fdf6 100644 --- a/arch/um/include/as-layout.h +++ b/arch/um/include/as-layout.h @@ -10,23 +10,31 @@ #include "kern_constants.h" /* - * Assembly doesn't want any casting, but C does, so define these - * without casts here, and define new symbols with casts inside the C - * section. + * Stolen from linux/const.h, which can't be directly included since + * this is used in userspace code, which has no access to the kernel + * headers. Changed to be suitable for adding casts to the start, + * rather than "UL" to the end. */ -#define ASM_STUB_CODE (UML_CONFIG_TOP_ADDR - 2 * UM_KERN_PAGE_SIZE) -#define ASM_STUB_DATA (UML_CONFIG_TOP_ADDR - UM_KERN_PAGE_SIZE) -#define ASM_STUB_START ASM_STUB_CODE -/* - * This file is included by the assembly stubs, which just want the - * definitions above. +/* Some constant macros are used in both assembler and + * C code. Therefore we cannot annotate them always with + * 'UL' and other type specifiers unilaterally. We + * use the following macros to deal with this. */ -#ifndef __ASSEMBLY__ -#define STUB_CODE ((unsigned long) ASM_STUB_CODE) -#define STUB_DATA ((unsigned long) ASM_STUB_DATA) -#define STUB_START ((unsigned long) ASM_STUB_START) +#ifdef __ASSEMBLY__ +#define _AC(X, Y) (Y) +#else +#define __AC(X, Y) (X (Y)) +#define _AC(X, Y) __AC(X, Y) +#endif + +#define STUB_START _AC(, 0x100000) +#define STUB_CODE _AC((unsigned long), STUB_START) +#define STUB_DATA _AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE) +#define STUB_END _AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE) + +#ifndef __ASSEMBLY__ #include "sysdep/ptrace.h" diff --git a/arch/um/include/chan_user.h b/arch/um/include/chan_user.h index 5a2263e05bb2..9b9ced85b703 100644 --- a/arch/um/include/chan_user.h +++ b/arch/um/include/chan_user.h @@ -48,7 +48,7 @@ extern void register_winch_irq(int fd, int tty_fd, int pid, #define __channel_help(fn, prefix) \ __uml_help(fn, prefix "[0-9]*=<channel description>\n" \ " Attach a console or serial line to a host channel. See\n" \ -" http://user-mode-linux.sourceforge.net/input.html for a complete\n" \ +" http://user-mode-linux.sourceforge.net/old/input.html for a complete\n" \ " description of this switch.\n\n" \ ); diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h index 0edab695ed4e..b54bd35585c2 100644 --- a/arch/um/include/common-offsets.h +++ b/arch/um/include/common-offsets.h @@ -18,6 +18,7 @@ DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); DEFINE_STR(UM_KERN_INFO, KERN_INFO); DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); +DEFINE_STR(UM_KERN_CONT, KERN_CONT); DEFINE(UM_ELF_CLASS, ELF_CLASS); DEFINE(UM_ELFCLASS32, ELFCLASS32); diff --git a/arch/um/include/init.h b/arch/um/include/init.h index cebc6cae9190..b00a95741d41 100644 --- a/arch/um/include/init.h +++ b/arch/um/include/init.h @@ -40,6 +40,20 @@ typedef int (*initcall_t)(void); typedef void (*exitcall_t)(void); +#ifndef __KERNEL__ +#ifndef __section +# define __section(S) __attribute__ ((__section__(#S))) +#endif + +#if __GNUC_MINOR__ >= 3 +# define __used __attribute__((__used__)) +#else +# define __used __attribute__((__unused__)) +#endif + +#else +#include <linux/compiler.h> +#endif /* These are for everybody (although not all archs will actually discard it in modules) */ #define __init __section(.init.text) @@ -127,14 +141,3 @@ extern struct uml_param __uml_setup_start, __uml_setup_end; #endif #endif /* _LINUX_UML_INIT_H */ - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/irq_user.h b/arch/um/include/irq_user.h index 884a9c17eea0..e60b31873de1 100644 --- a/arch/um/include/irq_user.h +++ b/arch/um/include/irq_user.h @@ -14,7 +14,6 @@ struct irq_fd { int fd; int type; int irq; - int pid; int events; int current_events; }; diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index 74ce8e5370a6..3c341222d252 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -9,107 +9,61 @@ #include "sysdep/ptrace.h" #include "sysdep/faultinfo.h" -typedef void (*kern_hndl)(int, struct uml_pt_regs *); - -struct kern_handlers { - kern_hndl relay_signal; - kern_hndl winch; - kern_hndl bus_handler; - kern_hndl page_fault; - kern_hndl sigio_handler; - kern_hndl timer_handler; -}; - -extern const struct kern_handlers handlinfo_kern; +extern int uml_exitcode; extern int ncpus; -extern char *gdb_init; extern int kmalloc_ok; -extern int jail; -extern int nsyscalls; -#define UML_ROUND_DOWN(addr) ((void *)(((unsigned long) addr) & PAGE_MASK)) #define UML_ROUND_UP(addr) \ - UML_ROUND_DOWN(((unsigned long) addr) + PAGE_SIZE - 1) + ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK) -extern int kernel_fork(unsigned long flags, int (*fn)(void *), void * arg); -extern int kernel_thread_proc(void *data); -extern void syscall_segv(int sig); -extern int current_pid(void); extern unsigned long alloc_stack(int order, int atomic); +extern void free_stack(unsigned long stack, int order); + extern int do_signal(void); -extern int is_stack_fault(unsigned long sp); +extern void copy_sc(struct uml_pt_regs *regs, void *from); +extern void interrupt_end(void); +extern void relay_signal(int sig, struct uml_pt_regs *regs); + extern unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, struct uml_pt_regs *regs); extern int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out); -extern void syscall_ready(void); -extern void set_tracing(void *t, int tracing); -extern int is_tracing(void *task); -extern int segv_syscall(void); -extern void kern_finish_exec(void *task, int new_pid, unsigned long stack); -extern unsigned long page_mask(void); -extern int need_finish_fork(void); -extern void free_stack(unsigned long stack, int order); -extern void add_input_request(int op, void (*proc)(int), void *arg); -extern char *current_cmd(void); -extern void timer_handler(int sig, struct uml_pt_regs *regs); -extern int set_signals(int enable); -extern int pid_to_processor_id(int pid); -extern void deliver_signals(void *t); -extern int next_trap_index(int max); -extern void default_idle(void); -extern void finish_fork(void); -extern void paging_init(void); -extern void init_flush_vm(void); -extern void *syscall_sp(void *t); -extern void syscall_trace(struct uml_pt_regs *regs, int entryexit); + extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs); -extern void interrupt_end(void); -extern void initial_thread_cb(void (*proc)(void *), void *arg); -extern int debugger_signal(int status, int pid); -extern void debugger_parent_signal(int status, int pid); -extern void child_signal(int pid, int status); -extern int init_ptrace_proxy(int idle_pid, int startup, int stop); -extern int init_parent_proxy(int pid); -extern int singlestepping(void *t); -extern void check_stack_overflow(void *ptr); -extern void relay_signal(int sig, struct uml_pt_regs *regs); -extern int user_context(unsigned long sp); -extern void timer_irq(struct uml_pt_regs *regs); -extern void do_uml_exitcalls(void); -extern int attach_debugger(int idle_pid, int pid, int stop); -extern int config_gdb(char *str); -extern int remove_gdb(void); -extern char *uml_strdup(char *string); -extern void unprotect_kernel_mem(void); -extern void protect_kernel_mem(void); -extern void uml_cleanup(void); -extern void lock_signalled_task(void *t); -extern void IPI_handler(int cpu); -extern int jail_setup(char *line, int *add); -extern void *get_init_task(void); -extern int clear_user_proc(void *buf, int size); -extern int copy_to_user_proc(void *to, void *from, int size); -extern int copy_from_user_proc(void *to, void *from, int size); -extern int strlen_user_proc(char *str); -extern long execute_syscall(void *r); extern int smp_sigio_handler(void); -extern void *get_current(void); -extern struct task_struct *get_task(int pid, int require); -extern void machine_halt(void); +extern void initial_thread_cb(void (*proc)(void *), void *arg); extern int is_syscall(unsigned long addr); +extern void timer_handler(int sig, struct uml_pt_regs *regs); -extern void free_irq(unsigned int, void *); -extern int cpu(void); +extern void timer_handler(int sig, struct uml_pt_regs *regs); -extern void time_init_kern(void); +extern int start_uml(void); +extern void paging_init(void); -/* Are we disallowed to sleep? Used to choose between GFP_KERNEL and GFP_ATOMIC. */ +extern void uml_cleanup(void); +extern void do_uml_exitcalls(void); + +/* + * Are we disallowed to sleep? Used to choose between GFP_KERNEL and + * GFP_ATOMIC. + */ extern int __cant_sleep(void); -extern void sigio_handler(int sig, struct uml_pt_regs *regs); -extern void copy_sc(struct uml_pt_regs *regs, void *from); +extern void *get_current(void); +extern int copy_from_user_proc(void *to, void *from, int size); +extern int cpu(void); +extern char *uml_strdup(const char *string); + extern unsigned long to_irq_stack(unsigned long *mask_out); -unsigned long from_irq_stack(int nested); -extern int start_uml(void); +extern unsigned long from_irq_stack(int nested); + +extern void syscall_trace(struct uml_pt_regs *regs, int entryexit); +extern int singlestepping(void *t); + +extern void segv_handler(int sig, struct uml_pt_regs *regs); +extern void bus_handler(int sig, struct uml_pt_regs *regs); +extern void winch(int sig, struct uml_pt_regs *regs); +extern void fatal_sigsegv(void) __attribute__ ((noreturn)); + + #endif diff --git a/arch/um/include/mem_user.h b/arch/um/include/mem_user.h index a54514d2cc3a..46384acd547b 100644 --- a/arch/um/include/mem_user.h +++ b/arch/um/include/mem_user.h @@ -46,9 +46,6 @@ extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) -extern unsigned long host_task_size; -extern unsigned long task_size; - extern int init_mem_user(void); extern void setup_memory(void *entry); extern unsigned long find_iomem(char *driver, unsigned long *len_out); @@ -59,9 +56,7 @@ extern void setup_physmem(unsigned long start, unsigned long usable, unsigned long len, unsigned long long highmem); extern void add_iomem(char *name, int fd, unsigned long size); extern unsigned long phys_offset(unsigned long phys); -extern void unmap_physmem(void); extern void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x); -extern unsigned long get_kmem_end(void); #endif diff --git a/arch/um/include/misc_constants.h b/arch/um/include/misc_constants.h deleted file mode 100644 index 989bc08de36e..000000000000 --- a/arch/um/include/misc_constants.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __MISC_CONSTANT_H_ -#define __MISC_CONSTANT_H_ - -#include <user_constants.h> - -#endif diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 6f0d1c741bca..0b6b62733303 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -8,7 +8,6 @@ #include <stdarg.h> #include "irq_user.h" -#include "kern_util.h" #include "longjmp.h" #include "mm_id.h" #include "sysdep/tls.h" @@ -128,33 +127,31 @@ static inline struct openflags of_cloexec(struct openflags flags) extern int os_stat_file(const char *file_name, struct uml_stat *buf); extern int os_stat_fd(const int fd, struct uml_stat *buf); extern int os_access(const char *file, int mode); -extern int os_get_exec_close(int fd, int *close_on_exec); extern int os_set_exec_close(int fd); extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); extern int os_get_ifname(int fd, char *namebuf); extern int os_set_slip(int fd); -extern int os_set_owner(int fd, int pid); extern int os_mode_fd(int fd, int mode); extern int os_seek_file(int fd, unsigned long long offset); -extern int os_open_file(char *file, struct openflags flags, int mode); +extern int os_open_file(const char *file, struct openflags flags, int mode); extern int os_read_file(int fd, void *buf, int len); extern int os_write_file(int fd, const void *buf, int count); -extern int os_file_size(char *file, unsigned long long *size_out); -extern int os_file_modtime(char *file, unsigned long *modtime); +extern int os_file_size(const char *file, unsigned long long *size_out); +extern int os_file_modtime(const char *file, unsigned long *modtime); extern int os_pipe(int *fd, int stream, int close_on_exec); -extern int os_set_fd_async(int fd, int owner); +extern int os_set_fd_async(int fd); extern int os_clear_fd_async(int fd); extern int os_set_fd_block(int fd, int blocking); extern int os_accept_connection(int fd); -extern int os_create_unix_socket(char *file, int len, int close_on_exec); +extern int os_create_unix_socket(const char *file, int len, int close_on_exec); extern int os_shutdown_socket(int fd, int r, int w); extern void os_close_file(int fd); extern int os_rcv_fd(int fd, int *helper_pid_out); extern int create_unix_socket(char *file, int len, int close_on_exec); -extern int os_connect_socket(char *name); +extern int os_connect_socket(const char *name); extern int os_file_type(char *file); -extern int os_file_mode(char *file, struct openflags *mode_out); +extern int os_file_mode(const char *file, struct openflags *mode_out); extern int os_lock_file(int fd, int excl); extern void os_flush_stdout(void); extern int os_stat_filesystem(char *path, long *bsize_out, @@ -168,14 +165,10 @@ extern int os_fchange_dir(int fd); /* start_up.c */ extern void os_early_checks(void); -extern int can_do_skas(void); +extern void can_do_skas(void); extern void os_check_bugs(void); extern void check_host_supports_tls(int *supports_tls, int *tls_min); -/* Make sure they are clear when running in TT mode. Required by - * SEGV_MAYBE_FIXABLE */ -#define clear_can_do_skas() do { ptrace_faultinfo = proc_mm = 0; } while (0) - /* mem.c */ extern int create_mem_file(unsigned long long len); @@ -214,7 +207,7 @@ extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); extern int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv); extern int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long *stack_out); -extern int helper_wait(int pid, int nohang, char *pname); +extern int helper_wait(int pid); /* tls.c */ @@ -237,16 +230,12 @@ extern void unblock_signals(void); extern int get_signals(void); extern int set_signals(int enable); -/* trap.c */ -extern void os_fill_handlinfo(struct kern_handlers h); - /* util.c */ extern void stack_protections(unsigned long address); extern int raw(int fd); extern void setup_machinename(char *machine_out); extern void setup_hostinfo(char *buf, int len); -extern int setjmp_wrapper(void (*proc)(void *, void *), ...); -extern void os_dump_core(void); +extern void os_dump_core(void) __attribute__ ((noreturn)); /* time.c */ extern void idle_sleep(unsigned long long nsecs); @@ -275,11 +264,9 @@ extern int protect(struct mm_id * mm_idp, unsigned long addr, extern int is_skas_winch(int pid, int fd, void *data); extern int start_userspace(unsigned long stub_stack); extern int copy_context_skas0(unsigned long stack, int pid); -extern void save_registers(int pid, struct uml_pt_regs *regs); -extern void restore_registers(int pid, struct uml_pt_regs *regs); extern void userspace(struct uml_pt_regs *regs); -extern void map_stub_pages(int fd, unsigned long code, - unsigned long data, unsigned long stack); +extern int map_stub_pages(int fd, unsigned long code, unsigned long data, + unsigned long stack); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); extern void switch_threads(jmp_buf *me, jmp_buf *you); extern int start_idle_thread(void *stack, jmp_buf *switch_buf); @@ -298,16 +285,12 @@ extern void os_free_irq_later(struct irq_fd *active_fds, extern int os_get_pollfd(int i); extern void os_set_pollfd(int i, int fd); extern void os_set_ioignore(void); -extern void init_irq_signals(int on_sigstack); /* sigio.c */ extern int add_sigio_fd(int fd); extern int ignore_sigio_fd(int fd); extern void maybe_sigio_broken(int fd, int read); -/* skas/trap */ -extern void sig_handler_common_skas(int sig, void *sc_ptr); - /* sys-x86_64/prctl.c */ extern int os_arch_prctl(int pid, int code, unsigned long *addr); diff --git a/arch/um/include/ptrace_user.h b/arch/um/include/ptrace_user.h index f3450e6bc18d..4bce6e012889 100644 --- a/arch/um/include/ptrace_user.h +++ b/arch/um/include/ptrace_user.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -10,12 +10,6 @@ extern int ptrace_getregs(long pid, unsigned long *regs_out); extern int ptrace_setregs(long pid, unsigned long *regs_in); -extern int ptrace_getfpregs(long pid, unsigned long *regs_out); -extern int ptrace_setfpregs(long pid, unsigned long *regs); -extern void arch_enter_kernel(void *task, int pid); -extern void arch_leave_kernel(void *task, int pid); -extern void ptrace_pokeuser(unsigned long addr, unsigned long data); - /* syscall emulation path in ptrace */ @@ -54,7 +48,8 @@ extern int sysemu_supported; (((int[3][3] ) { \ { PTRACE_SYSCALL, PTRACE_SYSCALL, PTRACE_SINGLESTEP }, \ { PTRACE_SYSEMU, PTRACE_SYSEMU, PTRACE_SINGLESTEP }, \ - { PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, PTRACE_SYSEMU_SINGLESTEP }}) \ + { PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, \ + PTRACE_SYSEMU_SINGLESTEP } }) \ [sysemu_mode][singlestep_mode]) #endif diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h index 0e27406a43a4..9ea1ae3c8f46 100644 --- a/arch/um/include/registers.h +++ b/arch/um/include/registers.h @@ -9,14 +9,13 @@ #include "sysdep/ptrace.h" #include "sysdep/archsetjmp.h" -extern void init_thread_registers(struct uml_pt_regs *to); extern int save_fp_registers(int pid, unsigned long *fp_regs); extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern int save_fpx_registers(int pid, unsigned long *fp_regs); extern int restore_fpx_registers(int pid, unsigned long *fp_regs); -extern void save_registers(int pid, struct uml_pt_regs *regs); -extern void restore_registers(int pid, struct uml_pt_regs *regs); -extern void init_registers(int pid); +extern int save_registers(int pid, struct uml_pt_regs *regs); +extern int restore_registers(int pid, struct uml_pt_regs *regs); +extern int init_registers(int pid); extern void get_safe_registers(unsigned long *regs); extern unsigned long get_thread_reg(int reg, jmp_buf *buf); diff --git a/arch/um/include/signal_kern.h b/arch/um/include/signal_kern.h deleted file mode 100644 index aeb5d5ab1dfd..000000000000 --- a/arch/um/include/signal_kern.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#ifndef __SIGNAL_KERN_H__ -#define __SIGNAL_KERN_H__ - -extern int have_signals(void *t); - -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/skas/mode-skas.h b/arch/um/include/skas/mode-skas.h deleted file mode 100644 index e065feb000df..000000000000 --- a/arch/um/include/skas/mode-skas.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL - */ - -#ifndef __MODE_SKAS_H__ -#define __MODE_SKAS_H__ - -extern void kill_off_processes_skas(void); - -#endif diff --git a/arch/um/include/sysdep-i386/syscalls.h b/arch/um/include/sysdep-i386/syscalls.h index 57bd79efbee3..905698197e35 100644 --- a/arch/um/include/sysdep-i386/syscalls.h +++ b/arch/um/include/sysdep-i386/syscalls.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -18,7 +18,8 @@ extern syscall_handler_t old_mmap_i386; extern syscall_handler_t *sys_call_table[]; #define EXECUTE_SYSCALL(syscall, regs) \ - ((long (*)(struct syscall_args)) (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) + ((long (*)(struct syscall_args)) \ + (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) extern long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, diff --git a/arch/um/include/sysdep-x86_64/kernel-offsets.h b/arch/um/include/sysdep-x86_64/kernel-offsets.h index c978b589df41..a307237b7964 100644 --- a/arch/um/include/sysdep-x86_64/kernel-offsets.h +++ b/arch/um/include/sysdep-x86_64/kernel-offsets.h @@ -17,16 +17,7 @@ #define OFFSET(sym, str, mem) \ DEFINE(sym, offsetof(struct str, mem)); -#define __NO_STUBS 1 -#undef __SYSCALL -#undef _ASM_X86_64_UNISTD_H_ -#define __SYSCALL(nr, sym) [nr] = 1, -static char syscalls[] = { -#include <asm/arch/unistd.h> -}; - void foo(void) { #include <common-offsets.h> -DEFINE(UM_NR_syscall_max, sizeof(syscalls) - 1); } diff --git a/arch/um/include/sysdep-x86_64/syscalls.h b/arch/um/include/sysdep-x86_64/syscalls.h index cf72256609e4..7cfb0b085655 100644 --- a/arch/um/include/sysdep-x86_64/syscalls.h +++ b/arch/um/include/sysdep-x86_64/syscalls.h @@ -30,6 +30,4 @@ extern long old_mmap(unsigned long addr, unsigned long len, extern syscall_handler_t sys_modify_ldt; extern syscall_handler_t sys_arch_prctl; -#define NR_syscalls (UM_NR_syscall_max + 1) - #endif diff --git a/arch/um/include/um_mmu.h b/arch/um/include/um_mmu.h index 8855d8df512f..82865fcf6872 100644 --- a/arch/um/include/um_mmu.h +++ b/arch/um/include/um_mmu.h @@ -12,10 +12,6 @@ typedef struct mm_context { struct mm_id id; - unsigned long last_page_table; -#ifdef CONFIG_3_LEVEL_PGTABLES - unsigned long last_pmd; -#endif struct uml_ldt ldt; } mm_context_t; diff --git a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h index fdfc06b85605..2b6fc8e0f071 100644 --- a/arch/um/include/um_uaccess.h +++ b/arch/um/include/um_uaccess.h @@ -6,7 +6,9 @@ #ifndef __ARCH_UM_UACCESS_H #define __ARCH_UM_UACCESS_H -#include "asm/fixmap.h" +#include <asm/elf.h> +#include <asm/fixmap.h> +#include "sysdep/archsetjmp.h" #define __under_task_size(addr, size) \ (((unsigned long) (addr) < TASK_SIZE) && \ diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 8196450451cd..76a62c0cb2bc 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -19,12 +19,13 @@ void flush_thread(void) { void *data = NULL; - unsigned long end = proc_mm ? task_size : STUB_START; int ret; arch_flush_thread(¤t->thread.arch); - ret = unmap(¤t->mm->context.id, 0, end, 1, &data); + ret = unmap(¤t->mm->context.id, 0, STUB_START, 0, &data); + ret = ret || unmap(¤t->mm->context.id, STUB_END, + TASK_SIZE - STUB_END, 1, &data); if (ret) { printk(KERN_ERR "flush_thread - clearing address space failed, " "err = %d\n", ret); diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index c716b5a6db13..984f80e668ca 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -1,15 +1,17 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/kernel.h" -#include "linux/init.h" -#include "linux/ctype.h" -#include "linux/proc_fs.h" -#include "asm/uaccess.h" +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/proc_fs.h> +#include <linux/types.h> +#include <asm/uaccess.h> -/* If read and write race, the read will still atomically read a valid +/* + * If read and write race, the read will still atomically read a valid * value. */ int uml_exitcode = 0; @@ -19,18 +21,19 @@ static int read_proc_exitcode(char *page, char **start, off_t off, { int len, val; - /* Save uml_exitcode in a local so that we don't need to guarantee + /* + * Save uml_exitcode in a local so that we don't need to guarantee * that sprintf accesses it atomically. */ val = uml_exitcode; len = sprintf(page, "%d\n", val); len -= off; - if(len <= off+count) + if (len <= off+count) *eof = 1; *start = page + off; - if(len > count) + if (len > count) len = count; - if(len < 0) + if (len < 0) len = 0; return len; } @@ -41,11 +44,11 @@ static int write_proc_exitcode(struct file *file, const char __user *buffer, char *end, buf[sizeof("nnnnn\0")]; int tmp; - if(copy_from_user(buf, buffer, count)) + if (copy_from_user(buf, buffer, count)) return -EFAULT; tmp = simple_strtol(buf, &end, 0); - if((*end != '\0') && !isspace(*end)) + if ((*end != '\0') && !isspace(*end)) return -EINVAL; uml_exitcode = tmp; @@ -57,7 +60,7 @@ static int make_proc_exitcode(void) struct proc_dir_entry *ent; ent = create_proc_entry("exitcode", 0600, &proc_root); - if(ent == NULL){ + if (ent == NULL) { printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); return 0; diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c index 734f873cab12..72eccd2a4113 100644 --- a/arch/um/kernel/gmon_syms.c +++ b/arch/um/kernel/gmon_syms.c @@ -1,5 +1,5 @@ -/* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -8,12 +8,13 @@ extern void __bb_init_func(void *) __attribute__((weak)); EXPORT_SYMBOL(__bb_init_func); -/* This is defined (and referred to in profiling stub code) only by some GCC +/* + * This is defined (and referred to in profiling stub code) only by some GCC * versions in libgcov. * * Since SuSE backported the fix, we cannot handle it depending on GCC version. - * So, unconditionally export it. But also give it a weak declaration, which will - * be overridden by any other one. + * So, unconditionally export it. But also give it a weak declaration, which + * will be overridden by any other one. */ extern void __gcov_init(void *) __attribute__((weak)); diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c index 9244f018d44c..e2f043d0de6c 100644 --- a/arch/um/kernel/gprof_syms.c +++ b/arch/um/kernel/gprof_syms.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -7,14 +7,3 @@ extern void mcount(void); EXPORT_SYMBOL(mcount); - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 16dc43e9d940..fa015565001b 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -7,7 +7,6 @@ #include "linux/bootmem.h" #include "linux/initrd.h" #include "asm/types.h" -#include "kern_util.h" #include "initrd.h" #include "init.h" #include "os.h" @@ -21,18 +20,27 @@ static int __init read_initrd(void) long long size; int err; - if(initrd == NULL) + if (initrd == NULL) return 0; err = os_file_size(initrd, &size); - if(err) + if (err) return 0; + /* + * This is necessary because alloc_bootmem craps out if you + * ask for no memory. + */ + if (size == 0) { + printk(KERN_ERR "\"%\" is a zero-size initrd\n"); + return 0; + } + area = alloc_bootmem(size); - if(area == NULL) + if (area == NULL) return 0; - if(load_initrd(initrd, area, size) == -1) + if (load_initrd(initrd, area, size) == -1) return 0; initrd_start = (unsigned long) area; @@ -59,13 +67,15 @@ int load_initrd(char *filename, void *buf, int size) int fd, n; fd = os_open_file(filename, of_read(OPENFLAGS()), 0); - if(fd < 0){ - printk("Opening '%s' failed - err = %d\n", filename, -fd); + if (fd < 0) { + printk(KERN_ERR "Opening '%s' failed - err = %d\n", filename, + -fd); return -1; } n = os_read_file(fd, buf, size); - if(n != size){ - printk("Read of %d bytes from '%s' failed, err = %d\n", size, + if (n != size) { + printk(KERN_ERR "Read of %d bytes from '%s' failed, " + "err = %d\n", size, filename, -n); return -1; } diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index ba11ccd6a8a3..91587f8db340 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -107,10 +107,9 @@ int activate_fd(int irq, int fd, int type, void *dev_id) struct pollfd *tmp_pfd; struct irq_fd *new_fd, *irq_fd; unsigned long flags; - int pid, events, err, n; + int events, err, n; - pid = os_getpid(); - err = os_set_fd_async(fd, pid); + err = os_set_fd_async(fd); if (err < 0) goto out; @@ -127,7 +126,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id) .fd = fd, .type = type, .irq = irq, - .pid = pid, .events = events, .current_events = 0 } ); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 7c7142ba3bd7..5311ee93ede3 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -18,15 +18,11 @@ EXPORT_SYMBOL(set_signals); EXPORT_SYMBOL(get_signals); EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(sys_waitpid); -EXPORT_SYMBOL(task_size); EXPORT_SYMBOL(flush_tlb_range); -EXPORT_SYMBOL(host_task_size); EXPORT_SYMBOL(arch_validate); -EXPORT_SYMBOL(get_kmem_end); EXPORT_SYMBOL(high_physmem); EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(um_virt_to_phys); EXPORT_SYMBOL(handle_page_fault); EXPORT_SYMBOL(find_iomem); @@ -40,7 +36,6 @@ EXPORT_SYMBOL(uml_strdup); EXPORT_SYMBOL(os_stat_fd); EXPORT_SYMBOL(os_stat_file); EXPORT_SYMBOL(os_access); -EXPORT_SYMBOL(os_get_exec_close); EXPORT_SYMBOL(os_set_exec_close); EXPORT_SYMBOL(os_getpid); EXPORT_SYMBOL(os_open_file); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 59822dee438a..d872fdce1d7e 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -1,49 +1,41 @@ /* - * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/stddef.h" -#include "linux/kernel.h" -#include "linux/mm.h" -#include "linux/bootmem.h" -#include "linux/swap.h" -#include "linux/highmem.h" -#include "linux/gfp.h" -#include "asm/page.h" -#include "asm/fixmap.h" -#include "asm/pgalloc.h" -#include "kern_util.h" +#include <linux/stddef.h> +#include <linux/bootmem.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <asm/fixmap.h> +#include <asm/page.h> #include "as-layout.h" +#include "init.h" #include "kern.h" +#include "kern_util.h" #include "mem_user.h" -#include "um_uaccess.h" #include "os.h" -#include "linux/types.h" -#include "linux/string.h" -#include "init.h" -#include "kern_constants.h" /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */ unsigned long *empty_zero_page = NULL; /* allocated in paging_init and unchanged thereafter */ unsigned long *empty_bad_page = NULL; + +/* + * Initialized during boot, and readonly for initializing page tables + * afterwards + */ pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* Initialized at boot time, and readonly after that */ unsigned long long highmem; int kmalloc_ok = 0; +/* Used during early boot */ static unsigned long brk_end; -void unmap_physmem(void) -{ - os_unmap_memory((void *) brk_end, uml_reserved - brk_end); -} - -static void map_cb(void *unused) -{ - map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); -} - #ifdef CONFIG_HIGHMEM static void setup_highmem(unsigned long highmem_start, unsigned long highmem_len) @@ -53,7 +45,7 @@ static void setup_highmem(unsigned long highmem_start, int i; highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; - for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ + for (i = 0; i < highmem_len >> PAGE_SHIFT; i++) { page = &mem_map[highmem_pfn + i]; ClearPageReserved(page); init_page_count(page); @@ -65,14 +57,13 @@ static void setup_highmem(unsigned long highmem_start, void __init mem_init(void) { /* clear the zero-page */ - memset((void *) empty_zero_page, 0, PAGE_SIZE); + memset(empty_zero_page, 0, PAGE_SIZE); /* Map in the area just after the brk now that kmalloc is about * to be turned on. */ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); - map_cb(NULL); - initial_thread_cb(map_cb, NULL); + map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); free_bootmem(__pa(brk_end), uml_reserved - brk_end); uml_reserved = brk_end; @@ -85,7 +76,7 @@ void __init mem_init(void) #endif num_physpages = totalram_pages; max_pfn = totalram_pages; - printk(KERN_INFO "Memory: %luk available\n", + printk(KERN_INFO "Memory: %luk available\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); kmalloc_ok = 1; @@ -119,7 +110,7 @@ static void __init one_md_table_init(pud_t *pud) #endif } -static void __init fixrange_init(unsigned long start, unsigned long end, +static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { pgd_t *pgd; @@ -138,7 +129,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end, if (pud_none(*pud)) one_md_table_init(pud); pmd = pmd_offset(pud, vaddr); - for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) { one_page_table_init(pmd); vaddr += PMD_SIZE; } @@ -152,7 +143,7 @@ pgprot_t kmap_prot; #define kmap_get_fixmap_pte(vaddr) \ pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\ - (vaddr)), (vaddr)) + (vaddr)), (vaddr)) static void __init kmap_init(void) { @@ -197,21 +188,23 @@ static void __init fixaddr_user_init( void) pud_t *pud; pmd_t *pmd; pte_t *pte; - unsigned long paddr, vaddr = FIXADDR_USER_START; + phys_t p; + unsigned long v, vaddr = FIXADDR_USER_START; - if ( ! size ) + if (!size) return; fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir); - paddr = (unsigned long)alloc_bootmem_low_pages( size); - memcpy( (void *)paddr, (void *)FIXADDR_USER_START, size); - paddr = __pa(paddr); - for ( ; size > 0; size-=PAGE_SIZE, vaddr+=PAGE_SIZE, paddr+=PAGE_SIZE){ + v = (unsigned long) alloc_bootmem_low_pages(size); + memcpy((void *) v , (void *) FIXADDR_USER_START, size); + p = __pa(v); + for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE, + p += PAGE_SIZE) { pgd = swapper_pg_dir + pgd_index(vaddr); pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); - pte_set_val( (*pte), paddr, PAGE_READONLY); + pte_set_val(*pte, p, PAGE_READONLY); } #endif } @@ -223,7 +216,7 @@ void __init paging_init(void) empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); - for(i = 0; i < ARRAY_SIZE(zones_size); i++) + for (i = 0; i < ARRAY_SIZE(zones_size); i++) zones_size[i] = 0; zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) - @@ -253,32 +246,33 @@ struct page *arch_validate(struct page *page, gfp_t mask, int order) int i; again: - if(page == NULL) + if (page == NULL) return page; - if(PageHighMem(page)) + if (PageHighMem(page)) return page; addr = (unsigned long) page_address(page); - for(i = 0; i < (1 << order); i++){ + for (i = 0; i < (1 << order); i++) { current->thread.fault_addr = (void *) addr; - if(__do_copy_to_user((void __user *) addr, &zero, + if (__do_copy_to_user((void __user *) addr, &zero, sizeof(zero), ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)){ - if(!(mask & __GFP_WAIT)) + ¤t->thread.fault_catcher)) { + if (!(mask & __GFP_WAIT)) return NULL; else break; } addr += PAGE_SIZE; } - if(i == (1 << order)) + if (i == (1 << order)) return page; page = alloc_pages(mask, order); goto again; } -/* This can't do anything because nothing in the kernel image can be freed +/* + * This can't do anything because nothing in the kernel image can be freed * since it's not in kernel physical memory. */ @@ -290,8 +284,8 @@ void free_initmem(void) void free_initrd_mem(unsigned long start, unsigned long end) { if (start < end) - printk ("Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); + printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", + (end - start) >> 10); for (; start < end; start += PAGE_SIZE) { ClearPageReserved(virt_to_page(start)); init_page_count(virt_to_page(start)); @@ -308,32 +302,31 @@ void show_mem(void) int highmem = 0; struct page *page; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", + nr_swap_pages<<(PAGE_SHIFT-10)); pfn = max_mapnr; - while(pfn-- > 0) { + while (pfn-- > 0) { page = pfn_to_page(pfn); total++; - if(PageHighMem(page)) + if (PageHighMem(page)) highmem++; - if(PageReserved(page)) + if (PageReserved(page)) reserved++; - else if(PageSwapCache(page)) + else if (PageSwapCache(page)) cached++; - else if(page_count(page)) + else if (page_count(page)) shared += page_count(page) - 1; } - printk("%d pages of RAM\n", total); - printk("%d pages of HIGHMEM\n", highmem); - printk("%d reserved pages\n", reserved); - printk("%d pages shared\n", shared); - printk("%d pages swap cached\n", cached); + printk(KERN_INFO "%d pages of RAM\n", total); + printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); + printk(KERN_INFO "%d reserved pages\n", reserved); + printk(KERN_INFO "%d pages shared\n", shared); + printk(KERN_INFO "%d pages swap cached\n", cached); } -/* - * Allocate and free page tables. - */ +/* Allocate and free page tables. */ pgd_t *pgd_alloc(struct mm_struct *mm) { @@ -341,14 +334,14 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (pgd) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); - memcpy(pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } return pgd; } -void pgd_free(pgd_t *pgd) +void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_page((unsigned long) pgd); } @@ -368,3 +361,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); return pte; } + +#ifdef CONFIG_3_LEVEL_PGTABLES +pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL); + + if (pmd) + memset(pmd, 0, PAGE_SIZE); + + return pmd; +} +#endif diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index e66432f42485..9757085a0220 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -55,16 +55,6 @@ int __init init_maps(unsigned long physmem, unsigned long iomem, return 0; } -/* Changed during early boot */ -static unsigned long kmem_top = 0; - -unsigned long get_kmem_end(void) -{ - if (kmem_top == 0) - kmem_top = host_task_size - 1024 * 1024; - return kmem_top; -} - void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x) { @@ -174,10 +164,10 @@ __uml_setup("iomem=", parse_iomem, * setup_iomem, both of which run during early boot. Afterwards, it's * unchanged. */ -struct iomem_region *iomem_regions = NULL; +struct iomem_region *iomem_regions; -/* Initialized in parse_iomem */ -int iomem_size = 0; +/* Initialized in parse_iomem and unchanged thereafter */ +int iomem_size; unsigned long find_iomem(char *driver, unsigned long *len_out) { diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 0eae00b3e588..c07961bedb75 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -4,19 +4,21 @@ * Licensed under the GPL */ -#include "linux/stddef.h" -#include "linux/err.h" -#include "linux/hardirq.h" -#include "linux/mm.h" -#include "linux/personality.h" -#include "linux/proc_fs.h" -#include "linux/ptrace.h" -#include "linux/random.h" -#include "linux/sched.h" -#include "linux/tick.h" -#include "linux/threads.h" -#include "asm/pgtable.h" -#include "asm/uaccess.h" +#include <linux/stddef.h> +#include <linux/err.h> +#include <linux/hardirq.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/personality.h> +#include <linux/proc_fs.h> +#include <linux/ptrace.h> +#include <linux/random.h> +#include <linux/sched.h> +#include <linux/tick.h> +#include <linux/threads.h> +#include <asm/current.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> #include "as-layout.h" #include "kern_util.h" #include "os.h" @@ -30,7 +32,7 @@ */ struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; -static inline int external_pid(struct task_struct *task) +static inline int external_pid(void) { /* FIXME: Need to look up userspace_pid by cpu */ return userspace_pid[0]; @@ -40,7 +42,7 @@ int pid_to_processor_id(int pid) { int i; - for(i = 0; i < ncpus; i++) { + for (i = 0; i < ncpus; i++) { if (cpu_tasks[i].pid == pid) return i; } @@ -60,8 +62,6 @@ unsigned long alloc_stack(int order, int atomic) if (atomic) flags = GFP_ATOMIC; page = __get_free_pages(flags, order); - if (page == 0) - return 0; return page; } @@ -80,15 +80,15 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) static inline void set_current(struct task_struct *task) { cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) - { external_pid(task), task }); + { external_pid(), task }); } -extern void arch_switch_to(struct task_struct *from, struct task_struct *to); +extern void arch_switch_to(struct task_struct *to); void *_switch_to(void *prev, void *next, void *last) { struct task_struct *from = prev; - struct task_struct *to= next; + struct task_struct *to = next; to->thread.prev_sched = from; set_current(to); @@ -99,13 +99,13 @@ void *_switch_to(void *prev, void *next, void *last) switch_threads(&from->thread.switch_buf, &to->thread.switch_buf); - arch_switch_to(current->thread.prev_sched, current); + arch_switch_to(current); if (current->thread.saved_task) show_regs(&(current->thread.regs)); - next= current->thread.saved_task; - prev= current; - } while(current->thread.saved_task); + to = current->thread.saved_task; + from = current; + } while (current->thread.saved_task); return current->thread.prev_sched; @@ -163,8 +163,6 @@ void new_thread_handler(void) void fork_handler(void) { force_flush_all(); - if (current->thread.prev_sched == NULL) - panic("blech"); schedule_tail(current->thread.prev_sched); @@ -173,7 +171,7 @@ void fork_handler(void) * arch_switch_to isn't needed. We could want to apply this to * improve performance. -bb */ - arch_switch_to(current->thread.prev_sched, current); + arch_switch_to(current); current->thread.prev_sched = NULL; @@ -204,7 +202,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, arch_copy_thread(¤t->thread.arch, &p->thread.arch); } else { - init_thread_registers(&p->thread.regs.regs); + get_safe_registers(p->thread.regs.regs.gp); p->thread.request.u.thread = current->thread.request.u.thread; handler = new_thread_handler; } @@ -237,7 +235,7 @@ void default_idle(void) { unsigned long long nsecs; - while(1) { + while (1) { /* endless idle loop with no priority at all */ /* @@ -256,53 +254,10 @@ void default_idle(void) void cpu_idle(void) { - cpu_tasks[current_thread->cpu].pid = os_getpid(); + cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); default_idle(); } -void *um_virt_to_phys(struct task_struct *task, unsigned long addr, - pte_t *pte_out) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t ptent; - - if (task->mm == NULL) - return ERR_PTR(-EINVAL); - pgd = pgd_offset(task->mm, addr); - if (!pgd_present(*pgd)) - return ERR_PTR(-EINVAL); - - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) - return ERR_PTR(-EINVAL); - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - return ERR_PTR(-EINVAL); - - pte = pte_offset_kernel(pmd, addr); - ptent = *pte; - if (!pte_present(ptent)) - return ERR_PTR(-EINVAL); - - if (pte_out != NULL) - *pte_out = ptent; - return (void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK); -} - -char *current_cmd(void) -{ -#if defined(CONFIG_SMP) || defined(CONFIG_HIGHMEM) - return "(Unknown)"; -#else - void *addr = um_virt_to_phys(current, current->mm->arg_start, NULL); - return IS_ERR(addr) ? "(Unknown)": __va((unsigned long) addr); -#endif -} - void dump_thread(struct pt_regs *regs, struct user *u) { } @@ -317,7 +272,7 @@ int user_context(unsigned long sp) unsigned long stack; stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); - return stack != (unsigned long) current_thread; + return stack != (unsigned long) current_thread_info(); } extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; @@ -331,7 +286,7 @@ void do_uml_exitcalls(void) (*call)(); } -char *uml_strdup(char *string) +char *uml_strdup(const char *string) { return kstrdup(string, GFP_KERNEL); } @@ -359,7 +314,7 @@ int strlen_user_proc(char __user *str) int smp_sigio_handler(void) { #ifdef CONFIG_SMP - int cpu = current_thread->cpu; + int cpu = current_thread_info()->cpu; IPI_handler(cpu); if (cpu != 0) return 1; @@ -369,7 +324,7 @@ int smp_sigio_handler(void) int cpu(void) { - return current_thread->cpu; + return current_thread_info()->cpu; } static atomic_t using_sysemu = ATOMIC_INIT(0); @@ -435,7 +390,7 @@ int singlestepping(void * t) { struct task_struct *task = t ? t : current; - if ( ! (task->ptrace & PT_DTRACE) ) + if (!(task->ptrace & PT_DTRACE)) return 0; if (task->thread.singlestep_syscall) @@ -459,3 +414,46 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } #endif + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long stack_page, sp, ip; + bool seen_sched = 0; + + if ((p == NULL) || (p == current) || (p->state == TASK_RUNNING)) + return 0; + + stack_page = (unsigned long) task_stack_page(p); + /* Bail if the process has no kernel stack for some reason */ + if (stack_page == 0) + return 0; + + sp = p->thread.switch_buf->JB_SP; + /* + * Bail if the stack pointer is below the bottom of the kernel + * stack for some reason + */ + if (sp < stack_page) + return 0; + + while (sp < stack_page + THREAD_SIZE) { + ip = *((unsigned long *) sp); + if (in_sched_functions(ip)) + /* Ignore everything until we're above the scheduler */ + seen_sched = 1; + else if (kernel_text_address(ip) && seen_sched) + return ip; + + sp += sizeof(unsigned long); + } + + return 0; +} + +int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu) +{ + int cpu = current_thread_info()->cpu; + + return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu); +} + diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 04cebcf0679f..00197d3d21ec 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -4,6 +4,7 @@ */ #include "linux/sched.h" +#include "kern_util.h" #include "os.h" #include "skas.h" @@ -11,7 +12,7 @@ void (*pm_power_off)(void); static void kill_off_processes(void) { - if(proc_mm) + if (proc_mm) /* * FIXME: need to loop over userspace_pids */ @@ -21,8 +22,8 @@ static void kill_off_processes(void) int pid, me; me = os_getpid(); - for_each_process(p){ - if(p->mm == NULL) + for_each_process(p) { + if (p->mm == NULL) continue; pid = p->mm->context.id.u.pid; diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index 89f9866a1354..2b272b63b514 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -1,18 +1,12 @@ /* - * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) * Licensed under the GPL */ -#include "linux/kernel.h" -#include "linux/list.h" -#include "linux/slab.h" -#include "linux/signal.h" -#include "linux/interrupt.h" -#include "init.h" -#include "sigio.h" -#include "irq_user.h" +#include <linux/interrupt.h> #include "irq_kern.h" #include "os.h" +#include "sigio.h" /* Protected by sigio_lock() called from write_sigio_workaround */ static int sigio_irq_fd = -1; @@ -33,9 +27,9 @@ int write_sigio_irq(int fd) err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, IRQF_DISABLED|IRQF_SAMPLE_RANDOM, "write sigio", NULL); - if(err){ - printk("write_sigio_irq : um_request_irq failed, err = %d\n", - err); + if (err) { + printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " + "err = %d\n", err); return -1; } sigio_irq_fd = fd; diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 19cb97733937..b0fce720c4d0 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -3,12 +3,12 @@ * Licensed under the GPL */ -#include "linux/module.h" -#include "linux/ptrace.h" -#include "linux/sched.h" -#include "asm/siginfo.h" -#include "asm/signal.h" -#include "asm/unistd.h" +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <asm/siginfo.h> +#include <asm/signal.h> +#include <asm/unistd.h> #include "frame_kern.h" #include "kern_util.h" #include "sigcontext.h" @@ -36,7 +36,7 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, /* Did we come from a system call? */ if (PT_REGS_SYSCALL_NR(regs) >= 0) { /* If so, check system call restarting.. */ - switch(PT_REGS_SYSCALL_RET(regs)) { + switch (PT_REGS_SYSCALL_RET(regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: PT_REGS_SYSCALL_RET(regs) = -EINTR; @@ -116,7 +116,7 @@ static int kern_do_signal(struct pt_regs *regs) /* Did we come from a system call? */ if (!handled_sig && (PT_REGS_SYSCALL_NR(regs) >= 0)) { /* Restart the system call - no handlers present */ - switch(PT_REGS_SYSCALL_RET(regs)) { + switch (PT_REGS_SYSCALL_RET(regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 8d07a7acb909..2c8583c1a344 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -1,17 +1,20 @@ -#include <sched.h> +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + #include <signal.h> -#include <sys/mman.h> -#include <sys/time.h> +#include <sched.h> #include <asm/unistd.h> +#include <sys/time.h> #include "as-layout.h" +#include "kern_constants.h" #include "ptrace_user.h" -#include "skas.h" #include "stub-data.h" -#include "uml-config.h" #include "sysdep/stub.h" -#include "kern_constants.h" -/* This is in a separate file because it needs to be compiled with any +/* + * This is in a separate file because it needs to be compiled with any * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled * * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize @@ -26,25 +29,26 @@ stub_clone_handler(void) err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); - if(err != 0) + if (err != 0) goto out; err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); - if(err) + if (err) goto out; - err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, + err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, (long) &data->timer, 0); - if(err) + if (err) goto out; remap_stack(data->fd, data->offset); goto done; out: - /* save current result. - * Parent: pid; - * child: retcode of mmap already saved and it jumps around this + /* + * save current result. + * Parent: pid; + * child: retcode of mmap already saved and it jumps around this * assignment */ data->err = err; diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index f859ec306cd5..78b3e9f69d57 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -34,33 +34,14 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, if (!pte) goto out_pte; - /* - * There's an interaction between the skas0 stub pages, stack - * randomization, and the BUG at the end of exit_mmap. exit_mmap - * checks that the number of page tables freed is the same as had - * been allocated. If the stack is on the last page table page, - * then the stack pte page will be freed, and if not, it won't. To - * avoid having to know where the stack is, or if the process mapped - * something at the top of its address space for some other reason, - * we set TASK_SIZE to end at the start of the last page table. - * This keeps exit_mmap off the last page, but introduces a leak - * of that page. So, we hang onto it here and free it in - * destroy_context_skas. - */ - - mm->context.last_page_table = pmd_page_vaddr(*pmd); -#ifdef CONFIG_3_LEVEL_PGTABLES - mm->context.last_pmd = (unsigned long) __va(pud_val(*pud)); -#endif - *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); *pte = pte_mkread(*pte); return 0; out_pmd: - pud_free(pud); + pud_free(mm, pud); out_pte: - pmd_free(pmd); + pmd_free(mm, pmd); out: return -ENOMEM; } @@ -76,24 +57,6 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) stack = get_zeroed_page(GFP_KERNEL); if (stack == 0) goto out; - - /* - * This zeros the entry that pgd_alloc didn't, needed since - * we are about to reinitialize it, and want mm.nr_ptes to - * be accurate. - */ - mm->pgd[USER_PTRS_PER_PGD] = __pgd(0); - - ret = init_stub_pte(mm, STUB_CODE, - (unsigned long) &__syscall_stub_start); - if (ret) - goto out_free; - - ret = init_stub_pte(mm, STUB_DATA, stack); - if (ret) - goto out_free; - - mm->nr_ptes--; } to_mm->id.stack = stack; @@ -114,6 +77,11 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) to_mm->id.u.pid = copy_context_skas0(stack, from_mm->id.u.pid); else to_mm->id.u.pid = start_userspace(stack); + + if (to_mm->id.u.pid < 0) { + ret = to_mm->id.u.pid; + goto out_free; + } } ret = init_new_ldt(to_mm, from_mm); @@ -132,24 +100,87 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) return ret; } +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +{ + struct page **pages; + int err, ret; + + if (!skas_needs_stub) + return; + + ret = init_stub_pte(mm, STUB_CODE, + (unsigned long) &__syscall_stub_start); + if (ret) + goto out; + + ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack); + if (ret) + goto out; + + pages = kmalloc(2 * sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + printk(KERN_ERR "arch_dup_mmap failed to allocate 2 page " + "pointers\n"); + goto out; + } + + pages[0] = virt_to_page(&__syscall_stub_start); + pages[1] = virt_to_page(mm->context.id.stack); + + /* dup_mmap already holds mmap_sem */ + err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START, + VM_READ | VM_MAYREAD | VM_EXEC | + VM_MAYEXEC | VM_DONTCOPY, pages); + if (err) { + printk(KERN_ERR "install_special_mapping returned %d\n", err); + goto out_free; + } + return; + +out_free: + kfree(pages); +out: + force_sigsegv(SIGSEGV, current); +} + +void arch_exit_mmap(struct mm_struct *mm) +{ + pte_t *pte; + + pte = virt_to_pte(mm, STUB_CODE); + if (pte != NULL) + pte_clear(mm, STUB_CODE, pte); + + pte = virt_to_pte(mm, STUB_DATA); + if (pte == NULL) + return; + + pte_clear(mm, STUB_DATA, pte); +} + void destroy_context(struct mm_struct *mm) { struct mm_context *mmu = &mm->context; if (proc_mm) os_close_file(mmu->id.u.mm_fd); - else + else { + /* + * If init_new_context wasn't called, this will be + * zero, resulting in a kill(0), which will result in the + * whole UML suddenly dying. Also, cover negative and + * 1 cases, since they shouldn't happen either. + */ + if (mmu->id.u.pid < 2) { + printk(KERN_ERR "corrupt mm_context - pid = %d\n", + mmu->id.u.pid); + return; + } os_kill_ptraced_process(mmu->id.u.pid, 1); + } - if (!proc_mm || !ptrace_faultinfo) { + if (skas_needs_stub) free_page(mmu->id.stack); - pte_lock_deinit(virt_to_page(mmu->last_page_table)); - pte_free_kernel((pte_t *) mmu->last_page_table); - dec_zone_page_state(virt_to_page(mmu->last_page_table), NR_PAGETABLE); -#ifdef CONFIG_3_LEVEL_PGTABLES - pmd_free((pmd_t *) mmu->last_pmd); -#endif - } free_ldt(mmu); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index fce389c2342f..2e9852c0d487 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -6,19 +6,25 @@ #include "linux/init.h" #include "linux/sched.h" #include "as-layout.h" +#include "kern.h" #include "os.h" #include "skas.h" int new_mm(unsigned long stack) { - int fd; + int fd, err; fd = os_open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0); if (fd < 0) return fd; - if (skas_needs_stub) - map_stub_pages(fd, STUB_CODE, STUB_DATA, stack); + if (skas_needs_stub) { + err = map_stub_pages(fd, STUB_CODE, STUB_DATA, stack); + if (err) { + os_close_file(fd); + return err; + } + } return fd; } @@ -49,8 +55,14 @@ int __init start_uml(void) { stack_protections((unsigned long) &cpu0_irqstack); set_sigstack(cpu0_irqstack, THREAD_SIZE); - if (proc_mm) + if (proc_mm) { userspace_pid[0] = start_userspace(0); + if (userspace_pid[0] < 0) { + printf("start_uml - start_userspace returned %d\n", + userspace_pid[0]); + exit(1); + } + } init_new_thread_signals(); diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 50b476f2b38d..4e3b820bd2be 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -9,6 +9,9 @@ #include "sysdep/ptrace.h" #include "sysdep/syscalls.h" +extern int syscall_table_size; +#define NR_syscalls (syscall_table_size / sizeof(void *)) + void handle_syscall(struct uml_pt_regs *r) { struct pt_regs *regs = container_of(r, struct pt_regs, regs); @@ -17,9 +20,6 @@ void handle_syscall(struct uml_pt_regs *r) syscall_trace(r, 0); - current->thread.nsyscalls++; - nsyscalls++; - /* * This should go in the declaration of syscall, but when I do that, * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index 1d8b119f2d0e..e22c96993db3 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -3,128 +3,130 @@ * Licensed under the GPL */ -#include "linux/err.h" -#include "linux/highmem.h" -#include "linux/mm.h" -#include "asm/current.h" -#include "asm/page.h" -#include "asm/pgtable.h" +#include <linux/err.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/current.h> +#include <asm/page.h> +#include <asm/pgtable.h> #include "kern_util.h" #include "os.h" -extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, - pte_t *pte_out); - -static unsigned long maybe_map(unsigned long virt, int is_write) +pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) { - pte_t pte; - int err; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + if (mm == NULL) + return NULL; + + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return NULL; + + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; - void *phys = um_virt_to_phys(current, virt, &pte); - int dummy_code; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + return pte_offset_kernel(pmd, addr); +} + +static pte_t *maybe_map(unsigned long virt, int is_write) +{ + pte_t *pte = virt_to_pte(current->mm, virt); + int err, dummy_code; - if (IS_ERR(phys) || (is_write && !pte_write(pte))) { + if ((pte == NULL) || !pte_present(*pte) || + (is_write && !pte_write(*pte))) { err = handle_page_fault(virt, 0, is_write, 1, &dummy_code); if (err) - return -1UL; - phys = um_virt_to_phys(current, virt, NULL); + return NULL; + pte = virt_to_pte(current->mm, virt); } - if (IS_ERR(phys)) - phys = (void *) -1; + if (!pte_present(*pte)) + pte = NULL; - return (unsigned long) phys; + return pte; } static int do_op_one_page(unsigned long addr, int len, int is_write, int (*op)(unsigned long addr, int len, void *arg), void *arg) { + jmp_buf buf; struct page *page; - int n; + pte_t *pte; + int n, faulted; - addr = maybe_map(addr, is_write); - if (addr == -1UL) + pte = maybe_map(addr, is_write); + if (pte == NULL) return -1; - page = phys_to_page(addr); + page = pte_page(*pte); addr = (unsigned long) kmap_atomic(page, KM_UML_USERCOPY) + (addr & ~PAGE_MASK); - n = (*op)(addr, len, arg); + current->thread.fault_catcher = &buf; + + faulted = UML_SETJMP(&buf); + if (faulted == 0) + n = (*op)(addr, len, arg); + else + n = -1; + + current->thread.fault_catcher = NULL; kunmap_atomic(page, KM_UML_USERCOPY); return n; } -static void do_buffer_op(void *jmpbuf, void *arg_ptr) +static int buffer_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long, int, void *), void *arg) { - va_list args; - unsigned long addr; - int len, is_write, size, remain, n; - int (*op)(unsigned long, int, void *); - void *arg; - int *res; - - va_copy(args, *(va_list *)arg_ptr); - addr = va_arg(args, unsigned long); - len = va_arg(args, int); - is_write = va_arg(args, int); - op = va_arg(args, void *); - arg = va_arg(args, void *); - res = va_arg(args, int *); - va_end(args); + int size, remain, n; + size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); remain = len; - current->thread.fault_catcher = jmpbuf; n = do_op_one_page(addr, size, is_write, op, arg); if (n != 0) { - *res = (n < 0 ? remain : 0); + remain = (n < 0 ? remain : 0); goto out; } addr += size; remain -= size; - if (remain == 0) { - *res = 0; + if (remain == 0) goto out; - } - while(addr < ((addr + remain) & PAGE_MASK)) { + while (addr < ((addr + remain) & PAGE_MASK)) { n = do_op_one_page(addr, PAGE_SIZE, is_write, op, arg); if (n != 0) { - *res = (n < 0 ? remain : 0); + remain = (n < 0 ? remain : 0); goto out; } addr += PAGE_SIZE; remain -= PAGE_SIZE; } - if (remain == 0) { - *res = 0; + if (remain == 0) goto out; - } n = do_op_one_page(addr, remain, is_write, op, arg); - if (n != 0) - *res = (n < 0 ? remain : 0); - else *res = 0; - out: - current->thread.fault_catcher = NULL; -} - -static int buffer_op(unsigned long addr, int len, int is_write, - int (*op)(unsigned long addr, int len, void *arg), - void *arg) -{ - int faulted, res; - - faulted = setjmp_wrapper(do_buffer_op, addr, len, is_write, op, arg, - &res); - if (!faulted) - return res; + if (n != 0) { + remain = (n < 0 ? remain : 0); + goto out; + } - return addr + len - (unsigned long) current->thread.fault_addr; + return 0; + out: + return remain; } static int copy_chunk_from_user(unsigned long from, int len, void *arg) diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 36d89cf8d20b..e1062ec36d40 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -21,7 +21,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); #include "asm/smp.h" #include "asm/processor.h" #include "asm/spinlock.h" -#include "kern_util.h" #include "kern.h" #include "irq_user.h" #include "os.h" @@ -61,7 +60,7 @@ void smp_send_stop(void) continue; os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); } - printk(KERN_INFO "done\n"); + printk(KERN_CONT "done\n"); } static cpumask_t smp_commenced_mask = CPU_MASK_NONE; @@ -75,8 +74,7 @@ static int idle_proc(void *cpup) if (err < 0) panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); - os_set_fd_async(cpu_data[cpu].ipi_pipe[0], - current->thread.mode.tt.extern_pid); + os_set_fd_async(cpu_data[cpu].ipi_pipe[0]); wmb(); if (cpu_test_and_set(cpu, cpu_callin_map)) { @@ -129,8 +127,7 @@ void smp_prepare_cpus(unsigned int maxcpus) if (err < 0) panic("CPU#0 failed to create IPI pipe, errno = %d", -err); - os_set_fd_async(cpu_data[me].ipi_pipe[0], - current->thread.mode.tt.extern_pid); + os_set_fd_async(cpu_data[me].ipi_pipe[0]); for (cpu = 1; cpu < ncpus; cpu++) { printk(KERN_INFO "Booting processor %d...\n", cpu); @@ -143,9 +140,8 @@ void smp_prepare_cpus(unsigned int maxcpus) while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) cpu_relax(); - if (cpu_isset(cpu, cpu_callin_map)) - printk(KERN_INFO "done\n"); - else printk(KERN_INFO "failed\n"); + printk(KERN_INFO "%s\n", + cpu_isset(cpu, cpu_calling_map) ? "done" : "failed"); } } diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index b9d92b2089ae..9cffc628a37e 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -13,9 +13,6 @@ #include "asm/uaccess.h" #include "asm/unistd.h" -/* Unlocked, I don't care if this is a bit off */ -int nsyscalls = 0; - long sys_fork(void) { long ret; diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 93263571d813..56d43d0a3960 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -1,38 +1,37 @@ -/* - * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/sched.h" -#include "linux/kernel.h" -#include "linux/module.h" -#include "linux/kallsyms.h" -#include "asm/page.h" -#include "asm/processor.h" +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> #include "sysrq.h" /* Catch non-i386 SUBARCH's. */ #if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT) void show_trace(struct task_struct *task, unsigned long * stack) { - unsigned long addr; + unsigned long addr; - if (!stack) { + if (!stack) { stack = (unsigned long*) &stack; WARN_ON(1); } - printk("Call Trace: \n"); - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack; + printk(KERN_INFO "Call Trace: \n"); + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack; if (__kernel_text_address(addr)) { - printk("%08lx: [<%08lx>]", (unsigned long) stack, addr); - print_symbol(" %s", addr); - printk("\n"); - } - stack++; - } - printk("\n"); + printk(KERN_INFO "%08lx: [<%08lx>]", + (unsigned long) stack, addr); + print_symbol(KERN_CONT " %s", addr); + printk(KERN_CONT "\n"); + } + stack++; + } + printk(KERN_INFO "\n"); } #endif @@ -67,14 +66,13 @@ void show_stack(struct task_struct *task, unsigned long *esp) } stack = esp; - for(i = 0; i < kstack_depth_to_print; i++) { + for (i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; if (i && ((i % 8) == 0)) - printk("\n "); + printk("\n" KERN_INFO " "); printk("%08lx ", *stack++); } - printk("Call Trace: \n"); show_trace(task, esp); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 1ac746a9eae1..e066e84493b1 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -3,12 +3,12 @@ * Licensed under the GPL */ -#include "linux/clockchips.h" -#include "linux/interrupt.h" -#include "linux/jiffies.h" -#include "linux/threads.h" -#include "asm/irq.h" -#include "asm/param.h" +#include <linux/clockchips.h> +#include <linux/interrupt.h> +#include <linux/jiffies.h> +#include <linux/threads.h> +#include <asm/irq.h> +#include <asm/param.h> #include "kern_util.h" #include "os.h" @@ -32,7 +32,7 @@ void timer_handler(int sig, struct uml_pt_regs *regs) static void itimer_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { - switch(mode) { + switch (mode) { case CLOCK_EVT_MODE_PERIODIC: set_interval(); break; diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index f4a0e407eee4..d175d0566af0 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -3,9 +3,10 @@ * Licensed under the GPL */ -#include "linux/mm.h" -#include "asm/pgtable.h" -#include "asm/tlbflush.h" +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> #include "as-layout.h" #include "mem_user.h" #include "os.h" @@ -56,7 +57,7 @@ static int do_ops(struct host_vm_change *hvc, int end, for (i = 0; i < end && !ret; i++) { op = &hvc->ops[i]; - switch(op->type) { + switch (op->type) { case MMAP: ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len, op->u.mmap.prot, op->u.mmap.fd, @@ -183,27 +184,30 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, pte = pte_offset_kernel(pmd, addr); do { + if ((addr >= STUB_START) && (addr < STUB_END)) + continue; + r = pte_read(*pte); w = pte_write(*pte); x = pte_exec(*pte); if (!pte_young(*pte)) { r = 0; w = 0; - } else if (!pte_dirty(*pte)) { + } else if (!pte_dirty(*pte)) w = 0; - } + prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | (x ? UM_PROT_EXEC : 0)); if (hvc->force || pte_newpage(*pte)) { if (pte_present(*pte)) ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, PAGE_SIZE, prot, hvc); - else ret = add_munmap(addr, PAGE_SIZE, hvc); - } - else if (pte_newprot(*pte)) + else + ret = add_munmap(addr, PAGE_SIZE, hvc); + } else if (pte_newprot(*pte)) ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); *pte = pte_mkuptodate(*pte); - } while (pte++, addr += PAGE_SIZE, ((addr != end) && !ret)); + } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); return ret; } @@ -225,7 +229,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr, } } else ret = update_pte_range(pmd, addr, next, hvc); - } while (pmd++, addr = next, ((addr != end) && !ret)); + } while (pmd++, addr = next, ((addr < end) && !ret)); return ret; } @@ -247,7 +251,7 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr, } } else ret = update_pmd_range(pud, addr, next, hvc); - } while (pud++, addr = next, ((addr != end) && !ret)); + } while (pud++, addr = next, ((addr < end) && !ret)); return ret; } @@ -270,7 +274,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, } } else ret = update_pud_range(pgd, addr, next, &hvc); - } while (pgd++, addr = next, ((addr != end_addr) && !ret)); + } while (pgd++, addr = next, ((addr < end_addr) && !ret)); if (!ret) ret = do_ops(&hvc, hvc.index, 1); @@ -485,9 +489,6 @@ void __flush_tlb_one(unsigned long addr) static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { - if (!proc_mm && (end_addr > STUB_START)) - end_addr = STUB_START; - fix_range_common(mm, start_addr, end_addr, force); } @@ -499,10 +500,9 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, else fix_range(vma->vm_mm, start, end, 0); } -void flush_tlb_mm(struct mm_struct *mm) +void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end) { - unsigned long end; - /* * Don't bother flushing if this address space is about to be * destroyed. @@ -510,8 +510,17 @@ void flush_tlb_mm(struct mm_struct *mm) if (atomic_read(&mm->mm_users) == 0) return; - end = proc_mm ? task_size : STUB_START; - fix_range(mm, 0, end, 0); + fix_range(mm, start, end, 0); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + struct vm_area_struct *vma = mm->mmap; + + while (vma != NULL) { + fix_range(mm, vma->vm_start, vma->vm_end, 0); + vma = vma->vm_next; + } } void force_flush_all(void) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index cb3321f8e0a9..44e490419495 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -13,6 +13,7 @@ #include "as-layout.h" #include "kern_util.h" #include "os.h" +#include "skas.h" #include "sysdep/sigcontext.h" /* @@ -128,7 +129,19 @@ static void bad_segv(struct faultinfo fi, unsigned long ip) force_sig_info(SIGSEGV, &si, current); } -static void segv_handler(int sig, struct uml_pt_regs *regs) +void fatal_sigsegv(void) +{ + force_sigsegv(SIGSEGV, current); + do_signal(); + /* + * This is to tell gcc that we're not returning - do_signal + * can, in general, return, but in this case, it's not, since + * we just got a fatal SIGSEGV queued. + */ + os_dump_core(); +} + +void segv_handler(int sig, struct uml_pt_regs *regs) { struct faultinfo * fi = UPT_FAULTINFO(regs); @@ -216,9 +229,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void relay_signal(int sig, struct uml_pt_regs *regs) { - if (arch_handle_signal(sig, regs)) - return; - if (!UPT_IS_USER(regs)) { if (sig == SIGBUS) printk(KERN_ERR "Bus error - the host /dev/shm or /tmp " @@ -226,31 +236,24 @@ void relay_signal(int sig, struct uml_pt_regs *regs) panic("Kernel mode signal %d", sig); } + arch_examine_signal(sig, regs); + current->thread.arch.faultinfo = *UPT_FAULTINFO(regs); force_sig(sig, current); } -static void bus_handler(int sig, struct uml_pt_regs *regs) +void bus_handler(int sig, struct uml_pt_regs *regs) { if (current->thread.fault_catcher != NULL) UML_LONGJMP(current->thread.fault_catcher, 1); else relay_signal(sig, regs); } -static void winch(int sig, struct uml_pt_regs *regs) +void winch(int sig, struct uml_pt_regs *regs) { do_IRQ(WINCH_IRQ, regs); } -const struct kern_handlers handlinfo_kern = { - .relay_signal = relay_signal, - .winch = winch, - .bus_handler = bus_handler, - .page_fault = segv_handler, - .sigio_handler = sigio_handler, - .timer_handler = timer_handler -}; - void trap_init(void) { } diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c index d7436aacd26f..f0f4b040d7c5 100644 --- a/arch/um/kernel/uaccess.c +++ b/arch/um/kernel/uaccess.c @@ -1,10 +1,11 @@ /* * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -/* These are here rather than tt/uaccess.c because skas mode needs them in +/* + * These are here rather than tt/uaccess.c because skas mode needs them in * order to do SIGBUS recovery when a tmpfs mount runs out of room. */ @@ -25,6 +26,8 @@ int __do_copy_to_user(void *to, const void *from, int n, fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, __do_copy, &faulted); - if(!faulted) return(0); - else return(n - (fault - (unsigned long) to)); + if (!faulted) + return 0; + else + return n - (fault - (unsigned long) to); } diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index f1c71393f578..468aba990dbd 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -3,22 +3,23 @@ * Licensed under the GPL */ -#include "linux/delay.h" -#include "linux/mm.h" -#include "linux/module.h" -#include "linux/seq_file.h" -#include "linux/string.h" -#include "linux/utsname.h" -#include "asm/pgtable.h" -#include "asm/processor.h" -#include "asm/setup.h" -#include "arch.h" +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/string.h> +#include <linux/utsname.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/setup.h> #include "as-layout.h" +#include "arch.h" #include "init.h" #include "kern.h" +#include "kern_util.h" #include "mem_user.h" #include "os.h" -#include "skas.h" #define DEFAULT_COMMAND_LINE "root=98:0" @@ -100,8 +101,6 @@ const struct seq_operations cpuinfo_op = { }; /* Set in linux_main */ -unsigned long host_task_size; -unsigned long task_size; unsigned long uml_physmem; unsigned long uml_reserved; /* Also modified in mem_init */ unsigned long start_vm; @@ -197,20 +196,19 @@ __uml_setup("--help", Usage, " Prints this message.\n\n" ); -static int __init uml_checksetup(char *line, int *add) +static void __init uml_checksetup(char *line, int *add) { struct uml_param *p; p = &__uml_setup_start; - while(p < &__uml_setup_end) { + while (p < &__uml_setup_end) { int n; n = strlen(p->str); if (!strncmp(line, p->str, n) && p->setup_func(line + n, add)) - return 1; + return; p++; } - return 0; } static void __init uml_postsetup(void) @@ -218,13 +216,30 @@ static void __init uml_postsetup(void) initcall_t *p; p = &__uml_postsetup_start; - while(p < &__uml_postsetup_end) { + while (p < &__uml_postsetup_end) { (*p)(); p++; } return; } +static int panic_exit(struct notifier_block *self, unsigned long unused1, + void *unused2) +{ + bust_spinlocks(1); + show_regs(&(current->thread.regs)); + bust_spinlocks(0); + uml_exitcode = 1; + os_dump_core(); + return 0; +} + +static struct notifier_block panic_exit_notifier = { + .notifier_call = panic_exit, + .next = NULL, + .priority = 0 +}; + /* Set during early boot */ unsigned long brk_start; unsigned long end_iomem; @@ -234,20 +249,6 @@ EXPORT_SYMBOL(end_iomem); extern char __binary_start; -static unsigned long set_task_sizes_skas(unsigned long *task_size_out) -{ - /* Round up to the nearest 4M */ - unsigned long host_task_size = ROUND_4M((unsigned long) - &host_task_size); - - if (!skas_needs_stub) - *task_size_out = host_task_size; - else - *task_size_out = STUB_START & PGDIR_MASK; - - return host_task_size; -} - int __init linux_main(int argc, char **argv) { unsigned long avail, diff; @@ -278,13 +279,6 @@ int __init linux_main(int argc, char **argv) printf("UML running in %s mode\n", mode); - host_task_size = set_task_sizes_skas(&task_size); - - /* - * Setting up handlers to 'sig_info' struct - */ - os_fill_handlinfo(handlinfo_kern); - brk_start = (unsigned long) sbrk(0); /* @@ -309,7 +303,7 @@ int __init linux_main(int argc, char **argv) highmem = 0; iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; - max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC; + max_physmem = CONFIG_TOP_ADDR - uml_physmem - iomem_size - MIN_VMALLOC; /* * Zones have to begin on a 1 << MAX_ORDER page boundary, @@ -341,7 +335,7 @@ int __init linux_main(int argc, char **argv) } virtmem_size = physmem_size; - avail = get_kmem_end() - start_vm; + avail = CONFIG_TOP_ADDR - start_vm; if (physmem_size > avail) virtmem_size = avail; end_vm = start_vm + virtmem_size; @@ -350,6 +344,9 @@ int __init linux_main(int argc, char **argv) printf("Kernel virtual memory size shrunk to %lu bytes\n", virtmem_size); + atomic_notifier_chain_register(&panic_notifier_list, + &panic_exit_notifier); + uml_postsetup(); stack_protections((unsigned long) &init_thread_info); @@ -358,29 +355,8 @@ int __init linux_main(int argc, char **argv) return start_uml(); } -extern int uml_exitcode; - -static int panic_exit(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - bust_spinlocks(1); - show_regs(&(current->thread.regs)); - bust_spinlocks(0); - uml_exitcode = 1; - os_dump_core(); - return 0; -} - -static struct notifier_block panic_exit_notifier = { - .notifier_call = panic_exit, - .next = NULL, - .priority = 0 -}; - void __init setup_arch(char **cmdline_p) { - atomic_notifier_chain_register(&panic_notifier_list, - &panic_exit_notifier); paging_init(); strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c index 039e16efcd55..81e07e2be3ae 100644 --- a/arch/um/kernel/umid.c +++ b/arch/um/kernel/umid.c @@ -1,13 +1,12 @@ -/* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "asm/errno.h" +#include <asm/errno.h> #include "init.h" -#include "os.h" #include "kern.h" -#include "linux/kernel.h" +#include "os.h" /* Changed by set_umid_arg */ static int umid_inited = 0; @@ -16,16 +15,16 @@ static int __init set_umid_arg(char *name, int *add) { int err; - if(umid_inited){ + if (umid_inited) { printf("umid already set\n"); return 0; } *add = 0; err = set_umid(name); - if(err == -EEXIST) + if (err == -EEXIST) printf("umid '%s' already in use\n", name); - else if(!err) + else if (!err) umid_inited = 1; return 0; diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 8e129af8170d..8a48d6a30064 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -4,7 +4,7 @@ # obj-y = aio.o elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ - registers.o sigio.o signal.o start_up.o time.o trap.o tty.o uaccess.o \ + registers.o sigio.o signal.o start_up.o time.o tty.o uaccess.o \ umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/ obj-$(CONFIG_TTY_LOG) += tty_log.o @@ -12,7 +12,7 @@ user-objs-$(CONFIG_TTY_LOG) += tty_log.o USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \ main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ - trap.o tty.o tls.o uaccess.o umid.o util.o + tty.o tls.o uaccess.o umid.o util.o CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c index 93dc0c80ebaf..b8d8c9ca8d4a 100644 --- a/arch/um/os-Linux/aio.c +++ b/arch/um/os-Linux/aio.c @@ -12,6 +12,7 @@ #include "aio.h" #include "init.h" #include "kern_constants.h" +#include "kern_util.h" #include "os.h" #include "user.h" diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c index 07ca0cb472ac..6fb0b174f538 100644 --- a/arch/um/os-Linux/drivers/ethertap_user.c +++ b/arch/um/os-Linux/drivers/ethertap_user.c @@ -131,7 +131,7 @@ static int etap_tramp(char *dev, char *gate, int control_me, } if (c != 1) { printk(UM_KERN_ERR "etap_tramp : uml_net failed\n"); - err = helper_wait(pid, 0, "uml_net"); + err = helper_wait(pid); } return err; } diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c index 1037a3b6386e..2448be03fd7a 100644 --- a/arch/um/os-Linux/drivers/tuntap_user.c +++ b/arch/um/os-Linux/drivers/tuntap_user.c @@ -14,6 +14,7 @@ #include <sys/wait.h> #include <sys/uio.h> #include "kern_constants.h" +#include "kern_util.h" #include "os.h" #include "tuntap.h" #include "user.h" @@ -107,7 +108,7 @@ static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, "errno = %d\n", errno); return err; } - helper_wait(pid, 0, "tuntap_open_tramp"); + helper_wait(pid); cmsg = CMSG_FIRSTHDR(&msg); if (cmsg == NULL) { @@ -148,7 +149,7 @@ static int tuntap_open(void *data) memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); - if (ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0) { + if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { err = -errno; printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", errno); diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index f83462758627..b5afcfd0f861 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -8,18 +8,16 @@ #include <errno.h> #include <fcntl.h> #include <signal.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/socket.h> -#include <sys/un.h> #include <sys/ioctl.h> #include <sys/mount.h> -#include <sys/uio.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/un.h> +#include "kern_constants.h" #include "os.h" #include "user.h" -#include "kern_util.h" -static void copy_stat(struct uml_stat *dst, struct stat64 *src) +static void copy_stat(struct uml_stat *dst, const struct stat64 *src) { *dst = ((struct uml_stat) { .ust_dev = src->st_dev, /* device */ @@ -43,10 +41,10 @@ int os_stat_fd(const int fd, struct uml_stat *ubuf) int err; CATCH_EINTR(err = fstat64(fd, &sbuf)); - if(err < 0) + if (err < 0) return -errno; - if(ubuf != NULL) + if (ubuf != NULL) copy_stat(ubuf, &sbuf); return err; } @@ -56,27 +54,26 @@ int os_stat_file(const char *file_name, struct uml_stat *ubuf) struct stat64 sbuf; int err; - do { - err = stat64(file_name, &sbuf); - } while((err < 0) && (errno == EINTR)) ; - - if(err < 0) + CATCH_EINTR(err = stat64(file_name, &sbuf)); + if (err < 0) return -errno; - if(ubuf != NULL) + if (ubuf != NULL) copy_stat(ubuf, &sbuf); return err; } -int os_access(const char* file, int mode) +int os_access(const char *file, int mode) { int amode, err; - amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) | - (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ; + amode = (mode & OS_ACC_R_OK ? R_OK : 0) | + (mode & OS_ACC_W_OK ? W_OK : 0) | + (mode & OS_ACC_X_OK ? X_OK : 0) | + (mode & OS_ACC_F_OK ? F_OK : 0); err = access(file, amode); - if(err < 0) + if (err < 0) return -errno; return 0; @@ -88,7 +85,7 @@ int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) int err; err = ioctl(fd, cmd, arg); - if(err < 0) + if (err < 0) return -errno; return err; @@ -97,7 +94,7 @@ int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) /* FIXME: ensure namebuf in os_get_if_name is big enough */ int os_get_ifname(int fd, char* namebuf) { - if(ioctl(fd, SIOCGIFNAME, namebuf) < 0) + if (ioctl(fd, SIOCGIFNAME, namebuf) < 0) return -errno; return 0; @@ -108,37 +105,22 @@ int os_set_slip(int fd) int disc, sencap; disc = N_SLIP; - if(ioctl(fd, TIOCSETD, &disc) < 0) + if (ioctl(fd, TIOCSETD, &disc) < 0) return -errno; sencap = 0; - if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0) + if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0) return -errno; return 0; } -int os_set_owner(int fd, int pid) -{ - if(fcntl(fd, F_SETOWN, pid) < 0){ - int save_errno = errno; - - if(fcntl(fd, F_GETOWN, 0) != pid) - return -save_errno; - } - - return 0; -} - int os_mode_fd(int fd, int mode) { int err; - do { - err = fchmod(fd, mode); - } while((err < 0) && (errno==EINTR)) ; - - if(err < 0) + CATCH_EINTR(err = fchmod(fd, mode)); + if (err < 0) return -errno; return 0; @@ -150,64 +132,73 @@ int os_file_type(char *file) int err; err = os_stat_file(file, &buf); - if(err < 0) + if (err < 0) return err; - if(S_ISDIR(buf.ust_mode)) + if (S_ISDIR(buf.ust_mode)) return OS_TYPE_DIR; - else if(S_ISLNK(buf.ust_mode)) + else if (S_ISLNK(buf.ust_mode)) return OS_TYPE_SYMLINK; - else if(S_ISCHR(buf.ust_mode)) + else if (S_ISCHR(buf.ust_mode)) return OS_TYPE_CHARDEV; - else if(S_ISBLK(buf.ust_mode)) + else if (S_ISBLK(buf.ust_mode)) return OS_TYPE_BLOCKDEV; - else if(S_ISFIFO(buf.ust_mode)) + else if (S_ISFIFO(buf.ust_mode)) return OS_TYPE_FIFO; - else if(S_ISSOCK(buf.ust_mode)) + else if (S_ISSOCK(buf.ust_mode)) return OS_TYPE_SOCK; else return OS_TYPE_FILE; } -int os_file_mode(char *file, struct openflags *mode_out) +int os_file_mode(const char *file, struct openflags *mode_out) { int err; *mode_out = OPENFLAGS(); err = access(file, W_OK); - if(err && (errno != EACCES)) + if (err && (errno != EACCES)) return -errno; - else if(!err) + else if (!err) *mode_out = of_write(*mode_out); err = access(file, R_OK); - if(err && (errno != EACCES)) + if (err && (errno != EACCES)) return -errno; - else if(!err) + else if (!err) *mode_out = of_read(*mode_out); return err; } -int os_open_file(char *file, struct openflags flags, int mode) +int os_open_file(const char *file, struct openflags flags, int mode) { int fd, err, f = 0; - if(flags.r && flags.w) f = O_RDWR; - else if(flags.r) f = O_RDONLY; - else if(flags.w) f = O_WRONLY; + if (flags.r && flags.w) + f = O_RDWR; + else if (flags.r) + f = O_RDONLY; + else if (flags.w) + f = O_WRONLY; else f = 0; - if(flags.s) f |= O_SYNC; - if(flags.c) f |= O_CREAT; - if(flags.t) f |= O_TRUNC; - if(flags.e) f |= O_EXCL; + if (flags.s) + f |= O_SYNC; + if (flags.c) + f |= O_CREAT; + if (flags.t) + f |= O_TRUNC; + if (flags.e) + f |= O_EXCL; + if (flags.a) + f |= O_APPEND; fd = open64(file, f, mode); - if(fd < 0) + if (fd < 0) return -errno; - if(flags.cl && fcntl(fd, F_SETFD, 1)){ + if (flags.cl && fcntl(fd, F_SETFD, 1)) { err = -errno; close(fd); return err; @@ -216,7 +207,7 @@ int os_open_file(char *file, struct openflags flags, int mode) return fd; } -int os_connect_socket(char *name) +int os_connect_socket(const char *name) { struct sockaddr_un sock; int fd, err; @@ -225,13 +216,13 @@ int os_connect_socket(char *name) snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name); fd = socket(AF_UNIX, SOCK_STREAM, 0); - if(fd < 0) { + if (fd < 0) { err = -errno; goto out; } err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); - if(err) { + if (err) { err = -errno; goto out_close; } @@ -254,7 +245,7 @@ int os_seek_file(int fd, unsigned long long offset) unsigned long long actual; actual = lseek64(fd, offset, SEEK_SET); - if(actual != offset) + if (actual != offset) return -errno; return 0; } @@ -263,7 +254,7 @@ int os_read_file(int fd, void *buf, int len) { int n = read(fd, buf, len); - if(n < 0) + if (n < 0) return -errno; return n; } @@ -272,37 +263,38 @@ int os_write_file(int fd, const void *buf, int len) { int n = write(fd, (void *) buf, len); - if(n < 0) + if (n < 0) return -errno; return n; } -int os_file_size(char *file, unsigned long long *size_out) +int os_file_size(const char *file, unsigned long long *size_out) { struct uml_stat buf; int err; err = os_stat_file(file, &buf); - if(err < 0){ - printk("Couldn't stat \"%s\" : err = %d\n", file, -err); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); return err; } - if(S_ISBLK(buf.ust_mode)){ + if (S_ISBLK(buf.ust_mode)) { int fd; long blocks; fd = open(file, O_RDONLY, 0); - if(fd < 0) { + if (fd < 0) { err = -errno; - printk("Couldn't open \"%s\", errno = %d\n", file, - errno); + printk(UM_KERN_ERR "Couldn't open \"%s\", " + "errno = %d\n", file, errno); return err; } - if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ + if (ioctl(fd, BLKGETSIZE, &blocks) < 0) { err = -errno; - printk("Couldn't get the block size of \"%s\", " - "errno = %d\n", file, errno); + printk(UM_KERN_ERR "Couldn't get the block size of " + "\"%s\", errno = %d\n", file, errno); close(fd); return err; } @@ -314,14 +306,15 @@ int os_file_size(char *file, unsigned long long *size_out) return 0; } -int os_file_modtime(char *file, unsigned long *modtime) +int os_file_modtime(const char *file, unsigned long *modtime) { struct uml_stat buf; int err; err = os_stat_file(file, &buf); - if(err < 0){ - printk("Couldn't stat \"%s\" : err = %d\n", file, -err); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); return err; } @@ -329,26 +322,13 @@ int os_file_modtime(char *file, unsigned long *modtime) return 0; } -int os_get_exec_close(int fd, int *close_on_exec) -{ - int ret; - - CATCH_EINTR(ret = fcntl(fd, F_GETFD)); - - if(ret < 0) - return -errno; - - *close_on_exec = (ret & FD_CLOEXEC) ? 1 : 0; - return ret; -} - int os_set_exec_close(int fd) { int err; CATCH_EINTR(err = fcntl(fd, F_SETFD, FD_CLOEXEC)); - if(err < 0) + if (err < 0) return -errno; return err; } @@ -358,53 +338,51 @@ int os_pipe(int *fds, int stream, int close_on_exec) int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; err = socketpair(AF_UNIX, type, 0, fds); - if(err < 0) + if (err < 0) return -errno; - if(!close_on_exec) + if (!close_on_exec) return 0; err = os_set_exec_close(fds[0]); - if(err < 0) + if (err < 0) goto error; err = os_set_exec_close(fds[1]); - if(err < 0) + if (err < 0) goto error; return 0; error: - printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err); + printk(UM_KERN_ERR "os_pipe : Setting FD_CLOEXEC failed, err = %d\n", + -err); close(fds[1]); close(fds[0]); return err; } -int os_set_fd_async(int fd, int owner) +int os_set_fd_async(int fd) { - int err; + int err, flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; - /* XXX This should do F_GETFL first */ - if(fcntl(fd, F_SETFL, O_ASYNC | O_NONBLOCK) < 0){ + flags |= O_ASYNC | O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { err = -errno; - printk("os_set_fd_async : failed to set O_ASYNC and " - "O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); + printk(UM_KERN_ERR "os_set_fd_async : failed to set O_ASYNC " + "and O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); return err; } -#ifdef notdef - if(fcntl(fd, F_SETFD, 1) < 0){ - printk("os_set_fd_async : Setting FD_CLOEXEC failed, " - "errno = %d\n", errno); - } -#endif - if((fcntl(fd, F_SETSIG, SIGIO) < 0) || - (fcntl(fd, F_SETOWN, owner) < 0)){ + if ((fcntl(fd, F_SETSIG, SIGIO) < 0) || + (fcntl(fd, F_SETOWN, os_getpid()) < 0)) { err = -errno; - printk("os_set_fd_async : Failed to fcntl F_SETOWN " - "(or F_SETSIG) fd %d to pid %d, errno = %d\n", fd, - owner, errno); + printk(UM_KERN_ERR "os_set_fd_async : Failed to fcntl F_SETOWN " + "(or F_SETSIG) fd %d, errno = %d\n", fd, errno); return err; } @@ -413,10 +391,14 @@ int os_set_fd_async(int fd, int owner) int os_clear_fd_async(int fd) { - int flags = fcntl(fd, F_GETFL); + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; flags &= ~(O_ASYNC | O_NONBLOCK); - if(fcntl(fd, F_SETFL, flags) < 0) + if (fcntl(fd, F_SETFL, flags) < 0) return -errno; return 0; } @@ -426,11 +408,15 @@ int os_set_fd_block(int fd, int blocking) int flags; flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; - if(blocking) flags &= ~O_NONBLOCK; - else flags |= O_NONBLOCK; + if (blocking) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; - if(fcntl(fd, F_SETFL, flags) < 0) + if (fcntl(fd, F_SETFL, flags) < 0) return -errno; return 0; @@ -441,7 +427,7 @@ int os_accept_connection(int fd) int new; new = accept(fd, NULL, 0); - if(new < 0) + if (new < 0) return -errno; return new; } @@ -462,15 +448,17 @@ int os_shutdown_socket(int fd, int r, int w) { int what, err; - if(r && w) what = SHUT_RDWR; - else if(r) what = SHUT_RD; - else if(w) what = SHUT_WR; - else { - printk("os_shutdown_socket : neither r or w was set\n"); + if (r && w) + what = SHUT_RDWR; + else if (r) + what = SHUT_RD; + else if (w) + what = SHUT_WR; + else return -EINVAL; - } + err = shutdown(fd, what); - if(err < 0) + if (err < 0) return -errno; return 0; } @@ -494,19 +482,20 @@ int os_rcv_fd(int fd, int *helper_pid_out) msg.msg_flags = 0; n = recvmsg(fd, &msg, 0); - if(n < 0) + if (n < 0) return -errno; - else if(n != iov.iov_len) + else if (n != iov.iov_len) *helper_pid_out = -1; cmsg = CMSG_FIRSTHDR(&msg); - if(cmsg == NULL){ - printk("rcv_fd didn't receive anything, error = %d\n", errno); + if (cmsg == NULL) { + printk(UM_KERN_ERR "rcv_fd didn't receive anything, " + "error = %d\n", errno); return -1; } - if((cmsg->cmsg_level != SOL_SOCKET) || - (cmsg->cmsg_type != SCM_RIGHTS)){ - printk("rcv_fd didn't receive a descriptor\n"); + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n"); return -1; } @@ -514,29 +503,28 @@ int os_rcv_fd(int fd, int *helper_pid_out) return new; } -int os_create_unix_socket(char *file, int len, int close_on_exec) +int os_create_unix_socket(const char *file, int len, int close_on_exec) { struct sockaddr_un addr; int sock, err; sock = socket(PF_UNIX, SOCK_DGRAM, 0); - if(sock < 0) + if (sock < 0) return -errno; - if(close_on_exec) { + if (close_on_exec) { err = os_set_exec_close(sock); - if(err < 0) - printk("create_unix_socket : close_on_exec failed, " - "err = %d", -err); + if (err < 0) + printk(UM_KERN_ERR "create_unix_socket : " + "close_on_exec failed, err = %d", -err); } addr.sun_family = AF_UNIX; - /* XXX Be more careful about overflow */ snprintf(addr.sun_path, len, "%s", file); err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); - if(err < 0) + if (err < 0) return -errno; return sock; @@ -557,17 +545,18 @@ int os_lock_file(int fd, int excl) int err, save; err = fcntl(fd, F_SETLK, &lock); - if(!err) + if (!err) goto out; save = -errno; err = fcntl(fd, F_GETLK, &lock); - if(err){ + if (err) { err = -errno; goto out; } - printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); + printk(UM_KERN_ERR "F_SETLK failed, file already locked by pid %d\n", + lock.l_pid); err = save; out: return err; diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index fba3f0fefeef..f4bd349d4412 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -1,22 +1,19 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include <sched.h> -#include <limits.h> -#include <sys/signal.h> -#include <sys/wait.h> #include <sys/socket.h> -#include "user.h" +#include <sys/wait.h> +#include "kern_constants.h" #include "kern_util.h" #include "os.h" #include "um_malloc.h" -#include "kern_constants.h" +#include "user.h" struct helper_data { void (*pre_exec)(void*); @@ -30,21 +27,19 @@ static int helper_child(void *arg) { struct helper_data *data = arg; char **argv = data->argv; - int errval; + int err; if (data->pre_exec != NULL) (*data->pre_exec)(data->pre_data); - errval = execvp_noalloc(data->buf, argv[0], argv); - printk("helper_child - execvp of '%s' failed - errno = %d\n", argv[0], - -errval); - write(data->fd, &errval, sizeof(errval)); - kill(os_getpid(), SIGKILL); + err = execvp_noalloc(data->buf, argv[0], argv); + + /* If the exec succeeds, we don't get here */ + write(data->fd, &err, sizeof(err)); + return 0; } -/* Returns either the pid of the child process we run or -E* on failure. - * XXX The alloc_stack here breaks if this is called in the tracing thread, so - * we need to receive a preallocated stack (a local buffer is ok). */ +/* Returns either the pid of the child process we run or -E* on failure. */ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) { struct helper_data data; @@ -58,14 +53,15 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds); if (ret < 0) { ret = -errno; - printk("run_helper : pipe failed, errno = %d\n", errno); + printk(UM_KERN_ERR "run_helper : pipe failed, errno = %d\n", + errno); goto out_free; } ret = os_set_exec_close(fds[1]); if (ret < 0) { - printk("run_helper : setting FD_CLOEXEC failed, ret = %d\n", - -ret); + printk(UM_KERN_ERR "run_helper : setting FD_CLOEXEC failed, " + "ret = %d\n", -ret); goto out_close; } @@ -79,7 +75,8 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) pid = clone(helper_child, (void *) sp, CLONE_VM, &data); if (pid < 0) { ret = -errno; - printk("run_helper : clone failed, errno = %d\n", errno); + printk(UM_KERN_ERR "run_helper : clone failed, errno = %d\n", + errno); goto out_free2; } @@ -96,10 +93,9 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) } else { if (n < 0) { n = -errno; - printk("run_helper : read on pipe failed, ret = %d\n", - -n); + printk(UM_KERN_ERR "run_helper : read on pipe failed, " + "ret = %d\n", -n); ret = n; - kill(pid, SIGKILL); } CATCH_EINTR(waitpid(pid, NULL, __WCLONE)); } @@ -129,50 +125,40 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, pid = clone(proc, (void *) sp, flags, arg); if (pid < 0) { err = -errno; - printk("run_helper_thread : clone failed, errno = %d\n", - errno); + printk(UM_KERN_ERR "run_helper_thread : clone failed, " + "errno = %d\n", errno); return err; } if (stack_out == NULL) { CATCH_EINTR(pid = waitpid(pid, &status, __WCLONE)); if (pid < 0) { err = -errno; - printk("run_helper_thread - wait failed, errno = %d\n", - errno); + printk(UM_KERN_ERR "run_helper_thread - wait failed, " + "errno = %d\n", errno); pid = err; } if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) - printk("run_helper_thread - thread returned status " - "0x%x\n", status); + printk(UM_KERN_ERR "run_helper_thread - thread " + "returned status 0x%x\n", status); free_stack(stack, 0); } else *stack_out = stack; return pid; } -int helper_wait(int pid, int nohang, char *pname) +int helper_wait(int pid) { int ret, status; int wflags = __WCLONE; - if (nohang) - wflags |= WNOHANG; - - if (!pname) - pname = "helper_wait"; - CATCH_EINTR(ret = waitpid(pid, &status, wflags)); if (ret < 0) { - printk(UM_KERN_ERR "%s : waitpid process %d failed, " - "errno = %d\n", pname, pid, errno); + printk(UM_KERN_ERR "helper_wait : waitpid process %d failed, " + "errno = %d\n", pid, errno); return -errno; - } else if (nohang && ret == 0) { - printk(UM_KERN_ERR "%s : process %d has not exited\n", - pname, pid); - return -ECHILD; } else if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { - printk(UM_KERN_ERR "%s : process %d didn't exit with " - "status 0\n", pname, pid); + printk(UM_KERN_ERR "helper_wait : process %d exited with " + "status 0x%x\n", pid, status); return -ECHILD; } else return 0; diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c index 6aa6f95d6524..0348b975e81c 100644 --- a/arch/um/os-Linux/irq.c +++ b/arch/um/os-Linux/irq.c @@ -1,23 +1,19 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #include <stdlib.h> -#include <unistd.h> #include <errno.h> +#include <poll.h> #include <signal.h> #include <string.h> -#include <sys/poll.h> -#include <sys/types.h> -#include <sys/time.h> -#include "kern_util.h" -#include "user.h" -#include "process.h" -#include "sigio.h" #include "irq_user.h" +#include "kern_constants.h" #include "os.h" +#include "process.h" #include "um_malloc.h" +#include "user.h" /* * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd @@ -36,7 +32,7 @@ int os_waiting_for_events(struct irq_fd *active_fds) if (n < 0) { err = -errno; if (errno != EINTR) - printk("sigio_handler: os_waiting_for_events:" + printk(UM_KERN_ERR "os_waiting_for_events:" " poll returned %d, errno = %d\n", n, errno); return err; } @@ -95,24 +91,26 @@ void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, struct irq_fd *old_fd = *prev; if ((pollfds[i].fd != -1) && (pollfds[i].fd != (*prev)->fd)) { - printk("os_free_irq_by_cb - mismatch between " - "active_fds and pollfds, fd %d vs %d\n", + printk(UM_KERN_ERR "os_free_irq_by_cb - " + "mismatch between active_fds and " + "pollfds, fd %d vs %d\n", (*prev)->fd, pollfds[i].fd); goto out; } pollfds_num--; - /* This moves the *whole* array after pollfds[i] + /* + * This moves the *whole* array after pollfds[i] * (though it doesn't spot as such)! */ memmove(&pollfds[i], &pollfds[i + 1], (pollfds_num - i) * sizeof(pollfds[0])); - if(*last_irq_ptr2 == &old_fd->next) + if (*last_irq_ptr2 == &old_fd->next) *last_irq_ptr2 = prev; *prev = (*prev)->next; - if(old_fd->type == IRQ_WRITE) + if (old_fd->type == IRQ_WRITE) ignore_sigio_fd(old_fd->fd); kfree(old_fd); continue; @@ -138,14 +136,3 @@ void os_set_ioignore(void) { signal(SIGIO, SIG_IGN); } - -void init_irq_signals(int on_sigstack) -{ - int flags; - - flags = on_sigstack ? SA_ONSTACK : 0; - - set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART, - SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); - signal(SIGWINCH, SIG_IGN); -} diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 82c3778627b8..abb9b0ffd960 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -73,7 +73,7 @@ static void install_fatal_handler(int sig) action.sa_handler = last_ditch_exit; if (sigaction(sig, &action, NULL) < 0) { printf("failed to install handler for signal %d - errno = %d\n", - errno); + sig, errno); exit(1); } } @@ -92,7 +92,8 @@ static void setup_env_path(void) * just use the default + /usr/lib/uml */ if (!old_path || (path_len = strlen(old_path)) == 0) { - putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH); + if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH)) + perror("couldn't putenv"); return; } @@ -100,15 +101,16 @@ static void setup_env_path(void) path_len += strlen("PATH=" UML_LIB_PATH) + 1; new_path = malloc(path_len); if (!new_path) { - perror("coudn't malloc to set a new PATH"); + perror("couldn't malloc to set a new PATH"); return; } snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path); - putenv(new_path); + if (putenv(new_path)) { + perror("couldn't putenv to set a new PATH"); + free(new_path); + } } -extern int uml_exitcode; - extern void scan_elf_aux( char **envp); int __init main(int argc, char **argv, char **envp) diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index 436f8d20b20f..eedc2d88ef8a 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -9,7 +9,6 @@ #include <sys/types.h> #include <sys/mman.h> #include <sys/statfs.h> -#include "kern_util.h" #include "user.h" #include "mem_user.h" #include "init.h" @@ -30,7 +29,7 @@ static char *tempdir = NULL; static void __init find_tempdir(void) { - char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; + const char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; int i; char *dir = NULL; @@ -59,9 +58,10 @@ static void __init find_tempdir(void) * read the file as needed. If there's an error, -errno is returned; * if the end of the file is reached, 0 is returned. */ -static int next(int fd, char *buf, int size, char c) +static int next(int fd, char *buf, size_t size, char c) { - int n, len; + ssize_t n; + size_t len; char *ptr; while((ptr = strchr(buf, c)) == NULL){ @@ -172,13 +172,15 @@ int __init make_tempfile(const char *template, char **out_tempname, which_tmpdir(); tempname = malloc(MAXPATHLEN); + if (!tempname) + goto out; find_tempdir(); if (template[0] != '/') strcpy(tempname, tempdir); else tempname[0] = '\0'; - strcat(tempname, template); + strncat(tempname, template, MAXPATHLEN-1-strlen(tempname)); fd = mkstemp(tempname); if(fd < 0){ fprintf(stderr, "open - cannot create %s: %s\n", tempname, @@ -268,6 +270,7 @@ void __init check_tmpexec(void) if(addr == MAP_FAILED){ err = errno; perror("failed"); + close(fd); if(err == EPERM) printf("%s must be not mounted noexec\n",tempdir); exit(1); diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index bda5c3150d6c..abf6beae3df1 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -249,7 +249,10 @@ void init_new_thread_signals(void) SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); signal(SIGHUP, SIG_IGN); - init_irq_signals(1); + set_handler(SIGIO, (__sighandler_t) sig_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, + SIGVTALRM, -1); + signal(SIGWINCH, SIG_IGN); } int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr) diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index a32ba6ab1211..830fe6a1518a 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -8,47 +8,41 @@ #include <string.h> #include <sys/ptrace.h> #include "sysdep/ptrace.h" -#include "user.h" -/* This is set once at boot time and not changed thereafter */ - -static unsigned long exec_regs[MAX_REG_NR]; - -void init_thread_registers(struct uml_pt_regs *to) -{ - memcpy(to->gp, exec_regs, sizeof(to->gp)); -} - -void save_registers(int pid, struct uml_pt_regs *regs) +int save_registers(int pid, struct uml_pt_regs *regs) { int err; err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp); if (err < 0) - panic("save_registers - saving registers failed, errno = %d\n", - errno); + return -errno; + return 0; } -void restore_registers(int pid, struct uml_pt_regs *regs) +int restore_registers(int pid, struct uml_pt_regs *regs) { int err; err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp); if (err < 0) - panic("restore_registers - saving registers failed, " - "errno = %d\n", errno); + return -errno; + return 0; } -void init_registers(int pid) +/* This is set once at boot time and not changed thereafter */ + +static unsigned long exec_regs[MAX_REG_NR]; + +int init_registers(int pid) { int err; err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs); - if (err) - panic("check_ptrace : PTRACE_GETREGS failed, errno = %d", - errno); + if (err < 0) + return -errno; arch_init_registers(pid); + return 0; } void get_safe_registers(unsigned long *regs) diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index dc03e9cccb63..abf47a7c4abd 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -1,34 +1,33 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #include <unistd.h> -#include <stdlib.h> -#include <termios.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> #include <pty.h> +#include <sched.h> #include <signal.h> -#include <fcntl.h> -#include <errno.h> #include <string.h> -#include <sched.h> -#include <sys/socket.h> -#include <sys/poll.h> -#include "init.h" -#include "user.h" +#include "kern_constants.h" #include "kern_util.h" -#include "sigio.h" +#include "init.h" #include "os.h" +#include "sigio.h" #include "um_malloc.h" -#include "init.h" +#include "user.h" -/* Protected by sigio_lock(), also used by sigio_cleanup, which is an +/* + * Protected by sigio_lock(), also used by sigio_cleanup, which is an * exitcall. */ static int write_sigio_pid = -1; static unsigned long write_sigio_stack; -/* These arrays are initialized before the sigio thread is started, and +/* + * These arrays are initialized before the sigio thread is started, and * the descriptors closed after it is killed. So, it can't see them change. * On the UML side, they are changed under the sigio_lock. */ @@ -43,7 +42,8 @@ struct pollfds { int used; }; -/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread +/* + * Protected by sigio_lock(). Used by the sigio thread, but the UML thread * synchronizes with it. */ static struct pollfds current_poll; @@ -57,23 +57,26 @@ static int write_sigio_thread(void *unused) int i, n, respond_fd; char c; - signal(SIGWINCH, SIG_IGN); + signal(SIGWINCH, SIG_IGN); fds = ¤t_poll; - while(1){ + while (1) { n = poll(fds->poll, fds->used, -1); - if(n < 0){ - if(errno == EINTR) continue; - printk("write_sigio_thread : poll returned %d, " - "errno = %d\n", n, errno); + if (n < 0) { + if (errno == EINTR) + continue; + printk(UM_KERN_ERR "write_sigio_thread : poll returned " + "%d, errno = %d\n", n, errno); } - for(i = 0; i < fds->used; i++){ + for (i = 0; i < fds->used; i++) { p = &fds->poll[i]; - if(p->revents == 0) continue; - if(p->fd == sigio_private[1]){ + if (p->revents == 0) + continue; + if (p->fd == sigio_private[1]) { CATCH_EINTR(n = read(sigio_private[1], &c, sizeof(c))); - if(n != sizeof(c)) - printk("write_sigio_thread : " + if (n != sizeof(c)) + printk(UM_KERN_ERR + "write_sigio_thread : " "read on socket failed, " "err = %d\n", errno); tmp = current_poll; @@ -89,9 +92,10 @@ static int write_sigio_thread(void *unused) } CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); - if(n != sizeof(c)) - printk("write_sigio_thread : write on socket " - "failed, err = %d\n", errno); + if (n != sizeof(c)) + printk(UM_KERN_ERR "write_sigio_thread : " + "write on socket failed, err = %d\n", + errno); } } @@ -102,12 +106,13 @@ static int need_poll(struct pollfds *polls, int n) { struct pollfd *new; - if(n <= polls->size) + if (n <= polls->size) return 0; new = kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); - if(new == NULL){ - printk("need_poll : failed to allocate new pollfds\n"); + if (new == NULL) { + printk(UM_KERN_ERR "need_poll : failed to allocate new " + "pollfds\n"); return -ENOMEM; } @@ -119,7 +124,8 @@ static int need_poll(struct pollfds *polls, int n) return 0; } -/* Must be called with sigio_lock held, because it's needed by the marked +/* + * Must be called with sigio_lock held, because it's needed by the marked * critical section. */ static void update_thread(void) @@ -129,15 +135,17 @@ static void update_thread(void) char c; flags = set_signals(0); - n = write(sigio_private[0], &c, sizeof(c)); - if(n != sizeof(c)){ - printk("update_thread : write failed, err = %d\n", errno); + CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", + errno); goto fail; } CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); - if(n != sizeof(c)){ - printk("update_thread : read failed, err = %d\n", errno); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", + errno); goto fail; } @@ -164,23 +172,23 @@ int add_sigio_fd(int fd) int err = 0, i, n; sigio_lock(); - for(i = 0; i < all_sigio_fds.used; i++){ - if(all_sigio_fds.poll[i].fd == fd) + for (i = 0; i < all_sigio_fds.used; i++) { + if (all_sigio_fds.poll[i].fd == fd) break; } - if(i == all_sigio_fds.used) + if (i == all_sigio_fds.used) goto out; p = &all_sigio_fds.poll[i]; - for(i = 0; i < current_poll.used; i++){ - if(current_poll.poll[i].fd == fd) + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) goto out; } n = current_poll.used; err = need_poll(&next_poll, n + 1); - if(err) + if (err) goto out; memcpy(next_poll.poll, current_poll.poll, @@ -198,27 +206,29 @@ int ignore_sigio_fd(int fd) struct pollfd *p; int err = 0, i, n = 0; - /* This is called from exitcalls elsewhere in UML - if + /* + * This is called from exitcalls elsewhere in UML - if * sigio_cleanup has already run, then update_thread will hang * or fail because the thread is no longer running. */ - if(write_sigio_pid == -1) + if (write_sigio_pid == -1) return -EIO; sigio_lock(); - for(i = 0; i < current_poll.used; i++){ - if(current_poll.poll[i].fd == fd) break; + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + break; } - if(i == current_poll.used) + if (i == current_poll.used) goto out; err = need_poll(&next_poll, current_poll.used - 1); - if(err) + if (err) goto out; - for(i = 0; i < current_poll.used; i++){ + for (i = 0; i < current_poll.used; i++) { p = ¤t_poll.poll[i]; - if(p->fd != fd) + if (p->fd != fd) next_poll.poll[n++] = *p; } next_poll.used = current_poll.used - 1; @@ -235,7 +245,8 @@ static struct pollfd *setup_initial_poll(int fd) p = kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); if (p == NULL) { - printk("setup_initial_poll : failed to allocate poll\n"); + printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " + "poll\n"); return NULL; } *p = ((struct pollfd) { .fd = fd, @@ -261,27 +272,29 @@ static void write_sigio_workaround(void) return; err = os_pipe(l_write_sigio_fds, 1, 1); - if(err < 0){ - printk("write_sigio_workaround - os_pipe 1 failed, " + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " "err = %d\n", -err); return; } err = os_pipe(l_sigio_private, 1, 1); - if(err < 0){ - printk("write_sigio_workaround - os_pipe 2 failed, " + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " "err = %d\n", -err); goto out_close1; } p = setup_initial_poll(l_sigio_private[1]); - if(!p) + if (!p) goto out_close2; sigio_lock(); - /* Did we race? Don't try to optimize this, please, it's not so likely - * to happen, and no more than once at the boot. */ - if(write_sigio_pid != -1) + /* + * Did we race? Don't try to optimize this, please, it's not so likely + * to happen, and no more than once at the boot. + */ + if (write_sigio_pid != -1) goto out_free; current_poll = ((struct pollfds) { .poll = p, @@ -333,19 +346,19 @@ void maybe_sigio_broken(int fd, int read) { int err; - if(!isatty(fd)) + if (!isatty(fd)) return; - if((read || pty_output_sigio) && (!read || pty_close_sigio)) + if ((read || pty_output_sigio) && (!read || pty_close_sigio)) return; write_sigio_workaround(); sigio_lock(); err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); - if(err){ - printk("maybe_sigio_broken - failed to add pollfd for " - "descriptor %d\n", fd); + if (err) { + printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " + "for descriptor %d\n", fd); goto out; } @@ -388,7 +401,7 @@ static void openpty_cb(void *arg) struct openpty_arg *info = arg; info->err = 0; - if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) + if (openpty(&info->master, &info->slave, NULL, NULL, NULL)) info->err = -errno; } @@ -397,17 +410,17 @@ static int async_pty(int master, int slave) int flags; flags = fcntl(master, F_GETFL); - if(flags < 0) + if (flags < 0) return -errno; - if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || - (fcntl(master, F_SETOWN, os_getpid()) < 0)) + if ((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || + (fcntl(master, F_SETOWN, os_getpid()) < 0)) return -errno; - if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) + if ((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) return -errno; - return(0); + return 0; } static void __init check_one_sigio(void (*proc)(int, int)) @@ -417,34 +430,49 @@ static void __init check_one_sigio(void (*proc)(int, int)) int master, slave, err; initial_thread_cb(openpty_cb, &pty); - if(pty.err){ - printk("openpty failed, errno = %d\n", -pty.err); + if (pty.err) { + printk(UM_KERN_ERR "check_one_sigio failed, errno = %d\n", + -pty.err); return; } master = pty.master; slave = pty.slave; - if((master == -1) || (slave == -1)){ - printk("openpty failed to allocate a pty\n"); + if ((master == -1) || (slave == -1)) { + printk(UM_KERN_ERR "check_one_sigio failed to allocate a " + "pty\n"); return; } /* Not now, but complain so we now where we failed. */ err = raw(master); - if (err < 0) - panic("check_sigio : __raw failed, errno = %d\n", -err); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : raw failed, errno = %d\n", + -err); + return; + } err = async_pty(master, slave); - if(err < 0) - panic("tty_fds : sigio_async failed, err = %d\n", -err); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigio_async failed, " + "err = %d\n", -err); + return; + } + + if (sigaction(SIGIO, NULL, &old) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 1 failed, " + "errno = %d\n", errno); + return; + } - if(sigaction(SIGIO, NULL, &old) < 0) - panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); new = old; new.sa_handler = handler; - if(sigaction(SIGIO, &new, NULL) < 0) - panic("check_sigio : sigaction 2 failed, errno = %d\n", errno); + if (sigaction(SIGIO, &new, NULL) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 2 failed, " + "errno = %d\n", errno); + return; + } got_sigio = 0; (*proc)(master, slave); @@ -452,8 +480,9 @@ static void __init check_one_sigio(void (*proc)(int, int)) close(master); close(slave); - if(sigaction(SIGIO, &old, NULL) < 0) - panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); + if (sigaction(SIGIO, &old, NULL) < 0) + printk(UM_KERN_ERR "check_one_sigio : sigaction 3 failed, " + "errno = %d\n", errno); } static void tty_output(int master, int slave) @@ -461,42 +490,45 @@ static void tty_output(int master, int slave) int n; char buf[512]; - printk("Checking that host ptys support output SIGIO..."); + printk(UM_KERN_INFO "Checking that host ptys support output SIGIO..."); memset(buf, 0, sizeof(buf)); - while(write(master, buf, sizeof(buf)) > 0) ; - if(errno != EAGAIN) - panic("tty_output : write failed, errno = %d\n", errno); - while(((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; + while (write(master, buf, sizeof(buf)) > 0) ; + if (errno != EAGAIN) + printk(UM_KERN_ERR "tty_output : write failed, errno = %d\n", + errno); + while (((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) + ; - if(got_sigio){ - printk("Yes\n"); + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); pty_output_sigio = 1; - } - else if(n == -EAGAIN) - printk("No, enabling workaround\n"); - else panic("tty_output : read failed, err = %d\n", n); + } else if (n == -EAGAIN) + printk(UM_KERN_CONT "No, enabling workaround\n"); + else + printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n); } static void tty_close(int master, int slave) { - printk("Checking that host ptys support SIGIO on close..."); + printk(UM_KERN_INFO "Checking that host ptys support SIGIO on " + "close..."); close(slave); - if(got_sigio){ - printk("Yes\n"); + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); pty_close_sigio = 1; - } - else printk("No, enabling workaround\n"); + } else + printk(UM_KERN_CONT "No, enabling workaround\n"); } void __init check_sigio(void) { - if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && - (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ - printk("No pseudo-terminals available - skipping pty SIGIO " - "check\n"); + if ((access("/dev/ptmx", R_OK) < 0) && + (access("/dev/ptyp0", R_OK) < 0)) { + printk(UM_KERN_WARNING "No pseudo-terminals available - " + "skipping pty SIGIO check\n"); return; } check_one_sigio(tty_output); diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index e9800b0b5689..0fb0cc8d4757 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -9,11 +9,47 @@ #include <errno.h> #include <signal.h> #include <strings.h> +#include "as-layout.h" +#include "kern_util.h" #include "os.h" #include "sysdep/barrier.h" #include "sysdep/sigcontext.h" #include "user.h" +/* Copied from linux/compiler-gcc.h since we can't include it directly */ +#define barrier() __asm__ __volatile__("": : :"memory") + +void (*sig_info[NSIG])(int, struct uml_pt_regs *) = { + [SIGTRAP] = relay_signal, + [SIGFPE] = relay_signal, + [SIGILL] = relay_signal, + [SIGWINCH] = winch, + [SIGBUS] = bus_handler, + [SIGSEGV] = segv_handler, + [SIGIO] = sigio_handler, + [SIGVTALRM] = timer_handler }; + +static void sig_handler_common(int sig, struct sigcontext *sc) +{ + struct uml_pt_regs r; + int save_errno = errno; + + r.is_user = 0; + if (sig == SIGSEGV) { + /* For segfaults, we want the data from the sigcontext. */ + copy_sc(&r, sc); + GET_FAULTINFO_FROM_SC(r.faultinfo, sc); + } + + /* enable signals if sig isn't IRQ signal */ + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) + unblock_signals(); + + (*sig_info[sig])(sig, &r); + + errno = save_errno; +} + /* * These are the asynchronous signals. SIGPROF is excluded because we want to * be able to profile all of UML, not just the non-critical sections. If @@ -26,13 +62,8 @@ #define SIGVTALRM_BIT 1 #define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) -/* - * These are used by both the signal handlers and - * block/unblock_signals. I don't want modifications cached in a - * register - they must go straight to memory. - */ -static volatile int signals_enabled = 1; -static volatile int pending = 0; +static int signals_enabled; +static unsigned int signals_pending; void sig_handler(int sig, struct sigcontext *sc) { @@ -40,13 +71,13 @@ void sig_handler(int sig, struct sigcontext *sc) enabled = signals_enabled; if (!enabled && (sig == SIGIO)) { - pending |= SIGIO_MASK; + signals_pending |= SIGIO_MASK; return; } block_signals(); - sig_handler_common_skas(sig, sc); + sig_handler_common(sig, sc); set_signals(enabled); } @@ -68,7 +99,7 @@ void alarm_handler(int sig, struct sigcontext *sc) enabled = signals_enabled; if (!signals_enabled) { - pending |= SIGVTALRM_MASK; + signals_pending |= SIGVTALRM_MASK; return; } @@ -94,16 +125,6 @@ void set_sigstack(void *sig_stack, int size) panic("enabling signal stack failed, errno = %d\n", errno); } -void remove_sigstack(void) -{ - stack_t stack = ((stack_t) { .ss_flags = SS_DISABLE, - .ss_sp = NULL, - .ss_size = 0 }); - - if (sigaltstack(&stack, NULL) != 0) - panic("disabling signal stack failed, errno = %d\n", errno); -} - void (*handlers[_NSIG])(int sig, struct sigcontext *sc); void handle_signal(int sig, struct sigcontext *sc) @@ -166,6 +187,9 @@ void set_handler(int sig, void (*handler)(int), int flags, ...) sigaddset(&action.sa_mask, mask); va_end(ap); + if (sig == SIGSEGV) + flags |= SA_NODEFER; + action.sa_flags = flags; action.sa_restorer = NULL; if (sigaction(sig, &action, NULL) < 0) @@ -179,12 +203,14 @@ void set_handler(int sig, void (*handler)(int), int flags, ...) int change_sig(int signal, int on) { - sigset_t sigset, old; + sigset_t sigset; sigemptyset(&sigset); sigaddset(&sigset, signal); - sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); - return !sigismember(&old, signal); + if (sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + return -errno; + + return 0; } void block_signals(void) @@ -196,7 +222,7 @@ void block_signals(void) * This might matter if gcc figures out how to inline this and * decides to shuffle this code into the caller. */ - mb(); + barrier(); } void unblock_signals(void) @@ -209,36 +235,26 @@ void unblock_signals(void) /* * We loop because the IRQ handler returns with interrupts off. So, * interrupts may have arrived and we need to re-enable them and - * recheck pending. + * recheck signals_pending. */ while(1) { /* * Save and reset save_pending after enabling signals. This - * way, pending won't be changed while we're reading it. + * way, signals_pending won't be changed while we're reading it. */ signals_enabled = 1; /* - * Setting signals_enabled and reading pending must + * Setting signals_enabled and reading signals_pending must * happen in this order. */ - mb(); - - save_pending = pending; - if (save_pending == 0) { - /* - * This must return with signals enabled, so - * this barrier ensures that writes are - * flushed out before the return. This might - * matter if gcc figures out how to inline - * this (unlikely, given its size) and decides - * to shuffle this code into the caller. - */ - mb(); + barrier(); + + save_pending = signals_pending; + if (save_pending == 0) return; - } - pending = 0; + signals_pending = 0; /* * We have pending interrupts, so disable signals, as the @@ -254,7 +270,7 @@ void unblock_signals(void) * back here. */ if (save_pending & SIGIO_MASK) - sig_handler_common_skas(SIGIO, NULL); + sig_handler_common(SIGIO, NULL); if (save_pending & SIGVTALRM_MASK) real_alarm_handler(NULL); diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile index 5fd8d4dad66a..d2ea3409e072 100644 --- a/arch/um/os-Linux/skas/Makefile +++ b/arch/um/os-Linux/skas/Makefile @@ -1,10 +1,10 @@ # -# Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com) +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) # Licensed under the GPL # -obj-y := mem.o process.o trap.o +obj-y := mem.o process.o -USER_OBJS := mem.o process.o trap.o +USER_OBJS := $(obj-y) include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index e8b7a97e83d3..d36c89c24a45 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -15,6 +15,7 @@ #include "as-layout.h" #include "chan_user.h" #include "kern_constants.h" +#include "kern_util.h" #include "mem.h" #include "os.h" #include "process.h" @@ -37,27 +38,27 @@ int is_skas_winch(int pid, int fd, void *data) static int ptrace_dump_regs(int pid) { - unsigned long regs[MAX_REG_NR]; - int i; + unsigned long regs[MAX_REG_NR]; + int i; - if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) - return -errno; + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + return -errno; printk(UM_KERN_ERR "Stub registers -\n"); for (i = 0; i < ARRAY_SIZE(regs); i++) printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]); - return 0; + return 0; } /* * Signals that are OK to receive in the stub - we'll just continue it. * SIGWINCH will happen when UML is inside a detached screen. */ -#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) +#define STUB_SIG_MASK (1 << SIGVTALRM) /* Signals that the stub will finish with - anything else is an error */ -#define STUB_DONE_MASK ((1 << SIGUSR1) | (1 << SIGTRAP)) +#define STUB_DONE_MASK (1 << SIGTRAP) void wait_stub_done(int pid) { @@ -72,9 +73,11 @@ void wait_stub_done(int pid) break; err = ptrace(PTRACE_CONT, pid, 0, 0); - if (err) - panic("wait_stub_done : continue failed, errno = %d\n", - errno); + if (err) { + printk(UM_KERN_ERR "wait_stub_done : continue failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } } if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) @@ -85,8 +88,10 @@ bad_wait: if (err) printk(UM_KERN_ERR "Failed to get registers from stub, " "errno = %d\n", -err); - panic("wait_stub_done : failed to wait for SIGUSR1/SIGTRAP, pid = %d, " - "n = %d, errno = %d, status = 0x%x\n", pid, n, errno, status); + printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno, + status); + fatal_sigsegv(); } extern unsigned long current_stub_stack(void); @@ -97,9 +102,11 @@ void get_skas_faultinfo(int pid, struct faultinfo * fi) if (ptrace_faultinfo) { err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); - if (err) - panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " - "errno = %d\n", errno); + if (err) { + printk(UM_KERN_ERR "get_skas_faultinfo - " + "PTRACE_FAULTINFO failed, errno = %d\n", errno); + fatal_sigsegv(); + } /* Special handling for i386, which has different structs */ if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) @@ -109,9 +116,11 @@ void get_skas_faultinfo(int pid, struct faultinfo * fi) } else { err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); - if (err) - panic("Failed to continue stub, pid = %d, errno = %d\n", - pid, errno); + if (err) { + printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " + "errno = %d\n", pid, errno); + fatal_sigsegv(); + } wait_stub_done(pid); /* @@ -137,6 +146,9 @@ static void handle_trap(int pid, struct uml_pt_regs *regs, { int err, status; + if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) + fatal_sigsegv(); + /* Mark this as a syscall */ UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); @@ -144,25 +156,31 @@ static void handle_trap(int pid, struct uml_pt_regs *regs, { err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid); - if (err < 0) - panic("handle_trap - nullifying syscall failed, " - "errno = %d\n", errno); + if (err < 0) { + printk(UM_KERN_ERR "handle_trap - nullifying syscall " + "failed, errno = %d\n", errno); + fatal_sigsegv(); + } err = ptrace(PTRACE_SYSCALL, pid, 0, 0); - if (err < 0) - panic("handle_trap - continuing to end of syscall " - "failed, errno = %d\n", errno); + if (err < 0) { + printk(UM_KERN_ERR "handle_trap - continuing to end of " + "syscall failed, errno = %d\n", errno); + fatal_sigsegv(); + } CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); if ((err < 0) || !WIFSTOPPED(status) || - (WSTOPSIG(status) != SIGTRAP + 0x80)) { - err = ptrace_dump_regs(pid); - if (err) - printk(UM_KERN_ERR "Failed to get registers " + (WSTOPSIG(status) != SIGTRAP + 0x80)) { + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers " "from process, errno = %d\n", -err); - panic("handle_trap - failed to wait at end of syscall, " - "errno = %d, status = %d\n", errno, status); - } + printk(UM_KERN_ERR "handle_trap - failed to wait at " + "end of syscall, errno = %d, status = %d\n", + errno, status); + fatal_sigsegv(); + } } handle_syscall(regs); @@ -178,10 +196,13 @@ static int userspace_tramp(void *stack) ptrace(PTRACE_TRACEME, 0, 0, 0); signal(SIGTERM, SIG_DFL); + signal(SIGWINCH, SIG_IGN); err = set_interval(); - if (err) - panic("userspace_tramp - setting timer failed, errno = %d\n", - err); + if (err) { + printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " + "errno = %d\n", err); + exit(1); + } if (!proc_mm) { /* @@ -221,16 +242,14 @@ static int userspace_tramp(void *stack) set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE); sigemptyset(&sa.sa_mask); - sigaddset(&sa.sa_mask, SIGIO); - sigaddset(&sa.sa_mask, SIGWINCH); - sigaddset(&sa.sa_mask, SIGVTALRM); - sigaddset(&sa.sa_mask, SIGUSR1); - sa.sa_flags = SA_ONSTACK; + sa.sa_flags = SA_ONSTACK | SA_NODEFER; sa.sa_handler = (void *) v; sa.sa_restorer = NULL; - if (sigaction(SIGSEGV, &sa, NULL) < 0) - panic("userspace_tramp - setting SIGSEGV handler " - "failed - errno = %d\n", errno); + if (sigaction(SIGSEGV, &sa, NULL) < 0) { + printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV " + "handler failed - errno = %d\n", errno); + exit(1); + } } kill(os_getpid(), SIGSTOP); @@ -246,13 +265,18 @@ int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n, flags; + int pid, status, n, flags, err; stack = mmap(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (stack == MAP_FAILED) - panic("start_userspace : mmap failed, errno = %d", errno); + if (stack == MAP_FAILED) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : mmap failed, " + "errno = %d\n", errno); + return err; + } + sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); flags = CLONE_FILES; @@ -262,29 +286,50 @@ int start_userspace(unsigned long stub_stack) flags |= SIGCHLD; pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); - if (pid < 0) - panic("start_userspace : clone failed, errno = %d", errno); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : clone failed, " + "errno = %d\n", errno); + return err; + } do { CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); - if (n < 0) - panic("start_userspace : wait failed, errno = %d", - errno); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : wait failed, " + "errno = %d\n", errno); + goto out_kill; + } } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); - if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) - panic("start_userspace : expected SIGSTOP, got status = %d", - status); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + err = -EINVAL; + printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got " + "status = %d\n", status); + goto out_kill; + } if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, - (void *) PTRACE_O_TRACESYSGOOD) < 0) - panic("start_userspace : PTRACE_OLDSETOPTIONS failed, " - "errno = %d\n", errno); + (void *) PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS " + "failed, errno = %d\n", errno); + goto out_kill; + } - if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) - panic("start_userspace : munmap failed, errno = %d\n", errno); + if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : munmap failed, " + "errno = %d\n", errno); + goto out_kill; + } return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; } void userspace(struct uml_pt_regs *regs) @@ -302,7 +347,16 @@ void userspace(struct uml_pt_regs *regs) nsecs += os_nsecs(); while (1) { - restore_registers(pid, regs); + /* + * This can legitimately fail if the process loads a + * bogus value into a segment register. It will + * segfault and PTRACE_GETREGS will read that value + * out of the process. However, PTRACE_SETREGS will + * fail. In this case, there is nothing to do but + * just kill the process. + */ + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) + fatal_sigsegv(); /* Now we set local_using_sysemu to be used for one loop */ local_using_sysemu = get_using_sysemu(); @@ -310,19 +364,26 @@ void userspace(struct uml_pt_regs *regs) op = SELECT_PTRACE_OPERATION(local_using_sysemu, singlestepping(NULL)); - err = ptrace(op, pid, 0, 0); - if (err) - panic("userspace - could not resume userspace process, " - "pid=%d, ptrace operation = %d, errno = %d\n", - pid, op, errno); + if (ptrace(op, pid, 0, 0)) { + printk(UM_KERN_ERR "userspace - ptrace continue " + "failed, op = %d, errno = %d\n", op, errno); + fatal_sigsegv(); + } CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); - if (err < 0) - panic("userspace - waitpid failed, errno = %d\n", - errno); + if (err < 0) { + printk(UM_KERN_ERR "userspace - wait failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } regs->is_user = 1; - save_registers(pid, regs); + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ if (WIFSTOPPED(status)) { @@ -345,7 +406,7 @@ void userspace(struct uml_pt_regs *regs) break; case SIGVTALRM: now = os_nsecs(); - if(now < nsecs) + if (now < nsecs) break; block_signals(); (*sig_info[sig])(sig, regs); @@ -368,6 +429,7 @@ void userspace(struct uml_pt_regs *regs) default: printk(UM_KERN_ERR "userspace - child stopped " "with signal %d\n", sig); + fatal_sigsegv(); } pid = userspace_pid[0]; interrupt_end(); @@ -419,9 +481,12 @@ int copy_context_skas0(unsigned long new_stack, int pid) .it_interval = tv }) }); err = ptrace_setregs(pid, thread_regs); - if (err < 0) - panic("copy_context_skas0 : PTRACE_SETREGS failed, " - "pid = %d, errno = %d\n", pid, -err); + if (err < 0) { + err = -errno; + printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS " + "failed, pid = %d, errno = %d\n", pid, -err); + return err; + } /* set a well known return code for detection of child write failure */ child_data->err = 12345678; @@ -431,31 +496,47 @@ int copy_context_skas0(unsigned long new_stack, int pid) * parent's stack, and check, if bad result. */ err = ptrace(PTRACE_CONT, pid, 0, 0); - if (err) - panic("Failed to continue new process, pid = %d, " - "errno = %d\n", pid, errno); + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to continue new process, pid = %d, " + "errno = %d\n", pid, errno); + return err; + } + wait_stub_done(pid); pid = data->err; - if (pid < 0) - panic("copy_context_skas0 - stub-parent reports error %d\n", - -pid); + if (pid < 0) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports " + "error %d\n", -pid); + return pid; + } /* * Wait, until child has finished too: read child's result from * child's stack and check it. */ wait_stub_done(pid); - if (child_data->err != STUB_DATA) - panic("copy_context_skas0 - stub-child reports error %ld\n", - child_data->err); + if (child_data->err != STUB_DATA) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports " + "error %ld\n", child_data->err); + err = child_data->err; + goto out_kill; + } if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, - (void *)PTRACE_O_TRACESYSGOOD) < 0) - panic("copy_context_skas0 : PTRACE_OLDSETOPTIONS failed, " - "errno = %d\n", errno); + (void *)PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS " + "failed, errno = %d\n", errno); + goto out_kill; + } return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; } /* @@ -463,8 +544,8 @@ int copy_context_skas0(unsigned long new_stack, int pid) * available. Opening /proc/mm creates a new mm_context, which lacks * the stub-pages. Thus, we map them using /proc/mm-fd */ -void map_stub_pages(int fd, unsigned long code, - unsigned long data, unsigned long stack) +int map_stub_pages(int fd, unsigned long code, unsigned long data, + unsigned long stack) { struct proc_mm_op mmop; int n; @@ -488,8 +569,9 @@ void map_stub_pages(int fd, unsigned long code, printk(UM_KERN_ERR "mmap args - addr = 0x%lx, fd = %d, " "offset = %llx\n", code, code_fd, (unsigned long long) code_offset); - panic("map_stub_pages : /proc/mm map for code failed, " - "err = %d\n", n); + printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for code " + "failed, err = %d\n", n); + return -n; } if (stack) { @@ -507,10 +589,15 @@ void map_stub_pages(int fd, unsigned long code, .offset = map_offset } } }); CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); - if (n != sizeof(mmop)) - panic("map_stub_pages : /proc/mm map for data failed, " - "err = %d\n", errno); + if (n != sizeof(mmop)) { + n = errno; + printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for " + "data failed, err = %d\n", n); + return -n; + } } + + return 0; } void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) @@ -571,7 +658,9 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf) kmalloc_ok = 0; return 1; default: - panic("Bad sigsetjmp return in start_idle_thread - %d\n", n); + printk(UM_KERN_ERR "Bad sigsetjmp return in " + "start_idle_thread - %d\n", n); + fatal_sigsegv(); } longjmp(*switch_buf, 1); } @@ -614,9 +703,11 @@ void __switch_mm(struct mm_id *mm_idp) if (proc_mm) { err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_idp->u.mm_fd); - if (err) - panic("__switch_mm - PTRACE_SWITCH_MM failed, " - "errno = %d\n", errno); + if (err) { + printk(UM_KERN_ERR "__switch_mm - PTRACE_SWITCH_MM " + "failed, errno = %d\n", errno); + fatal_sigsegv(); + } } else userspace_pid[0] = mm_idp->u.pid; } diff --git a/arch/um/os-Linux/skas/trap.c b/arch/um/os-Linux/skas/trap.c deleted file mode 100644 index 3b1b9244f468..000000000000 --- a/arch/um/os-Linux/skas/trap.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#if 0 -#include "kern_util.h" -#include "skas.h" -#include "ptrace_user.h" -#include "sysdep/ptrace_user.h" -#endif - -#include <errno.h> -#include <signal.h> -#include "sysdep/ptrace.h" -#include "kern_constants.h" -#include "as-layout.h" -#include "os.h" -#include "sigcontext.h" -#include "task.h" - -static struct uml_pt_regs ksig_regs[UM_NR_CPUS]; - -void sig_handler_common_skas(int sig, void *sc_ptr) -{ - struct sigcontext *sc = sc_ptr; - struct uml_pt_regs *r; - void (*handler)(int, struct uml_pt_regs *); - int save_user, save_errno = errno; - - /* - * This is done because to allow SIGSEGV to be delivered inside a SEGV - * handler. This can happen in copy_user, and if SEGV is disabled, - * the process will die. - * XXX Figure out why this is better than SA_NODEFER - */ - if (sig == SIGSEGV) { - change_sig(SIGSEGV, 1); - /* - * For segfaults, we want the data from the - * sigcontext. In this case, we don't want to mangle - * the process registers, so use a static set of - * registers. For other signals, the process - * registers are OK. - */ - r = &ksig_regs[cpu()]; - copy_sc(r, sc_ptr); - } - else r = TASK_REGS(get_current()); - - save_user = r->is_user; - r->is_user = 0; - if ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || - (sig == SIGILL) || (sig == SIGTRAP)) - GET_FAULTINFO_FROM_SC(r->faultinfo, sc); - - change_sig(SIGUSR1, 1); - - handler = sig_info[sig]; - - /* unblock SIGVTALRM, SIGIO if sig isn't IRQ signal */ - if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) - unblock_signals(); - - handler(sig, r); - - errno = save_errno; - r->is_user = save_user; -} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 7b81f6c08a5e..b616e15638fb 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -60,10 +60,11 @@ static int ptrace_child(void) * the UML code itself. */ ret = 2; - _exit(ret); + + exit(ret); } -static void fatal_perror(char *str) +static void fatal_perror(const char *str) { perror(str); exit(1); @@ -341,6 +342,8 @@ static void __init check_coredump_limit(void) void __init os_early_checks(void) { + int pid; + /* Print out the core dump limits early */ check_coredump_limit(); @@ -350,6 +353,11 @@ void __init os_early_checks(void) * kernel is running. */ check_tmpexec(); + + pid = start_ptraced_child(); + if (init_registers(pid)) + fatal("Failed to initialize default registers"); + stop_ptraced_child(pid, 1, 1); } static int __init noprocmm_cmd_param(char *str, int* add) @@ -411,7 +419,6 @@ static inline void check_skas3_ptrace_faultinfo(void) non_fatal("found\n"); } - init_registers(pid); stop_ptraced_child(pid, 1, 1); } @@ -466,7 +473,7 @@ static inline void check_skas3_proc_mm(void) else non_fatal("found\n"); } -int can_do_skas(void) +void can_do_skas(void) { non_fatal("Checking for the skas3 patch in the host:\n"); @@ -476,8 +483,6 @@ int can_do_skas(void) if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt) skas_needs_stub = 1; - - return 1; } int __init parse_iomem(char *str, int *add) diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c deleted file mode 100644 index 2a1c9843e32e..000000000000 --- a/arch/um/os-Linux/trap.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#include <signal.h> -#include "os.h" -#include "sysdep/ptrace.h" - -/* Initialized from linux_main() */ -void (*sig_info[NSIG])(int, struct uml_pt_regs *); - -void os_fill_handlinfo(struct kern_handlers h) -{ - sig_info[SIGTRAP] = h.relay_signal; - sig_info[SIGFPE] = h.relay_signal; - sig_info[SIGILL] = h.relay_signal; - sig_info[SIGWINCH] = h.winch; - sig_info[SIGBUS] = h.bus_handler; - sig_info[SIGSEGV] = h.page_fault; - sig_info[SIGIO] = h.sigio_handler; - sig_info[SIGVTALRM] = h.timer_handler; -} diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c index 4cfdd18ea1ef..b09ff66a77ee 100644 --- a/arch/um/os-Linux/tty.c +++ b/arch/um/os-Linux/tty.c @@ -1,13 +1,16 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #include <stdlib.h> +#include <unistd.h> #include <errno.h> +#include <fcntl.h> +#include "kern_constants.h" +#include "kern_util.h" #include "os.h" #include "user.h" -#include "kern_util.h" struct grantpt_info { int fd; @@ -26,36 +29,34 @@ static void grantpt_cb(void *arg) int get_pty(void) { struct grantpt_info info; - int fd; - - fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0); - if(fd < 0){ - printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd); - return(fd); + int fd, err; + + fd = open("/dev/ptmx", O_RDWR); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't open /dev/ptmx - " + "err = %d\n", errno); + return err; } info.fd = fd; initial_thread_cb(grantpt_cb, &info); - if(info.res < 0){ - printk("get_pty : Couldn't grant pty - errno = %d\n", - -info.err); - return(-1); + if (info.res < 0) { + err = -info.err; + printk(UM_KERN_ERR "get_pty : Couldn't grant pty - " + "errno = %d\n", -info.err); + goto out; } - if(unlockpt(fd) < 0){ - printk("get_pty : Couldn't unlock pty - errno = %d\n", errno); - return(-1); + + if (unlockpt(fd) < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't unlock pty - " + "errno = %d\n", errno); + goto out; } - return(fd); + return fd; +out: + close(fd); + return err; } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/tty_log.c b/arch/um/os-Linux/tty_log.c index d11a55baa6bd..cc648e6fd3a2 100644 --- a/arch/um/os-Linux/tty_log.c +++ b/arch/um/os-Linux/tty_log.c @@ -12,7 +12,6 @@ #include <sys/time.h> #include "init.h" #include "user.h" -#include "kern_util.h" #include "os.h" #define TTY_LOG_DIR "./" diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 3e058ce9ffb6..a6f31d476993 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -88,21 +88,6 @@ void setup_hostinfo(char *buf, int len) host.release, host.version, host.machine); } -int setjmp_wrapper(void (*proc)(void *, void *), ...) -{ - va_list args; - jmp_buf buf; - int n; - - n = UML_SETJMP(&buf); - if(n == 0){ - va_start(args, proc); - (*proc)(&buf, &args); - } - va_end(args); - return n; -} - void os_dump_core(void) { int pid; diff --git a/arch/um/sys-i386/bug.c b/arch/um/sys-i386/bug.c index a4360b5207db..8d4f273f1219 100644 --- a/arch/um/sys-i386/bug.c +++ b/arch/um/sys-i386/bug.c @@ -4,6 +4,7 @@ */ #include <linux/uaccess.h> +#include <asm/errno.h> /* Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because * that's not relevant in skas mode. diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c index 806895d73bcc..a74442d13762 100644 --- a/arch/um/sys-i386/bugs.c +++ b/arch/um/sys-i386/bugs.c @@ -3,171 +3,47 @@ * Licensed under the GPL */ -#include <errno.h> #include <signal.h> -#include <string.h> #include "kern_constants.h" -#include "os.h" +#include "kern_util.h" +#include "longjmp.h" #include "task.h" #include "user.h" - -#define MAXTOKEN 64 +#include "sysdep/ptrace.h" /* Set during early boot */ int host_has_cmov = 1; -int host_has_xmm = 0; +static jmp_buf cmov_test_return; -static char token(int fd, char *buf, int len, char stop) +static void cmov_sigill_test_handler(int sig) { - int n; - char *ptr, *end, c; - - ptr = buf; - end = &buf[len]; - do { - n = os_read_file(fd, ptr, sizeof(*ptr)); - c = *ptr++; - if (n != sizeof(*ptr)) { - if (n == 0) - return 0; - printk(UM_KERN_ERR "Reading /proc/cpuinfo failed, " - "err = %d\n", -n); - if (n < 0) - return n; - else return -EIO; - } - } while ((c != '\n') && (c != stop) && (ptr < end)); - - if (ptr == end) { - printk(UM_KERN_ERR "Failed to find '%c' in /proc/cpuinfo\n", - stop); - return -1; - } - *(ptr - 1) = '\0'; - return c; -} - -static int find_cpuinfo_line(int fd, char *key, char *scratch, int len) -{ - int n; - char c; - - scratch[len - 1] = '\0'; - while (1) { - c = token(fd, scratch, len - 1, ':'); - if (c <= 0) - return 0; - else if (c != ':') { - printk(UM_KERN_ERR "Failed to find ':' in " - "/proc/cpuinfo\n"); - return 0; - } - - if (!strncmp(scratch, key, strlen(key))) - return 1; - - do { - n = os_read_file(fd, &c, sizeof(c)); - if (n != sizeof(c)) { - printk(UM_KERN_ERR "Failed to find newline in " - "/proc/cpuinfo, err = %d\n", -n); - return 0; - } - } while (c != '\n'); - } - return 0; + host_has_cmov = 0; + longjmp(cmov_test_return, 1); } -static int check_cpu_flag(char *feature, int *have_it) -{ - char buf[MAXTOKEN], c; - int fd, len = ARRAY_SIZE(buf); - - printk(UM_KERN_INFO "Checking for host processor %s support...", - feature); - fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); - if (fd < 0) { - printk(UM_KERN_ERR "Couldn't open /proc/cpuinfo, err = %d\n", - -fd); - return 0; - } - - *have_it = 0; - if (!find_cpuinfo_line(fd, "flags", buf, ARRAY_SIZE(buf))) - goto out; - - c = token(fd, buf, len - 1, ' '); - if (c < 0) - goto out; - else if (c != ' ') { - printk(UM_KERN_ERR "Failed to find ' ' in /proc/cpuinfo\n"); - goto out; - } - - while (1) { - c = token(fd, buf, len - 1, ' '); - if (c < 0) - goto out; - else if (c == '\n') - break; - - if (!strcmp(buf, feature)) { - *have_it = 1; - goto out; - } - } - out: - if (*have_it == 0) - printk("No\n"); - else if (*have_it == 1) - printk("Yes\n"); - os_close_file(fd); - return 1; -} - -#if 0 /* - * This doesn't work in tt mode, plus it's causing compilation problems - * for some people. - */ -static void disable_lcall(void) +void arch_check_bugs(void) { - struct modify_ldt_ldt_s ldt; - int err; + struct sigaction old, new; - bzero(&ldt, sizeof(ldt)); - ldt.entry_number = 7; - ldt.base_addr = 0; - ldt.limit = 0; - err = modify_ldt(1, &ldt, sizeof(ldt)); - if (err) - printk(UM_KERN_ERR "Failed to disable lcall7 - errno = %d\n", - errno); -} -#endif + printk(UM_KERN_INFO "Checking for host processor cmov support..."); + new.sa_handler = cmov_sigill_test_handler; -void arch_init_thread(void) -{ -#if 0 - disable_lcall(); -#endif -} + /* Make sure that SIGILL is enabled after the handler longjmps back */ + new.sa_flags = SA_NODEFER; + sigemptyset(&new.sa_mask); + sigaction(SIGILL, &new, &old); -void arch_check_bugs(void) -{ - int have_it; + if (setjmp(cmov_test_return) == 0) { + unsigned long foo = 0; + __asm__ __volatile__("cmovz %0, %1" : "=r" (foo) : "0" (foo)); + printk(UM_KERN_CONT "Yes\n"); + } else + printk(UM_KERN_CONT "No\n"); - if (os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0) { - printk(UM_KERN_ERR "/proc/cpuinfo not available - skipping CPU " - "capability checks\n"); - return; - } - if (check_cpu_flag("cmov", &have_it)) - host_has_cmov = have_it; - if (check_cpu_flag("xmm", &have_it)) - host_has_xmm = have_it; + sigaction(SIGILL, &old, &new); } -int arch_handle_signal(int sig, struct uml_pt_regs *regs) +void arch_examine_signal(int sig, struct uml_pt_regs *regs) { unsigned char tmp[2]; @@ -176,24 +52,25 @@ int arch_handle_signal(int sig, struct uml_pt_regs *regs) * SIGILL in init. */ if ((sig != SIGILL) || (TASK_PID(get_current()) != 1)) - return 0; + return; + + if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) { + printk(UM_KERN_ERR "SIGILL in init, could not read " + "instructions!\n"); + return; + } - if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) - panic("SIGILL in init, could not read instructions!\n"); if ((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40)) - return 0; + return; if (host_has_cmov == 0) - panic("SIGILL caused by cmov, which this processor doesn't " - "implement, boot a filesystem compiled for older " - "processors"); + printk(UM_KERN_ERR "SIGILL caused by cmov, which this " + "processor doesn't implement. Boot a filesystem " + "compiled for older processors"); else if (host_has_cmov == 1) - panic("SIGILL caused by cmov, which this processor claims to " - "implement"); - else if (host_has_cmov == -1) - panic("SIGILL caused by cmov, couldn't tell if this processor " - "implements it, boot a filesystem compiled for older " - "processors"); - else panic("Bad value for host_has_cmov (%d)", host_has_cmov); - return 0; + printk(UM_KERN_ERR "SIGILL caused by cmov, which this " + "processor claims to implement"); + else + printk(UM_KERN_ERR "Bad value for host_has_cmov (%d)", + host_has_cmov); } diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c index 67c0958eb984..a34263e6b08d 100644 --- a/arch/um/sys-i386/ldt.c +++ b/arch/um/sys-i386/ldt.c @@ -3,8 +3,9 @@ * Licensed under the GPL */ -#include "linux/mm.h" -#include "asm/unistd.h" +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/unistd.h> #include "os.h" #include "proc_mm.h" #include "skas.h" @@ -146,7 +147,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) if (ptrace_ldt) return read_ldt_from_host(ptr, bytecount); - down(&ldt->semaphore); + mutex_lock(&ldt->lock); if (ldt->entry_count <= LDT_DIRECT_ENTRIES) { size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES; if (size > bytecount) @@ -170,7 +171,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) ptr += size; } } - up(&ldt->semaphore); + mutex_unlock(&ldt->lock); if (bytecount == 0 || err == -EFAULT) goto out; @@ -228,7 +229,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int func) } if (!ptrace_ldt) - down(&ldt->semaphore); + mutex_lock(&ldt->lock); err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1); if (err) @@ -288,7 +289,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int func) err = 0; out_unlock: - up(&ldt->semaphore); + mutex_unlock(&ldt->lock); out: return err; } @@ -395,7 +396,7 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm) if (!ptrace_ldt) - init_MUTEX(&new_mm->ldt.semaphore); + mutex_init(&new_mm->ldt.lock); if (!from_mm) { memset(&desc, 0, sizeof(desc)); @@ -455,7 +456,7 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm) * i.e., we have to use the stub for modify_ldt, which * can't handle the big read buffer of up to 64kB. */ - down(&from_mm->ldt.semaphore); + mutex_lock(&from_mm->ldt.lock); if (from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES) memcpy(new_mm->ldt.u.entries, from_mm->ldt.u.entries, sizeof(new_mm->ldt.u.entries)); @@ -474,7 +475,7 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm) } } new_mm->ldt.entry_count = from_mm->ldt.entry_count; - up(&from_mm->ldt.semaphore); + mutex_unlock(&from_mm->ldt.lock); } out: diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c index bd3da8a61f64..6b4499906a6c 100644 --- a/arch/um/sys-i386/ptrace.c +++ b/arch/um/sys-i386/ptrace.c @@ -8,11 +8,11 @@ #include "asm/uaccess.h" #include "skas.h" -extern int arch_switch_tls(struct task_struct *from, struct task_struct *to); +extern int arch_switch_tls(struct task_struct *to); -void arch_switch_to(struct task_struct *from, struct task_struct *to) +void arch_switch_to(struct task_struct *to) { - int err = arch_switch_tls(from, to); + int err = arch_switch_tls(to); if (!err) return; diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c index 5cf97bc229b9..0b10c3e74028 100644 --- a/arch/um/sys-i386/ptrace_user.c +++ b/arch/um/sys-i386/ptrace_user.c @@ -19,17 +19,3 @@ int ptrace_setregs(long pid, unsigned long *regs) return -errno; return 0; } - -int ptrace_getfpregs(long pid, unsigned long *regs) -{ - if (ptrace(PTRACE_GETFPREGS, pid, 0, regs) < 0) - return -errno; - return 0; -} - -int ptrace_setfpregs(long pid, unsigned long *regs) -{ - if (ptrace(PTRACE_SETFPREGS, pid, 0, regs) < 0) - return -errno; - return 0; -} diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index 19053d46cb60..fd0c25ad6af3 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -168,12 +168,13 @@ static int copy_sc_from_user(struct pt_regs *regs, struct sigcontext __user *from) { struct sigcontext sc; - int err; + int err, pid; err = copy_from_user(&sc, from, sizeof(sc)); if (err) return err; + pid = userspace_pid[current_thread_info()->cpu]; copy_sc(®s->regs, &sc); if (have_fpx_regs) { struct user_fxsr_struct fpx; @@ -187,8 +188,7 @@ static int copy_sc_from_user(struct pt_regs *regs, if (err) return 1; - err = restore_fpx_registers(userspace_pid[current_thread->cpu], - (unsigned long *) &fpx); + err = restore_fpx_registers(pid, (unsigned long *) &fpx); if (err < 0) { printk(KERN_ERR "copy_sc_from_user - " "restore_fpx_registers failed, errno = %d\n", @@ -204,8 +204,7 @@ static int copy_sc_from_user(struct pt_regs *regs, if (err) return 1; - err = restore_fp_registers(userspace_pid[current_thread->cpu], - (unsigned long *) &fp); + err = restore_fp_registers(pid, (unsigned long *) &fp); if (err < 0) { printk(KERN_ERR "copy_sc_from_user - " "restore_fp_registers failed, errno = %d\n", @@ -223,7 +222,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, { struct sigcontext sc; struct faultinfo * fi = ¤t->thread.arch.faultinfo; - int err; + int err, pid; sc.gs = REGS_GS(regs->regs.gp); sc.fs = REGS_FS(regs->regs.gp); @@ -249,11 +248,11 @@ static int copy_sc_to_user(struct sigcontext __user *to, to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1)); sc.fpstate = to_fp; + pid = userspace_pid[current_thread_info()->cpu]; if (have_fpx_regs) { struct user_fxsr_struct fpx; - err = save_fpx_registers(userspace_pid[current_thread->cpu], - (unsigned long *) &fpx); + err = save_fpx_registers(pid, (unsigned long *) &fpx); if (err < 0){ printk(KERN_ERR "copy_sc_to_user - save_fpx_registers " "failed, errno = %d\n", err); @@ -276,8 +275,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, else { struct user_i387_struct fp; - err = save_fp_registers(userspace_pid[current_thread->cpu], - (unsigned long *) &fp); + err = save_fp_registers(pid, (unsigned long *) &fp); if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct))) return 1; } diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S index e730772c401b..7699e89f660f 100644 --- a/arch/um/sys-i386/stub.S +++ b/arch/um/sys-i386/stub.S @@ -7,7 +7,7 @@ .globl batch_syscall_stub batch_syscall_stub: /* load pointer to first operation */ - mov $(ASM_STUB_DATA+8), %esp + mov $(STUB_DATA+8), %esp again: /* load length of additional data */ @@ -15,12 +15,12 @@ again: /* if(length == 0) : end of list */ /* write possible 0 to header */ - mov %eax, ASM_STUB_DATA+4 + mov %eax, STUB_DATA+4 cmpl $0, %eax jz done /* save current pointer */ - mov %esp, ASM_STUB_DATA+4 + mov %esp, STUB_DATA+4 /* skip additional data */ add %eax, %esp @@ -46,7 +46,7 @@ again: done: /* save return value */ - mov %eax, ASM_STUB_DATA + mov %eax, STUB_DATA /* stop */ int3 diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c index b3999cb76bfd..28ccf737a79f 100644 --- a/arch/um/sys-i386/stub_segv.c +++ b/arch/um/sys-i386/stub_segv.c @@ -1,32 +1,17 @@ /* - * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include <signal.h> -#include <sys/select.h> /* The only way I can see to get sigset_t */ -#include <asm/unistd.h> -#include "as-layout.h" -#include "uml-config.h" #include "sysdep/stub.h" #include "sysdep/sigcontext.h" -#include "sysdep/faultinfo.h" void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig) { struct sigcontext *sc = (struct sigcontext *) (&sig + 1); - int pid; GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), sc); - pid = stub_syscall0(__NR_getpid); - stub_syscall2(__NR_kill, pid, SIGUSR1); - - /* Load pointer to sigcontext into esp, since we need to leave - * the stack in its original form when we do the sigreturn here, by - * hand. - */ - __asm__ __volatile__("mov %0,%%esp ; movl %1, %%eax ; " - "int $0x80" : : "a" (sc), "g" (__NR_sigreturn)); + trap_myself(); } diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/um/sys-i386/sys_call_table.S index 12d4148dba39..00e5f5203eea 100644 --- a/arch/um/sys-i386/sys_call_table.S +++ b/arch/um/sys-i386/sys_call_table.S @@ -9,4 +9,9 @@ #define old_mmap old_mmap_i386 +.section .rodata,"a" + #include "../../x86/kernel/syscall_table_32.S" + +ENTRY(syscall_table_size) +.long .-sys_call_table diff --git a/arch/um/sys-i386/tls.c b/arch/um/sys-i386/tls.c index fcaff86b000c..c6c7131e563b 100644 --- a/arch/um/sys-i386/tls.c +++ b/arch/um/sys-i386/tls.c @@ -26,6 +26,11 @@ int do_set_thread_area(struct user_desc *info) cpu = get_cpu(); ret = os_set_thread_area(info, userspace_pid[cpu]); put_cpu(); + + if (ret) + printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, " + "index = %d\n", ret, info->entry_number); + return ret; } @@ -37,6 +42,11 @@ int do_get_thread_area(struct user_desc *info) cpu = get_cpu(); ret = os_get_thread_area(info, userspace_pid[cpu]); put_cpu(); + + if (ret) + printk(KERN_ERR "PTRACE_GET_THREAD_AREA failed, err = %d, " + "index = %d\n", ret, info->entry_number); + return ret; } @@ -172,7 +182,7 @@ void clear_flushed_tls(struct task_struct *task) * SKAS patch. */ -int arch_switch_tls(struct task_struct *from, struct task_struct *to) +int arch_switch_tls(struct task_struct *to) { if (!host_supports_tls) return 0; @@ -225,7 +235,8 @@ out: } /* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */ -static int get_tls_entry(struct task_struct* task, struct user_desc *info, int idx) +static int get_tls_entry(struct task_struct *task, struct user_desc *info, + int idx) { struct thread_struct *t = &task->thread; @@ -263,7 +274,7 @@ clear: goto out; } -asmlinkage int sys_set_thread_area(struct user_desc __user *user_desc) +int sys_set_thread_area(struct user_desc __user *user_desc) { struct user_desc info; int idx, ret; @@ -298,7 +309,7 @@ asmlinkage int sys_set_thread_area(struct user_desc __user *user_desc) * i386. However the only possible error are caused by bugs. */ int ptrace_set_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc) + struct user_desc __user *user_desc) { struct user_desc info; @@ -311,7 +322,7 @@ int ptrace_set_thread_area(struct task_struct *child, int idx, return set_tls_entry(child, &info, idx, 0); } -asmlinkage int sys_get_thread_area(struct user_desc __user *user_desc) +int sys_get_thread_area(struct user_desc __user *user_desc) { struct user_desc info; int idx, ret; @@ -355,10 +366,9 @@ out: return ret; } - /* - * XXX: This part is probably common to i386 and x86-64. Don't create a common - * file for now, do that when implementing x86-64 support. + * This code is really i386-only, but it detects and logs x86_64 GDT indexes + * if a 32-bit UML is running on a 64-bit host. */ static int __init __setup_host_supports_tls(void) { @@ -367,13 +377,16 @@ static int __init __setup_host_supports_tls(void) printk(KERN_INFO "Host TLS support detected\n"); printk(KERN_INFO "Detected host type: "); switch (host_gdt_entry_tls_min) { - case GDT_ENTRY_TLS_MIN_I386: - printk("i386\n"); - break; - case GDT_ENTRY_TLS_MIN_X86_64: - printk("x86_64\n"); - break; + case GDT_ENTRY_TLS_MIN_I386: + printk(KERN_CONT "i386"); + break; + case GDT_ENTRY_TLS_MIN_X86_64: + printk(KERN_CONT "x86_64"); + break; } + printk(KERN_CONT " (GDT indexes %d to %d)\n", + host_gdt_entry_tls_min, + host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES); } else printk(KERN_ERR " Host TLS support NOT detected! " "TLS support inside UML will not work\n"); diff --git a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile index a9814a7ae60e..08901526e893 100644 --- a/arch/um/sys-ppc/Makefile +++ b/arch/um/sys-ppc/Makefile @@ -6,7 +6,7 @@ OBJ = built-in.o OBJS = ptrace.o sigcontext.o semaphore.o checksum.o miscthings.o misc.o \ ptrace_user.o sysrq.o -EXTRA_AFLAGS := -DCONFIG_PPC32 -I. -I$(TOPDIR)/arch/ppc/kernel +EXTRA_AFLAGS := -DCONFIG_PPC32 -I. -I$(srctree)/arch/ppc/kernel all: $(OBJ) @@ -22,25 +22,25 @@ sigcontext.o: sigcontext.c semaphore.c: rm -f $@ - ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ + ln -s $(srctree)/arch/ppc/kernel/$@ $@ checksum.S: rm -f $@ - ln -s $(TOPDIR)/arch/ppc/lib/$@ $@ + ln -s $(srctree)/arch/ppc/lib/$@ $@ mk_defs.c: rm -f $@ - ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ + ln -s $(srctree)/arch/ppc/kernel/$@ $@ ppc_defs.head: rm -f $@ - ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ + ln -s $(srctree)/arch/ppc/kernel/$@ $@ ppc_defs.h: mk_defs.c ppc_defs.head \ - $(TOPDIR)/include/asm-ppc/mmu.h \ - $(TOPDIR)/include/asm-ppc/processor.h \ - $(TOPDIR)/include/asm-ppc/pgtable.h \ - $(TOPDIR)/include/asm-ppc/ptrace.h + $(srctree)/include/asm-ppc/mmu.h \ + $(srctree)/include/asm-ppc/processor.h \ + $(srctree)/include/asm-ppc/pgtable.h \ + $(srctree)/include/asm-ppc/ptrace.h # $(CC) $(CFLAGS) -S mk_defs.c cp ppc_defs.head ppc_defs.h # for bk, this way we can write to the file even if it's not checked out @@ -56,13 +56,13 @@ ppc_defs.h: mk_defs.c ppc_defs.head \ checksum.o: checksum.S rm -f asm - ln -s $(TOPDIR)/include/asm-ppc asm + ln -s $(srctree)/include/asm-ppc asm $(CC) $(EXTRA_AFLAGS) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o rm -f asm misc.o: misc.S ppc_defs.h rm -f asm - ln -s $(TOPDIR)/include/asm-ppc asm + ln -s $(srctree)/include/asm-ppc asm $(CC) $(EXTRA_AFLAGS) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o rm -f asm diff --git a/arch/um/sys-x86_64/bug.c b/arch/um/sys-x86_64/bug.c index a4360b5207db..e8034e363d83 100644 --- a/arch/um/sys-x86_64/bug.c +++ b/arch/um/sys-x86_64/bug.c @@ -5,7 +5,8 @@ #include <linux/uaccess.h> -/* Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because +/* + * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because * that's not relevant in skas mode. */ diff --git a/arch/um/sys-x86_64/bugs.c b/arch/um/sys-x86_64/bugs.c index 506b6765bbcb..44e02ba2a265 100644 --- a/arch/um/sys-x86_64/bugs.c +++ b/arch/um/sys-x86_64/bugs.c @@ -6,15 +6,10 @@ #include "sysdep/ptrace.h" -void arch_init_thread(void) -{ -} - void arch_check_bugs(void) { } -int arch_handle_signal(int sig, struct uml_pt_regs *regs) +void arch_examine_signal(int sig, struct uml_pt_regs *regs) { - return 0; } diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c index b7631b0e9ddc..f3458d7d1c5a 100644 --- a/arch/um/sys-x86_64/ptrace.c +++ b/arch/um/sys-x86_64/ptrace.c @@ -5,13 +5,12 @@ * Licensed under the GPL */ -#define __FRAME_OFFSETS -#include <asm/ptrace.h> +#include <linux/mm.h> #include <linux/sched.h> #include <linux/errno.h> -#include <linux/mm.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> #include <asm/uaccess.h> -#include <asm/elf.h> /* * determines which flags the user has access to. @@ -24,12 +23,14 @@ int putreg(struct task_struct *child, int regno, unsigned long value) unsigned long tmp; #ifdef TIF_IA32 - /* Some code in the 64bit emulation may not be 64bit clean. - Don't take any chances. */ + /* + * Some code in the 64bit emulation may not be 64bit clean. + * Don't take any chances. + */ if (test_tsk_thread_flag(child, TIF_IA32)) value &= 0xffffffff; #endif - switch (regno){ + switch (regno) { case FS: case GS: case DS: @@ -66,7 +67,7 @@ int poke_user(struct task_struct *child, long addr, long data) if (addr < MAX_REG_OFFSET) return putreg(child, addr, data); else if ((addr >= offsetof(struct user, u_debugreg[0])) && - (addr <= offsetof(struct user, u_debugreg[7]))){ + (addr <= offsetof(struct user, u_debugreg[7]))) { addr -= offsetof(struct user, u_debugreg[0]); addr = addr >> 2; if ((addr == 4) || (addr == 5)) @@ -108,11 +109,10 @@ int peek_user(struct task_struct *child, long addr, long data) return -EIO; tmp = 0; /* Default return condition */ - if (addr < MAX_REG_OFFSET){ + if (addr < MAX_REG_OFFSET) tmp = getreg(child, addr); - } else if ((addr >= offsetof(struct user, u_debugreg[0])) && - (addr <= offsetof(struct user, u_debugreg[7]))){ + (addr <= offsetof(struct user, u_debugreg[7]))) { addr -= offsetof(struct user, u_debugreg[0]); addr = addr >> 2; tmp = child->thread.arch.debugregs[addr]; @@ -127,8 +127,9 @@ int is_syscall(unsigned long addr) int n; n = copy_from_user(&instr, (void __user *) addr, sizeof(instr)); - if (n){ - /* access_process_vm() grants access to vsyscall and stub, + if (n) { + /* + * access_process_vm() grants access to vsyscall and stub, * while copy_from_user doesn't. Maybe access_process_vm is * slow, but that doesn't matter, since it will be called only * in case of singlestepping, if copy_from_user failed. @@ -155,7 +156,7 @@ int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) return err; n = copy_to_user(buf, fpregs, sizeof(fpregs)); - if(n > 0) + if (n > 0) return -EFAULT; return n; diff --git a/arch/um/sys-x86_64/ptrace_user.c b/arch/um/sys-x86_64/ptrace_user.c index b5f9c33e311e..c57a496d3f5b 100644 --- a/arch/um/sys-x86_64/ptrace_user.c +++ b/arch/um/sys-x86_64/ptrace_user.c @@ -4,55 +4,19 @@ * Licensed under the GPL */ -#include <stddef.h> #include <errno.h> #include "ptrace_user.h" -#include "user.h" -#include "kern_constants.h" int ptrace_getregs(long pid, unsigned long *regs_out) { - if(ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0) - return(-errno); - return(0); -} - -int ptrace_setregs(long pid, unsigned long *regs) -{ - if(ptrace(PTRACE_SETREGS, pid, 0, regs) < 0) - return(-errno); + if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0) + return -errno; return(0); } -int ptrace_setfpregs(long pid, unsigned long *regs) +int ptrace_setregs(long pid, unsigned long *regs_out) { - if (ptrace(PTRACE_SETFPREGS, pid, 0, regs) < 0) + if (ptrace(PTRACE_SETREGS, pid, 0, regs_out) < 0) return -errno; - return 0; -} - -void ptrace_pokeuser(unsigned long addr, unsigned long data) -{ - panic("ptrace_pokeuser"); -} - -#define DS 184 -#define ES 192 -#define __USER_DS 0x2b - -void arch_enter_kernel(void *task, int pid) -{ -} - -void arch_leave_kernel(void *task, int pid) -{ -#ifdef UM_USER_CS - if(ptrace(PTRACE_POKEUSR, pid, CS, UM_USER_CS) < 0) - printk("POKEUSR CS failed"); -#endif - - if(ptrace(PTRACE_POKEUSR, pid, DS, __USER_DS) < 0) - printk("POKEUSR DS failed"); - if(ptrace(PTRACE_POKEUSR, pid, ES, __USER_DS) < 0) - printk("POKEUSR ES failed"); + return(0); } diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c index 14070181407b..1a899a7ed7a6 100644 --- a/arch/um/sys-x86_64/signal.c +++ b/arch/um/sys-x86_64/signal.c @@ -81,7 +81,7 @@ static int copy_sc_from_user(struct pt_regs *regs, if (err) return 1; - err = restore_fp_registers(userspace_pid[current_thread->cpu], + err = restore_fp_registers(userspace_pid[current_thread_info()->cpu], (unsigned long *) &fp); if (err < 0) { printk(KERN_ERR "copy_sc_from_user - " @@ -143,7 +143,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, if (err) return 1; - err = save_fp_registers(userspace_pid[current_thread->cpu], + err = save_fp_registers(userspace_pid[current_thread_info()->cpu], (unsigned long *) &fp); if (err < 0) { printk(KERN_ERR "copy_sc_from_user - restore_fp_registers " diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S index 4afe204a6af7..568768763155 100644 --- a/arch/um/sys-x86_64/stub.S +++ b/arch/um/sys-x86_64/stub.S @@ -8,18 +8,18 @@ syscall_stub: /* We don't have 64-bit constants, so this constructs the address * we need. */ - movq $(ASM_STUB_DATA >> 32), %rbx + movq $(STUB_DATA >> 32), %rbx salq $32, %rbx - movq $(ASM_STUB_DATA & 0xffffffff), %rcx + movq $(STUB_DATA & 0xffffffff), %rcx or %rcx, %rbx movq %rax, (%rbx) int3 .globl batch_syscall_stub batch_syscall_stub: - mov $(ASM_STUB_DATA >> 32), %rbx + mov $(STUB_DATA >> 32), %rbx sal $32, %rbx - mov $(ASM_STUB_DATA & 0xffffffff), %rax + mov $(STUB_DATA & 0xffffffff), %rax or %rax, %rbx /* load pointer to first operation */ mov %rbx, %rsp diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c index 3afb590f0072..ced051afc705 100644 --- a/arch/um/sys-x86_64/stub_segv.c +++ b/arch/um/sys-x86_64/stub_segv.c @@ -1,51 +1,22 @@ /* - * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include <stddef.h> #include <signal.h> -#include <asm/unistd.h> #include "as-layout.h" -#include "uml-config.h" -#include "sysdep/sigcontext.h" -#include "sysdep/faultinfo.h" #include "sysdep/stub.h" - -/* Copied from sys-x86_64/signal.c - Can't find an equivalent definition - * in the libc headers anywhere. - */ -struct rt_sigframe -{ - char *pretcode; - struct ucontext uc; - struct siginfo info; -}; - -/* Copied here from <linux/kernel.h> - we're userspace. */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) +#include "sysdep/faultinfo.h" +#include "sysdep/sigcontext.h" void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig) { struct ucontext *uc; - int pid; __asm__ __volatile__("movq %%rdx, %0" : "=g" (uc) :); GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), &uc->uc_mcontext); - - pid = stub_syscall0(__NR_getpid); - stub_syscall2(__NR_kill, pid, SIGUSR1); - - /* sys_sigreturn expects that the stack pointer will be 8 bytes into - * the signal frame. So, we use the ucontext pointer, which we know - * already, to get the signal frame pointer, and add 8 to that. - */ - __asm__ __volatile__("movq %0, %%rsp; movq %1, %%rax ; syscall": : - "g" ((unsigned long) - container_of(uc, struct rt_sigframe, uc) + 8), - "g" (__NR_rt_sigreturn)); + trap_myself(); } + diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c index 71b2ae4ad5de..c128eb897008 100644 --- a/arch/um/sys-x86_64/syscall_table.c +++ b/arch/um/sys-x86_64/syscall_table.c @@ -1,5 +1,7 @@ -/* System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c - * with some changes for UML. */ +/* + * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c + * with some changes for UML. + */ #include <linux/linkage.h> #include <linux/sys.h> @@ -8,22 +10,26 @@ #define __NO_STUBS -/* Below you can see, in terms of #define's, the differences between the x86-64 - * and the UML syscall table. */ +/* + * Below you can see, in terms of #define's, the differences between the x86-64 + * and the UML syscall table. + */ /* Not going to be implemented by UML, since we have no hardware. */ #define stub_iopl sys_ni_syscall #define sys_ioperm sys_ni_syscall -/* The UML TLS problem. Note that x86_64 does not implement this, so the below - * is needed only for the ia32 compatibility. */ -/*#define sys_set_thread_area sys_ni_syscall -#define sys_get_thread_area sys_ni_syscall*/ +/* + * The UML TLS problem. Note that x86_64 does not implement this, so the below + * is needed only for the ia32 compatibility. + */ /* On UML we call it this way ("old" means it's not mmap2) */ #define sys_mmap old_mmap -/* On x86-64 sys_uname is actually sys_newuname plus a compatibility trick. - * See arch/x86_64/kernel/sys_x86_64.c */ +/* + * On x86-64 sys_uname is actually sys_newuname plus a compatibility trick. + * See arch/x86_64/kernel/sys_x86_64.c + */ #define sys_uname sys_uname64 #define stub_clone sys_clone @@ -46,8 +52,19 @@ typedef void (*sys_call_ptr_t)(void); extern void sys_ni_syscall(void); -sys_call_ptr_t sys_call_table[UM_NR_syscall_max+1] __cacheline_aligned = { - /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ - [0 ... UM_NR_syscall_max] = &sys_ni_syscall, +/* + * We used to have a trick here which made sure that holes in the + * x86_64 table were filled in with sys_ni_syscall, but a comment in + * unistd_64.h says that holes aren't allowed, so the trick was + * removed. + * The trick looked like this + * [0 ... UM_NR_syscall_max] = &sys_ni_syscall + * before including unistd_64.h - the later initializations overwrote + * the sys_ni_syscall filler. + */ + +sys_call_ptr_t sys_call_table[] __cacheline_aligned = { #include <asm-x86/unistd_64.h> }; + +int syscall_table_size = sizeof(sys_call_table); diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c index 86f6b18410ee..f1199fd34d38 100644 --- a/arch/um/sys-x86_64/syscalls.c +++ b/arch/um/sys-x86_64/syscalls.c @@ -48,7 +48,9 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) switch (code) { case ARCH_SET_FS: case ARCH_SET_GS: - restore_registers(pid, ¤t->thread.regs.regs); + ret = restore_registers(pid, ¤t->thread.regs.regs); + if (ret) + return ret; break; case ARCH_GET_FS: case ARCH_GET_GS: @@ -70,10 +72,10 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) switch (code) { case ARCH_SET_FS: current->thread.arch.fs = (unsigned long) ptr; - save_registers(pid, ¤t->thread.regs.regs); + ret = save_registers(pid, ¤t->thread.regs.regs); break; case ARCH_SET_GS: - save_registers(pid, ¤t->thread.regs.regs); + ret = save_registers(pid, ¤t->thread.regs.regs); break; case ARCH_GET_FS: ret = put_user(tmp, addr); @@ -105,7 +107,7 @@ long sys_clone(unsigned long clone_flags, unsigned long newsp, return ret; } -void arch_switch_to(struct task_struct *from, struct task_struct *to) +void arch_switch_to(struct task_struct *to) { if ((to->thread.arch.fs == 0) || (to->mm == NULL)) return; diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c index 765444031819..f4f82beb3508 100644 --- a/arch/um/sys-x86_64/sysrq.c +++ b/arch/um/sys-x86_64/sysrq.c @@ -4,32 +4,33 @@ * Licensed under the GPL */ -#include "linux/kernel.h" -#include "linux/utsname.h" -#include "linux/module.h" -#include "asm/current.h" -#include "asm/ptrace.h" +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/utsname.h> +#include <asm/current.h> +#include <asm/ptrace.h> #include "sysrq.h" -void __show_regs(struct pt_regs * regs) +void __show_regs(struct pt_regs *regs) { printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current), + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release); - printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff, + printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff, PT_REGS_RIP(regs)); - printk("\nRSP: %016lx EFLAGS: %08lx\n", PT_REGS_RSP(regs), + printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_RSP(regs), PT_REGS_EFLAGS(regs)); - printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", + printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs)); - printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", + printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs)); - printk("RBP: %016lx R08: %016lx R09: %016lx\n", + printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs)); - printk("R10: %016lx R11: %016lx R12: %016lx\n", + printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs)); - printk("R13: %016lx R14: %016lx R15: %016lx\n", + printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", PT_REGS_R13(regs), PT_REGS_R14(regs), PT_REGS_R15(regs)); } diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c index 8b8eff1bd977..3dead392a415 100644 --- a/arch/um/sys-x86_64/um_module.c +++ b/arch/um/sys-x86_64/um_module.c @@ -1,7 +1,7 @@ #include <linux/vmalloc.h> #include <linux/moduleloader.h> -/*Copied from i386 arch/i386/kernel/module.c */ +/* Copied from i386 arch/i386/kernel/module.c */ void *module_alloc(unsigned long size) { if (size == 0) @@ -13,7 +13,9 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + /* + * FIXME: If module_region == mod->init_region, trim exception + * table entries. + */ } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 59eef1c7fdaa..434821187cfc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -465,6 +465,9 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT Calgary anyway, pass 'iommu=calgary' on the kernel command line. If unsure, say Y. +config IOMMU_HELPER + def_bool (CALGARY_IOMMU || GART_IOMMU) + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 0db0a6291bbd..8022d3c695c0 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -722,7 +722,9 @@ ia32_sys_call_table: .quad sys_epoll_pwait .quad compat_sys_utimensat /* 320 */ .quad compat_sys_signalfd - .quad compat_sys_timerfd + .quad sys_timerfd_create .quad sys_eventfd .quad sys32_fallocate + .quad compat_sys_timerfd_settime /* 325 */ + .quad compat_sys_timerfd_gettime ia32_syscall_end: diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1fe7f043ebde..1b5464c2434f 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -35,6 +35,7 @@ #include <linux/pci.h> #include <linux/delay.h> #include <linux/scatterlist.h> +#include <linux/iommu-helper.h> #include <asm/gart.h> #include <asm/calgary.h> #include <asm/tce.h> @@ -260,22 +261,28 @@ static void iommu_range_reserve(struct iommu_table *tbl, spin_unlock_irqrestore(&tbl->it_lock, flags); } -static unsigned long iommu_range_alloc(struct iommu_table *tbl, - unsigned int npages) +static unsigned long iommu_range_alloc(struct device *dev, + struct iommu_table *tbl, + unsigned int npages) { unsigned long flags; unsigned long offset; + unsigned long boundary_size; + + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; BUG_ON(npages == 0); spin_lock_irqsave(&tbl->it_lock, flags); - offset = find_next_zero_string(tbl->it_map, tbl->it_hint, - tbl->it_size, npages); + offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint, + npages, 0, boundary_size, 0); if (offset == ~0UL) { tbl->chip_ops->tce_cache_blast(tbl); - offset = find_next_zero_string(tbl->it_map, 0, - tbl->it_size, npages); + + offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, + npages, 0, boundary_size, 0); if (offset == ~0UL) { printk(KERN_WARNING "Calgary: IOMMU full.\n"); spin_unlock_irqrestore(&tbl->it_lock, flags); @@ -286,7 +293,6 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, } } - set_bit_string(tbl->it_map, offset, npages); tbl->it_hint = offset + npages; BUG_ON(tbl->it_hint > tbl->it_size); @@ -295,13 +301,13 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl, return offset; } -static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr, - unsigned int npages, int direction) +static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, + void *vaddr, unsigned int npages, int direction) { unsigned long entry; dma_addr_t ret = bad_dma_address; - entry = iommu_range_alloc(tbl, npages); + entry = iommu_range_alloc(dev, tbl, npages); if (unlikely(entry == bad_dma_address)) goto error; @@ -354,7 +360,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, badbit, tbl, dma_addr, entry, npages); } - __clear_bit_string(tbl->it_map, entry, npages); + iommu_area_free(tbl->it_map, entry, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); } @@ -438,7 +444,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, vaddr = (unsigned long) sg_virt(s); npages = num_dma_pages(vaddr, s->length); - entry = iommu_range_alloc(tbl, npages); + entry = iommu_range_alloc(dev, tbl, npages); if (entry == bad_dma_address) { /* makes sure unmap knows to stop */ s->dma_length = 0; @@ -476,7 +482,7 @@ static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, npages = num_dma_pages(uaddr, size); if (translation_enabled(tbl)) - dma_handle = iommu_alloc(tbl, vaddr, npages, direction); + dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); else dma_handle = virt_to_bus(vaddr); @@ -516,7 +522,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, if (translation_enabled(tbl)) { /* set up tces to cover the allocated range */ - mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL); + mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); if (mapping == bad_dma_address) goto free; diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 845cbecd68e9..65f6acb025c8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -25,6 +25,7 @@ #include <linux/bitops.h> #include <linux/kdebug.h> #include <linux/scatterlist.h> +#include <linux/iommu-helper.h> #include <asm/atomic.h> #include <asm/io.h> #include <asm/mtrr.h> @@ -82,17 +83,24 @@ AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(int size) +static unsigned long alloc_iommu(struct device *dev, int size) { unsigned long offset, flags; + unsigned long boundary_size; + unsigned long base_index; + + base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), + PAGE_SIZE) >> PAGE_SHIFT; + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = find_next_zero_string(iommu_gart_bitmap, next_bit, - iommu_pages, size); + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, + size, base_index, boundary_size, 0); if (offset == -1) { need_flush = 1; - offset = find_next_zero_string(iommu_gart_bitmap, 0, - iommu_pages, size); + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, + size, base_index, boundary_size, 0); } if (offset != -1) { set_bit_string(iommu_gart_bitmap, offset, size); @@ -114,7 +122,7 @@ static void free_iommu(unsigned long offset, int size) unsigned long flags; spin_lock_irqsave(&iommu_bitmap_lock, flags); - __clear_bit_string(iommu_gart_bitmap, offset, size); + iommu_area_free(iommu_gart_bitmap, offset, size); spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } @@ -235,7 +243,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir) { unsigned long npages = to_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(npages); + unsigned long iommu_page = alloc_iommu(dev, npages); int i; if (iommu_page == -1) { @@ -355,10 +363,11 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, } /* Map multiple scatterlist entries continuous into the first. */ -static int __dma_map_cont(struct scatterlist *start, int nelems, - struct scatterlist *sout, unsigned long pages) +static int __dma_map_cont(struct device *dev, struct scatterlist *start, + int nelems, struct scatterlist *sout, + unsigned long pages) { - unsigned long iommu_start = alloc_iommu(pages); + unsigned long iommu_start = alloc_iommu(dev, pages); unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; @@ -394,8 +403,8 @@ static int __dma_map_cont(struct scatterlist *start, int nelems, } static inline int -dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, - unsigned long pages, int need) +dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, + struct scatterlist *sout, unsigned long pages, int need) { if (!need) { BUG_ON(nelems != 1); @@ -403,7 +412,7 @@ dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, sout->dma_length = start->length; return 0; } - return __dma_map_cont(start, nelems, sout, pages); + return __dma_map_cont(dev, start, nelems, sout, pages); } /* @@ -416,6 +425,8 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) struct scatterlist *s, *ps, *start_sg, *sgmap; int need = 0, nextneed, i, out, start; unsigned long pages = 0; + unsigned int seg_size; + unsigned int max_seg_size; if (nents == 0) return 0; @@ -426,6 +437,8 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) out = 0; start = 0; start_sg = sgmap = sg; + seg_size = 0; + max_seg_size = dma_get_max_seg_size(dev); ps = NULL; /* shut up gcc */ for_each_sg(sg, s, nents, i) { dma_addr_t addr = sg_phys(s); @@ -443,11 +456,13 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) * offset. */ if (!iommu_merge || !nextneed || !need || s->offset || + (s->length + seg_size > max_seg_size) || (ps->offset + ps->length) % PAGE_SIZE) { - if (dma_map_cont(start_sg, i - start, sgmap, - pages, need) < 0) + if (dma_map_cont(dev, start_sg, i - start, + sgmap, pages, need) < 0) goto error; out++; + seg_size = 0; sgmap = sg_next(sgmap); pages = 0; start = i; @@ -455,11 +470,12 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) } } + seg_size += s->length; need = nextneed; pages += to_pages(s->offset, s->length); ps = s; } - if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0) + if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) goto error; out++; flush_gart(); diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 8344c70adf61..adff5562f5fd 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -321,6 +321,8 @@ ENTRY(sys_call_table) .long sys_epoll_pwait .long sys_utimensat /* 320 */ .long sys_signalfd - .long sys_timerfd + .long sys_timerfd_create .long sys_eventfd .long sys_fallocate + .long sys_timerfd_settime /* 325 */ + .long sys_timerfd_gettime diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f94a0b89dff..cf5308148689 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1739,7 +1739,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, if (bytes == 8) { gpa_t gpa; struct page *page; - char *addr; + char *kaddr; u64 val; down_read(¤t->mm->mmap_sem); @@ -1754,9 +1754,9 @@ static int emulator_cmpxchg_emulated(unsigned long addr, val = *(u64 *)new; page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - addr = kmap_atomic(page, KM_USER0); - set_64bit((u64 *)(addr + offset_in_page(gpa)), val); - kunmap_atomic(addr, KM_USER0); + kaddr = kmap_atomic(page, KM_USER0); + set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); + kunmap_atomic(kaddr, KM_USER0); kvm_release_page_dirty(page); emul_write: up_read(¤t->mm->mmap_sem); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 4876182daf8a..25df1c1989fe 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -21,7 +21,7 @@ else lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o - lib-y += bitstr_64.o bitops_64.o + lib-y += bitops_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o endif diff --git a/arch/x86/lib/bitstr_64.c b/arch/x86/lib/bitstr_64.c deleted file mode 100644 index 7445caf1b5de..000000000000 --- a/arch/x86/lib/bitstr_64.c +++ /dev/null @@ -1,28 +0,0 @@ -#include <linux/module.h> -#include <linux/bitops.h> - -/* Find string of zero bits in a bitmap */ -unsigned long -find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len) -{ - unsigned long n, end, i; - - again: - n = find_next_zero_bit(bitmap, nbits, start); - if (n == -1) - return -1; - - /* could test bitsliced, but it's hardly worth it */ - end = n+len; - if (end > nbits) - return -1; - for (i = n+1; i < end; i++) { - if (test_bit(i, bitmap)) { - start = i+1; - goto again; - } - } - return n; -} - -EXPORT_SYMBOL(find_next_zero_string); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index c7db504be1ea..6c1914622a88 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -272,7 +272,7 @@ static void pgd_dtor(void *pgd) * preallocate which never got a corresponding vma will need to be * freed manually. */ -static void pgd_mop_up_pmds(pgd_t *pgdp) +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) { int i; @@ -285,7 +285,7 @@ static void pgd_mop_up_pmds(pgd_t *pgdp) pgdp[i] = native_make_pgd(0); paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT); - pmd_free(pmd); + pmd_free(mm, pmd); } } } @@ -313,7 +313,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) pmd_t *pmd = pmd_alloc_one(mm, addr); if (!pmd) { - pgd_mop_up_pmds(pgd); + pgd_mop_up_pmds(mm, pgd); return 0; } @@ -333,7 +333,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) return 1; } -static void pgd_mop_up_pmds(pgd_t *pgd) +static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) { } #endif /* CONFIG_X86_PAE */ @@ -352,9 +352,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; } -void pgd_free(pgd_t *pgd) +void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - pgd_mop_up_pmds(pgd); + pgd_mop_up_pmds(mm, pgd); quicklist_free(0, pgd_dtor, pgd); } diff --git a/drivers/Kconfig b/drivers/Kconfig index 3f8a231fe754..d74d9fbb9fd2 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -52,6 +52,8 @@ source "drivers/i2c/Kconfig" source "drivers/spi/Kconfig" +source "drivers/gpio/Kconfig" + source "drivers/w1/Kconfig" source "drivers/power/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 0ee9a8a4095e..f1c11db52a57 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -5,6 +5,7 @@ # Rewritten to use lists instead of if-statements. # +obj-$(CONFIG_HAVE_GPIO_LIB) += gpio/ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_PARISC) += parisc/ obj-$(CONFIG_RAPIDIO) += rapidio/ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index eb1f82f79153..199ea2146153 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -38,7 +38,7 @@ #include <linux/dmi.h> #include <linux/moduleparam.h> #include <linux/sched.h> /* need_resched() */ -#include <linux/latency.h> +#include <linux/pm_qos_params.h> #include <linux/clockchips.h> #include <linux/cpuidle.h> @@ -648,7 +648,8 @@ static void acpi_processor_idle(void) if (cx->promotion.state && ((cx->promotion.state - pr->power.states) <= max_cstate)) { if (sleep_ticks > cx->promotion.threshold.ticks && - cx->promotion.state->latency <= system_latency_constraint()) { + cx->promotion.state->latency <= + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { cx->promotion.count++; cx->demotion.count = 0; if (cx->promotion.count >= @@ -692,7 +693,8 @@ static void acpi_processor_idle(void) * or if the latency of the current state is unacceptable */ if ((pr->power.state - pr->power.states) > max_cstate || - pr->power.state->latency > system_latency_constraint()) { + pr->power.state->latency > + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { if (cx->demotion.state) next_state = cx->demotion.state; } @@ -1200,7 +1202,7 @@ static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) "maximum allowed latency: %d usec\n", pr->power.state ? pr->power.state - pr->power.states : 0, max_cstate, (unsigned)pr->power.bm_activity, - system_latency_constraint()); + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)); seq_puts(seq, "states:\n"); @@ -1718,8 +1720,9 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, "ACPI: processor limited to max C-state %d\n", max_cstate); first_run++; -#if !defined (CONFIG_CPU_IDLE) && defined (CONFIG_SMP) - register_latency_notifier(&acpi_processor_latency_notifier); +#if !defined(CONFIG_CPU_IDLE) && defined(CONFIG_SMP) + pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, + &acpi_processor_latency_notifier); #endif } @@ -1806,7 +1809,8 @@ int acpi_processor_power_exit(struct acpi_processor *pr, */ cpu_idle_wait(); #ifdef CONFIG_SMP - unregister_latency_notifier(&acpi_processor_latency_notifier); + pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY, + &acpi_processor_latency_notifier); #endif } #endif diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index 96e614a1c169..59e65edc5820 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -108,17 +108,6 @@ struct inic_port_priv { u8 cached_pirq_mask; }; -static int inic_slave_config(struct scsi_device *sdev) -{ - /* This controller is braindamaged. dma_boundary is 0xffff - * like others but it will lock up the whole machine HARD if - * 65536 byte PRD entry is fed. Reduce maximum segment size. - */ - blk_queue_max_segment_size(sdev->request_queue, 65536 - 512); - - return ata_scsi_slave_config(sdev); -} - static struct scsi_host_template inic_sht = { .module = THIS_MODULE, .name = DRV_NAME, @@ -132,7 +121,7 @@ static struct scsi_host_template inic_sht = { .use_clustering = ATA_SHT_USE_CLUSTERING, .proc_name = DRV_NAME, .dma_boundary = ATA_DMA_BOUNDARY, - .slave_configure = inic_slave_config, + .slave_configure = ata_scsi_slave_config, .slave_destroy = ata_scsi_slave_destroy, .bios_param = ata_std_bios_param, }; @@ -730,6 +719,18 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return rc; } + /* + * This controller is braindamaged. dma_boundary is 0xffff + * like others but it will lock up the whole machine HARD if + * 65536 byte PRD entry is fed. Reduce maximum segment size. + */ + rc = pci_set_dma_max_seg_size(pdev, 65536 - 512); + if (rc) { + dev_printk(KERN_ERR, &pdev->dev, + "failed to set the maximum segment size.\n"); + return rc; + } + rc = init_controller(iomap[MMIO_BAR], hpriv->cached_hctl); if (rc) { dev_printk(KERN_ERR, &pdev->dev, diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index a18f9b8c9e12..7703d6e06fd9 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c @@ -704,7 +704,7 @@ static int next_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t * static int bt3c_config(struct pcmcia_device *link) { - static kio_addr_t base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; + static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; bt3c_info_t *info = link->priv; tuple_t tuple; u_short buf[256]; diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c index dade1626865b..68d1d258e6a4 100644 --- a/drivers/bluetooth/btuart_cs.c +++ b/drivers/bluetooth/btuart_cs.c @@ -634,7 +634,7 @@ static int next_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t * static int btuart_config(struct pcmcia_device *link) { - static kio_addr_t base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; + static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; btuart_info_t *info = link->priv; tuple_t tuple; u_short buf[256]; diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c index aa8f3a39a704..e77c17838c8a 100644 --- a/drivers/char/agp/alpha-agp.c +++ b/drivers/char/agp/alpha-agp.c @@ -11,29 +11,28 @@ #include "agp.h" -static struct page *alpha_core_agp_vm_nopage(struct vm_area_struct *vma, - unsigned long address, - int *type) +static int alpha_core_agp_vm_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) { alpha_agp_info *agp = agp_bridge->dev_private_data; dma_addr_t dma_addr; unsigned long pa; struct page *page; - dma_addr = address - vma->vm_start + agp->aperture.bus_base; + dma_addr = (unsigned long)vmf->virtual_address - vma->vm_start + + agp->aperture.bus_base; pa = agp->ops->translate(agp, dma_addr); if (pa == (unsigned long)-EINVAL) - return NULL; /* no translation */ + return VM_FAULT_SIGBUS; /* no translation */ /* * Get the page, inc the use count, and return it */ page = virt_to_page(__va(pa)); get_page(page); - if (type) - *type = VM_FAULT_MINOR; - return page; + vmf->page = page; + return 0; } static struct aper_size_info_fixed alpha_core_agp_sizes[] = @@ -42,7 +41,7 @@ static struct aper_size_info_fixed alpha_core_agp_sizes[] = }; struct vm_operations_struct alpha_core_agp_vm_ops = { - .nopage = alpha_core_agp_vm_nopage, + .fault = alpha_core_agp_vm_fault, }; diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 0118b9817a95..84cdf9025737 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -234,11 +234,11 @@ static DEVICE_ATTR(rng_available, S_IRUGO, NULL); -static void unregister_miscdev(void) +static void unregister_miscdev(bool suspended) { device_remove_file(rng_miscdev.this_device, &dev_attr_rng_available); device_remove_file(rng_miscdev.this_device, &dev_attr_rng_current); - misc_deregister(&rng_miscdev); + __misc_deregister(&rng_miscdev, suspended); } static int register_miscdev(void) @@ -313,7 +313,7 @@ out: } EXPORT_SYMBOL_GPL(hwrng_register); -void hwrng_unregister(struct hwrng *rng) +void __hwrng_unregister(struct hwrng *rng, bool suspended) { int err; @@ -332,11 +332,11 @@ void hwrng_unregister(struct hwrng *rng) } } if (list_empty(&rng_list)) - unregister_miscdev(); + unregister_miscdev(suspended); mutex_unlock(&rng_mutex); } -EXPORT_SYMBOL_GPL(hwrng_unregister); +EXPORT_SYMBOL_GPL(__hwrng_unregister); MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver"); diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 71c8cd7fa15f..a39101feb2ed 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -232,8 +232,9 @@ int misc_register(struct miscdevice * misc) } /** - * misc_deregister - unregister a miscellaneous device + * __misc_deregister - unregister a miscellaneous device * @misc: device to unregister + * @suspended: to be set if the function is used during suspend/resume * * Unregister a miscellaneous device that was previously * successfully registered with misc_register(). Success @@ -241,7 +242,7 @@ int misc_register(struct miscdevice * misc) * indicates an error. */ -int misc_deregister(struct miscdevice * misc) +int __misc_deregister(struct miscdevice *misc, bool suspended) { int i = misc->minor; @@ -250,7 +251,11 @@ int misc_deregister(struct miscdevice * misc) mutex_lock(&misc_mtx); list_del(&misc->list); - device_destroy(misc_class, MKDEV(MISC_MAJOR, misc->minor)); + if (suspended) + destroy_suspended_device(misc_class, + MKDEV(MISC_MAJOR, misc->minor)); + else + device_destroy(misc_class, MKDEV(MISC_MAJOR, misc->minor)); if (i < DYNAMIC_MINORS && i>0) { misc_minors[i>>3] &= ~(1 << (misc->minor & 7)); } @@ -259,7 +264,7 @@ int misc_deregister(struct miscdevice * misc) } EXPORT_SYMBOL(misc_register); -EXPORT_SYMBOL(misc_deregister); +EXPORT_SYMBOL(__misc_deregister); static int __init misc_init(void) { diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index 02518da6a386..454d7324ba40 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -308,7 +308,8 @@ static unsigned int calc_baudv(unsigned char fidi) return (wcrcf / wbrcf); } -static unsigned short io_read_num_rec_bytes(ioaddr_t iobase, unsigned short *s) +static unsigned short io_read_num_rec_bytes(unsigned int iobase, + unsigned short *s) { unsigned short tmp; @@ -426,7 +427,7 @@ static struct card_fixup card_fixups[] = { static void set_cardparameter(struct cm4000_dev *dev) { int i; - ioaddr_t iobase = dev->p_dev->io.BasePort1; + unsigned int iobase = dev->p_dev->io.BasePort1; u_int8_t stopbits = 0x02; /* ISO default */ DEBUGP(3, dev, "-> set_cardparameter\n"); @@ -459,7 +460,7 @@ static int set_protocol(struct cm4000_dev *dev, struct ptsreq *ptsreq) unsigned short num_bytes_read; unsigned char pts_reply[4]; ssize_t rc; - ioaddr_t iobase = dev->p_dev->io.BasePort1; + unsigned int iobase = dev->p_dev->io.BasePort1; rc = 0; @@ -610,7 +611,7 @@ exit_setprotocol: return rc; } -static int io_detect_cm4000(ioaddr_t iobase, struct cm4000_dev *dev) +static int io_detect_cm4000(unsigned int iobase, struct cm4000_dev *dev) { /* note: statemachine is assumed to be reset */ @@ -671,7 +672,7 @@ static void terminate_monitor(struct cm4000_dev *dev) static void monitor_card(unsigned long p) { struct cm4000_dev *dev = (struct cm4000_dev *) p; - ioaddr_t iobase = dev->p_dev->io.BasePort1; + unsigned int iobase = dev->p_dev->io.BasePort1; unsigned short s; struct ptsreq ptsreq; int i, atrc; @@ -933,7 +934,7 @@ static ssize_t cmm_read(struct file *filp, __user char *buf, size_t count, loff_t *ppos) { struct cm4000_dev *dev = filp->private_data; - ioaddr_t iobase = dev->p_dev->io.BasePort1; + unsigned int iobase = dev->p_dev->io.BasePort1; ssize_t rc; int i, j, k; @@ -1054,7 +1055,7 @@ static ssize_t cmm_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cm4000_dev *dev = (struct cm4000_dev *) filp->private_data; - ioaddr_t iobase = dev->p_dev->io.BasePort1; + unsigned int iobase = dev->p_dev->io.BasePort1; unsigned short s; unsigned char tmp; unsigned char infolen; @@ -1408,7 +1409,7 @@ static int cmm_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { struct cm4000_dev *dev = filp->private_data; - ioaddr_t iobase = dev->p_dev->io.BasePort1; + unsigned int iobase = dev->p_dev->io.BasePort1; struct pcmcia_device *link; int size; int rc; diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index d2fabe7863a9..2a98d99cbd46 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -12,7 +12,7 @@ #include <linux/mutex.h> #include <linux/sched.h> #include <linux/notifier.h> -#include <linux/latency.h> +#include <linux/pm_qos_params.h> #include <linux/cpu.h> #include <linux/cpuidle.h> @@ -265,7 +265,10 @@ static struct notifier_block cpuidle_latency_notifier = { .notifier_call = cpuidle_latency_notify, }; -#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0) +static inline void latency_notifier_init(struct notifier_block *n) +{ + pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); +} #else /* CONFIG_SMP */ diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index eb666ecae7c9..ba7b9a6b17a1 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -14,7 +14,7 @@ #include <linux/kernel.h> #include <linux/cpuidle.h> -#include <linux/latency.h> +#include <linux/pm_qos_params.h> #include <linux/moduleparam.h> #include <linux/jiffies.h> @@ -81,7 +81,8 @@ static int ladder_select_state(struct cpuidle_device *dev) /* consider promotion */ if (last_idx < dev->state_count - 1 && last_residency > last_state->threshold.promotion_time && - dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) { + dev->states[last_idx + 1].exit_latency <= + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 299d45c3bdd2..78d77c5dc35c 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -8,7 +8,7 @@ #include <linux/kernel.h> #include <linux/cpuidle.h> -#include <linux/latency.h> +#include <linux/pm_qos_params.h> #include <linux/time.h> #include <linux/ktime.h> #include <linux/hrtimer.h> @@ -48,7 +48,7 @@ static int menu_select(struct cpuidle_device *dev) break; if (s->target_residency > data->predicted_us) break; - if (s->exit_latency > system_latency_constraint()) + if (s->exit_latency > pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) break; } diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c index 17502d6efae7..07f274f853d9 100644 --- a/drivers/dio/dio.c +++ b/drivers/dio/dio.c @@ -88,8 +88,6 @@ static struct dioname names[] = #undef DIONAME #undef DIOFBNAME -#define NUMNAMES (sizeof(names) / sizeof(struct dioname)) - static const char *unknowndioname = "unknown DIO board -- please email <linux-m68k@lists.linux-m68k.org>!"; @@ -97,7 +95,7 @@ static const char *dio_getname(int id) { /* return pointer to a constant string describing the board with given ID */ unsigned int i; - for (i = 0; i < NUMNAMES; i++) + for (i = 0; i < ARRAY_SIZE(names); i++) if (names[i].id == id) return names[i].name; diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig new file mode 100644 index 000000000000..74fac0f5c348 --- /dev/null +++ b/drivers/gpio/Kconfig @@ -0,0 +1,72 @@ +# +# GPIO infrastructure and expanders +# + +config HAVE_GPIO_LIB + bool + help + Platforms select gpiolib if they use this infrastructure + for all their GPIOs, usually starting with ones integrated + into SOC processors. + +menu "GPIO Support" + depends on HAVE_GPIO_LIB + +config DEBUG_GPIO + bool "Debug GPIO calls" + depends on DEBUG_KERNEL + help + Say Y here to add some extra checks and diagnostics to GPIO calls. + The checks help ensure that GPIOs have been properly initialized + before they are used and that sleeping calls aren not made from + nonsleeping contexts. They can make bitbanged serial protocols + slower. The diagnostics help catch the type of setup errors + that are most common when setting up new platforms or boards. + +# put expanders in the right section, in alphabetical order + +comment "I2C GPIO expanders:" + +config GPIO_PCA9539 + tristate "PCA9539 16-bit I/O port" + depends on I2C + help + Say yes here to support the PCA9539 16-bit I/O port. These + parts are made by NXP and TI. + + This driver can also be built as a module. If so, the module + will be called pca9539. + +config GPIO_PCF857X + tristate "PCF857x, PCA857x, and PCA967x I2C GPIO expanders" + depends on I2C + help + Say yes here to provide access to most "quasi-bidirectional" I2C + GPIO expanders used for additional digital outputs or inputs. + Most of these parts are from NXP, though TI is a second source for + some of them. Compatible models include: + + 8 bits: pcf8574, pcf8574a, pca8574, pca8574a, + pca9670, pca9672, pca9674, pca9674a + + 16 bits: pcf8575, pcf8575c, pca8575, + pca9671, pca9673, pca9675 + + Your board setup code will need to declare the expanders in + use, and assign numbers to the GPIOs they expose. Those GPIOs + can then be used from drivers and other kernel code, just like + other GPIOs, but only accessible from task contexts. + + This driver provides an in-kernel interface to those GPIOs using + platform-neutral GPIO calls. + +comment "SPI GPIO expanders:" + +config GPIO_MCP23S08 + tristate "Microchip MCP23S08 I/O expander" + depends on SPI_MASTER + help + SPI driver for Microchip MCP23S08 I/O expander. This provides + a GPIO interface supporting inputs and outputs. + +endmenu diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile new file mode 100644 index 000000000000..470ecd6aa778 --- /dev/null +++ b/drivers/gpio/Makefile @@ -0,0 +1,9 @@ +# gpio support: dedicated expander chips, etc + +ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG + +obj-$(CONFIG_HAVE_GPIO_LIB) += gpiolib.o + +obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o +obj-$(CONFIG_GPIO_PCA9539) += pca9539.o +obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c new file mode 100644 index 000000000000..d8db2f8ee411 --- /dev/null +++ b/drivers/gpio/gpiolib.c @@ -0,0 +1,567 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/irq.h> +#include <linux/spinlock.h> + +#include <asm/gpio.h> + + +/* Optional implementation infrastructure for GPIO interfaces. + * + * Platforms may want to use this if they tend to use very many GPIOs + * that aren't part of a System-On-Chip core; or across I2C/SPI/etc. + * + * When kernel footprint or instruction count is an issue, simpler + * implementations may be preferred. The GPIO programming interface + * allows for inlining speed-critical get/set operations for common + * cases, so that access to SOC-integrated GPIOs can sometimes cost + * only an instruction or two per bit. + */ + + +/* When debugging, extend minimal trust to callers and platform code. + * Also emit diagnostic messages that may help initial bringup, when + * board setup or driver bugs are most common. + * + * Otherwise, minimize overhead in what may be bitbanging codepaths. + */ +#ifdef DEBUG +#define extra_checks 1 +#else +#define extra_checks 0 +#endif + +/* gpio_lock prevents conflicts during gpio_desc[] table updates. + * While any GPIO is requested, its gpio_chip is not removable; + * each GPIO's "requested" flag serves as a lock and refcount. + */ +static DEFINE_SPINLOCK(gpio_lock); + +struct gpio_desc { + struct gpio_chip *chip; + unsigned long flags; +/* flag symbols are bit numbers */ +#define FLAG_REQUESTED 0 +#define FLAG_IS_OUT 1 + +#ifdef CONFIG_DEBUG_FS + const char *label; +#endif +}; +static struct gpio_desc gpio_desc[ARCH_NR_GPIOS]; + +static inline void desc_set_label(struct gpio_desc *d, const char *label) +{ +#ifdef CONFIG_DEBUG_FS + d->label = label; +#endif +} + +/* Warn when drivers omit gpio_request() calls -- legal but ill-advised + * when setting direction, and otherwise illegal. Until board setup code + * and drivers use explicit requests everywhere (which won't happen when + * those calls have no teeth) we can't avoid autorequesting. This nag + * message should motivate switching to explicit requests... + */ +static void gpio_ensure_requested(struct gpio_desc *desc) +{ + if (test_and_set_bit(FLAG_REQUESTED, &desc->flags) == 0) { + pr_warning("GPIO-%d autorequested\n", (int)(desc - gpio_desc)); + desc_set_label(desc, "[auto]"); + } +} + +/* caller holds gpio_lock *OR* gpio is marked as requested */ +static inline struct gpio_chip *gpio_to_chip(unsigned gpio) +{ + return gpio_desc[gpio].chip; +} + +/** + * gpiochip_add() - register a gpio_chip + * @chip: the chip to register, with chip->base initialized + * Context: potentially before irqs or kmalloc will work + * + * Returns a negative errno if the chip can't be registered, such as + * because the chip->base is invalid or already associated with a + * different chip. Otherwise it returns zero as a success code. + */ +int gpiochip_add(struct gpio_chip *chip) +{ + unsigned long flags; + int status = 0; + unsigned id; + + /* NOTE chip->base negative is reserved to mean a request for + * dynamic allocation. We don't currently support that. + */ + + if (chip->base < 0 || (chip->base + chip->ngpio) >= ARCH_NR_GPIOS) { + status = -EINVAL; + goto fail; + } + + spin_lock_irqsave(&gpio_lock, flags); + + /* these GPIO numbers must not be managed by another gpio_chip */ + for (id = chip->base; id < chip->base + chip->ngpio; id++) { + if (gpio_desc[id].chip != NULL) { + status = -EBUSY; + break; + } + } + if (status == 0) { + for (id = chip->base; id < chip->base + chip->ngpio; id++) { + gpio_desc[id].chip = chip; + gpio_desc[id].flags = 0; + } + } + + spin_unlock_irqrestore(&gpio_lock, flags); +fail: + /* failures here can mean systems won't boot... */ + if (status) + pr_err("gpiochip_add: gpios %d..%d (%s) not registered\n", + chip->base, chip->base + chip->ngpio, + chip->label ? : "generic"); + return status; +} +EXPORT_SYMBOL_GPL(gpiochip_add); + +/** + * gpiochip_remove() - unregister a gpio_chip + * @chip: the chip to unregister + * + * A gpio_chip with any GPIOs still requested may not be removed. + */ +int gpiochip_remove(struct gpio_chip *chip) +{ + unsigned long flags; + int status = 0; + unsigned id; + + spin_lock_irqsave(&gpio_lock, flags); + + for (id = chip->base; id < chip->base + chip->ngpio; id++) { + if (test_bit(FLAG_REQUESTED, &gpio_desc[id].flags)) { + status = -EBUSY; + break; + } + } + if (status == 0) { + for (id = chip->base; id < chip->base + chip->ngpio; id++) + gpio_desc[id].chip = NULL; + } + + spin_unlock_irqrestore(&gpio_lock, flags); + return status; +} +EXPORT_SYMBOL_GPL(gpiochip_remove); + + +/* These "optional" allocation calls help prevent drivers from stomping + * on each other, and help provide better diagnostics in debugfs. + * They're called even less than the "set direction" calls. + */ +int gpio_request(unsigned gpio, const char *label) +{ + struct gpio_desc *desc; + int status = -EINVAL; + unsigned long flags; + + spin_lock_irqsave(&gpio_lock, flags); + + if (gpio >= ARCH_NR_GPIOS) + goto done; + desc = &gpio_desc[gpio]; + if (desc->chip == NULL) + goto done; + + /* NOTE: gpio_request() can be called in early boot, + * before IRQs are enabled. + */ + + if (test_and_set_bit(FLAG_REQUESTED, &desc->flags) == 0) { + desc_set_label(desc, label ? : "?"); + status = 0; + } else + status = -EBUSY; + +done: + if (status) + pr_debug("gpio_request: gpio-%d (%s) status %d\n", + gpio, label ? : "?", status); + spin_unlock_irqrestore(&gpio_lock, flags); + return status; +} +EXPORT_SYMBOL_GPL(gpio_request); + +void gpio_free(unsigned gpio) +{ + unsigned long flags; + struct gpio_desc *desc; + + if (gpio >= ARCH_NR_GPIOS) { + WARN_ON(extra_checks); + return; + } + + spin_lock_irqsave(&gpio_lock, flags); + + desc = &gpio_desc[gpio]; + if (desc->chip && test_and_clear_bit(FLAG_REQUESTED, &desc->flags)) + desc_set_label(desc, NULL); + else + WARN_ON(extra_checks); + + spin_unlock_irqrestore(&gpio_lock, flags); +} +EXPORT_SYMBOL_GPL(gpio_free); + + +/** + * gpiochip_is_requested - return string iff signal was requested + * @chip: controller managing the signal + * @offset: of signal within controller's 0..(ngpio - 1) range + * + * Returns NULL if the GPIO is not currently requested, else a string. + * If debugfs support is enabled, the string returned is the label passed + * to gpio_request(); otherwise it is a meaningless constant. + * + * This function is for use by GPIO controller drivers. The label can + * help with diagnostics, and knowing that the signal is used as a GPIO + * can help avoid accidentally multiplexing it to another controller. + */ +const char *gpiochip_is_requested(struct gpio_chip *chip, unsigned offset) +{ + unsigned gpio = chip->base + offset; + + if (gpio >= ARCH_NR_GPIOS || gpio_desc[gpio].chip != chip) + return NULL; + if (test_bit(FLAG_REQUESTED, &gpio_desc[gpio].flags) == 0) + return NULL; +#ifdef CONFIG_DEBUG_FS + return gpio_desc[gpio].label; +#else + return "?"; +#endif +} +EXPORT_SYMBOL_GPL(gpiochip_is_requested); + + +/* Drivers MUST set GPIO direction before making get/set calls. In + * some cases this is done in early boot, before IRQs are enabled. + * + * As a rule these aren't called more than once (except for drivers + * using the open-drain emulation idiom) so these are natural places + * to accumulate extra debugging checks. Note that we can't (yet) + * rely on gpio_request() having been called beforehand. + */ + +int gpio_direction_input(unsigned gpio) +{ + unsigned long flags; + struct gpio_chip *chip; + struct gpio_desc *desc = &gpio_desc[gpio]; + int status = -EINVAL; + + spin_lock_irqsave(&gpio_lock, flags); + + if (gpio >= ARCH_NR_GPIOS) + goto fail; + chip = desc->chip; + if (!chip || !chip->get || !chip->direction_input) + goto fail; + gpio -= chip->base; + if (gpio >= chip->ngpio) + goto fail; + gpio_ensure_requested(desc); + + /* now we know the gpio is valid and chip won't vanish */ + + spin_unlock_irqrestore(&gpio_lock, flags); + + might_sleep_if(extra_checks && chip->can_sleep); + + status = chip->direction_input(chip, gpio); + if (status == 0) + clear_bit(FLAG_IS_OUT, &desc->flags); + return status; +fail: + spin_unlock_irqrestore(&gpio_lock, flags); + if (status) + pr_debug("%s: gpio-%d status %d\n", + __FUNCTION__, gpio, status); + return status; +} +EXPORT_SYMBOL_GPL(gpio_direction_input); + +int gpio_direction_output(unsigned gpio, int value) +{ + unsigned long flags; + struct gpio_chip *chip; + struct gpio_desc *desc = &gpio_desc[gpio]; + int status = -EINVAL; + + spin_lock_irqsave(&gpio_lock, flags); + + if (gpio >= ARCH_NR_GPIOS) + goto fail; + chip = desc->chip; + if (!chip || !chip->set || !chip->direction_output) + goto fail; + gpio -= chip->base; + if (gpio >= chip->ngpio) + goto fail; + gpio_ensure_requested(desc); + + /* now we know the gpio is valid and chip won't vanish */ + + spin_unlock_irqrestore(&gpio_lock, flags); + + might_sleep_if(extra_checks && chip->can_sleep); + + status = chip->direction_output(chip, gpio, value); + if (status == 0) + set_bit(FLAG_IS_OUT, &desc->flags); + return status; +fail: + spin_unlock_irqrestore(&gpio_lock, flags); + if (status) + pr_debug("%s: gpio-%d status %d\n", + __FUNCTION__, gpio, status); + return status; +} +EXPORT_SYMBOL_GPL(gpio_direction_output); + + +/* I/O calls are only valid after configuration completed; the relevant + * "is this a valid GPIO" error checks should already have been done. + * + * "Get" operations are often inlinable as reading a pin value register, + * and masking the relevant bit in that register. + * + * When "set" operations are inlinable, they involve writing that mask to + * one register to set a low value, or a different register to set it high. + * Otherwise locking is needed, so there may be little value to inlining. + * + *------------------------------------------------------------------------ + * + * IMPORTANT!!! The hot paths -- get/set value -- assume that callers + * have requested the GPIO. That can include implicit requesting by + * a direction setting call. Marking a gpio as requested locks its chip + * in memory, guaranteeing that these table lookups need no more locking + * and that gpiochip_remove() will fail. + * + * REVISIT when debugging, consider adding some instrumentation to ensure + * that the GPIO was actually requested. + */ + +/** + * __gpio_get_value() - return a gpio's value + * @gpio: gpio whose value will be returned + * Context: any + * + * This is used directly or indirectly to implement gpio_get_value(). + * It returns the zero or nonzero value provided by the associated + * gpio_chip.get() method; or zero if no such method is provided. + */ +int __gpio_get_value(unsigned gpio) +{ + struct gpio_chip *chip; + + chip = gpio_to_chip(gpio); + WARN_ON(extra_checks && chip->can_sleep); + return chip->get ? chip->get(chip, gpio - chip->base) : 0; +} +EXPORT_SYMBOL_GPL(__gpio_get_value); + +/** + * __gpio_set_value() - assign a gpio's value + * @gpio: gpio whose value will be assigned + * @value: value to assign + * Context: any + * + * This is used directly or indirectly to implement gpio_set_value(). + * It invokes the associated gpio_chip.set() method. + */ +void __gpio_set_value(unsigned gpio, int value) +{ + struct gpio_chip *chip; + + chip = gpio_to_chip(gpio); + WARN_ON(extra_checks && chip->can_sleep); + chip->set(chip, gpio - chip->base, value); +} +EXPORT_SYMBOL_GPL(__gpio_set_value); + +/** + * __gpio_cansleep() - report whether gpio value access will sleep + * @gpio: gpio in question + * Context: any + * + * This is used directly or indirectly to implement gpio_cansleep(). It + * returns nonzero if access reading or writing the GPIO value can sleep. + */ +int __gpio_cansleep(unsigned gpio) +{ + struct gpio_chip *chip; + + /* only call this on GPIOs that are valid! */ + chip = gpio_to_chip(gpio); + + return chip->can_sleep; +} +EXPORT_SYMBOL_GPL(__gpio_cansleep); + + + +/* There's no value in making it easy to inline GPIO calls that may sleep. + * Common examples include ones connected to I2C or SPI chips. + */ + +int gpio_get_value_cansleep(unsigned gpio) +{ + struct gpio_chip *chip; + + might_sleep_if(extra_checks); + chip = gpio_to_chip(gpio); + return chip->get(chip, gpio - chip->base); +} +EXPORT_SYMBOL_GPL(gpio_get_value_cansleep); + +void gpio_set_value_cansleep(unsigned gpio, int value) +{ + struct gpio_chip *chip; + + might_sleep_if(extra_checks); + chip = gpio_to_chip(gpio); + chip->set(chip, gpio - chip->base, value); +} +EXPORT_SYMBOL_GPL(gpio_set_value_cansleep); + + +#ifdef CONFIG_DEBUG_FS + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + + +static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip) +{ + unsigned i; + unsigned gpio = chip->base; + struct gpio_desc *gdesc = &gpio_desc[gpio]; + int is_out; + + for (i = 0; i < chip->ngpio; i++, gpio++, gdesc++) { + if (!test_bit(FLAG_REQUESTED, &gdesc->flags)) + continue; + + is_out = test_bit(FLAG_IS_OUT, &gdesc->flags); + seq_printf(s, " gpio-%-3d (%-12s) %s %s", + gpio, gdesc->label, + is_out ? "out" : "in ", + chip->get + ? (chip->get(chip, i) ? "hi" : "lo") + : "? "); + + if (!is_out) { + int irq = gpio_to_irq(gpio); + struct irq_desc *desc = irq_desc + irq; + + /* This races with request_irq(), set_irq_type(), + * and set_irq_wake() ... but those are "rare". + * + * More significantly, trigger type flags aren't + * currently maintained by genirq. + */ + if (irq >= 0 && desc->action) { + char *trigger; + + switch (desc->status & IRQ_TYPE_SENSE_MASK) { + case IRQ_TYPE_NONE: + trigger = "(default)"; + break; + case IRQ_TYPE_EDGE_FALLING: + trigger = "edge-falling"; + break; + case IRQ_TYPE_EDGE_RISING: + trigger = "edge-rising"; + break; + case IRQ_TYPE_EDGE_BOTH: + trigger = "edge-both"; + break; + case IRQ_TYPE_LEVEL_HIGH: + trigger = "level-high"; + break; + case IRQ_TYPE_LEVEL_LOW: + trigger = "level-low"; + break; + default: + trigger = "?trigger?"; + break; + } + + seq_printf(s, " irq-%d %s%s", + irq, trigger, + (desc->status & IRQ_WAKEUP) + ? " wakeup" : ""); + } + } + + seq_printf(s, "\n"); + } +} + +static int gpiolib_show(struct seq_file *s, void *unused) +{ + struct gpio_chip *chip = NULL; + unsigned gpio; + int started = 0; + + /* REVISIT this isn't locked against gpio_chip removal ... */ + + for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) { + if (chip == gpio_desc[gpio].chip) + continue; + chip = gpio_desc[gpio].chip; + if (!chip) + continue; + + seq_printf(s, "%sGPIOs %d-%d, %s%s:\n", + started ? "\n" : "", + chip->base, chip->base + chip->ngpio - 1, + chip->label ? : "generic", + chip->can_sleep ? ", can sleep" : ""); + started = 1; + if (chip->dbg_show) + chip->dbg_show(s, chip); + else + gpiolib_dbg_show(s, chip); + } + return 0; +} + +static int gpiolib_open(struct inode *inode, struct file *file) +{ + return single_open(file, gpiolib_show, NULL); +} + +static struct file_operations gpiolib_operations = { + .open = gpiolib_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init gpiolib_debugfs_init(void) +{ + /* /sys/kernel/debug/gpio */ + (void) debugfs_create_file("gpio", S_IFREG | S_IRUGO, + NULL, NULL, &gpiolib_operations); + return 0; +} +subsys_initcall(gpiolib_debugfs_init); + +#endif /* DEBUG_FS */ diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c new file mode 100644 index 000000000000..bb60e8c1a1f0 --- /dev/null +++ b/drivers/gpio/mcp23s08.c @@ -0,0 +1,357 @@ +/* + * mcp23s08.c - SPI gpio expander driver + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> + +#include <linux/spi/spi.h> +#include <linux/spi/mcp23s08.h> + +#include <asm/gpio.h> + + +/* Registers are all 8 bits wide. + * + * The mcp23s17 has twice as many bits, and can be configured to work + * with either 16 bit registers or with two adjacent 8 bit banks. + * + * Also, there are I2C versions of both chips. + */ +#define MCP_IODIR 0x00 /* init/reset: all ones */ +#define MCP_IPOL 0x01 +#define MCP_GPINTEN 0x02 +#define MCP_DEFVAL 0x03 +#define MCP_INTCON 0x04 +#define MCP_IOCON 0x05 +# define IOCON_SEQOP (1 << 5) +# define IOCON_HAEN (1 << 3) +# define IOCON_ODR (1 << 2) +# define IOCON_INTPOL (1 << 1) +#define MCP_GPPU 0x06 +#define MCP_INTF 0x07 +#define MCP_INTCAP 0x08 +#define MCP_GPIO 0x09 +#define MCP_OLAT 0x0a + +struct mcp23s08 { + struct spi_device *spi; + u8 addr; + + /* lock protects the cached values */ + struct mutex lock; + u8 cache[11]; + + struct gpio_chip chip; + + struct work_struct work; +}; + +static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg) +{ + u8 tx[2], rx[1]; + int status; + + tx[0] = mcp->addr | 0x01; + tx[1] = reg; + status = spi_write_then_read(mcp->spi, tx, sizeof tx, rx, sizeof rx); + return (status < 0) ? status : rx[0]; +} + +static int mcp23s08_write(struct mcp23s08 *mcp, unsigned reg, u8 val) +{ + u8 tx[3]; + + tx[0] = mcp->addr; + tx[1] = reg; + tx[2] = val; + return spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0); +} + +static int +mcp23s08_read_regs(struct mcp23s08 *mcp, unsigned reg, u8 *vals, unsigned n) +{ + u8 tx[2]; + + if ((n + reg) > sizeof mcp->cache) + return -EINVAL; + tx[0] = mcp->addr | 0x01; + tx[1] = reg; + return spi_write_then_read(mcp->spi, tx, sizeof tx, vals, n); +} + +/*----------------------------------------------------------------------*/ + +static int mcp23s08_direction_input(struct gpio_chip *chip, unsigned offset) +{ + struct mcp23s08 *mcp = container_of(chip, struct mcp23s08, chip); + int status; + + mutex_lock(&mcp->lock); + mcp->cache[MCP_IODIR] |= (1 << offset); + status = mcp23s08_write(mcp, MCP_IODIR, mcp->cache[MCP_IODIR]); + mutex_unlock(&mcp->lock); + return status; +} + +static int mcp23s08_get(struct gpio_chip *chip, unsigned offset) +{ + struct mcp23s08 *mcp = container_of(chip, struct mcp23s08, chip); + int status; + + mutex_lock(&mcp->lock); + + /* REVISIT reading this clears any IRQ ... */ + status = mcp23s08_read(mcp, MCP_GPIO); + if (status < 0) + status = 0; + else { + mcp->cache[MCP_GPIO] = status; + status = !!(status & (1 << offset)); + } + mutex_unlock(&mcp->lock); + return status; +} + +static int __mcp23s08_set(struct mcp23s08 *mcp, unsigned mask, int value) +{ + u8 olat = mcp->cache[MCP_OLAT]; + + if (value) + olat |= mask; + else + olat &= ~mask; + mcp->cache[MCP_OLAT] = olat; + return mcp23s08_write(mcp, MCP_OLAT, olat); +} + +static void mcp23s08_set(struct gpio_chip *chip, unsigned offset, int value) +{ + struct mcp23s08 *mcp = container_of(chip, struct mcp23s08, chip); + u8 mask = 1 << offset; + + mutex_lock(&mcp->lock); + __mcp23s08_set(mcp, mask, value); + mutex_unlock(&mcp->lock); +} + +static int +mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value) +{ + struct mcp23s08 *mcp = container_of(chip, struct mcp23s08, chip); + u8 mask = 1 << offset; + int status; + + mutex_lock(&mcp->lock); + status = __mcp23s08_set(mcp, mask, value); + if (status == 0) { + mcp->cache[MCP_IODIR] &= ~mask; + status = mcp23s08_write(mcp, MCP_IODIR, mcp->cache[MCP_IODIR]); + } + mutex_unlock(&mcp->lock); + return status; +} + +/*----------------------------------------------------------------------*/ + +#ifdef CONFIG_DEBUG_FS + +#include <linux/seq_file.h> + +/* + * This shows more info than the generic gpio dump code: + * pullups, deglitching, open drain drive. + */ +static void mcp23s08_dbg_show(struct seq_file *s, struct gpio_chip *chip) +{ + struct mcp23s08 *mcp; + char bank; + unsigned t; + unsigned mask; + + mcp = container_of(chip, struct mcp23s08, chip); + + /* NOTE: we only handle one bank for now ... */ + bank = '0' + ((mcp->addr >> 1) & 0x3); + + mutex_lock(&mcp->lock); + t = mcp23s08_read_regs(mcp, 0, mcp->cache, sizeof mcp->cache); + if (t < 0) { + seq_printf(s, " I/O ERROR %d\n", t); + goto done; + } + + for (t = 0, mask = 1; t < 8; t++, mask <<= 1) { + const char *label; + + label = gpiochip_is_requested(chip, t); + if (!label) + continue; + + seq_printf(s, " gpio-%-3d P%c.%d (%-12s) %s %s %s", + chip->base + t, bank, t, label, + (mcp->cache[MCP_IODIR] & mask) ? "in " : "out", + (mcp->cache[MCP_GPIO] & mask) ? "hi" : "lo", + (mcp->cache[MCP_GPPU] & mask) ? " " : "up"); + /* NOTE: ignoring the irq-related registers */ + seq_printf(s, "\n"); + } +done: + mutex_unlock(&mcp->lock); +} + +#else +#define mcp23s08_dbg_show NULL +#endif + +/*----------------------------------------------------------------------*/ + +static int mcp23s08_probe(struct spi_device *spi) +{ + struct mcp23s08 *mcp; + struct mcp23s08_platform_data *pdata; + int status; + int do_update = 0; + + pdata = spi->dev.platform_data; + if (!pdata || pdata->slave > 3 || !pdata->base) + return -ENODEV; + + mcp = kzalloc(sizeof *mcp, GFP_KERNEL); + if (!mcp) + return -ENOMEM; + + mutex_init(&mcp->lock); + + mcp->spi = spi; + mcp->addr = 0x40 | (pdata->slave << 1); + + mcp->chip.label = "mcp23s08", + + mcp->chip.direction_input = mcp23s08_direction_input; + mcp->chip.get = mcp23s08_get; + mcp->chip.direction_output = mcp23s08_direction_output; + mcp->chip.set = mcp23s08_set; + mcp->chip.dbg_show = mcp23s08_dbg_show; + + mcp->chip.base = pdata->base; + mcp->chip.ngpio = 8; + mcp->chip.can_sleep = 1; + + spi_set_drvdata(spi, mcp); + + /* verify MCP_IOCON.SEQOP = 0, so sequential reads work */ + status = mcp23s08_read(mcp, MCP_IOCON); + if (status < 0) + goto fail; + if (status & IOCON_SEQOP) { + status &= ~IOCON_SEQOP; + status = mcp23s08_write(mcp, MCP_IOCON, (u8) status); + if (status < 0) + goto fail; + } + + /* configure ~100K pullups */ + status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups); + if (status < 0) + goto fail; + + status = mcp23s08_read_regs(mcp, 0, mcp->cache, sizeof mcp->cache); + if (status < 0) + goto fail; + + /* disable inverter on input */ + if (mcp->cache[MCP_IPOL] != 0) { + mcp->cache[MCP_IPOL] = 0; + do_update = 1; + } + + /* disable irqs */ + if (mcp->cache[MCP_GPINTEN] != 0) { + mcp->cache[MCP_GPINTEN] = 0; + do_update = 1; + } + + if (do_update) { + u8 tx[4]; + + tx[0] = mcp->addr; + tx[1] = MCP_IPOL; + memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2); + status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0); + + /* FIXME check status... */ + } + + status = gpiochip_add(&mcp->chip); + + /* NOTE: these chips have a relatively sane IRQ framework, with + * per-signal masking and level/edge triggering. It's not yet + * handled here... + */ + + if (pdata->setup) { + status = pdata->setup(spi, mcp->chip.base, + mcp->chip.ngpio, pdata->context); + if (status < 0) + dev_dbg(&spi->dev, "setup --> %d\n", status); + } + + return 0; + +fail: + kfree(mcp); + return status; +} + +static int mcp23s08_remove(struct spi_device *spi) +{ + struct mcp23s08 *mcp = spi_get_drvdata(spi); + struct mcp23s08_platform_data *pdata = spi->dev.platform_data; + int status = 0; + + if (pdata->teardown) { + status = pdata->teardown(spi, + mcp->chip.base, mcp->chip.ngpio, + pdata->context); + if (status < 0) { + dev_err(&spi->dev, "%s --> %d\n", "teardown", status); + return status; + } + } + + status = gpiochip_remove(&mcp->chip); + if (status == 0) + kfree(mcp); + else + dev_err(&spi->dev, "%s --> %d\n", "remove", status); + return status; +} + +static struct spi_driver mcp23s08_driver = { + .probe = mcp23s08_probe, + .remove = mcp23s08_remove, + .driver = { + .name = "mcp23s08", + .owner = THIS_MODULE, + }, +}; + +/*----------------------------------------------------------------------*/ + +static int __init mcp23s08_init(void) +{ + return spi_register_driver(&mcp23s08_driver); +} +module_init(mcp23s08_init); + +static void __exit mcp23s08_exit(void) +{ + spi_unregister_driver(&mcp23s08_driver); +} +module_exit(mcp23s08_exit); + +MODULE_LICENSE("GPL"); + diff --git a/drivers/gpio/pca9539.c b/drivers/gpio/pca9539.c new file mode 100644 index 000000000000..3e85c92a7d59 --- /dev/null +++ b/drivers/gpio/pca9539.c @@ -0,0 +1,271 @@ +/* + * pca9539.c - 16-bit I/O port with interrupt and reset + * + * Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com> + * Copyright (C) 2007 Marvell International Ltd. + * + * Derived from drivers/i2c/chips/pca9539.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/i2c.h> +#include <linux/i2c/pca9539.h> + +#include <asm/gpio.h> + + +#define NR_PCA9539_GPIOS 16 + +#define PCA9539_INPUT 0 +#define PCA9539_OUTPUT 2 +#define PCA9539_INVERT 4 +#define PCA9539_DIRECTION 6 + +struct pca9539_chip { + unsigned gpio_start; + uint16_t reg_output; + uint16_t reg_direction; + + struct i2c_client *client; + struct gpio_chip gpio_chip; +}; + +/* NOTE: we can't currently rely on fault codes to come from SMBus + * calls, so we map all errors to EIO here and return zero otherwise. + */ +static int pca9539_write_reg(struct pca9539_chip *chip, int reg, uint16_t val) +{ + if (i2c_smbus_write_word_data(chip->client, reg, val) < 0) + return -EIO; + else + return 0; +} + +static int pca9539_read_reg(struct pca9539_chip *chip, int reg, uint16_t *val) +{ + int ret; + + ret = i2c_smbus_read_word_data(chip->client, reg); + if (ret < 0) { + dev_err(&chip->client->dev, "failed reading register\n"); + return -EIO; + } + + *val = (uint16_t)ret; + return 0; +} + +static int pca9539_gpio_direction_input(struct gpio_chip *gc, unsigned off) +{ + struct pca9539_chip *chip; + uint16_t reg_val; + int ret; + + chip = container_of(gc, struct pca9539_chip, gpio_chip); + + reg_val = chip->reg_direction | (1u << off); + ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val); + if (ret) + return ret; + + chip->reg_direction = reg_val; + return 0; +} + +static int pca9539_gpio_direction_output(struct gpio_chip *gc, + unsigned off, int val) +{ + struct pca9539_chip *chip; + uint16_t reg_val; + int ret; + + chip = container_of(gc, struct pca9539_chip, gpio_chip); + + /* set output level */ + if (val) + reg_val = chip->reg_output | (1u << off); + else + reg_val = chip->reg_output & ~(1u << off); + + ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val); + if (ret) + return ret; + + chip->reg_output = reg_val; + + /* then direction */ + reg_val = chip->reg_direction & ~(1u << off); + ret = pca9539_write_reg(chip, PCA9539_DIRECTION, reg_val); + if (ret) + return ret; + + chip->reg_direction = reg_val; + return 0; +} + +static int pca9539_gpio_get_value(struct gpio_chip *gc, unsigned off) +{ + struct pca9539_chip *chip; + uint16_t reg_val; + int ret; + + chip = container_of(gc, struct pca9539_chip, gpio_chip); + + ret = pca9539_read_reg(chip, PCA9539_INPUT, ®_val); + if (ret < 0) { + /* NOTE: diagnostic already emitted; that's all we should + * do unless gpio_*_value_cansleep() calls become different + * from their nonsleeping siblings (and report faults). + */ + return 0; + } + + return (reg_val & (1u << off)) ? 1 : 0; +} + +static void pca9539_gpio_set_value(struct gpio_chip *gc, unsigned off, int val) +{ + struct pca9539_chip *chip; + uint16_t reg_val; + int ret; + + chip = container_of(gc, struct pca9539_chip, gpio_chip); + + if (val) + reg_val = chip->reg_output | (1u << off); + else + reg_val = chip->reg_output & ~(1u << off); + + ret = pca9539_write_reg(chip, PCA9539_OUTPUT, reg_val); + if (ret) + return; + + chip->reg_output = reg_val; +} + +static int pca9539_init_gpio(struct pca9539_chip *chip) +{ + struct gpio_chip *gc; + + gc = &chip->gpio_chip; + + gc->direction_input = pca9539_gpio_direction_input; + gc->direction_output = pca9539_gpio_direction_output; + gc->get = pca9539_gpio_get_value; + gc->set = pca9539_gpio_set_value; + + gc->base = chip->gpio_start; + gc->ngpio = NR_PCA9539_GPIOS; + gc->label = "pca9539"; + + return gpiochip_add(gc); +} + +static int __devinit pca9539_probe(struct i2c_client *client) +{ + struct pca9539_platform_data *pdata; + struct pca9539_chip *chip; + int ret; + + pdata = client->dev.platform_data; + if (pdata == NULL) + return -ENODEV; + + chip = kzalloc(sizeof(struct pca9539_chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + + chip->client = client; + + chip->gpio_start = pdata->gpio_base; + + /* initialize cached registers from their original values. + * we can't share this chip with another i2c master. + */ + ret = pca9539_read_reg(chip, PCA9539_OUTPUT, &chip->reg_output); + if (ret) + goto out_failed; + + ret = pca9539_read_reg(chip, PCA9539_DIRECTION, &chip->reg_direction); + if (ret) + goto out_failed; + + /* set platform specific polarity inversion */ + ret = pca9539_write_reg(chip, PCA9539_INVERT, pdata->invert); + if (ret) + goto out_failed; + + ret = pca9539_init_gpio(chip); + if (ret) + goto out_failed; + + if (pdata->setup) { + ret = pdata->setup(client, chip->gpio_chip.base, + chip->gpio_chip.ngpio, pdata->context); + if (ret < 0) + dev_warn(&client->dev, "setup failed, %d\n", ret); + } + + i2c_set_clientdata(client, chip); + return 0; + +out_failed: + kfree(chip); + return ret; +} + +static int pca9539_remove(struct i2c_client *client) +{ + struct pca9539_platform_data *pdata = client->dev.platform_data; + struct pca9539_chip *chip = i2c_get_clientdata(client); + int ret = 0; + + if (pdata->teardown) { + ret = pdata->teardown(client, chip->gpio_chip.base, + chip->gpio_chip.ngpio, pdata->context); + if (ret < 0) { + dev_err(&client->dev, "%s failed, %d\n", + "teardown", ret); + return ret; + } + } + + ret = gpiochip_remove(&chip->gpio_chip); + if (ret) { + dev_err(&client->dev, "%s failed, %d\n", + "gpiochip_remove()", ret); + return ret; + } + + kfree(chip); + return 0; +} + +static struct i2c_driver pca9539_driver = { + .driver = { + .name = "pca9539", + }, + .probe = pca9539_probe, + .remove = pca9539_remove, +}; + +static int __init pca9539_init(void) +{ + return i2c_add_driver(&pca9539_driver); +} +module_init(pca9539_init); + +static void __exit pca9539_exit(void) +{ + i2c_del_driver(&pca9539_driver); +} +module_exit(pca9539_exit); + +MODULE_AUTHOR("eric miao <eric.miao@marvell.com>"); +MODULE_DESCRIPTION("GPIO expander driver for PCA9539"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c new file mode 100644 index 000000000000..c6b3b5378384 --- /dev/null +++ b/drivers/gpio/pcf857x.c @@ -0,0 +1,330 @@ +/* + * pcf857x - driver for pcf857x, pca857x, and pca967x I2C GPIO expanders + * + * Copyright (C) 2007 David Brownell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/i2c/pcf857x.h> + +#include <asm/gpio.h> + + +/* + * The pcf857x, pca857x, and pca967x chips only expose one read and one + * write register. Writing a "one" bit (to match the reset state) lets + * that pin be used as an input; it's not an open-drain model, but acts + * a bit like one. This is described as "quasi-bidirectional"; read the + * chip documentation for details. + * + * Many other I2C GPIO expander chips (like the pca953x models) have + * more complex register models and more conventional circuitry using + * push/pull drivers. They often use the same 0x20..0x27 addresses as + * pcf857x parts, making the "legacy" I2C driver model problematic. + */ +struct pcf857x { + struct gpio_chip chip; + struct i2c_client *client; + unsigned out; /* software latch */ +}; + +/*-------------------------------------------------------------------------*/ + +/* Talk to 8-bit I/O expander */ + +static int pcf857x_input8(struct gpio_chip *chip, unsigned offset) +{ + struct pcf857x *gpio = container_of(chip, struct pcf857x, chip); + + gpio->out |= (1 << offset); + return i2c_smbus_write_byte(gpio->client, gpio->out); +} + +static int pcf857x_get8(struct gpio_chip *chip, unsigned offset) +{ + struct pcf857x *gpio = container_of(chip, struct pcf857x, chip); + s32 value; + + value = i2c_smbus_read_byte(gpio->client); + return (value < 0) ? 0 : (value & (1 << offset)); +} + +static int pcf857x_output8(struct gpio_chip *chip, unsigned offset, int value) +{ + struct pcf857x *gpio = container_of(chip, struct pcf857x, chip); + unsigned bit = 1 << offset; + + if (value) + gpio->out |= bit; + else + gpio->out &= ~bit; + return i2c_smbus_write_byte(gpio->client, gpio->out); +} + +static void pcf857x_set8(struct gpio_chip *chip, unsigned offset, int value) +{ + pcf857x_output8(chip, offset, value); +} + +/*-------------------------------------------------------------------------*/ + +/* Talk to 16-bit I/O expander */ + +static int i2c_write_le16(struct i2c_client *client, u16 word) +{ + u8 buf[2] = { word & 0xff, word >> 8, }; + int status; + + status = i2c_master_send(client, buf, 2); + return (status < 0) ? status : 0; +} + +static int i2c_read_le16(struct i2c_client *client) +{ + u8 buf[2]; + int status; + + status = i2c_master_recv(client, buf, 2); + if (status < 0) + return status; + return (buf[1] << 8) | buf[0]; +} + +static int pcf857x_input16(struct gpio_chip *chip, unsigned offset) +{ + struct pcf857x *gpio = container_of(chip, struct pcf857x, chip); + + gpio->out |= (1 << offset); + return i2c_write_le16(gpio->client, gpio->out); +} + +static int pcf857x_get16(struct gpio_chip *chip, unsigned offset) +{ + struct pcf857x *gpio = container_of(chip, struct pcf857x, chip); + int value; + + value = i2c_read_le16(gpio->client); + return (value < 0) ? 0 : (value & (1 << offset)); +} + +static int pcf857x_output16(struct gpio_chip *chip, unsigned offset, int value) +{ + struct pcf857x *gpio = container_of(chip, struct pcf857x, chip); + unsigned bit = 1 << offset; + + if (value) + gpio->out |= bit; + else + gpio->out &= ~bit; + return i2c_write_le16(gpio->client, gpio->out); +} + +static void pcf857x_set16(struct gpio_chip *chip, unsigned offset, int value) +{ + pcf857x_output16(chip, offset, value); +} + +/*-------------------------------------------------------------------------*/ + +static int pcf857x_probe(struct i2c_client *client) +{ + struct pcf857x_platform_data *pdata; + struct pcf857x *gpio; + int status; + + pdata = client->dev.platform_data; + if (!pdata) + return -ENODEV; + + /* Allocate, initialize, and register this gpio_chip. */ + gpio = kzalloc(sizeof *gpio, GFP_KERNEL); + if (!gpio) + return -ENOMEM; + + gpio->chip.base = pdata->gpio_base; + gpio->chip.can_sleep = 1; + + /* NOTE: the OnSemi jlc1562b is also largely compatible with + * these parts, notably for output. It has a low-resolution + * DAC instead of pin change IRQs; and its inputs can be the + * result of comparators. + */ + + /* 8574 addresses are 0x20..0x27; 8574a uses 0x38..0x3f; + * 9670, 9672, 9764, and 9764a use quite a variety. + * + * NOTE: we don't distinguish here between *4 and *4a parts. + */ + if (strcmp(client->name, "pcf8574") == 0 + || strcmp(client->name, "pca8574") == 0 + || strcmp(client->name, "pca9670") == 0 + || strcmp(client->name, "pca9672") == 0 + || strcmp(client->name, "pca9674") == 0 + ) { + gpio->chip.ngpio = 8; + gpio->chip.direction_input = pcf857x_input8; + gpio->chip.get = pcf857x_get8; + gpio->chip.direction_output = pcf857x_output8; + gpio->chip.set = pcf857x_set8; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE)) + status = -EIO; + + /* fail if there's no chip present */ + else + status = i2c_smbus_read_byte(client); + + /* '75/'75c addresses are 0x20..0x27, just like the '74; + * the '75c doesn't have a current source pulling high. + * 9671, 9673, and 9765 use quite a variety of addresses. + * + * NOTE: we don't distinguish here between '75 and '75c parts. + */ + } else if (strcmp(client->name, "pcf8575") == 0 + || strcmp(client->name, "pca8575") == 0 + || strcmp(client->name, "pca9671") == 0 + || strcmp(client->name, "pca9673") == 0 + || strcmp(client->name, "pca9675") == 0 + ) { + gpio->chip.ngpio = 16; + gpio->chip.direction_input = pcf857x_input16; + gpio->chip.get = pcf857x_get16; + gpio->chip.direction_output = pcf857x_output16; + gpio->chip.set = pcf857x_set16; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + status = -EIO; + + /* fail if there's no chip present */ + else + status = i2c_read_le16(client); + + } else + status = -ENODEV; + + if (status < 0) + goto fail; + + gpio->chip.label = client->name; + + gpio->client = client; + i2c_set_clientdata(client, gpio); + + /* NOTE: these chips have strange "quasi-bidirectional" I/O pins. + * We can't actually know whether a pin is configured (a) as output + * and driving the signal low, or (b) as input and reporting a low + * value ... without knowing the last value written since the chip + * came out of reset (if any). We can't read the latched output. + * + * In short, the only reliable solution for setting up pin direction + * is to do it explicitly. The setup() method can do that, but it + * may cause transient glitching since it can't know the last value + * written (some pins may need to be driven low). + * + * Using pdata->n_latch avoids that trouble. When left initialized + * to zero, our software copy of the "latch" then matches the chip's + * all-ones reset state. Otherwise it flags pins to be driven low. + */ + gpio->out = ~pdata->n_latch; + + status = gpiochip_add(&gpio->chip); + if (status < 0) + goto fail; + + /* NOTE: these chips can issue "some pin-changed" IRQs, which we + * don't yet even try to use. Among other issues, the relevant + * genirq state isn't available to modular drivers; and most irq + * methods can't be called from sleeping contexts. + */ + + dev_info(&client->dev, "gpios %d..%d on a %s%s\n", + gpio->chip.base, + gpio->chip.base + gpio->chip.ngpio - 1, + client->name, + client->irq ? " (irq ignored)" : ""); + + /* Let platform code set up the GPIOs and their users. + * Now is the first time anyone could use them. + */ + if (pdata->setup) { + status = pdata->setup(client, + gpio->chip.base, gpio->chip.ngpio, + pdata->context); + if (status < 0) + dev_warn(&client->dev, "setup --> %d\n", status); + } + + return 0; + +fail: + dev_dbg(&client->dev, "probe error %d for '%s'\n", + status, client->name); + kfree(gpio); + return status; +} + +static int pcf857x_remove(struct i2c_client *client) +{ + struct pcf857x_platform_data *pdata = client->dev.platform_data; + struct pcf857x *gpio = i2c_get_clientdata(client); + int status = 0; + + if (pdata->teardown) { + status = pdata->teardown(client, + gpio->chip.base, gpio->chip.ngpio, + pdata->context); + if (status < 0) { + dev_err(&client->dev, "%s --> %d\n", + "teardown", status); + return status; + } + } + + status = gpiochip_remove(&gpio->chip); + if (status == 0) + kfree(gpio); + else + dev_err(&client->dev, "%s --> %d\n", "remove", status); + return status; +} + +static struct i2c_driver pcf857x_driver = { + .driver = { + .name = "pcf857x", + .owner = THIS_MODULE, + }, + .probe = pcf857x_probe, + .remove = pcf857x_remove, +}; + +static int __init pcf857x_init(void) +{ + return i2c_add_driver(&pcf857x_driver); +} +module_init(pcf857x_init); + +static void __exit pcf857x_exit(void) +{ + i2c_del_driver(&pcf857x_driver); +} +module_exit(pcf857x_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David Brownell"); diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig index bd7082c2443d..b21593f93586 100644 --- a/drivers/i2c/chips/Kconfig +++ b/drivers/i2c/chips/Kconfig @@ -54,8 +54,8 @@ config PCF8575 hardware. If unsure, say N. config SENSORS_PCA9539 - tristate "Philips PCA9539 16-bit I/O port" - depends on EXPERIMENTAL + tristate "Philips PCA9539 16-bit I/O port (DEPRECATED)" + depends on EXPERIMENTAL && GPIO_PCA9539 = "n" help If you say yes here you get support for the Philips PCA9539 16-bit I/O port. @@ -63,6 +63,9 @@ config SENSORS_PCA9539 This driver can also be built as a module. If so, the module will be called pca9539. + This driver is deprecated and will be dropped soon. Use + drivers/gpio/pca9539.c instead. + config SENSORS_PCF8591 tristate "Philips PCF8591" depends on EXPERIMENTAL diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 64c66b3769c9..4a938780dfc3 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -137,12 +137,14 @@ err_out: EXPORT_SYMBOL_GPL(led_classdev_register); /** - * led_classdev_unregister - unregisters a object of led_properties class. + * __led_classdev_unregister - unregisters a object of led_properties class. * @led_cdev: the led device to unregister + * @suspended: indicates whether system-wide suspend or resume is in progress * * Unregisters a previously registered via led_classdev_register object. */ -void led_classdev_unregister(struct led_classdev *led_cdev) +void __led_classdev_unregister(struct led_classdev *led_cdev, + bool suspended) { device_remove_file(led_cdev->dev, &dev_attr_brightness); #ifdef CONFIG_LEDS_TRIGGERS @@ -153,13 +155,16 @@ void led_classdev_unregister(struct led_classdev *led_cdev) up_write(&led_cdev->trigger_lock); #endif - device_unregister(led_cdev->dev); + if (suspended) + device_pm_schedule_removal(led_cdev->dev); + else + device_unregister(led_cdev->dev); down_write(&leds_list_lock); list_del(&led_cdev->node); up_write(&leds_list_lock); } -EXPORT_SYMBOL_GPL(led_classdev_unregister); +EXPORT_SYMBOL_GPL(__led_classdev_unregister); static int __init leds_init(void) { diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c index 01b8eca7ccd5..6e6dd17ab572 100644 --- a/drivers/macintosh/via-macii.c +++ b/drivers/macintosh/via-macii.c @@ -111,7 +111,7 @@ static enum macii_state { static struct adb_request *current_req; /* first request struct in the queue */ static struct adb_request *last_req; /* last request struct in the queue */ static unsigned char reply_buf[16]; /* storage for autopolled replies */ -static unsigned char *reply_ptr; /* next byte in req->data or reply_buf */ +static unsigned char *reply_ptr; /* next byte in reply_buf or req->reply */ static int reading_reply; /* store reply in reply_buf else req->reply */ static int data_index; /* index of the next byte to send from req->data */ static int reply_len; /* number of bytes received in reply_buf or req->reply */ diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c index d48c396bdabb..901c824bfe6d 100644 --- a/drivers/net/cxgb3/cxgb3_offload.c +++ b/drivers/net/cxgb3/cxgb3_offload.c @@ -1070,9 +1070,7 @@ void *cxgb_alloc_mem(unsigned long size) */ void cxgb_free_mem(void *addr) { - unsigned long p = (unsigned long)addr; - - if (p >= VMALLOC_START && p < VMALLOC_END) + if (is_vmalloc_addr(addr)) vfree(addr); else kfree(addr); diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index 36a7ba3134ce..3b78a3819bb3 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -230,10 +230,11 @@ static char mii_preamble_required = 0; static int tc574_config(struct pcmcia_device *link); static void tc574_release(struct pcmcia_device *link); -static void mdio_sync(kio_addr_t ioaddr, int bits); -static int mdio_read(kio_addr_t ioaddr, int phy_id, int location); -static void mdio_write(kio_addr_t ioaddr, int phy_id, int location, int value); -static unsigned short read_eeprom(kio_addr_t ioaddr, int index); +static void mdio_sync(unsigned int ioaddr, int bits); +static int mdio_read(unsigned int ioaddr, int phy_id, int location); +static void mdio_write(unsigned int ioaddr, int phy_id, int location, + int value); +static unsigned short read_eeprom(unsigned int ioaddr, int index); static void tc574_wait_for_completion(struct net_device *dev, int cmd); static void tc574_reset(struct net_device *dev); @@ -341,7 +342,7 @@ static int tc574_config(struct pcmcia_device *link) tuple_t tuple; __le16 buf[32]; int last_fn, last_ret, i, j; - kio_addr_t ioaddr; + unsigned int ioaddr; __be16 *phys_addr; char *cardname; __u32 config; @@ -515,7 +516,7 @@ static int tc574_resume(struct pcmcia_device *link) static void dump_status(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; EL3WINDOW(1); printk(KERN_INFO " irq status %04x, rx status %04x, tx status " "%02x, tx free %04x\n", inw(ioaddr+EL3_STATUS), @@ -544,7 +545,7 @@ static void tc574_wait_for_completion(struct net_device *dev, int cmd) /* Read a word from the EEPROM using the regular EEPROM access register. Assume that we are in register window zero. */ -static unsigned short read_eeprom(kio_addr_t ioaddr, int index) +static unsigned short read_eeprom(unsigned int ioaddr, int index) { int timer; outw(EEPROM_Read + index, ioaddr + Wn0EepromCmd); @@ -572,9 +573,9 @@ static unsigned short read_eeprom(kio_addr_t ioaddr, int index) /* Generate the preamble required for initial synchronization and a few older transceivers. */ -static void mdio_sync(kio_addr_t ioaddr, int bits) +static void mdio_sync(unsigned int ioaddr, int bits) { - kio_addr_t mdio_addr = ioaddr + Wn4_PhysicalMgmt; + unsigned int mdio_addr = ioaddr + Wn4_PhysicalMgmt; /* Establish sync by sending at least 32 logic ones. */ while (-- bits >= 0) { @@ -583,12 +584,12 @@ static void mdio_sync(kio_addr_t ioaddr, int bits) } } -static int mdio_read(kio_addr_t ioaddr, int phy_id, int location) +static int mdio_read(unsigned int ioaddr, int phy_id, int location) { int i; int read_cmd = (0xf6 << 10) | (phy_id << 5) | location; unsigned int retval = 0; - kio_addr_t mdio_addr = ioaddr + Wn4_PhysicalMgmt; + unsigned int mdio_addr = ioaddr + Wn4_PhysicalMgmt; if (mii_preamble_required) mdio_sync(ioaddr, 32); @@ -608,10 +609,10 @@ static int mdio_read(kio_addr_t ioaddr, int phy_id, int location) return (retval>>1) & 0xffff; } -static void mdio_write(kio_addr_t ioaddr, int phy_id, int location, int value) +static void mdio_write(unsigned int ioaddr, int phy_id, int location, int value) { int write_cmd = 0x50020000 | (phy_id << 23) | (location << 18) | value; - kio_addr_t mdio_addr = ioaddr + Wn4_PhysicalMgmt; + unsigned int mdio_addr = ioaddr + Wn4_PhysicalMgmt; int i; if (mii_preamble_required) @@ -637,7 +638,7 @@ static void tc574_reset(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); int i; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; unsigned long flags; tc574_wait_for_completion(dev, TotalReset|0x10); @@ -695,7 +696,7 @@ static void tc574_reset(struct net_device *dev) mdio_write(ioaddr, lp->phys, 4, lp->advertising); if (!auto_polarity) { /* works for TDK 78Q2120 series MII's */ - int i = mdio_read(ioaddr, lp->phys, 16) | 0x20; + i = mdio_read(ioaddr, lp->phys, 16) | 0x20; mdio_write(ioaddr, lp->phys, 16, i); } @@ -741,7 +742,7 @@ static int el3_open(struct net_device *dev) static void el3_tx_timeout(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; printk(KERN_NOTICE "%s: Transmit timed out!\n", dev->name); dump_status(dev); @@ -756,7 +757,7 @@ static void el3_tx_timeout(struct net_device *dev) static void pop_tx_status(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int i; /* Clear the Tx status stack. */ @@ -779,7 +780,7 @@ static void pop_tx_status(struct net_device *dev) static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; struct el3_private *lp = netdev_priv(dev); unsigned long flags; @@ -813,7 +814,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *) dev_id; struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr; + unsigned int ioaddr; unsigned status; int work_budget = max_interrupt_work; int handled = 0; @@ -907,7 +908,7 @@ static void media_check(unsigned long arg) { struct net_device *dev = (struct net_device *) arg; struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; unsigned long flags; unsigned short /* cable, */ media, partner; @@ -996,7 +997,7 @@ static struct net_device_stats *el3_get_stats(struct net_device *dev) static void update_stats(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u8 rx, tx, up; DEBUG(2, "%s: updating the statistics.\n", dev->name); @@ -1033,7 +1034,7 @@ static void update_stats(struct net_device *dev) static int el3_rx(struct net_device *dev, int worklimit) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; short rx_status; DEBUG(3, "%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", @@ -1094,7 +1095,7 @@ static const struct ethtool_ops netdev_ethtool_ops = { static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u16 *data = (u16 *)&rq->ifr_ifru; int phy = lp->phys & 0x1f; @@ -1148,7 +1149,7 @@ static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) static void set_rx_mode(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; if (dev->flags & IFF_PROMISC) outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm, @@ -1161,7 +1162,7 @@ static void set_rx_mode(struct net_device *dev) static int el3_close(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; struct el3_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index e862d14ece79..1b1abb19c911 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -145,7 +145,7 @@ DRV_NAME ".c " DRV_VERSION " 2001/10/13 00:08:50 (David Hinds)"; static int tc589_config(struct pcmcia_device *link); static void tc589_release(struct pcmcia_device *link); -static u16 read_eeprom(kio_addr_t ioaddr, int index); +static u16 read_eeprom(unsigned int ioaddr, int index); static void tc589_reset(struct net_device *dev); static void media_check(unsigned long arg); static int el3_config(struct net_device *dev, struct ifmap *map); @@ -254,7 +254,7 @@ static int tc589_config(struct pcmcia_device *link) __le16 buf[32]; __be16 *phys_addr; int last_fn, last_ret, i, j, multi = 0, fifo; - kio_addr_t ioaddr; + unsigned int ioaddr; char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; DECLARE_MAC_BUF(mac); @@ -403,7 +403,7 @@ static void tc589_wait_for_completion(struct net_device *dev, int cmd) Read a word from the EEPROM using the regular EEPROM access register. Assume that we are in register window zero. */ -static u16 read_eeprom(kio_addr_t ioaddr, int index) +static u16 read_eeprom(unsigned int ioaddr, int index) { int i; outw(EEPROM_READ + index, ioaddr + 10); @@ -421,7 +421,7 @@ static u16 read_eeprom(kio_addr_t ioaddr, int index) static void tc589_set_xcvr(struct net_device *dev, int if_port) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; EL3WINDOW(0); switch (if_port) { @@ -443,7 +443,7 @@ static void tc589_set_xcvr(struct net_device *dev, int if_port) static void dump_status(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; EL3WINDOW(1); printk(KERN_INFO " irq status %04x, rx status %04x, tx status " "%02x tx free %04x\n", inw(ioaddr+EL3_STATUS), @@ -459,7 +459,7 @@ static void dump_status(struct net_device *dev) /* Reset and restore all of the 3c589 registers. */ static void tc589_reset(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int i; EL3WINDOW(0); @@ -567,7 +567,7 @@ static int el3_open(struct net_device *dev) static void el3_tx_timeout(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; printk(KERN_WARNING "%s: Transmit timed out!\n", dev->name); dump_status(dev); @@ -582,7 +582,7 @@ static void el3_tx_timeout(struct net_device *dev) static void pop_tx_status(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int i; /* Clear the Tx status stack. */ @@ -604,7 +604,7 @@ static void pop_tx_status(struct net_device *dev) static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; struct el3_private *priv = netdev_priv(dev); unsigned long flags; @@ -641,7 +641,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *) dev_id; struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr; + unsigned int ioaddr; __u16 status; int i = 0, handled = 1; @@ -727,7 +727,7 @@ static void media_check(unsigned long arg) { struct net_device *dev = (struct net_device *)(arg); struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u16 media, errs; unsigned long flags; @@ -828,7 +828,7 @@ static struct net_device_stats *el3_get_stats(struct net_device *dev) static void update_stats(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; DEBUG(2, "%s: updating the statistics.\n", dev->name); /* Turn off statistics updates while reading. */ @@ -855,7 +855,7 @@ static void update_stats(struct net_device *dev) static int el3_rx(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int worklimit = 32; short rx_status; @@ -909,7 +909,7 @@ static void set_multicast_list(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u16 opts = SetRxFilter | RxStation | RxBroadcast; if (!pcmcia_dev_present(link)) return; @@ -924,7 +924,7 @@ static int el3_close(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; DEBUG(1, "%s: shutting down ethercard.\n", dev->name); diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index 6d342f6c14f6..e8a63e483a2b 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -96,8 +96,8 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); static void ei_watchdog(u_long arg); static void axnet_reset_8390(struct net_device *dev); -static int mdio_read(kio_addr_t addr, int phy_id, int loc); -static void mdio_write(kio_addr_t addr, int phy_id, int loc, int value); +static int mdio_read(unsigned int addr, int phy_id, int loc); +static void mdio_write(unsigned int addr, int phy_id, int loc, int value); static void get_8390_hdr(struct net_device *, struct e8390_pkt_hdr *, int); @@ -203,7 +203,7 @@ static void axnet_detach(struct pcmcia_device *link) static int get_prom(struct pcmcia_device *link) { struct net_device *dev = link->priv; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int i, j; /* This is based on drivers/net/ne.c */ @@ -473,7 +473,7 @@ static int axnet_resume(struct pcmcia_device *link) #define MDIO_MASK 0x0f #define MDIO_ENB_IN 0x02 -static void mdio_sync(kio_addr_t addr) +static void mdio_sync(unsigned int addr) { int bits; for (bits = 0; bits < 32; bits++) { @@ -482,7 +482,7 @@ static void mdio_sync(kio_addr_t addr) } } -static int mdio_read(kio_addr_t addr, int phy_id, int loc) +static int mdio_read(unsigned int addr, int phy_id, int loc) { u_int cmd = (0xf6<<10)|(phy_id<<5)|loc; int i, retval = 0; @@ -501,7 +501,7 @@ static int mdio_read(kio_addr_t addr, int phy_id, int loc) return (retval>>1) & 0xffff; } -static void mdio_write(kio_addr_t addr, int phy_id, int loc, int value) +static void mdio_write(unsigned int addr, int phy_id, int loc, int value) { u_int cmd = (0x05<<28)|(phy_id<<23)|(loc<<18)|(1<<17)|value; int i; @@ -575,7 +575,7 @@ static int axnet_close(struct net_device *dev) static void axnet_reset_8390(struct net_device *dev) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; int i; ei_status.txing = ei_status.dmaing = 0; @@ -610,8 +610,8 @@ static void ei_watchdog(u_long arg) { struct net_device *dev = (struct net_device *)(arg); axnet_dev_t *info = PRIV(dev); - kio_addr_t nic_base = dev->base_addr; - kio_addr_t mii_addr = nic_base + AXNET_MII_EEP; + unsigned int nic_base = dev->base_addr; + unsigned int mii_addr = nic_base + AXNET_MII_EEP; u_short link; if (!netif_device_present(dev)) goto reschedule; @@ -681,7 +681,7 @@ static int axnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { axnet_dev_t *info = PRIV(dev); u16 *data = (u16 *)&rq->ifr_ifru; - kio_addr_t mii_addr = dev->base_addr + AXNET_MII_EEP; + unsigned int mii_addr = dev->base_addr + AXNET_MII_EEP; switch (cmd) { case SIOCGMIIPHY: data[0] = info->phy_id; @@ -703,7 +703,7 @@ static void get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; outb_p(0, nic_base + EN0_RSARLO); /* On page boundary */ outb_p(ring_page, nic_base + EN0_RSARHI); @@ -721,7 +721,7 @@ static void get_8390_hdr(struct net_device *dev, static void block_input(struct net_device *dev, int count, struct sk_buff *skb, int ring_offset) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; int xfer_count = count; char *buf = skb->data; @@ -744,7 +744,7 @@ static void block_input(struct net_device *dev, int count, static void block_output(struct net_device *dev, int count, const u_char *buf, const int start_page) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; #ifdef PCMCIA_DEBUG if (ei_debug > 4) @@ -991,7 +991,7 @@ static int ax_open(struct net_device *dev) * * Opposite of ax_open(). Only used when "ifconfig <devname> down" is done. */ -int ax_close(struct net_device *dev) +static int ax_close(struct net_device *dev) { unsigned long flags; @@ -1014,7 +1014,7 @@ int ax_close(struct net_device *dev) * completed (or failed) - i.e. never posted a Tx related interrupt. */ -void ei_tx_timeout(struct net_device *dev) +static void ei_tx_timeout(struct net_device *dev) { long e8390_base = dev->base_addr; struct ei_device *ei_local = (struct ei_device *) netdev_priv(dev); @@ -1087,8 +1087,8 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev) ei_local->irqlock = 1; - send_length = ETH_ZLEN < length ? length : ETH_ZLEN; - + send_length = max(length, ETH_ZLEN); + /* * We have two Tx slots available for use. Find the first free * slot, and then perform some sanity checks. With two Tx bufs, diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 949c6df74c97..8f328a03847b 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -298,7 +298,8 @@ do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) static int mfc_try_io_port(struct pcmcia_device *link) { int i, ret; - static const kio_addr_t serial_base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; + static const unsigned int serial_base[5] = + { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; for (i = 0; i < 5; i++) { link->io.BasePort2 = serial_base[i]; @@ -316,7 +317,7 @@ static int mfc_try_io_port(struct pcmcia_device *link) static int ungermann_try_io_port(struct pcmcia_device *link) { int ret; - kio_addr_t ioaddr; + unsigned int ioaddr; /* Ungermann-Bass Access/CARD accepts 0x300,0x320,0x340,0x360 0x380,0x3c0 only for ioport. @@ -342,7 +343,7 @@ static int fmvj18x_config(struct pcmcia_device *link) cisparse_t parse; u_short buf[32]; int i, last_fn = 0, last_ret = 0, ret; - kio_addr_t ioaddr; + unsigned int ioaddr; cardtype_t cardtype; char *card_name = "unknown"; u_char *node_id; @@ -610,7 +611,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) u_char __iomem *base; int i, j; struct net_device *dev = link->priv; - kio_addr_t ioaddr; + unsigned int ioaddr; /* Allocate a small memory window */ req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; @@ -735,7 +736,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) { struct net_device *dev = dev_id; local_info_t *lp = netdev_priv(dev); - kio_addr_t ioaddr; + unsigned int ioaddr; unsigned short tx_stat, rx_stat; ioaddr = dev->base_addr; @@ -789,7 +790,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) static void fjn_tx_timeout(struct net_device *dev) { struct local_info_t *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; printk(KERN_NOTICE "%s: transmit timed out with status %04x, %s?\n", dev->name, htons(inw(ioaddr + TX_STATUS)), @@ -819,7 +820,7 @@ static void fjn_tx_timeout(struct net_device *dev) static int fjn_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct local_info_t *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; short length = skb->len; if (length < ETH_ZLEN) @@ -892,7 +893,7 @@ static int fjn_start_xmit(struct sk_buff *skb, struct net_device *dev) static void fjn_reset(struct net_device *dev) { struct local_info_t *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int i; DEBUG(4, "fjn_reset(%s) called.\n",dev->name); @@ -971,7 +972,7 @@ static void fjn_reset(struct net_device *dev) static void fjn_rx(struct net_device *dev) { struct local_info_t *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int boguscount = 10; /* 5 -> 10: by agy 19940922 */ DEBUG(4, "%s: in rx_packet(), rx_status %02x.\n", @@ -1125,7 +1126,7 @@ static int fjn_close(struct net_device *dev) { struct local_info_t *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; DEBUG(4, "fjn_close('%s').\n", dev->name); @@ -1168,7 +1169,7 @@ static struct net_device_stats *fjn_get_stats(struct net_device *dev) static void set_rx_mode(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u_char mc_filter[8]; /* Multicast hash filter */ u_long flags; int i; @@ -1197,8 +1198,7 @@ static void set_rx_mode(struct net_device *dev) outb(1, ioaddr + RX_MODE); /* Ignore almost all multicasts. */ } else { struct dev_mc_list *mclist; - int i; - + memset(mc_filter, 0, sizeof(mc_filter)); for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; i++, mclist = mclist->next) { diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index a355a93b908b..cfcbea9b7e2e 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -518,7 +518,7 @@ mace_read assuming that during normal operation, the MACE is always in bank 0. ---------------------------------------------------------------------------- */ -static int mace_read(mace_private *lp, kio_addr_t ioaddr, int reg) +static int mace_read(mace_private *lp, unsigned int ioaddr, int reg) { int data = 0xFF; unsigned long flags; @@ -545,7 +545,8 @@ mace_write are assuming that during normal operation, the MACE is always in bank 0. ---------------------------------------------------------------------------- */ -static void mace_write(mace_private *lp, kio_addr_t ioaddr, int reg, int data) +static void mace_write(mace_private *lp, unsigned int ioaddr, int reg, + int data) { unsigned long flags; @@ -567,7 +568,7 @@ static void mace_write(mace_private *lp, kio_addr_t ioaddr, int reg, int data) mace_init Resets the MACE chip. ---------------------------------------------------------------------------- */ -static int mace_init(mace_private *lp, kio_addr_t ioaddr, char *enet_addr) +static int mace_init(mace_private *lp, unsigned int ioaddr, char *enet_addr) { int i; int ct = 0; @@ -657,7 +658,7 @@ static int nmclan_config(struct pcmcia_device *link) tuple_t tuple; u_char buf[64]; int i, last_ret, last_fn; - kio_addr_t ioaddr; + unsigned int ioaddr; DECLARE_MAC_BUF(mac); DEBUG(0, "nmclan_config(0x%p)\n", link); @@ -839,7 +840,7 @@ mace_open ---------------------------------------------------------------------------- */ static int mace_open(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; mace_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; @@ -862,7 +863,7 @@ mace_close ---------------------------------------------------------------------------- */ static int mace_close(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; mace_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; @@ -935,7 +936,7 @@ static void mace_tx_timeout(struct net_device *dev) static int mace_start_xmit(struct sk_buff *skb, struct net_device *dev) { mace_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; netif_stop_queue(dev); @@ -996,7 +997,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *) dev_id; mace_private *lp = netdev_priv(dev); - kio_addr_t ioaddr; + unsigned int ioaddr; int status; int IntrCnt = MACE_MAX_IR_ITERATIONS; @@ -1140,7 +1141,7 @@ mace_rx static int mace_rx(struct net_device *dev, unsigned char RxCnt) { mace_private *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; unsigned char rx_framecnt; unsigned short rx_status; @@ -1302,7 +1303,7 @@ update_stats card's SRAM fast enough. If this happens, something is seriously wrong with the hardware. ---------------------------------------------------------------------------- */ -static void update_stats(kio_addr_t ioaddr, struct net_device *dev) +static void update_stats(unsigned int ioaddr, struct net_device *dev) { mace_private *lp = netdev_priv(dev); @@ -1448,7 +1449,7 @@ static void restore_multicast_list(struct net_device *dev) mace_private *lp = netdev_priv(dev); int num_addrs = lp->multicast_num_addrs; int *ladrf = lp->multicast_ladrf; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int i; DEBUG(2, "%s: restoring Rx mode to %d addresses.\n", @@ -1540,7 +1541,7 @@ static void set_multicast_list(struct net_device *dev) static void restore_multicast_list(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; mace_private *lp = netdev_priv(dev); DEBUG(2, "%s: restoring Rx mode to %d addresses.\n", dev->name, diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 9ba56aa26a1b..6323988dfa1d 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -349,7 +349,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) static hw_info_t *get_prom(struct pcmcia_device *link) { struct net_device *dev = link->priv; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u_char prom[32]; int i, j; @@ -425,7 +425,7 @@ static hw_info_t *get_dl10019(struct pcmcia_device *link) static hw_info_t *get_ax88190(struct pcmcia_device *link) { struct net_device *dev = link->priv; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int i, j; /* Not much of a test, but the alternatives are messy */ @@ -521,7 +521,7 @@ static int pcnet_config(struct pcmcia_device *link) int i, last_ret, last_fn, start_pg, stop_pg, cm_offset; int has_shmem = 0; u_short buf[64]; - hw_info_t *hw_info; + hw_info_t *local_hw_info; DECLARE_MAC_BUF(mac); DEBUG(0, "pcnet_config(0x%p)\n", link); @@ -590,23 +590,23 @@ static int pcnet_config(struct pcmcia_device *link) dev->if_port = 0; } - hw_info = get_hwinfo(link); - if (hw_info == NULL) - hw_info = get_prom(link); - if (hw_info == NULL) - hw_info = get_dl10019(link); - if (hw_info == NULL) - hw_info = get_ax88190(link); - if (hw_info == NULL) - hw_info = get_hwired(link); - - if (hw_info == NULL) { + local_hw_info = get_hwinfo(link); + if (local_hw_info == NULL) + local_hw_info = get_prom(link); + if (local_hw_info == NULL) + local_hw_info = get_dl10019(link); + if (local_hw_info == NULL) + local_hw_info = get_ax88190(link); + if (local_hw_info == NULL) + local_hw_info = get_hwired(link); + + if (local_hw_info == NULL) { printk(KERN_NOTICE "pcnet_cs: unable to read hardware net" " address for io base %#3lx\n", dev->base_addr); goto failed; } - info->flags = hw_info->flags; + info->flags = local_hw_info->flags; /* Check for user overrides */ info->flags |= (delay_output) ? DELAY_OUTPUT : 0; if ((link->manf_id == MANFID_SOCKET) && @@ -756,7 +756,7 @@ static int pcnet_resume(struct pcmcia_device *link) #define MDIO_DATA_READ 0x10 #define MDIO_MASK 0x0f -static void mdio_sync(kio_addr_t addr) +static void mdio_sync(unsigned int addr) { int bits, mask = inb(addr) & MDIO_MASK; for (bits = 0; bits < 32; bits++) { @@ -765,7 +765,7 @@ static void mdio_sync(kio_addr_t addr) } } -static int mdio_read(kio_addr_t addr, int phy_id, int loc) +static int mdio_read(unsigned int addr, int phy_id, int loc) { u_int cmd = (0x06<<10)|(phy_id<<5)|loc; int i, retval = 0, mask = inb(addr) & MDIO_MASK; @@ -784,7 +784,7 @@ static int mdio_read(kio_addr_t addr, int phy_id, int loc) return (retval>>1) & 0xffff; } -static void mdio_write(kio_addr_t addr, int phy_id, int loc, int value) +static void mdio_write(unsigned int addr, int phy_id, int loc, int value) { u_int cmd = (0x05<<28)|(phy_id<<23)|(loc<<18)|(1<<17)|value; int i, mask = inb(addr) & MDIO_MASK; @@ -818,10 +818,10 @@ static void mdio_write(kio_addr_t addr, int phy_id, int loc, int value) #define DL19FDUPLX 0x0400 /* DL10019 Full duplex mode */ -static int read_eeprom(kio_addr_t ioaddr, int location) +static int read_eeprom(unsigned int ioaddr, int location) { int i, retval = 0; - kio_addr_t ee_addr = ioaddr + DLINK_EEPROM; + unsigned int ee_addr = ioaddr + DLINK_EEPROM; int read_cmd = location | (EE_READ_CMD << 8); outb(0, ee_addr); @@ -852,10 +852,10 @@ static int read_eeprom(kio_addr_t ioaddr, int location) In ASIC mode, EE_ADOT is used to output the data to the ASIC. */ -static void write_asic(kio_addr_t ioaddr, int location, short asic_data) +static void write_asic(unsigned int ioaddr, int location, short asic_data) { int i; - kio_addr_t ee_addr = ioaddr + DLINK_EEPROM; + unsigned int ee_addr = ioaddr + DLINK_EEPROM; short dataval; int read_cmd = location | (EE_READ_CMD << 8); @@ -897,7 +897,7 @@ static void write_asic(kio_addr_t ioaddr, int location, short asic_data) static void set_misc_reg(struct net_device *dev) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; pcnet_dev_t *info = PRIV(dev); u_char tmp; @@ -936,7 +936,7 @@ static void set_misc_reg(struct net_device *dev) static void mii_phy_probe(struct net_device *dev) { pcnet_dev_t *info = PRIV(dev); - kio_addr_t mii_addr = dev->base_addr + DLINK_GPIO; + unsigned int mii_addr = dev->base_addr + DLINK_GPIO; int i; u_int tmp, phyid; @@ -1014,7 +1014,7 @@ static int pcnet_close(struct net_device *dev) static void pcnet_reset_8390(struct net_device *dev) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; int i; ei_status.txing = ei_status.dmaing = 0; @@ -1074,8 +1074,8 @@ static void ei_watchdog(u_long arg) { struct net_device *dev = (struct net_device *)arg; pcnet_dev_t *info = PRIV(dev); - kio_addr_t nic_base = dev->base_addr; - kio_addr_t mii_addr = nic_base + DLINK_GPIO; + unsigned int nic_base = dev->base_addr; + unsigned int mii_addr = nic_base + DLINK_GPIO; u_short link; if (!netif_device_present(dev)) goto reschedule; @@ -1177,7 +1177,7 @@ static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { pcnet_dev_t *info = PRIV(dev); u16 *data = (u16 *)&rq->ifr_ifru; - kio_addr_t mii_addr = dev->base_addr + DLINK_GPIO; + unsigned int mii_addr = dev->base_addr + DLINK_GPIO; switch (cmd) { case SIOCGMIIPHY: data[0] = info->phy_id; @@ -1199,7 +1199,7 @@ static void dma_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; if (ei_status.dmaing) { printk(KERN_NOTICE "%s: DMAing conflict in dma_block_input." @@ -1230,7 +1230,7 @@ static void dma_get_8390_hdr(struct net_device *dev, static void dma_block_input(struct net_device *dev, int count, struct sk_buff *skb, int ring_offset) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; int xfer_count = count; char *buf = skb->data; @@ -1285,7 +1285,7 @@ static void dma_block_input(struct net_device *dev, int count, static void dma_block_output(struct net_device *dev, int count, const u_char *buf, const int start_page) { - kio_addr_t nic_base = dev->base_addr; + unsigned int nic_base = dev->base_addr; pcnet_dev_t *info = PRIV(dev); #ifdef PCMCIA_DEBUG int retries = 0; diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index c9868e9dac4c..f18eca9831e8 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -295,7 +295,7 @@ static int s9k_config(struct net_device *dev, struct ifmap *map); static void smc_set_xcvr(struct net_device *dev, int if_port); static void smc_reset(struct net_device *dev); static void media_check(u_long arg); -static void mdio_sync(kio_addr_t addr); +static void mdio_sync(unsigned int addr); static int mdio_read(struct net_device *dev, int phy_id, int loc); static void mdio_write(struct net_device *dev, int phy_id, int loc, int value); static int smc_link_ok(struct net_device *dev); @@ -601,8 +601,8 @@ static void mot_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; - kio_addr_t iouart = link->io.BasePort2; + unsigned int ioaddr = dev->base_addr; + unsigned int iouart = link->io.BasePort2; /* Set UART base address and force map with COR bit 1 */ writeb(iouart & 0xff, smc->base + MOT_UART + CISREG_IOBASE_0); @@ -621,7 +621,7 @@ static void mot_config(struct pcmcia_device *link) static int mot_setup(struct pcmcia_device *link) { struct net_device *dev = link->priv; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int i, wait, loop; u_int addr; @@ -754,7 +754,7 @@ free_cfg_mem: static int osi_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; - static const kio_addr_t com[4] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 }; + static const unsigned int com[4] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 }; int i, j; link->conf.Attributes |= CONF_ENABLE_SPKR; @@ -900,7 +900,7 @@ static int smc91c92_resume(struct pcmcia_device *link) static int check_sig(struct pcmcia_device *link) { struct net_device *dev = link->priv; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int width; u_short s; @@ -960,7 +960,7 @@ static int smc91c92_config(struct pcmcia_device *link) struct smc_private *smc = netdev_priv(dev); char *name; int i, j, rev; - kio_addr_t ioaddr; + unsigned int ioaddr; u_long mir; DECLARE_MAC_BUF(mac); @@ -1136,7 +1136,7 @@ static void smc91c92_release(struct pcmcia_device *link) #define MDIO_DATA_WRITE1 (MDIO_DIR_WRITE | MDIO_DATA_OUT) #define MDIO_DATA_READ 0x02 -static void mdio_sync(kio_addr_t addr) +static void mdio_sync(unsigned int addr) { int bits; for (bits = 0; bits < 32; bits++) { @@ -1147,7 +1147,7 @@ static void mdio_sync(kio_addr_t addr) static int mdio_read(struct net_device *dev, int phy_id, int loc) { - kio_addr_t addr = dev->base_addr + MGMT; + unsigned int addr = dev->base_addr + MGMT; u_int cmd = (0x06<<10)|(phy_id<<5)|loc; int i, retval = 0; @@ -1167,7 +1167,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int loc) static void mdio_write(struct net_device *dev, int phy_id, int loc, int value) { - kio_addr_t addr = dev->base_addr + MGMT; + unsigned int addr = dev->base_addr + MGMT; u_int cmd = (0x05<<28)|(phy_id<<23)|(loc<<18)|(1<<17)|value; int i; @@ -1193,7 +1193,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int value) #ifdef PCMCIA_DEBUG static void smc_dump(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u_short i, w, save; save = inw(ioaddr + BANK_SELECT); for (w = 0; w < 4; w++) { @@ -1248,7 +1248,7 @@ static int smc_close(struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); struct pcmcia_device *link = smc->p_dev; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; DEBUG(0, "%s: smc_close(), status %4.4x.\n", dev->name, inw(ioaddr + BANK_SELECT)); @@ -1285,7 +1285,7 @@ static void smc_hardware_send_packet(struct net_device * dev) { struct smc_private *smc = netdev_priv(dev); struct sk_buff *skb = smc->saved_skb; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u_char packet_no; if (!skb) { @@ -1349,7 +1349,7 @@ static void smc_hardware_send_packet(struct net_device * dev) static void smc_tx_timeout(struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; printk(KERN_NOTICE "%s: SMC91c92 transmit timed out, " "Tx_status %2.2x status %4.4x.\n", @@ -1364,7 +1364,7 @@ static void smc_tx_timeout(struct net_device *dev) static int smc_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u_short num_pages; short time_out, ir; unsigned long flags; @@ -1434,7 +1434,7 @@ static int smc_start_xmit(struct sk_buff *skb, struct net_device *dev) static void smc_tx_err(struct net_device * dev) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int saved_packet = inw(ioaddr + PNR_ARR) & 0xff; int packet_no = inw(ioaddr + FIFO_PORTS) & 0x7f; int tx_status; @@ -1478,7 +1478,7 @@ static void smc_tx_err(struct net_device * dev) static void smc_eph_irq(struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u_short card_stats, ephs; SMC_SELECT_BANK(0); @@ -1513,7 +1513,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr; + unsigned int ioaddr; u_short saved_bank, saved_pointer, mask, status; unsigned int handled = 1; char bogus_cnt = INTR_WORK; /* Work we are willing to do. */ @@ -1633,7 +1633,7 @@ irq_done: static void smc_rx(struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int rx_status; int packet_length; /* Caution: not frame length, rather words to transfer from the chip. */ @@ -1738,7 +1738,7 @@ static void fill_multicast_tbl(int count, struct dev_mc_list *addrs, static void set_rx_mode(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; struct smc_private *smc = netdev_priv(dev); u_int multicast_table[ 2 ] = { 0, }; unsigned long flags; @@ -1804,7 +1804,7 @@ static int s9k_config(struct net_device *dev, struct ifmap *map) static void smc_set_xcvr(struct net_device *dev, int if_port) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u_short saved_bank; saved_bank = inw(ioaddr + BANK_SELECT); @@ -1827,7 +1827,7 @@ static void smc_set_xcvr(struct net_device *dev, int if_port) static void smc_reset(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; struct smc_private *smc = netdev_priv(dev); int i; @@ -1904,7 +1904,7 @@ static void media_check(u_long arg) { struct net_device *dev = (struct net_device *) arg; struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u_short i, media, saved_bank; u_short link; unsigned long flags; @@ -2021,7 +2021,7 @@ reschedule: static int smc_link_ok(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; struct smc_private *smc = netdev_priv(dev); if (smc->cfg & CFG_MII_SELECT) { @@ -2035,7 +2035,7 @@ static int smc_link_ok(struct net_device *dev) static int smc_netdev_get_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd) { u16 tmp; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; ecmd->supported = (SUPPORTED_TP | SUPPORTED_AUI | SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full); @@ -2057,7 +2057,7 @@ static int smc_netdev_get_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd) static int smc_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd) { u16 tmp; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; if (ecmd->speed != SPEED_10) return -EINVAL; @@ -2100,7 +2100,7 @@ static void smc_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info static int smc_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u16 saved_bank = inw(ioaddr + BANK_SELECT); int ret; @@ -2118,7 +2118,7 @@ static int smc_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) static int smc_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u16 saved_bank = inw(ioaddr + BANK_SELECT); int ret; @@ -2136,7 +2136,7 @@ static int smc_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) static u32 smc_get_link(struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u16 saved_bank = inw(ioaddr + BANK_SELECT); u32 ret; @@ -2164,7 +2164,7 @@ static int smc_nway_reset(struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); if (smc->cfg & CFG_MII_SELECT) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u16 saved_bank = inw(ioaddr + BANK_SELECT); int res; @@ -2196,7 +2196,7 @@ static int smc_ioctl (struct net_device *dev, struct ifreq *rq, int cmd) struct mii_ioctl_data *mii = if_mii(rq); int rc = 0; u16 saved_bank; - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; if (!netif_running(dev)) return -EINVAL; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index 1f09bea6db5a..d041f831a18d 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -273,12 +273,12 @@ INT_MODULE_PARM(lockup_hack, 0); /* anti lockup hack */ static unsigned maxrx_bytes = 22000; /* MII management prototypes */ -static void mii_idle(kio_addr_t ioaddr); -static void mii_putbit(kio_addr_t ioaddr, unsigned data); -static int mii_getbit(kio_addr_t ioaddr); -static void mii_wbits(kio_addr_t ioaddr, unsigned data, int len); -static unsigned mii_rd(kio_addr_t ioaddr, u_char phyaddr, u_char phyreg); -static void mii_wr(kio_addr_t ioaddr, u_char phyaddr, u_char phyreg, +static void mii_idle(unsigned int ioaddr); +static void mii_putbit(unsigned int ioaddr, unsigned data); +static int mii_getbit(unsigned int ioaddr); +static void mii_wbits(unsigned int ioaddr, unsigned data, int len); +static unsigned mii_rd(unsigned int ioaddr, u_char phyaddr, u_char phyreg); +static void mii_wr(unsigned int ioaddr, u_char phyaddr, u_char phyreg, unsigned data, int len); /* @@ -403,7 +403,7 @@ next_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t *parse) static void PrintRegisters(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; if (pc_debug > 1) { int i, page; @@ -439,7 +439,7 @@ PrintRegisters(struct net_device *dev) * Turn around for read */ static void -mii_idle(kio_addr_t ioaddr) +mii_idle(unsigned int ioaddr) { PutByte(XIRCREG2_GPR2, 0x04|0); /* drive MDCK low */ udelay(1); @@ -451,7 +451,7 @@ mii_idle(kio_addr_t ioaddr) * Write a bit to MDI/O */ static void -mii_putbit(kio_addr_t ioaddr, unsigned data) +mii_putbit(unsigned int ioaddr, unsigned data) { #if 1 if (data) { @@ -484,7 +484,7 @@ mii_putbit(kio_addr_t ioaddr, unsigned data) * Get a bit from MDI/O */ static int -mii_getbit(kio_addr_t ioaddr) +mii_getbit(unsigned int ioaddr) { unsigned d; @@ -497,7 +497,7 @@ mii_getbit(kio_addr_t ioaddr) } static void -mii_wbits(kio_addr_t ioaddr, unsigned data, int len) +mii_wbits(unsigned int ioaddr, unsigned data, int len) { unsigned m = 1 << (len-1); for (; m; m >>= 1) @@ -505,7 +505,7 @@ mii_wbits(kio_addr_t ioaddr, unsigned data, int len) } static unsigned -mii_rd(kio_addr_t ioaddr, u_char phyaddr, u_char phyreg) +mii_rd(unsigned int ioaddr, u_char phyaddr, u_char phyreg) { int i; unsigned data=0, m; @@ -527,7 +527,8 @@ mii_rd(kio_addr_t ioaddr, u_char phyaddr, u_char phyreg) } static void -mii_wr(kio_addr_t ioaddr, u_char phyaddr, u_char phyreg, unsigned data, int len) +mii_wr(unsigned int ioaddr, u_char phyaddr, u_char phyreg, unsigned data, + int len) { int i; @@ -726,7 +727,7 @@ xirc2ps_config(struct pcmcia_device * link) local_info_t *local = netdev_priv(dev); tuple_t tuple; cisparse_t parse; - kio_addr_t ioaddr; + unsigned int ioaddr; int err, i; u_char buf[64]; cistpl_lan_node_id_t *node_id = (cistpl_lan_node_id_t*)parse.funce.data; @@ -1104,7 +1105,7 @@ xirc2ps_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; local_info_t *lp = netdev_priv(dev); - kio_addr_t ioaddr; + unsigned int ioaddr; u_char saved_page; unsigned bytes_rcvd; unsigned int_status, eth_status, rx_status, tx_status; @@ -1209,7 +1210,7 @@ xirc2ps_interrupt(int irq, void *dev_id) unsigned i; u_long *p = skb_put(skb, pktlen); register u_long a; - kio_addr_t edpreg = ioaddr+XIRCREG_EDP-2; + unsigned int edpreg = ioaddr+XIRCREG_EDP-2; for (i=0; i < len ; i += 4, p++) { a = inl(edpreg); __asm__("rorl $16,%0\n\t" @@ -1346,7 +1347,7 @@ static int do_start_xmit(struct sk_buff *skb, struct net_device *dev) { local_info_t *lp = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; int okay; unsigned freespace; unsigned pktlen = skb->len; @@ -1415,7 +1416,7 @@ do_get_stats(struct net_device *dev) static void set_addresses(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; local_info_t *lp = netdev_priv(dev); struct dev_mc_list *dmi = dev->mc_list; unsigned char *addr; @@ -1459,7 +1460,7 @@ set_addresses(struct net_device *dev) static void set_multicast_list(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; SelectPage(0x42); if (dev->flags & IFF_PROMISC) { /* snoop */ @@ -1543,7 +1544,7 @@ static int do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { local_info_t *local = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; u16 *data = (u16 *)&rq->ifr_ifru; DEBUG(1, "%s: ioctl(%-.6s, %#04x) %04x %04x %04x %04x\n", @@ -1575,7 +1576,7 @@ static void hardreset(struct net_device *dev) { local_info_t *local = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; SelectPage(4); udelay(1); @@ -1592,7 +1593,7 @@ static void do_reset(struct net_device *dev, int full) { local_info_t *local = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; unsigned value; DEBUG(0, "%s: do_reset(%p,%d)\n", dev? dev->name:"eth?", dev, full); @@ -1753,7 +1754,7 @@ static int init_mii(struct net_device *dev) { local_info_t *local = netdev_priv(dev); - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; unsigned control, status, linkpartner; int i; @@ -1826,7 +1827,7 @@ static void do_powerdown(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; DEBUG(0, "do_powerdown(%p)\n", dev); @@ -1838,7 +1839,7 @@ do_powerdown(struct net_device *dev) static int do_stop(struct net_device *dev) { - kio_addr_t ioaddr = dev->base_addr; + unsigned int ioaddr = dev->base_addr; local_info_t *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index 32a24f5c4fa6..08a011f0834a 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -724,6 +724,7 @@ struct b43_wldev { bool short_preamble; /* TRUE, if short preamble is enabled. */ bool short_slot; /* TRUE, if short slot timing is enabled. */ bool radio_hw_enable; /* saved state of radio hardware enabled state */ + bool suspend_in_progress; /* TRUE, if we are in a suspend/resume cycle */ /* PHY/Radio device. */ struct b43_phy phy; diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c index 4b590d8c65ff..0908335892db 100644 --- a/drivers/net/wireless/b43/leds.c +++ b/drivers/net/wireless/b43/leds.c @@ -116,7 +116,10 @@ static void b43_unregister_led(struct b43_led *led) { if (!led->dev) return; - led_classdev_unregister(&led->led_dev); + if (led->dev->suspend_in_progress) + led_classdev_unregister_suspended(&led->led_dev); + else + led_classdev_unregister(&led->led_dev); b43_led_turn_off(led->dev, led->index, led->activelow); led->dev = NULL; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 64c154d080d8..ef65c41af00f 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -38,6 +38,7 @@ #include <linux/wireless.h> #include <linux/workqueue.h> #include <linux/skbuff.h> +#include <linux/io.h> #include <linux/dma-mapping.h> #include <asm/unaligned.h> @@ -2554,10 +2555,10 @@ static int b43_rng_read(struct hwrng *rng, u32 * data) return (sizeof(u16)); } -static void b43_rng_exit(struct b43_wl *wl) +static void b43_rng_exit(struct b43_wl *wl, bool suspended) { if (wl->rng_initialized) - hwrng_unregister(&wl->rng); + __hwrng_unregister(&wl->rng, suspended); } static int b43_rng_init(struct b43_wl *wl) @@ -3417,8 +3418,10 @@ static void b43_wireless_core_exit(struct b43_wldev *dev) macctl |= B43_MACCTL_PSM_JMP0; b43_write32(dev, B43_MMIO_MACCTL, macctl); - b43_leds_exit(dev); - b43_rng_exit(dev->wl); + if (!dev->suspend_in_progress) { + b43_leds_exit(dev); + b43_rng_exit(dev->wl, false); + } b43_dma_free(dev); b43_chip_exit(dev); b43_radio_turn_off(dev, 1); @@ -3534,11 +3537,13 @@ static int b43_wireless_core_init(struct b43_wldev *dev) ssb_bus_powerup(bus, 1); /* Enable dynamic PCTL */ b43_upload_card_macaddress(dev); b43_security_init(dev); - b43_rng_init(wl); + if (!dev->suspend_in_progress) + b43_rng_init(wl); b43_set_status(dev, B43_STAT_INITIALIZED); - b43_leds_init(dev); + if (!dev->suspend_in_progress) + b43_leds_init(dev); out: return err; @@ -4135,6 +4140,7 @@ static int b43_suspend(struct ssb_device *dev, pm_message_t state) b43dbg(wl, "Suspending...\n"); mutex_lock(&wl->mutex); + wldev->suspend_in_progress = true; wldev->suspend_init_status = b43_status(wldev); if (wldev->suspend_init_status >= B43_STAT_STARTED) b43_wireless_core_stop(wldev); @@ -4166,15 +4172,17 @@ static int b43_resume(struct ssb_device *dev) if (wldev->suspend_init_status >= B43_STAT_STARTED) { err = b43_wireless_core_start(wldev); if (err) { + b43_leds_exit(wldev); + b43_rng_exit(wldev->wl, true); b43_wireless_core_exit(wldev); b43err(wl, "Resume failed at core start\n"); goto out; } } - mutex_unlock(&wl->mutex); - b43dbg(wl, "Device resumed.\n"); - out: + out: + wldev->suspend_in_progress = false; + mutex_unlock(&wl->mutex); return err; } diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 2ab107f45793..5bf9e00b070c 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -162,7 +162,7 @@ that only one external action is invoked at a time. #include <linux/firmware.h> #include <linux/acpi.h> #include <linux/ctype.h> -#include <linux/latency.h> +#include <linux/pm_qos_params.h> #include "ipw2100.h" @@ -1701,7 +1701,7 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred) /* the ipw2100 hardware really doesn't want power management delays * longer than 175usec */ - modify_acceptable_latency("ipw2100", 175); + pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", 175); /* If the interrupt is enabled, turn it off... */ spin_lock_irqsave(&priv->low_lock, flags); @@ -1856,7 +1856,8 @@ static void ipw2100_down(struct ipw2100_priv *priv) ipw2100_disable_interrupts(priv); spin_unlock_irqrestore(&priv->low_lock, flags); - modify_acceptable_latency("ipw2100", INFINITE_LATENCY); + pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", + PM_QOS_DEFAULT_VALUE); /* We have to signal any supplicant if we are disassociating */ if (associated) @@ -6554,7 +6555,8 @@ static int __init ipw2100_init(void) if (ret) goto out; - set_acceptable_latency("ipw2100", INFINITE_LATENCY); + pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", + PM_QOS_DEFAULT_VALUE); #ifdef CONFIG_IPW2100_DEBUG ipw2100_debug_level = debug; ret = driver_create_file(&ipw2100_pci_driver.driver, @@ -6576,7 +6578,7 @@ static void __exit ipw2100_exit(void) &driver_attr_debug_level); #endif pci_unregister_driver(&ipw2100_pci_driver); - remove_acceptable_latency("ipw2100"); + pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100"); } module_init(ipw2100_init); diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index d2fa079fbc4c..f479c1af6782 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -195,7 +195,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *arg); /* Runs after card static void netwave_detach(struct pcmcia_device *p_dev); /* Destroy instance */ /* Hardware configuration */ -static void netwave_doreset(kio_addr_t iobase, u_char __iomem *ramBase); +static void netwave_doreset(unsigned int iobase, u_char __iomem *ramBase); static void netwave_reset(struct net_device *dev); /* Misc device stuff */ @@ -309,7 +309,7 @@ static inline void wait_WOC(unsigned int iobase) } static void netwave_snapshot(netwave_private *priv, u_char __iomem *ramBase, - kio_addr_t iobase) { + unsigned int iobase) { u_short resultBuffer; /* if time since last snapshot is > 1 sec. (100 jiffies?) then take @@ -340,7 +340,7 @@ static void netwave_snapshot(netwave_private *priv, u_char __iomem *ramBase, static struct iw_statistics *netwave_get_wireless_stats(struct net_device *dev) { unsigned long flags; - kio_addr_t iobase = dev->base_addr; + unsigned int iobase = dev->base_addr; netwave_private *priv = netdev_priv(dev); u_char __iomem *ramBase = priv->ramBase; struct iw_statistics* wstats; @@ -471,7 +471,7 @@ static int netwave_set_nwid(struct net_device *dev, char *extra) { unsigned long flags; - kio_addr_t iobase = dev->base_addr; + unsigned int iobase = dev->base_addr; netwave_private *priv = netdev_priv(dev); u_char __iomem *ramBase = priv->ramBase; @@ -518,7 +518,7 @@ static int netwave_set_scramble(struct net_device *dev, char *key) { unsigned long flags; - kio_addr_t iobase = dev->base_addr; + unsigned int iobase = dev->base_addr; netwave_private *priv = netdev_priv(dev); u_char __iomem *ramBase = priv->ramBase; @@ -621,7 +621,7 @@ static int netwave_get_snap(struct net_device *dev, char *extra) { unsigned long flags; - kio_addr_t iobase = dev->base_addr; + unsigned int iobase = dev->base_addr; netwave_private *priv = netdev_priv(dev); u_char __iomem *ramBase = priv->ramBase; @@ -874,7 +874,7 @@ static int netwave_resume(struct pcmcia_device *link) * * Proper hardware reset of the card. */ -static void netwave_doreset(kio_addr_t ioBase, u_char __iomem *ramBase) +static void netwave_doreset(unsigned int ioBase, u_char __iomem *ramBase) { /* Reset card */ wait_WOC(ioBase); @@ -892,7 +892,7 @@ static void netwave_reset(struct net_device *dev) { /* u_char state; */ netwave_private *priv = netdev_priv(dev); u_char __iomem *ramBase = priv->ramBase; - kio_addr_t iobase = dev->base_addr; + unsigned int iobase = dev->base_addr; DEBUG(0, "netwave_reset: Done with hardware reset\n"); @@ -973,7 +973,7 @@ static int netwave_hw_xmit(unsigned char* data, int len, netwave_private *priv = netdev_priv(dev); u_char __iomem * ramBase = priv->ramBase; - kio_addr_t iobase = dev->base_addr; + unsigned int iobase = dev->base_addr; /* Disable interrupts & save flags */ spin_lock_irqsave(&priv->spinlock, flags); @@ -1065,7 +1065,7 @@ static int netwave_start_xmit(struct sk_buff *skb, struct net_device *dev) { */ static irqreturn_t netwave_interrupt(int irq, void* dev_id) { - kio_addr_t iobase; + unsigned int iobase; u_char __iomem *ramBase; struct net_device *dev = (struct net_device *)dev_id; struct netwave_private *priv = netdev_priv(dev); @@ -1235,7 +1235,7 @@ static int netwave_rx(struct net_device *dev) { netwave_private *priv = netdev_priv(dev); u_char __iomem *ramBase = priv->ramBase; - kio_addr_t iobase = dev->base_addr; + unsigned int iobase = dev->base_addr; u_char rxStatus; struct sk_buff *skb = NULL; unsigned int curBuffer, @@ -1388,7 +1388,7 @@ module_exit(exit_netwave_cs); */ static void set_multicast_list(struct net_device *dev) { - kio_addr_t iobase = dev->base_addr; + unsigned int iobase = dev->base_addr; netwave_private *priv = netdev_priv(dev); u_char __iomem * ramBase = priv->ramBase; u_char rcvMode = 0; diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index c2037b2a05bf..06eea6ab7bf0 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -149,7 +149,7 @@ psa_write(struct net_device * dev, net_local *lp = netdev_priv(dev); u_char __iomem *ptr = lp->mem + PSA_ADDR + (o << 1); int count = 0; - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; /* As there seem to have no flag PSA_BUSY as in the ISA model, we are * oblige to verify this address to know when the PSA is ready... */ volatile u_char __iomem *verify = lp->mem + PSA_ADDR + @@ -708,7 +708,7 @@ static void wl_update_history(wavepoint_history *wavepoint, unsigned char sigqua /* Perform a handover to a new WavePoint */ static void wv_roam_handover(wavepoint_history *wavepoint, net_local *lp) { - kio_addr_t base = lp->dev->base_addr; + unsigned int base = lp->dev->base_addr; mm_t m; unsigned long flags; @@ -821,7 +821,7 @@ wv_82593_cmd(struct net_device * dev, int cmd, int result) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; int status; int wait_completed; long spin; @@ -945,7 +945,7 @@ read_ringbuf(struct net_device * dev, char * buf, int len) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; int ring_ptr = addr; int chunk_len; char * buf_ptr = buf; @@ -1096,7 +1096,7 @@ wv_psa_show(psa_t * p) static void wv_mmc_show(struct net_device * dev) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local * lp = netdev_priv(dev); mmr_t m; @@ -1275,7 +1275,7 @@ wv_packet_info(u_char * p, /* Packet to dump */ static inline void wv_init_info(struct net_device * dev) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; psa_t psa; DECLARE_MAC_BUF(mac); @@ -1294,7 +1294,7 @@ wv_init_info(struct net_device * dev) #ifdef DEBUG_BASIC_SHOW /* Now, let's go for the basic stuff */ - printk(KERN_NOTICE "%s: WaveLAN: port %#lx, irq %d, " + printk(KERN_NOTICE "%s: WaveLAN: port %#x, irq %d, " "hw_addr %s", dev->name, base, dev->irq, print_mac(mac, dev->dev_addr)); @@ -1828,7 +1828,7 @@ static int wavelan_set_nwid(struct net_device *dev, union iwreq_data *wrqu, char *extra) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local *lp = netdev_priv(dev); psa_t psa; mm_t m; @@ -1918,7 +1918,7 @@ static int wavelan_set_freq(struct net_device *dev, union iwreq_data *wrqu, char *extra) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local *lp = netdev_priv(dev); unsigned long flags; int ret; @@ -1948,7 +1948,7 @@ static int wavelan_get_freq(struct net_device *dev, union iwreq_data *wrqu, char *extra) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local *lp = netdev_priv(dev); psa_t psa; unsigned long flags; @@ -1994,7 +1994,7 @@ static int wavelan_set_sens(struct net_device *dev, union iwreq_data *wrqu, char *extra) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local *lp = netdev_priv(dev); psa_t psa; unsigned long flags; @@ -2060,7 +2060,7 @@ static int wavelan_set_encode(struct net_device *dev, union iwreq_data *wrqu, char *extra) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local *lp = netdev_priv(dev); unsigned long flags; psa_t psa; @@ -2130,7 +2130,7 @@ static int wavelan_get_encode(struct net_device *dev, union iwreq_data *wrqu, char *extra) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local *lp = netdev_priv(dev); psa_t psa; unsigned long flags; @@ -2349,7 +2349,7 @@ static int wavelan_get_range(struct net_device *dev, union iwreq_data *wrqu, char *extra) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local *lp = netdev_priv(dev); struct iw_range *range = (struct iw_range *) extra; unsigned long flags; @@ -2425,7 +2425,7 @@ static int wavelan_set_qthr(struct net_device *dev, union iwreq_data *wrqu, char *extra) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local *lp = netdev_priv(dev); psa_t psa; unsigned long flags; @@ -2701,7 +2701,7 @@ static const struct iw_handler_def wavelan_handler_def = static iw_stats * wavelan_get_wireless_stats(struct net_device * dev) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local * lp = netdev_priv(dev); mmr_t m; iw_stats * wstats; @@ -2764,7 +2764,7 @@ wv_start_of_frame(struct net_device * dev, int rfp, /* end of frame */ int wrap) /* start of buffer */ { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; int rp; int len; @@ -2925,7 +2925,7 @@ wv_packet_read(struct net_device * dev, static inline void wv_packet_rcv(struct net_device * dev) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local * lp = netdev_priv(dev); int newrfp; int rp; @@ -3062,7 +3062,7 @@ wv_packet_write(struct net_device * dev, short length) { net_local * lp = netdev_priv(dev); - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; unsigned long flags; int clen = length; register u_short xmtdata_base = TX_BASE; @@ -3183,7 +3183,7 @@ wavelan_packet_xmit(struct sk_buff * skb, static inline int wv_mmc_init(struct net_device * dev) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; psa_t psa; mmw_t m; int configured; @@ -3377,7 +3377,7 @@ wv_mmc_init(struct net_device * dev) static int wv_ru_stop(struct net_device * dev) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local * lp = netdev_priv(dev); unsigned long flags; int status; @@ -3440,7 +3440,7 @@ wv_ru_stop(struct net_device * dev) static int wv_ru_start(struct net_device * dev) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local * lp = netdev_priv(dev); unsigned long flags; @@ -3528,7 +3528,7 @@ wv_ru_start(struct net_device * dev) static int wv_82593_config(struct net_device * dev) { - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; net_local * lp = netdev_priv(dev); struct i82593_conf_block cfblk; int ret = TRUE; @@ -3765,7 +3765,7 @@ static int wv_hw_config(struct net_device * dev) { net_local * lp = netdev_priv(dev); - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; unsigned long flags; int ret = FALSE; @@ -4047,7 +4047,7 @@ wavelan_interrupt(int irq, { struct net_device * dev = dev_id; net_local * lp; - kio_addr_t base; + unsigned int base; int status0; u_int tx_status; @@ -4306,7 +4306,7 @@ static void wavelan_watchdog(struct net_device * dev) { net_local * lp = netdev_priv(dev); - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; unsigned long flags; int aborted = FALSE; @@ -4382,7 +4382,7 @@ wavelan_open(struct net_device * dev) { net_local * lp = netdev_priv(dev); struct pcmcia_device * link = lp->link; - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; #ifdef DEBUG_CALLBACK_TRACE printk(KERN_DEBUG "%s: ->wavelan_open(dev=0x%x)\n", dev->name, @@ -4436,7 +4436,7 @@ static int wavelan_close(struct net_device * dev) { struct pcmcia_device * link = ((net_local *)netdev_priv(dev))->link; - kio_addr_t base = dev->base_addr; + unsigned int base = dev->base_addr; #ifdef DEBUG_CALLBACK_TRACE printk(KERN_DEBUG "%s: ->wavelan_close(dev=0x%x)\n", dev->name, diff --git a/drivers/nubus/Makefile b/drivers/nubus/Makefile index f5ef03cf9879..21bda2031e7e 100644 --- a/drivers/nubus/Makefile +++ b/drivers/nubus/Makefile @@ -4,5 +4,4 @@ obj-y := nubus.o -obj-$(CONFIG_MODULES) += nubus_syms.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index f4076aeb2098..2f047e573d86 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -14,6 +14,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/delay.h> +#include <linux/module.h> #include <asm/setup.h> #include <asm/system.h> #include <asm/page.h> @@ -186,6 +187,7 @@ void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent* dirent, len--; } } +EXPORT_SYMBOL(nubus_get_rsrc_mem); void nubus_get_rsrc_str(void *dest, const struct nubus_dirent* dirent, int len) @@ -200,6 +202,7 @@ void nubus_get_rsrc_str(void *dest, const struct nubus_dirent* dirent, len--; } } +EXPORT_SYMBOL(nubus_get_rsrc_str); int nubus_get_root_dir(const struct nubus_board* board, struct nubus_dir* dir) @@ -209,6 +212,7 @@ int nubus_get_root_dir(const struct nubus_board* board, dir->mask = board->lanes; return 0; } +EXPORT_SYMBOL(nubus_get_root_dir); /* This is a slyly renamed version of the above */ int nubus_get_func_dir(const struct nubus_dev* dev, @@ -219,6 +223,7 @@ int nubus_get_func_dir(const struct nubus_dev* dev, dir->mask = dev->board->lanes; return 0; } +EXPORT_SYMBOL(nubus_get_func_dir); int nubus_get_board_dir(const struct nubus_board* board, struct nubus_dir* dir) @@ -237,6 +242,7 @@ int nubus_get_board_dir(const struct nubus_board* board, return -1; return 0; } +EXPORT_SYMBOL(nubus_get_board_dir); int nubus_get_subdir(const struct nubus_dirent *ent, struct nubus_dir *dir) @@ -246,6 +252,7 @@ int nubus_get_subdir(const struct nubus_dirent *ent, dir->mask = ent->mask; return 0; } +EXPORT_SYMBOL(nubus_get_subdir); int nubus_readdir(struct nubus_dir *nd, struct nubus_dirent *ent) { @@ -274,12 +281,14 @@ int nubus_readdir(struct nubus_dir *nd, struct nubus_dirent *ent) ent->mask = nd->mask; return 0; } +EXPORT_SYMBOL(nubus_readdir); int nubus_rewinddir(struct nubus_dir* dir) { dir->ptr = dir->base; return 0; } +EXPORT_SYMBOL(nubus_rewinddir); /* Driver interface functions, more or less like in pci.c */ @@ -303,6 +312,7 @@ nubus_find_device(unsigned short category, } return NULL; } +EXPORT_SYMBOL(nubus_find_device); struct nubus_dev* nubus_find_type(unsigned short category, @@ -320,6 +330,7 @@ nubus_find_type(unsigned short category, } return NULL; } +EXPORT_SYMBOL(nubus_find_type); struct nubus_dev* nubus_find_slot(unsigned int slot, @@ -335,6 +346,7 @@ nubus_find_slot(unsigned int slot, } return NULL; } +EXPORT_SYMBOL(nubus_find_slot); int nubus_find_rsrc(struct nubus_dir* dir, unsigned char rsrc_type, @@ -346,6 +358,7 @@ nubus_find_rsrc(struct nubus_dir* dir, unsigned char rsrc_type, } return -1; } +EXPORT_SYMBOL(nubus_find_rsrc); /* Initialization functions - decide which slots contain stuff worth looking at, and print out lots and lots of information from the diff --git a/drivers/nubus/nubus_syms.c b/drivers/nubus/nubus_syms.c deleted file mode 100644 index 9204f04fbf0b..000000000000 --- a/drivers/nubus/nubus_syms.c +++ /dev/null @@ -1,28 +0,0 @@ -/* Exported symbols for NuBus services - - (c) 1999 David Huggins-Daines <dhd@debian.org> */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/nubus.h> - -#ifdef CONFIG_PROC_FS -EXPORT_SYMBOL(nubus_proc_attach_device); -EXPORT_SYMBOL(nubus_proc_detach_device); -#endif - -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(nubus_find_device); -EXPORT_SYMBOL(nubus_find_type); -EXPORT_SYMBOL(nubus_find_slot); -EXPORT_SYMBOL(nubus_get_root_dir); -EXPORT_SYMBOL(nubus_get_board_dir); -EXPORT_SYMBOL(nubus_get_func_dir); -EXPORT_SYMBOL(nubus_readdir); -EXPORT_SYMBOL(nubus_find_rsrc); -EXPORT_SYMBOL(nubus_rewinddir); -EXPORT_SYMBOL(nubus_get_subdir); -EXPORT_SYMBOL(nubus_get_rsrc_mem); -EXPORT_SYMBOL(nubus_get_rsrc_str); - diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index 5271a4a7af26..e07492be1f4a 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -22,6 +22,8 @@ #include <linux/nubus.h> #include <linux/proc_fs.h> #include <linux/init.h> +#include <linux/module.h> + #include <asm/uaccess.h> #include <asm/byteorder.h> @@ -140,6 +142,7 @@ int nubus_proc_attach_device(struct nubus_dev *dev) return 0; } +EXPORT_SYMBOL(nubus_proc_attach_device); /* FIXME: this is certainly broken! */ int nubus_proc_detach_device(struct nubus_dev *dev) @@ -154,6 +157,7 @@ int nubus_proc_detach_device(struct nubus_dev *dev) } return 0; } +EXPORT_SYMBOL(nubus_proc_detach_device); void __init proc_bus_nubus_add_devices(void) { diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index ca52307b8f40..d08b284de196 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -941,7 +941,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents, ** w/o this association, we wouldn't have coherent DMA! ** Access to the virtual address is what forces a two pass algorithm. */ - coalesced = iommu_coalesce_chunks(ioc, sglist, nents, ccio_alloc_range); + coalesced = iommu_coalesce_chunks(ioc, dev, sglist, nents, ccio_alloc_range); /* ** Program the I/O Pdir diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h index 0a1f99a2e93e..97ba8286c596 100644 --- a/drivers/parisc/iommu-helpers.h +++ b/drivers/parisc/iommu-helpers.h @@ -95,12 +95,14 @@ iommu_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents, */ static inline unsigned int -iommu_coalesce_chunks(struct ioc *ioc, struct scatterlist *startsg, int nents, +iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, + struct scatterlist *startsg, int nents, int (*iommu_alloc_range)(struct ioc *, size_t)) { struct scatterlist *contig_sg; /* contig chunk head */ unsigned long dma_offset, dma_len; /* start/len of DMA stream */ unsigned int n_mappings = 0; + unsigned int max_seg_size = dma_get_max_seg_size(dev); while (nents > 0) { @@ -142,6 +144,9 @@ iommu_coalesce_chunks(struct ioc *ioc, struct scatterlist *startsg, int nents, IOVP_SIZE) > DMA_CHUNK_SIZE)) break; + if (startsg->length + dma_len > max_seg_size) + break; + /* ** Next see if we can append the next chunk (i.e. ** it must end on one page and begin on another diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index e527a0e1d6c0..d06627c3f353 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -946,7 +946,7 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, ** w/o this association, we wouldn't have coherent DMA! ** Access to the virtual address is what forces a two pass algorithm. */ - coalesced = iommu_coalesce_chunks(ioc, sglist, nents, sba_alloc_range); + coalesced = iommu_coalesce_chunks(ioc, dev, sglist, nents, sba_alloc_range); /* ** Program the I/O Pdir diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 04aac7782468..ae3df46eaabf 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1451,6 +1451,22 @@ pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) } #endif +#ifndef HAVE_ARCH_PCI_SET_DMA_MAX_SEGMENT_SIZE +int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size) +{ + return dma_set_max_seg_size(&dev->dev, size); +} +EXPORT_SYMBOL(pci_set_dma_max_seg_size); +#endif + +#ifndef HAVE_ARCH_PCI_SET_DMA_SEGMENT_BOUNDARY +int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask) +{ + return dma_set_seg_boundary(&dev->dev, mask); +} +EXPORT_SYMBOL(pci_set_dma_seg_boundary); +#endif + /** * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count * @dev: PCI device to query diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 7f5dab34d315..4d23b9fb551b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -933,8 +933,12 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) set_dev_node(&dev->dev, pcibus_to_node(bus)); dev->dev.dma_mask = &dev->dma_mask; + dev->dev.dma_parms = &dev->dma_parms; dev->dev.coherent_dma_mask = 0xffffffffull; + pci_set_dma_max_seg_size(dev, 65536); + pci_set_dma_seg_boundary(dev, 0xffffffff); + /* Fix up broken headers */ pci_fixup_device(pci_fixup_header, dev); diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c index eb6abd3f9221..385e145e1acc 100644 --- a/drivers/pcmcia/at91_cf.c +++ b/drivers/pcmcia/at91_cf.c @@ -21,9 +21,9 @@ #include <asm/hardware.h> #include <asm/io.h> #include <asm/sizes.h> +#include <asm/gpio.h> #include <asm/arch/board.h> -#include <asm/arch/gpio.h> #include <asm/arch/at91rm9200_mc.h> @@ -56,7 +56,7 @@ struct at91_cf_socket { static inline int at91_cf_present(struct at91_cf_socket *cf) { - return !at91_get_gpio_value(cf->board->det_pin); + return !gpio_get_value(cf->board->det_pin); } /*--------------------------------------------------------------------------*/ @@ -100,9 +100,9 @@ static int at91_cf_get_status(struct pcmcia_socket *s, u_int *sp) int vcc = cf->board->vcc_pin; *sp = SS_DETECT | SS_3VCARD; - if (!rdy || at91_get_gpio_value(rdy)) + if (!rdy || gpio_get_value(rdy)) *sp |= SS_READY; - if (!vcc || at91_get_gpio_value(vcc)) + if (!vcc || gpio_get_value(vcc)) *sp |= SS_POWERON; } else *sp = 0; @@ -121,10 +121,10 @@ at91_cf_set_socket(struct pcmcia_socket *sock, struct socket_state_t *s) if (cf->board->vcc_pin) { switch (s->Vcc) { case 0: - at91_set_gpio_value(cf->board->vcc_pin, 0); + gpio_set_value(cf->board->vcc_pin, 0); break; case 33: - at91_set_gpio_value(cf->board->vcc_pin, 1); + gpio_set_value(cf->board->vcc_pin, 1); break; default: return -EINVAL; @@ -132,7 +132,7 @@ at91_cf_set_socket(struct pcmcia_socket *sock, struct socket_state_t *s) } /* toggle reset if needed */ - at91_set_gpio_value(cf->board->rst_pin, s->flags & SS_RESET); + gpio_set_value(cf->board->rst_pin, s->flags & SS_RESET); pr_debug("%s: Vcc %d, io_irq %d, flags %04x csc %04x\n", driver_name, s->Vcc, s->io_irq, s->flags, s->csc_mask); @@ -239,11 +239,24 @@ static int __init at91_cf_probe(struct platform_device *pdev) platform_set_drvdata(pdev, cf); /* must be a GPIO; ergo must trigger on both edges */ - status = request_irq(board->det_pin, at91_cf_irq, 0, driver_name, cf); + status = gpio_request(board->det_pin, "cf_det"); if (status < 0) goto fail0; + status = request_irq(board->det_pin, at91_cf_irq, 0, driver_name, cf); + if (status < 0) + goto fail00; device_init_wakeup(&pdev->dev, 1); + status = gpio_request(board->rst_pin, "cf_rst"); + if (status < 0) + goto fail0a; + + if (board->vcc_pin) { + status = gpio_request(board->vcc_pin, "cf_vcc"); + if (status < 0) + goto fail0b; + } + /* * The card driver will request this irq later as needed. * but it causes lots of "irqNN: nobody cared" messages @@ -251,16 +264,20 @@ static int __init at91_cf_probe(struct platform_device *pdev) * (Note: DK board doesn't wire the IRQ pin...) */ if (board->irq_pin) { + status = gpio_request(board->irq_pin, "cf_irq"); + if (status < 0) + goto fail0c; status = request_irq(board->irq_pin, at91_cf_irq, IRQF_SHARED, driver_name, cf); if (status < 0) - goto fail0a; + goto fail0d; cf->socket.pci_irq = board->irq_pin; } else cf->socket.pci_irq = NR_IRQS + 1; /* pcmcia layer only remaps "real" memory not iospace */ - cf->socket.io_offset = (unsigned long) ioremap(cf->phys_baseaddr + CF_IO_PHYS, SZ_2K); + cf->socket.io_offset = (unsigned long) + ioremap(cf->phys_baseaddr + CF_IO_PHYS, SZ_2K); if (!cf->socket.io_offset) { status = -ENXIO; goto fail1; @@ -296,11 +313,21 @@ fail2: fail1: if (cf->socket.io_offset) iounmap((void __iomem *) cf->socket.io_offset); - if (board->irq_pin) + if (board->irq_pin) { free_irq(board->irq_pin, cf); +fail0d: + gpio_free(board->irq_pin); + } +fail0c: + if (board->vcc_pin) + gpio_free(board->vcc_pin); +fail0b: + gpio_free(board->rst_pin); fail0a: device_init_wakeup(&pdev->dev, 0); free_irq(board->det_pin, cf); +fail00: + gpio_free(board->det_pin); fail0: kfree(cf); return status; @@ -313,13 +340,18 @@ static int __exit at91_cf_remove(struct platform_device *pdev) struct resource *io = cf->socket.io[0].res; pcmcia_unregister_socket(&cf->socket); - if (board->irq_pin) + release_mem_region(io->start, io->end + 1 - io->start); + iounmap((void __iomem *) cf->socket.io_offset); + if (board->irq_pin) { free_irq(board->irq_pin, cf); + gpio_free(board->irq_pin); + } + if (board->vcc_pin) + gpio_free(board->vcc_pin); + gpio_free(board->rst_pin); device_init_wakeup(&pdev->dev, 0); free_irq(board->det_pin, cf); - iounmap((void __iomem *) cf->socket.io_offset); - release_mem_region(io->start, io->end + 1 - io->start); - + gpio_free(board->det_pin); kfree(cf); return 0; } diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c index a1bd763b4e33..714baaeb6da1 100644 --- a/drivers/pcmcia/cardbus.c +++ b/drivers/pcmcia/cardbus.c @@ -143,7 +143,7 @@ int read_cb_mem(struct pcmcia_socket * s, int space, u_int addr, u_int len, void /* Config space? */ if (space == 0) { if (addr + len > 0x100) - goto fail; + goto failput; for (; len; addr++, ptr++, len--) pci_read_config_byte(dev, addr, ptr); return 0; @@ -171,6 +171,8 @@ int read_cb_mem(struct pcmcia_socket * s, int space, u_int addr, u_int len, void memcpy_fromio(ptr, s->cb_cis_virt + addr, len); return 0; +failput: + pci_dev_put(dev); fail: memset(ptr, 0xff, len); return -1; diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 15c18f5246d6..5a85871f5ee9 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -865,11 +865,12 @@ static int pcmcia_load_firmware(struct pcmcia_device *dev, char * filename) ds_dbg(1, "trying to load CIS file %s\n", filename); if (strlen(filename) > 14) { - printk(KERN_WARNING "pcmcia: CIS filename is too long\n"); + printk(KERN_WARNING "pcmcia: CIS filename is too long [%s]\n", + filename); return -EINVAL; } - snprintf(path, 20, "%s", filename); + snprintf(path, sizeof(path), "%s", filename); if (request_firmware(&fw, path, &dev->dev) == 0) { if (fw->size >= CISTPL_MAX_CIS_SIZE) { @@ -1130,8 +1131,6 @@ static int runtime_suspend(struct device *dev) down(&dev->sem); rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND); up(&dev->sem); - if (!rc) - dev->power.power_state.event = PM_EVENT_SUSPEND; return rc; } @@ -1142,8 +1141,6 @@ static void runtime_resume(struct device *dev) down(&dev->sem); rc = pcmcia_dev_resume(dev); up(&dev->sem); - if (!rc) - dev->power.power_state.event = PM_EVENT_ON; } /************************ per-device sysfs output ***************************/ @@ -1265,6 +1262,9 @@ static int pcmcia_dev_suspend(struct device * dev, pm_message_t state) struct pcmcia_driver *p_drv = NULL; int ret = 0; + if (p_dev->suspended) + return 0; + ds_dbg(2, "suspending %s\n", dev->bus_id); if (dev->driver) @@ -1301,6 +1301,9 @@ static int pcmcia_dev_resume(struct device * dev) struct pcmcia_driver *p_drv = NULL; int ret = 0; + if (!p_dev->suspended) + return 0; + ds_dbg(2, "resuming %s\n", dev->bus_id); if (dev->driver) diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c index df21e2d16f87..749515534cc0 100644 --- a/drivers/pcmcia/i82092.c +++ b/drivers/pcmcia/i82092.c @@ -82,7 +82,7 @@ struct socket_info { 1 = empty socket, 2 = card but not initialized, 3 = operational card */ - kio_addr_t io_base; /* base io address of the socket */ + unsigned int io_base; /* base io address of the socket */ struct pcmcia_socket socket; struct pci_dev *dev; /* The PCI device for the socket */ diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c index 839bb1c0db58..32a2ab119798 100644 --- a/drivers/pcmcia/i82365.c +++ b/drivers/pcmcia/i82365.c @@ -164,7 +164,7 @@ struct i82365_socket { u_short type, flags; struct pcmcia_socket socket; unsigned int number; - kio_addr_t ioaddr; + unsigned int ioaddr; u_short psock; u_char cs_irq, intr; union { @@ -238,7 +238,7 @@ static u_char i365_get(u_short sock, u_short reg) unsigned long flags; spin_lock_irqsave(&bus_lock,flags); { - kio_addr_t port = socket[sock].ioaddr; + unsigned int port = socket[sock].ioaddr; u_char val; reg = I365_REG(socket[sock].psock, reg); outb(reg, port); val = inb(port+1); @@ -252,7 +252,7 @@ static void i365_set(u_short sock, u_short reg, u_char data) unsigned long flags; spin_lock_irqsave(&bus_lock,flags); { - kio_addr_t port = socket[sock].ioaddr; + unsigned int port = socket[sock].ioaddr; u_char val = I365_REG(socket[sock].psock, reg); outb(val, port); outb(data, port+1); spin_unlock_irqrestore(&bus_lock,flags); @@ -588,7 +588,7 @@ static int to_cycles(int ns) /*====================================================================*/ -static int __init identify(kio_addr_t port, u_short sock) +static int __init identify(unsigned int port, u_short sock) { u_char val; int type = -1; @@ -659,7 +659,7 @@ static int __init identify(kio_addr_t port, u_short sock) static int __init is_alive(u_short sock) { u_char stat; - kio_addr_t start, stop; + unsigned int start, stop; stat = i365_get(sock, I365_STATUS); start = i365_get_pair(sock, I365_IO(0)+I365_W_START); @@ -678,7 +678,7 @@ static int __init is_alive(u_short sock) /*====================================================================*/ -static void __init add_socket(kio_addr_t port, int psock, int type) +static void __init add_socket(unsigned int port, int psock, int type) { socket[sockets].ioaddr = port; socket[sockets].psock = psock; @@ -698,7 +698,7 @@ static void __init add_pcic(int ns, int type) base = sockets-ns; if (base == 0) printk("\n"); printk(KERN_INFO " %s", pcic[type].name); - printk(" ISA-to-PCMCIA at port %#lx ofs 0x%02x", + printk(" ISA-to-PCMCIA at port %#x ofs 0x%02x", t->ioaddr, t->psock*0x40); printk(", %d socket%s\n", ns, ((ns > 1) ? "s" : "")); @@ -772,7 +772,7 @@ static struct pnp_dev *i82365_pnpdev; static void __init isa_probe(void) { int i, j, sock, k, ns, id; - kio_addr_t port; + unsigned int port; #ifdef CONFIG_PNP struct isapnp_device_id *devid; struct pnp_dev *dev; @@ -1053,7 +1053,7 @@ static int i365_set_io_map(u_short sock, struct pccard_io_map *io) u_char map, ioctl; debug(1, "SetIOMap(%d, %d, %#2.2x, %d ns, " - "%#lx-%#lx)\n", sock, io->map, io->flags, + "%#x-%#x)\n", sock, io->map, io->flags, io->speed, io->start, io->stop); map = io->map; if ((map > 1) || (io->start > 0xffff) || (io->stop > 0xffff) || diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c index 91da15b5a81e..3616da227152 100644 --- a/drivers/pcmcia/m32r_cfc.c +++ b/drivers/pcmcia/m32r_cfc.c @@ -58,7 +58,7 @@ typedef struct pcc_socket { u_short type, flags; struct pcmcia_socket socket; unsigned int number; - kio_addr_t ioaddr; + unsigned int ioaddr; u_long mapaddr; u_long base; /* PCC register base */ u_char cs_irq1, cs_irq2, intr; @@ -298,7 +298,8 @@ static int __init is_alive(u_short sock) return 0; } -static void add_pcc_socket(ulong base, int irq, ulong mapaddr, kio_addr_t ioaddr) +static void add_pcc_socket(ulong base, int irq, ulong mapaddr, + unsigned int ioaddr) { pcc_socket_t *t = &socket[pcc_sockets]; @@ -738,7 +739,7 @@ static int __init init_m32r_pcc(void) #else /* CONFIG_PLAT_USRV */ { ulong base, mapaddr; - kio_addr_t ioaddr; + unsigned int ioaddr; for (i = 0 ; i < M32R_MAX_PCC ; i++) { base = (ulong)PLD_CFRSTCR; diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c index ec4c1253ebbb..2b42b7155e34 100644 --- a/drivers/pcmcia/m32r_pcc.c +++ b/drivers/pcmcia/m32r_pcc.c @@ -65,7 +65,7 @@ typedef struct pcc_socket { u_short type, flags; struct pcmcia_socket socket; unsigned int number; - kio_addr_t ioaddr; + unsigned int ioaddr; u_long mapaddr; u_long base; /* PCC register base */ u_char cs_irq, intr; @@ -310,7 +310,8 @@ static int __init is_alive(u_short sock) return 0; } -static void add_pcc_socket(ulong base, int irq, ulong mapaddr, kio_addr_t ioaddr) +static void add_pcc_socket(ulong base, int irq, ulong mapaddr, + unsigned int ioaddr) { pcc_socket_t *t = &socket[pcc_sockets]; @@ -491,7 +492,7 @@ static int _pcc_set_io_map(u_short sock, struct pccard_io_map *io) u_char map; debug(3, "m32r-pcc: SetIOMap(%d, %d, %#2.2x, %d ns, " - "%#lx-%#lx)\n", sock, io->map, io->flags, + "%#x-%#x)\n", sock, io->map, io->flags, io->speed, io->start, io->stop); map = io->map; diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c index 4ea426a25909..ac70d2cb7dd4 100644 --- a/drivers/pcmcia/m8xx_pcmcia.c +++ b/drivers/pcmcia/m8xx_pcmcia.c @@ -1174,8 +1174,10 @@ static int __init m8xx_probe(struct of_device *ofdev, pcmcia_schlvl = irq_of_parse_and_map(np, 0); hwirq = irq_map[pcmcia_schlvl].hwirq; - if (pcmcia_schlvl < 0) + if (pcmcia_schlvl < 0) { + iounmap(pcmcia); return -EINVAL; + } m8xx_pgcrx[0] = &pcmcia->pcmc_pgcra; m8xx_pgcrx[1] = &pcmcia->pcmc_pgcrb; @@ -1189,6 +1191,7 @@ static int __init m8xx_probe(struct of_device *ofdev, driver_name, socket)) { pcmcia_error("Cannot allocate IRQ %u for SCHLVL!\n", pcmcia_schlvl); + iounmap(pcmcia); return -1; } @@ -1284,6 +1287,7 @@ static int m8xx_remove(struct of_device *ofdev) } for (i = 0; i < PCMCIA_SOCKETS_NO; i++) pcmcia_unregister_socket(&socket[i].socket); + iounmap(pcmcia); free_irq(pcmcia_schlvl, NULL); diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 0ce39de834c4..1d128fbd1a92 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -65,23 +65,23 @@ extern int ds_pc_debug; * Special stuff for managing IO windows, because they are scarce */ -static int alloc_io_space(struct pcmcia_socket *s, u_int attr, ioaddr_t *base, - ioaddr_t num, u_int lines) +static int alloc_io_space(struct pcmcia_socket *s, u_int attr, + unsigned int *base, unsigned int num, u_int lines) { int i; - kio_addr_t try, align; + unsigned int try, align; align = (*base) ? (lines ? 1<<lines : 0) : 1; if (align && (align < num)) { if (*base) { - ds_dbg(s, 0, "odd IO request: num %#x align %#lx\n", + ds_dbg(s, 0, "odd IO request: num %#x align %#x\n", num, align); align = 0; } else while (align && (align < num)) align <<= 1; } if (*base & ~(align-1)) { - ds_dbg(s, 0, "odd IO request: base %#x align %#lx\n", + ds_dbg(s, 0, "odd IO request: base %#x align %#x\n", *base, align); align = 0; } @@ -132,8 +132,8 @@ static int alloc_io_space(struct pcmcia_socket *s, u_int attr, ioaddr_t *base, } /* alloc_io_space */ -static void release_io_space(struct pcmcia_socket *s, ioaddr_t base, - ioaddr_t num) +static void release_io_space(struct pcmcia_socket *s, unsigned int base, + unsigned int num) { int i; diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c index bfcaad6021cf..a8d100707721 100644 --- a/drivers/pcmcia/rsrc_nonstatic.c +++ b/drivers/pcmcia/rsrc_nonstatic.c @@ -186,15 +186,16 @@ static int sub_interval(struct resource_map *map, u_long base, u_long num) ======================================================================*/ #ifdef CONFIG_PCMCIA_PROBE -static void do_io_probe(struct pcmcia_socket *s, kio_addr_t base, kio_addr_t num) +static void do_io_probe(struct pcmcia_socket *s, unsigned int base, + unsigned int num) { struct resource *res; struct socket_data *s_data = s->resource_data; - kio_addr_t i, j, bad; + unsigned int i, j, bad; int any; u_char *b, hole, most; - printk(KERN_INFO "cs: IO port probe %#lx-%#lx:", + printk(KERN_INFO "cs: IO port probe %#x-%#x:", base, base+num-1); /* First, what does a floating port look like? */ @@ -233,7 +234,7 @@ static void do_io_probe(struct pcmcia_socket *s, kio_addr_t base, kio_addr_t num } else { if (bad) { sub_interval(&s_data->io_db, bad, i-bad); - printk(" %#lx-%#lx", bad, i-1); + printk(" %#x-%#x", bad, i-1); bad = 0; } } @@ -244,7 +245,7 @@ static void do_io_probe(struct pcmcia_socket *s, kio_addr_t base, kio_addr_t num return; } else { sub_interval(&s_data->io_db, bad, i-bad); - printk(" %#lx-%#lx", bad, i-1); + printk(" %#x-%#x", bad, i-1); } } diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c index 749ac3710914..5792bd5c54f9 100644 --- a/drivers/pcmcia/tcic.c +++ b/drivers/pcmcia/tcic.c @@ -719,7 +719,7 @@ static int tcic_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) u_short base, len, ioctl; debug(1, "SetIOMap(%d, %d, %#2.2x, %d ns, " - "%#lx-%#lx)\n", psock, io->map, io->flags, + "%#x-%#x)\n", psock, io->map, io->flags, io->speed, io->start, io->stop); if ((io->map > 1) || (io->start > 0xffff) || (io->stop > 0xffff) || (io->stop < io->start)) return -EINVAL; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 0e8267c1e915..fb0886140dd7 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -449,9 +449,6 @@ static int aac_slave_configure(struct scsi_device *sdev) else if (depth < 2) depth = 2; scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, depth); - if (!(((struct aac_dev *)host->hostdata)->adapter_info.options & - AAC_OPT_NEW_COMM)) - blk_queue_max_segment_size(sdev->request_queue, 65536); } else scsi_adjust_queue_depth(sdev, 0, 1); @@ -1133,6 +1130,12 @@ static int __devinit aac_probe_one(struct pci_dev *pdev, if (error < 0) goto out_deinit; + if (!(aac->adapter_info.options & AAC_OPT_NEW_COMM)) { + error = pci_set_dma_max_seg_size(pdev, 65536); + if (error) + goto out_deinit; + } + /* * Lets override negotiations and drop the maximum SG limit to 34 */ diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c index 4b82b2021981..d8b99351b053 100644 --- a/drivers/scsi/pcmcia/fdomain_stub.c +++ b/drivers/scsi/pcmcia/fdomain_stub.c @@ -130,7 +130,7 @@ static int fdomain_config(struct pcmcia_device *link) cisparse_t parse; int i, last_ret, last_fn; u_char tuple_data[64]; - char str[16]; + char str[22]; struct Scsi_Host *host; DEBUG(0, "fdomain_config(0x%p)\n", link); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b12fb310e399..f243fc30c908 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1569,6 +1569,7 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, request_fn_proc *request_fn) { struct request_queue *q; + struct device *dev = shost->shost_gendev.parent; q = blk_init_queue(request_fn, NULL); if (!q) @@ -1583,6 +1584,9 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_sectors(q, shost->max_sectors); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); + dma_set_seg_boundary(dev, shost->dma_boundary); + + blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); if (!shost->use_clustering) clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index f94109cbb46e..b8a4bd94f51d 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -2047,7 +2047,7 @@ serial8250_set_termios(struct uart_port *port, struct ktermios *termios, * Oxford Semi 952 rev B workaround */ if (up->bugs & UART_BUG_QUOT && (quot & 0xff) == 0) - quot ++; + quot++; if (up->capabilities & UART_CAP_FIFO && up->port.fifosize > 1) { if (baud < 2400) @@ -2662,16 +2662,17 @@ static int __devinit serial8250_probe(struct platform_device *dev) memset(&port, 0, sizeof(struct uart_port)); for (i = 0; p && p->flags != 0; p++, i++) { - port.iobase = p->iobase; - port.membase = p->membase; - port.irq = p->irq; - port.uartclk = p->uartclk; - port.regshift = p->regshift; - port.iotype = p->iotype; - port.flags = p->flags; - port.mapbase = p->mapbase; - port.hub6 = p->hub6; - port.dev = &dev->dev; + port.iobase = p->iobase; + port.membase = p->membase; + port.irq = p->irq; + port.uartclk = p->uartclk; + port.regshift = p->regshift; + port.iotype = p->iotype; + port.flags = p->flags; + port.mapbase = p->mapbase; + port.hub6 = p->hub6; + port.private_data = p->private_data; + port.dev = &dev->dev; if (share_irqs) port.flags |= UPF_SHARE_IRQ; ret = serial8250_register_port(&port); @@ -2812,15 +2813,16 @@ int serial8250_register_port(struct uart_port *port) if (uart) { uart_remove_one_port(&serial8250_reg, &uart->port); - uart->port.iobase = port->iobase; - uart->port.membase = port->membase; - uart->port.irq = port->irq; - uart->port.uartclk = port->uartclk; - uart->port.fifosize = port->fifosize; - uart->port.regshift = port->regshift; - uart->port.iotype = port->iotype; - uart->port.flags = port->flags | UPF_BOOT_AUTOCONF; - uart->port.mapbase = port->mapbase; + uart->port.iobase = port->iobase; + uart->port.membase = port->membase; + uart->port.irq = port->irq; + uart->port.uartclk = port->uartclk; + uart->port.fifosize = port->fifosize; + uart->port.regshift = port->regshift; + uart->port.iotype = port->iotype; + uart->port.flags = port->flags | UPF_BOOT_AUTOCONF; + uart->port.mapbase = port->mapbase; + uart->port.private_data = port->private_data; if (port->dev) uart->port.dev = port->dev; diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index ceb03c9e749f..0a4ac2b6eb5a 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -106,6 +106,32 @@ setup_port(struct serial_private *priv, struct uart_port *port, } /* + * ADDI-DATA GmbH communication cards <info@addi-data.com> + */ +static int addidata_apci7800_setup(struct serial_private *priv, + struct pciserial_board *board, + struct uart_port *port, int idx) +{ + unsigned int bar = 0, offset = board->first_offset; + bar = FL_GET_BASE(board->flags); + + if (idx < 2) { + offset += idx * board->uart_offset; + } else if ((idx >= 2) && (idx < 4)) { + bar += 1; + offset += ((idx - 2) * board->uart_offset); + } else if ((idx >= 4) && (idx < 6)) { + bar += 2; + offset += ((idx - 4) * board->uart_offset); + } else if (idx >= 6) { + bar += 3; + offset += ((idx - 6) * board->uart_offset); + } + + return setup_port(priv, port, bar, offset, board->reg_shift); +} + +/* * AFAVLAB uses a different mixture of BARs and offsets * Not that ugly ;) -- HW */ @@ -752,6 +778,16 @@ pci_default_setup(struct serial_private *priv, struct pciserial_board *board, */ static struct pci_serial_quirk pci_serial_quirks[] = { /* + * ADDI-DATA GmbH communication cards <info@addi-data.com> + */ + { + .vendor = PCI_VENDOR_ID_ADDIDATA_OLD, + .device = PCI_DEVICE_ID_ADDIDATA_APCI7800, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = addidata_apci7800_setup, + }, + /* * AFAVLAB cards - these may be called via parport_serial * It is not clear whether this applies to all products. */ @@ -1179,6 +1215,12 @@ static struct pciserial_board pci_boards[] __devinitdata = { .base_baud = 115200, .uart_offset = 8, }, + [pbn_b0_8_115200] = { + .flags = FL_BASE0, + .num_ports = 8, + .base_baud = 115200, + .uart_offset = 8, + }, [pbn_b0_1_921600] = { .flags = FL_BASE0, @@ -2697,6 +2739,97 @@ static struct pci_device_id serial_pci_tbl[] = { pbn_pasemi_1682M }, /* + * ADDI-DATA GmbH communication cards <info@addi-data.com> + */ + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7500, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_4_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7420, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_2_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7300, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_1_115200 }, + + { PCI_VENDOR_ID_ADDIDATA_OLD, + PCI_DEVICE_ID_ADDIDATA_APCI7800, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b1_8_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7500_2, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_4_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7420_2, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_2_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7300_2, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_1_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7500_3, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_4_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7420_3, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_2_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7300_3, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_1_115200 }, + + { PCI_VENDOR_ID_ADDIDATA, + PCI_DEVICE_ID_ADDIDATA_APCI7800_3, + PCI_ANY_ID, + PCI_ANY_ID, + 0, + 0, + pbn_b0_8_115200 }, + + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL */ diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index 1de098e75497..6f09cbd7fc48 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -414,8 +414,9 @@ static int __devinit check_resources(struct pnp_option *option) */ static int __devinit serial_pnp_guess_board(struct pnp_dev *dev, int *flags) { - if (!(check_name(pnp_dev_name(dev)) || (dev->card && check_name(dev->card->name)))) - return -ENODEV; + if (!(check_name(pnp_dev_name(dev)) || + (dev->card && check_name(dev->card->name)))) + return -ENODEV; if (check_resources(dev->independent)) return 0; @@ -452,8 +453,9 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) return -ENODEV; #ifdef SERIAL_DEBUG_PNP - printk("Setup PNP port: port %x, mem 0x%lx, irq %d, type %d\n", - port.iobase, port.mapbase, port.irq, port.iotype); + printk(KERN_DEBUG + "Setup PNP port: port %x, mem 0x%lx, irq %d, type %d\n", + port.iobase, port.mapbase, port.irq, port.iotype); #endif port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF; diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c index 051fcc2f5ba8..e76fc72c9b36 100644 --- a/drivers/serial/mcf.c +++ b/drivers/serial/mcf.c @@ -434,7 +434,7 @@ static struct uart_ops mcf_uart_ops = { static struct mcf_uart mcf_ports[3]; -#define MCF_MAXPORTS (sizeof(mcf_ports) / sizeof(struct mcf_uart)) +#define MCF_MAXPORTS ARRAY_SIZE(mcf_ports) /****************************************************************************/ #if defined(CONFIG_SERIAL_MCF_CONSOLE) diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c index 4d643c926657..cb3a91967742 100644 --- a/drivers/serial/mpsc.c +++ b/drivers/serial/mpsc.c @@ -612,6 +612,7 @@ static void mpsc_hw_init(struct mpsc_port_info *pi) /* No preamble, 16x divider, low-latency, */ writel(0x04400400, pi->mpsc_base + MPSC_MMCRH); + mpsc_set_baudrate(pi, pi->default_baud); if (pi->mirror_regs) { pi->MPSC_CHR_1_m = 0; diff --git a/drivers/serial/s3c2410.c b/drivers/serial/s3c2410.c index e773c8e14962..45de19366030 100644 --- a/drivers/serial/s3c2410.c +++ b/drivers/serial/s3c2410.c @@ -1527,7 +1527,7 @@ static inline void s3c2440_serial_exit(void) #define s3c2440_uart_inf_at NULL #endif /* CONFIG_CPU_S3C2440 */ -#if defined(CONFIG_CPU_S3C2412) || defined(CONFIG_CPU_S3C2413) +#if defined(CONFIG_CPU_S3C2412) static int s3c2412_serial_setsource(struct uart_port *port, struct s3c24xx_uart_clksrc *clk) diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index 3bb5d241dd40..276da148c57e 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -371,7 +371,8 @@ uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, */ termios->c_cflag &= ~CBAUD; if (old) { - termios->c_cflag |= old->c_cflag & CBAUD; + baud = tty_termios_baud_rate(old); + tty_termios_encode_baud_rate(termios, baud, baud); old = NULL; continue; } @@ -380,7 +381,7 @@ uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, * As a last resort, if the quotient is zero, * default to 9600 bps */ - termios->c_cflag |= B9600; + tty_termios_encode_baud_rate(termios, 9600, 9600); } return 0; @@ -1977,6 +1978,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port) if (state->info && state->info->flags & UIF_INITIALIZED) { const struct uart_ops *ops = port->ops; + int tries; state->info->flags = (state->info->flags & ~UIF_INITIALIZED) | UIF_SUSPENDED; @@ -1990,9 +1992,14 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port) /* * Wait for the transmitter to empty. */ - while (!ops->tx_empty(port)) { + for (tries = 3; !ops->tx_empty(port) && tries; tries--) { msleep(10); } + if (!tries) + printk(KERN_ERR "%s%s%s%d: Unable to drain transmitter\n", + port->dev ? port->dev->bus_id : "", + port->dev ? ": " : "", + drv->dev_name, port->line); ops->shutdown(port); } @@ -2029,8 +2036,6 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port) } port->suspended = 0; - uart_change_pm(state, 0); - /* * Re-enable the console device after suspending. */ @@ -2049,6 +2054,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port) if (state->info && state->info->tty && termios.c_cflag == 0) termios = *state->info->tty->termios; + uart_change_pm(state, 0); port->ops->set_termios(port, &termios, NULL); console_start(port->cons); } @@ -2057,6 +2063,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port) const struct uart_ops *ops = port->ops; int ret; + uart_change_pm(state, 0); ops->set_mctrl(port, 0); ret = ops->startup(port); if (ret == 0) { @@ -2150,10 +2157,11 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, /* * Ensure that the modem control lines are de-activated. + * keep the DTR setting that is set in uart_set_options() * We probably don't need a spinlock around this, but */ spin_lock_irqsave(&port->lock, flags); - port->ops->set_mctrl(port, 0); + port->ops->set_mctrl(port, port->mctrl & TIOCM_DTR); spin_unlock_irqrestore(&port->lock, flags); /* diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index d8b660061c13..164d2a42eb59 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -389,7 +389,7 @@ static void serial_detach(struct pcmcia_device *link) /*====================================================================*/ static int setup_serial(struct pcmcia_device *handle, struct serial_info * info, - kio_addr_t iobase, int irq) + unsigned int iobase, int irq) { struct uart_port port; int line; @@ -456,7 +456,7 @@ next_tuple(struct pcmcia_device *handle, tuple_t * tuple, cisparse_t * parse) static int simple_config(struct pcmcia_device *link) { - static const kio_addr_t base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; + static const unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; static const int size_table[2] = { 8, 16 }; struct serial_info *info = link->priv; struct serial_cfg_mem *cfg_mem; @@ -480,7 +480,7 @@ static int simple_config(struct pcmcia_device *link) /* If the card is already configured, look up the port and irq */ i = pcmcia_get_configuration_info(link, &config); if ((i == CS_SUCCESS) && (config.Attributes & CONF_VALID_CLIENT)) { - kio_addr_t port = 0; + unsigned int port = 0; if ((config.BasePort2 != 0) && (config.NumPorts2 == 8)) { port = config.BasePort2; info->slave = 1; diff --git a/fs/buffer.c b/fs/buffer.c index 456c9ab7705b..826baf4f04bc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1798,7 +1798,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) start = max(from, block_start); size = min(to, block_end) - start; - zero_user_page(page, start, size, KM_USER0); + zero_user(page, start, size); set_buffer_uptodate(bh); } @@ -1861,19 +1861,10 @@ static int __block_prepare_write(struct inode *inode, struct page *page, mark_buffer_dirty(bh); continue; } - if (block_end > to || block_start < from) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_end > to) - memset(kaddr+to, 0, - block_end-to); - if (block_start < from) - memset(kaddr+block_start, - 0, from-block_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } + if (block_end > to || block_start < from) + zero_user_segments(page, + to, block_end, + block_start, from); continue; } } @@ -2104,8 +2095,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) SetPageError(page); } if (!buffer_mapped(bh)) { - zero_user_page(page, i * blocksize, blocksize, - KM_USER0); + zero_user(page, i * blocksize, blocksize); if (!err) set_buffer_uptodate(bh); continue; @@ -2218,7 +2208,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping, &page, &fsdata); if (err) goto out; - zero_user_page(page, zerofrom, len, KM_USER0); + zero_user(page, zerofrom, len); err = pagecache_write_end(file, mapping, curpos, len, len, page, fsdata); if (err < 0) @@ -2245,7 +2235,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping, &page, &fsdata); if (err) goto out; - zero_user_page(page, zerofrom, len, KM_USER0); + zero_user(page, zerofrom, len); err = pagecache_write_end(file, mapping, curpos, len, len, page, fsdata); if (err < 0) @@ -2422,7 +2412,6 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, unsigned block_in_page; unsigned block_start, block_end; sector_t block_in_file; - char *kaddr; int nr_reads = 0; int ret = 0; int is_mapped_to_disk = 1; @@ -2493,13 +2482,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, continue; } if (buffer_new(bh) || !buffer_mapped(bh)) { - kaddr = kmap_atomic(page, KM_USER0); - if (block_start < from) - memset(kaddr+block_start, 0, from-block_start); - if (block_end > to) - memset(kaddr + to, 0, block_end - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_segments(page, block_start, from, + to, block_end); continue; } if (buffer_uptodate(bh)) @@ -2636,7 +2620,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) @@ -2709,7 +2693,7 @@ has_buffers: if (page_has_buffers(page)) goto has_buffers; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); set_page_dirty(page); err = 0; @@ -2785,7 +2769,7 @@ int block_truncate_page(struct address_space *mapping, goto unlock; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); mark_buffer_dirty(bh); err = 0; @@ -2831,7 +2815,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); return __block_write_full_page(inode, page, get_block, wbc); } @@ -3169,7 +3153,7 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, + struct buffer_head *ret = kmem_cache_alloc(bh_cachep, set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); @@ -3257,12 +3241,24 @@ int bh_submit_read(struct buffer_head *bh) } EXPORT_SYMBOL(bh_submit_read); +static void +init_buffer_head(struct kmem_cache *cachep, void *data) +{ + struct buffer_head *bh = data; + + memset(bh, 0, sizeof(*bh)); + INIT_LIST_HEAD(&bh->b_assoc_buffers); +} + void __init buffer_init(void) { int nrpages; - bh_cachep = KMEM_CACHE(buffer_head, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); + bh_cachep = kmem_cache_create("buffer_head", + sizeof(struct buffer_head), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| + SLAB_MEM_SPREAD), + init_buffer_head); /* * Limit the bh occupancy to 10% of ZONE_NORMAL diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d9567ba2960b..47f2621001e4 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1386,7 +1386,7 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) if (!page) return -ENOMEM; - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); unlock_page(page); page_cache_release(page); return rc; diff --git a/fs/compat.c b/fs/compat.c index 5216c3fd7517..69baca5ad608 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -2206,19 +2206,41 @@ asmlinkage long compat_sys_signalfd(int ufd, #ifdef CONFIG_TIMERFD -asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, - const struct compat_itimerspec __user *utmr) +asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, + const struct compat_itimerspec __user *utmr, + struct compat_itimerspec __user *otmr) { + int error; struct itimerspec t; struct itimerspec __user *ut; if (get_compat_itimerspec(&t, utmr)) return -EFAULT; - ut = compat_alloc_user_space(sizeof(*ut)); - if (copy_to_user(ut, &t, sizeof(t))) + ut = compat_alloc_user_space(2 * sizeof(struct itimerspec)); + if (copy_to_user(&ut[0], &t, sizeof(t))) return -EFAULT; + error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]); + if (!error && otmr) + error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) || + put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0; + + return error; +} + +asmlinkage long compat_sys_timerfd_gettime(int ufd, + struct compat_itimerspec __user *otmr) +{ + int error; + struct itimerspec t; + struct itimerspec __user *ut; - return sys_timerfd(ufd, clockid, flags, ut); + ut = compat_alloc_user_space(sizeof(struct itimerspec)); + error = sys_timerfd_gettime(ufd, ut); + if (!error) + error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) || + put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0; + + return error; } #endif /* CONFIG_TIMERFD */ diff --git a/fs/direct-io.c b/fs/direct-io.c index acf0da1bd257..9e81addbd6ea 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -878,8 +878,8 @@ do_holes: page_cache_release(page); goto out; } - zero_user_page(page, block_in_page << blkbits, - 1 << blkbits, KM_USER0); + zero_user(page, block_in_page << blkbits, + 1 << blkbits); dio->block_in_file++; block_in_page++; goto next_block; diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 32c5711d79a3..0535412d8c64 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -257,8 +257,7 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; if (to > end_byte_in_page) end_byte_in_page = to; - zero_user_page(page, end_byte_in_page, - PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0); + zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE); out: return 0; } @@ -307,7 +306,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, */ if ((i_size_read(page->mapping->host) == prev_page_end_size) && (from != 0)) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); } out: return rc; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 81c04abfb1aa..a415f42d32cf 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -353,7 +353,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) spin_unlock_irqrestore(&psw->lock, flags); /* Do really wake up now */ - wake_up(wq); + wake_up_nested(wq, 1 + wake_nests); /* Remove the current task from the list */ spin_lock_irqsave(&psw->lock, flags); diff --git a/fs/exec.c b/fs/exec.c index 282240afe99e..be923e4bc389 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -760,7 +760,7 @@ static int de_thread(struct task_struct *tsk) */ read_lock(&tasklist_lock); spin_lock_irq(lock); - if (sig->flags & SIGNAL_GROUP_EXIT) { + if (signal_group_exit(sig)) { /* * Another group action in progress, just * return so that the signal is processed. @@ -778,6 +778,7 @@ static int de_thread(struct task_struct *tsk) if (unlikely(tsk->group_leader == task_child_reaper(tsk))) task_active_pid_ns(tsk)->child_reaper = tsk; + sig->group_exit_task = tsk; zap_other_threads(tsk); read_unlock(&tasklist_lock); @@ -802,7 +803,6 @@ static int de_thread(struct task_struct *tsk) } sig->notify_count = count; - sig->group_exit_task = tsk; while (atomic_read(&sig->count) > count) { __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irq(lock); @@ -871,15 +871,10 @@ static int de_thread(struct task_struct *tsk) leader->exit_state = EXIT_DEAD; write_unlock_irq(&tasklist_lock); - } + } sig->group_exit_task = NULL; sig->notify_count = 0; - /* - * There may be one thread left which is just exiting, - * but it's safe to stop telling the group to kill themselves. - */ - sig->flags = 0; no_thread_group: exit_itimers(sig); @@ -947,12 +942,13 @@ static void flush_old_files(struct files_struct * files) spin_unlock(&files->file_lock); } -void get_task_comm(char *buf, struct task_struct *tsk) +char *get_task_comm(char *buf, struct task_struct *tsk) { /* buf must be at least sizeof(tsk->comm) in size */ task_lock(tsk); strncpy(buf, tsk->comm, sizeof(tsk->comm)); task_unlock(tsk); + return buf; } void set_task_comm(struct task_struct *tsk, char *buf) @@ -1548,7 +1544,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, int err = -EAGAIN; spin_lock_irq(&tsk->sighand->siglock); - if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) { + if (!signal_group_exit(tsk->signal)) { tsk->signal->group_exit_code = exit_code; zap_process(tsk); err = 0; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9b162cd6c16c..077535439288 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1845,7 +1845,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, */ if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode) && PageUptodate(page)) { - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); set_page_dirty(page); goto unlock; } @@ -1898,7 +1898,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, goto unlock; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); BUFFER_TRACE(bh, "zeroed end of block"); err = 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bb717cbb749c..05c4145dd27d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1840,7 +1840,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, */ if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode) && PageUptodate(page)) { - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); set_page_dirty(page); goto unlock; } @@ -1893,7 +1893,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, goto unlock; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); BUFFER_TRACE(bh, "zeroed end of block"); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 300324bd563c..0b3064079fa5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -284,7 +284,17 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) * soon as the queue becomes uncongested. */ inode->i_state |= I_DIRTY_PAGES; - requeue_io(inode); + if (wbc->nr_to_write <= 0) { + /* + * slice used up: queue for next turn + */ + requeue_io(inode); + } else { + /* + * somehow blocked: retry later + */ + redirty_tail(inode); + } } else { /* * Otherwise fully redirty the inode so that @@ -334,9 +344,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) WARN_ON(inode->i_state & I_WILL_FREE); if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { - struct address_space *mapping = inode->i_mapping; - int ret; - /* * We're skipping this inode because it's locked, and we're not * doing writeback-for-data-integrity. Move it to s_more_io so @@ -345,15 +352,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * completed a full scan of s_io. */ requeue_io(inode); - - /* - * Even if we don't actually write the inode itself here, - * we can at least start some of the data writeout.. - */ - spin_unlock(&inode_lock); - ret = do_writepages(mapping, wbc); - spin_lock(&inode_lock); - return ret; + return 0; } /* @@ -479,8 +478,12 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) iput(inode); cond_resched(); spin_lock(&inode_lock); - if (wbc->nr_to_write <= 0) + if (wbc->nr_to_write <= 0) { + wbc->more_io = 1; break; + } + if (!list_empty(&sb->s_more_io)) + wbc->more_io = 1; } return; /* Leave any unwritten inodes on s_io */ } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e4effc47abfc..e9456ebd3bb6 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -932,7 +932,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) if (!gfs2_is_writeback(ip)) gfs2_trans_add_bh(ip->i_gl, bh, 0); - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); unlock: unlock_page(page); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 38dbe99a30ed..ac772b6d9dbb 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -446,7 +446,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) * so we need to supply one here. It doesn't happen often. */ if (unlikely(page->index)) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); return 0; } diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 35c1a9f33f47..53fd0a67c11a 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -285,17 +285,17 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) return err; times[0].tv_sec = atime_ts.tv_sec; - times[0].tv_usec = atime_ts.tv_nsec * 1000; + times[0].tv_usec = atime_ts.tv_nsec / 1000; times[1].tv_sec = mtime_ts.tv_sec; - times[1].tv_usec = mtime_ts.tv_nsec * 1000; + times[1].tv_usec = mtime_ts.tv_nsec / 1000; if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { times[0].tv_sec = attrs->ia_atime.tv_sec; - times[0].tv_usec = attrs->ia_atime.tv_nsec * 1000; + times[0].tv_usec = attrs->ia_atime.tv_nsec / 1000; } if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) { times[1].tv_sec = attrs->ia_mtime.tv_sec; - times[1].tv_usec = attrs->ia_mtime.tv_nsec * 1000; + times[1].tv_usec = attrs->ia_mtime.tv_nsec / 1000; } if (fd >= 0) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 09ee07f02663..3b3cc28cdefc 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -768,7 +768,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) case Opt_mode: if (match_octal(&args[0], &option)) goto bad_val; - pconfig->mode = option & 0777U; + pconfig->mode = option & 01777U; break; case Opt_size: { diff --git a/fs/libfs.c b/fs/libfs.c index 6e68b700958d..5523bde96387 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -341,13 +341,10 @@ int simple_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { if (!PageUptodate(page)) { - if (to - from != PAGE_CACHE_SIZE) { - void *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, from); - memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } + if (to - from != PAGE_CACHE_SIZE) + zero_user_segments(page, + 0, from, + to, PAGE_CACHE_SIZE); } return 0; } diff --git a/fs/mpage.c b/fs/mpage.c index d54f8f897224..5df564366f36 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -276,9 +276,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, } if (first_hole != blocks_per_page) { - zero_user_page(page, first_hole << blkbits, - PAGE_CACHE_SIZE - (first_hole << blkbits), - KM_USER0); + zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE); if (first_hole == 0) { SetPageUptodate(page); unlock_page(page); @@ -571,8 +569,7 @@ page_is_mapped: if (page->index > end_index || !offset) goto confused; - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, - KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); } /* diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 8fd6dfbe1bc3..3d7d9631e125 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -79,7 +79,7 @@ void nfs_readdata_release(void *data) static int nfs_return_empty_page(struct page *page) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); unlock_page(page); return 0; @@ -103,10 +103,10 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) pglen = PAGE_CACHE_SIZE - base; for (;;) { if (remainder <= pglen) { - zero_user_page(*pages, base, remainder, KM_USER0); + zero_user(*pages, base, remainder); break; } - zero_user_page(*pages, base, pglen, KM_USER0); + zero_user(*pages, base, pglen); pages++; remainder -= pglen; pglen = PAGE_CACHE_SIZE; @@ -130,7 +130,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, return PTR_ERR(new); } if (len < PAGE_CACHE_SIZE) - zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); + zero_user_segment(page, len, PAGE_CACHE_SIZE); nfs_list_add_request(new, &one_request); if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) @@ -532,7 +532,7 @@ readpage_async_filler(void *data, struct page *page) goto out_error; if (len < PAGE_CACHE_SIZE) - zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); + zero_user_segment(page, len, PAGE_CACHE_SIZE); nfs_pageio_add_request(desc->pgio, new); return 0; out_error: diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 522efff3e2c5..b144b1957dd9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -665,9 +665,7 @@ zero_page: * then we need to zero any uninitalised data. */ if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE && !PageUptodate(req->wb_page)) - zero_user_page(req->wb_page, req->wb_bytes, - PAGE_CACHE_SIZE - req->wb_bytes, - KM_USER0); + zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE); return req; } diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 21928056e35e..d13403e33622 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -11,8 +11,6 @@ #include <linux/nfsd/nfsd.h> #include <linux/nfsd/export.h> -#define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) - int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) { struct exp_flavor_info *f; @@ -69,10 +67,12 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) ret = set_current_groups(cred.cr_group_info); put_group_info(cred.cr_group_info); if ((cred.cr_uid)) { - cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; + current->cap_effective = + cap_drop_nfsd_set(current->cap_effective); } else { - cap_t(current->cap_effective) |= (CAP_NFSD_MASK & - current->cap_permitted); + current->cap_effective = + cap_raise_nfsd_set(current->cap_effective, + current->cap_permitted); } return ret; } diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index ad87cb01299b..00e9ccde8e42 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -87,13 +87,17 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) /* Check for the current buffer head overflowing. */ if (unlikely(file_ofs + bh->b_size > init_size)) { int ofs; + void *kaddr; ofs = 0; if (file_ofs < init_size) ofs = init_size - file_ofs; local_irq_save(flags); - zero_user_page(page, bh_offset(bh) + ofs, - bh->b_size - ofs, KM_BIO_SRC_IRQ); + kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); + memset(kaddr + bh_offset(bh) + ofs, 0, + bh->b_size - ofs); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); local_irq_restore(flags); } } else { @@ -334,7 +338,7 @@ handle_hole: bh->b_blocknr = -1UL; clear_buffer_mapped(bh); handle_zblock: - zero_user_page(page, i * blocksize, blocksize, KM_USER0); + zero_user(page, i * blocksize, blocksize); if (likely(!err)) set_buffer_uptodate(bh); } while (i++, iblock++, (bh = bh->b_this_page) != head); @@ -410,7 +414,7 @@ retry_readpage: /* Is the page fully outside i_size? (truncate in progress) */ if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); ntfs_debug("Read outside i_size - truncated?"); goto done; } @@ -459,7 +463,7 @@ retry_readpage: * ok to ignore the compressed flag here. */ if (unlikely(page->index > 0)) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); goto done; } if (!NInoAttr(ni)) @@ -788,8 +792,7 @@ lock_retry_remap: if (err == -ENOENT || lcn == LCN_ENOENT) { bh->b_blocknr = -1; clear_buffer_dirty(bh); - zero_user_page(page, bh_offset(bh), blocksize, - KM_USER0); + zero_user(page, bh_offset(bh), blocksize); set_buffer_uptodate(bh); err = 0; continue; @@ -1414,8 +1417,7 @@ retry_writepage: if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { /* The page straddles i_size. */ unsigned int ofs = i_size & ~PAGE_CACHE_MASK; - zero_user_page(page, ofs, PAGE_CACHE_SIZE - ofs, - KM_USER0); + zero_user_segment(page, ofs, PAGE_CACHE_SIZE); } /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index d1619d05eb23..33ff314cc507 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -565,7 +565,7 @@ int ntfs_read_compressed_block(struct page *page) if (xpage >= max_page) { kfree(bhs); kfree(pages); - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); ntfs_debug("Compressed read outside i_size - truncated?"); SetPageUptodate(page); unlock_page(page); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 6cd08dfdc2ed..3c5550cd11d6 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -607,8 +607,8 @@ do_next_page: ntfs_submit_bh_for_read(bh); *wait_bh++ = bh; } else { - zero_user_page(page, bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } } @@ -683,9 +683,8 @@ map_buffer_cached: ntfs_submit_bh_for_read(bh); *wait_bh++ = bh; } else { - zero_user_page(page, - bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } } @@ -703,8 +702,8 @@ map_buffer_cached: */ if (bh_end <= pos || bh_pos >= end) { if (!buffer_uptodate(bh)) { - zero_user_page(page, bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } mark_buffer_dirty(bh); @@ -743,8 +742,7 @@ map_buffer_cached: if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh)) { - zero_user_page(page, bh_offset(bh), blocksize, - KM_USER0); + zero_user(page, bh_offset(bh), blocksize); set_buffer_uptodate(bh); } continue; @@ -868,8 +866,8 @@ rl_not_mapped_enoent: if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh)) { - zero_user_page(page, bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } continue; @@ -1128,8 +1126,8 @@ rl_not_mapped_enoent: if (likely(bh_pos < initialized_size)) ofs = initialized_size - bh_pos; - zero_user_page(page, bh_offset(bh) + ofs, - blocksize - ofs, KM_USER0); + zero_user_segment(page, bh_offset(bh) + ofs, + blocksize); } } else /* if (unlikely(!buffer_uptodate(bh))) */ err = -EIO; @@ -1269,8 +1267,8 @@ rl_not_mapped_enoent: if (PageUptodate(page)) set_buffer_uptodate(bh); else { - zero_user_page(page, bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } } @@ -1330,7 +1328,7 @@ err_out: len = PAGE_CACHE_SIZE; if (len > bytes) len = bytes; - zero_user_page(*pages, 0, len, KM_USER0); + zero_user(*pages, 0, len); } goto out; } @@ -1451,7 +1449,7 @@ err_out: len = PAGE_CACHE_SIZE; if (len > bytes) len = bytes; - zero_user_page(*pages, 0, len, KM_USER0); + zero_user(*pages, 0, len); } goto out; } diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index e38e402e4103..cd0be3f5c3cd 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -85,8 +85,7 @@ static inline void *ntfs_malloc_nofs_nofail(unsigned long size) static inline void ntfs_free(void *addr) { - if (likely(((unsigned long)addr < VMALLOC_START) || - ((unsigned long)addr >= VMALLOC_END ))) { + if (!is_vmalloc_addr(addr)) { kfree(addr); /* free_page((unsigned long)addr); */ return; diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 64713e149e46..447206eb5c2e 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5670,7 +5670,7 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, mlog_errno(ret); if (zero) - zero_user_page(page, from, to - from, KM_USER0); + zero_user_segment(page, from, to); /* * Need to set the buffers we zero'd into uptodate diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index bc7b4cbbe8ec..82243127eebf 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -307,7 +307,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) * XXX sys_readahead() seems to get that wrong? */ if (start >= i_size_read(inode)) { - zero_user_page(page, 0, PAGE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_SIZE); SetPageUptodate(page); ret = 0; goto out_alloc; @@ -869,7 +869,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, if (block_start >= to) break; - zero_user_page(page, block_start, bh->b_size, KM_USER0); + zero_user(page, block_start, bh->b_size); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -1034,7 +1034,7 @@ static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to start = max(from, block_start); end = min(to, block_end); - zero_user_page(page, start, end - start, KM_USER0); + zero_user_segment(page, start, end); set_buffer_uptodate(bh); } diff --git a/fs/proc/array.c b/fs/proc/array.c index b380313092bd..6ba2746e4517 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -281,14 +281,23 @@ static inline char *task_sig(struct task_struct *p, char *buffer) return buffer; } +static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer) +{ + unsigned __capi; + + buffer += sprintf(buffer, "%s", header); + CAP_FOR_EACH_U32(__capi) { + buffer += sprintf(buffer, "%08x", + a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); + } + return buffer + sprintf(buffer, "\n"); +} + static inline char *task_cap(struct task_struct *p, char *buffer) { - return buffer + sprintf(buffer, "CapInh:\t%016x\n" - "CapPrm:\t%016x\n" - "CapEff:\t%016x\n", - cap_t(p->cap_inheritable), - cap_t(p->cap_permitted), - cap_t(p->cap_effective)); + buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer); + buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer); + return render_cap_t("CapEff:\t", &p->cap_effective, buffer); } static inline char *task_context_switch_counts(struct task_struct *p, diff --git a/fs/proc/base.c b/fs/proc/base.c index 33537487f5ab..c59852b38787 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -88,10 +88,6 @@ * in /proc for a task before it execs a suid executable. */ - -/* Worst case buffer size needed for holding an integer. */ -#define PROC_NUMBUF 13 - struct pid_entry { char *name; int len; @@ -787,7 +783,7 @@ out_no_task: } #endif -static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { case 0: @@ -935,42 +931,6 @@ static const struct file_operations proc_oom_adjust_operations = { .write = oom_adjust_write, }; -#ifdef CONFIG_MMU -static ssize_t clear_refs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - struct mm_struct *mm; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) - return -EINVAL; - if (*end == '\n') - end++; - task = get_proc_task(file->f_path.dentry->d_inode); - if (!task) - return -ESRCH; - mm = get_task_mm(task); - if (mm) { - clear_refs_smap(mm); - mmput(mm); - } - put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; -} - -static struct file_operations proc_clear_refs_operations = { - .write = clear_refs_write, -}; -#endif - #ifdef CONFIG_AUDITSYSCALL #define TMPBUFLEN 21 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, @@ -2289,9 +2249,10 @@ static const struct pid_entry tgid_base_stuff[] = { LNK("exe", exe), REG("mounts", S_IRUGO, mounts), REG("mountstats", S_IRUSR, mountstats), -#ifdef CONFIG_MMU +#ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), + REG("pagemap", S_IRUSR, pagemap), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), @@ -2360,7 +2321,8 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); + if (!(current->flags & PF_EXITING)) + shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } @@ -2617,9 +2579,10 @@ static const struct pid_entry tid_base_stuff[] = { LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), -#ifdef CONFIG_MMU +#ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), + REG("pagemap", S_IRUSR, pagemap), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 05b3e9006262..7d57e8069924 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -52,15 +52,13 @@ extern int proc_tid_stat(struct task_struct *, char *); extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); +extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); extern const struct file_operations proc_maps_operations; extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; - -extern const struct file_operations proc_maps_operations; -extern const struct file_operations proc_numa_maps_operations; -extern const struct file_operations proc_smaps_operations; - +extern const struct file_operations proc_clear_refs_operations; +extern const struct file_operations proc_pagemap_operations; void free_proc_entry(struct proc_dir_entry *de); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 1be73082edd3..7dd26e18cbfd 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -325,7 +325,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (m == NULL) { if (clear_user(buffer, tsz)) return -EFAULT; - } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) { + } else if (is_vmalloc_addr((void *)start)) { char * elf_buf; struct vm_struct *m; unsigned long curstart = start; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 3462bfde89f6..51288db37a0c 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -46,6 +46,7 @@ #include <linux/vmalloc.h> #include <linux/crash_dump.h> #include <linux/pid_namespace.h> +#include <linux/bootmem.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -675,6 +676,137 @@ static const struct file_operations proc_sysrq_trigger_operations = { }; #endif +#ifdef CONFIG_PROC_PAGE_MONITOR +#define KPMSIZE sizeof(u64) +#define KPMMASK (KPMSIZE - 1) +/* /proc/kpagecount - an array exposing page counts + * + * Each entry is a u64 representing the corresponding + * physical page count. + */ +static ssize_t kpagecount_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 __user *out = (u64 __user *)buf; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + u64 pcount; + + pfn = src / KPMSIZE; + count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EIO; + + while (count > 0) { + ppage = NULL; + if (pfn_valid(pfn)) + ppage = pfn_to_page(pfn); + pfn++; + if (!ppage) + pcount = 0; + else + pcount = atomic_read(&ppage->_count); + + if (put_user(pcount, out++)) { + ret = -EFAULT; + break; + } + + count -= KPMSIZE; + } + + *ppos += (char __user *)out - buf; + if (!ret) + ret = (char __user *)out - buf; + return ret; +} + +static struct file_operations proc_kpagecount_operations = { + .llseek = mem_lseek, + .read = kpagecount_read, +}; + +/* /proc/kpageflags - an array exposing page flags + * + * Each entry is a u64 representing the corresponding + * physical page flags. + */ + +/* These macros are used to decouple internal flags from exported ones */ + +#define KPF_LOCKED 0 +#define KPF_ERROR 1 +#define KPF_REFERENCED 2 +#define KPF_UPTODATE 3 +#define KPF_DIRTY 4 +#define KPF_LRU 5 +#define KPF_ACTIVE 6 +#define KPF_SLAB 7 +#define KPF_WRITEBACK 8 +#define KPF_RECLAIM 9 +#define KPF_BUDDY 10 + +#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) + +static ssize_t kpageflags_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 __user *out = (u64 __user *)buf; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + u64 kflags, uflags; + + pfn = src / KPMSIZE; + count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EIO; + + while (count > 0) { + ppage = NULL; + if (pfn_valid(pfn)) + ppage = pfn_to_page(pfn); + pfn++; + if (!ppage) + kflags = 0; + else + kflags = ppage->flags; + + uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | + kpf_copy_bit(kflags, KPF_ERROR, PG_error) | + kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | + kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | + kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | + kpf_copy_bit(kflags, KPF_LRU, PG_lru) | + kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | + kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | + kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | + kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | + kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); + + if (put_user(uflags, out++)) { + ret = -EFAULT; + break; + } + + count -= KPMSIZE; + } + + *ppos += (char __user *)out - buf; + if (!ret) + ret = (char __user *)out - buf; + return ret; +} + +static struct file_operations proc_kpageflags_operations = { + .llseek = mem_lseek, + .read = kpageflags_read, +}; +#endif /* CONFIG_PROC_PAGE_MONITOR */ + struct proc_dir_entry *proc_root_kcore; void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) @@ -755,6 +887,10 @@ void __init proc_misc_init(void) (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; } #endif +#ifdef CONFIG_PROC_PAGE_MONITOR + create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); + create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); +#endif #ifdef CONFIG_PROC_VMCORE proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); if (proc_vmcore) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8043a3eab52c..38338ed98cc6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -5,7 +5,10 @@ #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/pagemap.h> +#include <linux/ptrace.h> #include <linux/mempolicy.h> +#include <linux/swap.h> +#include <linux/swapops.h> #include <asm/elf.h> #include <asm/uaccess.h> @@ -114,24 +117,124 @@ static void pad_len_spaces(struct seq_file *m, int len) seq_printf(m, "%*c", len, ' '); } -struct mem_size_stats +static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) { - unsigned long resident; - unsigned long shared_clean; - unsigned long shared_dirty; - unsigned long private_clean; - unsigned long private_dirty; - unsigned long referenced; -}; + if (vma && vma != priv->tail_vma) { + struct mm_struct *mm = vma->vm_mm; + up_read(&mm->mmap_sem); + mmput(mm); + } +} -struct pmd_walker { - struct vm_area_struct *vma; - void *private; - void (*action)(struct vm_area_struct *, pmd_t *, unsigned long, - unsigned long, void *); -}; +static void *m_start(struct seq_file *m, loff_t *pos) +{ + struct proc_maps_private *priv = m->private; + unsigned long last_addr = m->version; + struct mm_struct *mm; + struct vm_area_struct *vma, *tail_vma = NULL; + loff_t l = *pos; + + /* Clear the per syscall fields in priv */ + priv->task = NULL; + priv->tail_vma = NULL; + + /* + * We remember last_addr rather than next_addr to hit with + * mmap_cache most of the time. We have zero last_addr at + * the beginning and also after lseek. We will have -1 last_addr + * after the end of the vmas. + */ + + if (last_addr == -1UL) + return NULL; + + priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + if (!priv->task) + return NULL; + + mm = mm_for_maps(priv->task); + if (!mm) + return NULL; + + tail_vma = get_gate_vma(priv->task); + priv->tail_vma = tail_vma; + + /* Start with last addr hint */ + vma = find_vma(mm, last_addr); + if (last_addr && vma) { + vma = vma->vm_next; + goto out; + } + + /* + * Check the vma index is within the range and do + * sequential scan until m_index. + */ + vma = NULL; + if ((unsigned long)l < mm->map_count) { + vma = mm->mmap; + while (l-- && vma) + vma = vma->vm_next; + goto out; + } + + if (l != mm->map_count) + tail_vma = NULL; /* After gate vma */ + +out: + if (vma) + return vma; + + /* End of vmas has been reached */ + m->version = (tail_vma != NULL)? 0: -1UL; + up_read(&mm->mmap_sem); + mmput(mm); + return tail_vma; +} -static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + struct vm_area_struct *tail_vma = priv->tail_vma; + + (*pos)++; + if (vma && (vma != tail_vma) && vma->vm_next) + return vma->vm_next; + vma_stop(priv, vma); + return (vma != tail_vma)? tail_vma: NULL; +} + +static void m_stop(struct seq_file *m, void *v) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + + vma_stop(priv, vma); + if (priv->task) + put_task_struct(priv->task); +} + +static int do_maps_open(struct inode *inode, struct file *file, + struct seq_operations *ops) +{ + struct proc_maps_private *priv; + int ret = -ENOMEM; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv) { + priv->pid = proc_pid(inode); + ret = seq_open(file, ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = priv; + } else { + kfree(priv); + } + } + return ret; +} + +static int show_map(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct task_struct *task = priv->task; @@ -191,41 +294,71 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats } seq_putc(m, '\n'); - if (mss) - seq_printf(m, - "Size: %8lu kB\n" - "Rss: %8lu kB\n" - "Shared_Clean: %8lu kB\n" - "Shared_Dirty: %8lu kB\n" - "Private_Clean: %8lu kB\n" - "Private_Dirty: %8lu kB\n" - "Referenced: %8lu kB\n", - (vma->vm_end - vma->vm_start) >> 10, - mss->resident >> 10, - mss->shared_clean >> 10, - mss->shared_dirty >> 10, - mss->private_clean >> 10, - mss->private_dirty >> 10, - mss->referenced >> 10); - if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; return 0; } -static int show_map(struct seq_file *m, void *v) +static struct seq_operations proc_pid_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_map +}; + +static int maps_open(struct inode *inode, struct file *file) { - return show_map_internal(m, v, NULL); + return do_maps_open(inode, file, &proc_pid_maps_op); } -static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - void *private) +const struct file_operations proc_maps_operations = { + .open = maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +/* + * Proportional Set Size(PSS): my share of RSS. + * + * PSS of a process is the count of pages it has in memory, where each + * page is divided by the number of processes sharing it. So if a + * process has 1000 pages all to itself, and 1000 shared with one other + * process, its PSS will be 1500. + * + * To keep (accumulated) division errors low, we adopt a 64bit + * fixed-point pss counter to minimize division errors. So (pss >> + * PSS_SHIFT) would be the real byte count. + * + * A shift of 12 before division means (assuming 4K page size): + * - 1M 3-user-pages add up to 8KB errors; + * - supports mapcount up to 2^24, or 16M; + * - supports PSS up to 2^52 bytes, or 4PB. + */ +#define PSS_SHIFT 12 + +#ifdef CONFIG_PROC_PAGE_MONITOR +struct mem_size_stats +{ + struct vm_area_struct *vma; + unsigned long resident; + unsigned long shared_clean; + unsigned long shared_dirty; + unsigned long private_clean; + unsigned long private_dirty; + unsigned long referenced; + u64 pss; +}; + +static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) { struct mem_size_stats *mss = private; + struct vm_area_struct *vma = mss->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; + int mapcount; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { @@ -242,26 +375,88 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, /* Accumulate the size in pages that have been accessed. */ if (pte_young(ptent) || PageReferenced(page)) mss->referenced += PAGE_SIZE; - if (page_mapcount(page) >= 2) { + mapcount = page_mapcount(page); + if (mapcount >= 2) { if (pte_dirty(ptent)) mss->shared_dirty += PAGE_SIZE; else mss->shared_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; } else { if (pte_dirty(ptent)) mss->private_dirty += PAGE_SIZE; else mss->private_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT); } } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + return 0; } -static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - void *private) +static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; + +static int show_smap(struct seq_file *m, void *v) { + struct vm_area_struct *vma = v; + struct mem_size_stats mss; + int ret; + + memset(&mss, 0, sizeof mss); + mss.vma = vma; + if (vma->vm_mm && !is_vm_hugetlb_page(vma)) + walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, + &smaps_walk, &mss); + + ret = show_map(m, v); + if (ret) + return ret; + + seq_printf(m, + "Size: %8lu kB\n" + "Rss: %8lu kB\n" + "Pss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n" + "Referenced: %8lu kB\n", + (vma->vm_end - vma->vm_start) >> 10, + mss.resident >> 10, + (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), + mss.shared_clean >> 10, + mss.shared_dirty >> 10, + mss.private_clean >> 10, + mss.private_dirty >> 10, + mss.referenced >> 10); + + return ret; +} + +static struct seq_operations proc_pid_smaps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_smap +}; + +static int smaps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_pid_smaps_op); +} + +const struct file_operations proc_smaps_operations = { + .open = smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, void *private) +{ + struct vm_area_struct *vma = private; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -282,235 +477,248 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + return 0; } -static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, - unsigned long addr, unsigned long end) +static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; + +static ssize_t clear_refs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { - pmd_t *pmd; - unsigned long next; + struct task_struct *task; + char buffer[PROC_NUMBUF], *end; + struct mm_struct *mm; + struct vm_area_struct *vma; - for (pmd = pmd_offset(pud, addr); addr != end; - pmd++, addr = next) { - next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) - continue; - walker->action(walker->vma, pmd, addr, next, walker->private); + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + if (!simple_strtol(buffer, &end, 0)) + return -EINVAL; + if (*end == '\n') + end++; + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) + return -ESRCH; + mm = get_task_mm(task); + if (mm) { + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + if (!is_vm_hugetlb_page(vma)) + walk_page_range(mm, vma->vm_start, vma->vm_end, + &clear_refs_walk, vma); + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); } + put_task_struct(task); + if (end - buffer == 0) + return -EIO; + return end - buffer; } -static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, - unsigned long addr, unsigned long end) -{ - pud_t *pud; - unsigned long next; +const struct file_operations proc_clear_refs_operations = { + .write = clear_refs_write, +}; - for (pud = pud_offset(pgd, addr); addr != end; - pud++, addr = next) { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - walk_pmd_range(walker, pud, addr, next); +struct pagemapread { + char __user *out, *end; +}; + +#define PM_ENTRY_BYTES sizeof(u64) +#define PM_RESERVED_BITS 3 +#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS) +#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET) +#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK) +#define PM_NOT_PRESENT PM_SPECIAL(1LL) +#define PM_SWAP PM_SPECIAL(2LL) +#define PM_END_OF_BUFFER 1 + +static int add_to_pagemap(unsigned long addr, u64 pfn, + struct pagemapread *pm) +{ + /* + * Make sure there's room in the buffer for an + * entire entry. Otherwise, only copy part of + * the pfn. + */ + if (pm->out + PM_ENTRY_BYTES >= pm->end) { + if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) + return -EFAULT; + pm->out = pm->end; + return PM_END_OF_BUFFER; } + + if (put_user(pfn, pm->out)) + return -EFAULT; + pm->out += PM_ENTRY_BYTES; + return 0; } -/* - * walk_page_range - walk the page tables of a VMA with a callback - * @vma - VMA to walk - * @action - callback invoked for every bottom-level (PTE) page table - * @private - private data passed to the callback function - * - * Recursively walk the page table for the memory area in a VMA, calling - * a callback for every bottom-level (PTE) page table. - */ -static inline void walk_page_range(struct vm_area_struct *vma, - void (*action)(struct vm_area_struct *, - pmd_t *, unsigned long, - unsigned long, void *), - void *private) +static int pagemap_pte_hole(unsigned long start, unsigned long end, + void *private) { - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; - struct pmd_walker walker = { - .vma = vma, - .private = private, - .action = action, - }; - pgd_t *pgd; - unsigned long next; - - for (pgd = pgd_offset(vma->vm_mm, addr); addr != end; - pgd++, addr = next) { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - walk_pud_range(&walker, pgd, addr, next); + struct pagemapread *pm = private; + unsigned long addr; + int err = 0; + for (addr = start; addr < end; addr += PAGE_SIZE) { + err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); + if (err) + break; } + return err; } -static int show_smap(struct seq_file *m, void *v) +u64 swap_pte_to_pagemap_entry(pte_t pte) { - struct vm_area_struct *vma = v; - struct mem_size_stats mss; - - memset(&mss, 0, sizeof mss); - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma, smaps_pte_range, &mss); - return show_map_internal(m, v, &mss); + swp_entry_t e = pte_to_swp_entry(pte); + return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } -void clear_refs_smap(struct mm_struct *mm) +static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) { - struct vm_area_struct *vma; + struct pagemapread *pm = private; + pte_t *pte; + int err = 0; + + for (; addr != end; addr += PAGE_SIZE) { + u64 pfn = PM_NOT_PRESENT; + pte = pte_offset_map(pmd, addr); + if (is_swap_pte(*pte)) + pfn = swap_pte_to_pagemap_entry(*pte); + else if (pte_present(*pte)) + pfn = pte_pfn(*pte); + /* unmap so we're not in atomic when we copy to userspace */ + pte_unmap(pte); + err = add_to_pagemap(addr, pfn, pm); + if (err) + return err; + } - down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma, clear_refs_pte_range, NULL); - flush_tlb_mm(mm); - up_read(&mm->mmap_sem); + cond_resched(); + + return err; } -static void *m_start(struct seq_file *m, loff_t *pos) +static struct mm_walk pagemap_walk = { + .pmd_entry = pagemap_pte_range, + .pte_hole = pagemap_pte_hole +}; + +/* + * /proc/pid/pagemap - an array mapping virtual pages to pfns + * + * For each page in the address space, this file contains one 64-bit + * entry representing the corresponding physical page frame number + * (PFN) if the page is present. If there is a swap entry for the + * physical page, then an encoding of the swap file number and the + * page's offset into the swap file are returned. If no page is + * present at all, PM_NOT_PRESENT is returned. This allows determining + * precisely which pages are mapped (or in swap) and comparing mapped + * pages between processes. + * + * Efficient users of this interface will use /proc/pid/maps to + * determine which areas of memory are actually mapped and llseek to + * skip over unmapped regions. + */ +static ssize_t pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) { - struct proc_maps_private *priv = m->private; - unsigned long last_addr = m->version; + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct page **pages, *page; + unsigned long uaddr, uend; struct mm_struct *mm; - struct vm_area_struct *vma, *tail_vma = NULL; - loff_t l = *pos; - - /* Clear the per syscall fields in priv */ - priv->task = NULL; - priv->tail_vma = NULL; + struct pagemapread pm; + int pagecount; + int ret = -ESRCH; - /* - * We remember last_addr rather than next_addr to hit with - * mmap_cache most of the time. We have zero last_addr at - * the beginning and also after lseek. We will have -1 last_addr - * after the end of the vmas. - */ + if (!task) + goto out; - if (last_addr == -1UL) - return NULL; + ret = -EACCES; + if (!ptrace_may_attach(task)) + goto out; - priv->task = get_pid_task(priv->pid, PIDTYPE_PID); - if (!priv->task) - return NULL; + ret = -EINVAL; + /* file position must be aligned */ + if (*ppos % PM_ENTRY_BYTES) + goto out; - mm = mm_for_maps(priv->task); + ret = 0; + mm = get_task_mm(task); if (!mm) - return NULL; - - priv->tail_vma = tail_vma = get_gate_vma(priv->task); - - /* Start with last addr hint */ - if (last_addr && (vma = find_vma(mm, last_addr))) { - vma = vma->vm_next; goto out; - } - /* - * Check the vma index is within the range and do - * sequential scan until m_index. - */ - vma = NULL; - if ((unsigned long)l < mm->map_count) { - vma = mm->mmap; - while (l-- && vma) - vma = vma->vm_next; - goto out; - } + ret = -ENOMEM; + uaddr = (unsigned long)buf & PAGE_MASK; + uend = (unsigned long)(buf + count); + pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; + pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out_task; - if (l != mm->map_count) - tail_vma = NULL; /* After gate vma */ + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, pagecount, + 1, 0, pages, NULL); + up_read(¤t->mm->mmap_sem); -out: - if (vma) - return vma; + if (ret < 0) + goto out_free; - /* End of vmas has been reached */ - m->version = (tail_vma != NULL)? 0: -1UL; - up_read(&mm->mmap_sem); - mmput(mm); - return tail_vma; -} + pm.out = buf; + pm.end = buf + count; -static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) -{ - if (vma && vma != priv->tail_vma) { - struct mm_struct *mm = vma->vm_mm; - up_read(&mm->mmap_sem); - mmput(mm); + if (!ptrace_may_attach(task)) { + ret = -EIO; + } else { + unsigned long src = *ppos; + unsigned long svpfn = src / PM_ENTRY_BYTES; + unsigned long start_vaddr = svpfn << PAGE_SHIFT; + unsigned long end_vaddr = TASK_SIZE_OF(task); + + /* watch out for wraparound */ + if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) + start_vaddr = end_vaddr; + + /* + * The odds are that this will stop walking way + * before end_vaddr, because the length of the + * user buffer is tracked in "pm", and the walk + * will stop when we hit the end of the buffer. + */ + ret = walk_page_range(mm, start_vaddr, end_vaddr, + &pagemap_walk, &pm); + if (ret == PM_END_OF_BUFFER) + ret = 0; + /* don't need mmap_sem for these, but this looks cleaner */ + *ppos += pm.out - buf; + if (!ret) + ret = pm.out - buf; } -} - -static void *m_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - struct vm_area_struct *tail_vma = priv->tail_vma; - - (*pos)++; - if (vma && (vma != tail_vma) && vma->vm_next) - return vma->vm_next; - vma_stop(priv, vma); - return (vma != tail_vma)? tail_vma: NULL; -} - -static void m_stop(struct seq_file *m, void *v) -{ - struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - vma_stop(priv, vma); - if (priv->task) - put_task_struct(priv->task); -} - -static struct seq_operations proc_pid_maps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_map -}; - -static struct seq_operations proc_pid_smaps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_smap -}; - -static int do_maps_open(struct inode *inode, struct file *file, - struct seq_operations *ops) -{ - struct proc_maps_private *priv; - int ret = -ENOMEM; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } + for (; pagecount; pagecount--) { + page = pages[pagecount-1]; + if (!PageReserved(page)) + SetPageDirty(page); + page_cache_release(page); } + mmput(mm); +out_free: + kfree(pages); +out_task: + put_task_struct(task); +out: return ret; } -static int maps_open(struct inode *inode, struct file *file) -{ - return do_maps_open(inode, file, &proc_pid_maps_op); -} - -const struct file_operations proc_maps_operations = { - .open = maps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, +const struct file_operations proc_pagemap_operations = { + .llseek = mem_lseek, /* borrow this */ + .read = pagemap_read, }; +#endif /* CONFIG_PROC_PAGE_MONITOR */ #ifdef CONFIG_NUMA extern int show_numa_map(struct seq_file *m, void *v); @@ -545,15 +753,3 @@ const struct file_operations proc_numa_maps_operations = { .release = seq_release_private, }; #endif - -static int smaps_open(struct inode *inode, struct file *file) -{ - return do_maps_open(inode, file, &proc_pid_smaps_op); -} - -const struct file_operations proc_smaps_operations = { - .open = smaps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 231fd5ccadc5..195309857e63 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2143,7 +2143,7 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) /* if we are not on a block boundary */ if (length) { length = blocksize - length; - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); if (buffer_mapped(bh) && bh->b_blocknr != 0) { mark_buffer_dirty(bh); } @@ -2367,7 +2367,7 @@ static int reiserfs_write_full_page(struct page *page, unlock_page(page); return 0; } - zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0); + zero_user_segment(page, last_offset, PAGE_CACHE_SIZE); } bh = head; block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); diff --git a/fs/timerfd.c b/fs/timerfd.c index 61983f3b107c..10c80b59ec4b 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -25,13 +25,15 @@ struct timerfd_ctx { struct hrtimer tmr; ktime_t tintv; wait_queue_head_t wqh; + u64 ticks; int expired; + int clockid; }; /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, - * tintv.tv64 != 0) until the timer is read. + * tintv.tv64 != 0) until the timer is accessed. */ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) { @@ -40,13 +42,24 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) spin_lock_irqsave(&ctx->wqh.lock, flags); ctx->expired = 1; + ctx->ticks++; wake_up_locked(&ctx->wqh); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return HRTIMER_NORESTART; } -static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, +static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) +{ + ktime_t now, remaining; + + now = ctx->tmr.base->get_time(); + remaining = ktime_sub(ctx->tmr.expires, now); + + return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; +} + +static void timerfd_setup(struct timerfd_ctx *ctx, int flags, const struct itimerspec *ktmr) { enum hrtimer_mode htmode; @@ -57,8 +70,9 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; + ctx->ticks = 0; ctx->tintv = timespec_to_ktime(ktmr->it_interval); - hrtimer_init(&ctx->tmr, clockid, htmode); + hrtimer_init(&ctx->tmr, ctx->clockid, htmode); ctx->tmr.expires = texp; ctx->tmr.function = timerfd_tmrproc; if (texp.tv64 != 0) @@ -83,7 +97,7 @@ static unsigned int timerfd_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->wqh, wait); spin_lock_irqsave(&ctx->wqh.lock, flags); - if (ctx->expired) + if (ctx->ticks) events |= POLLIN; spin_unlock_irqrestore(&ctx->wqh.lock, flags); @@ -102,11 +116,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, return -EINVAL; spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; - if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) { + if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) { __add_wait_queue(&ctx->wqh, &wait); for (res = 0;;) { set_current_state(TASK_INTERRUPTIBLE); - if (ctx->expired) { + if (ctx->ticks) { res = 0; break; } @@ -121,22 +135,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (ctx->expired) { - ctx->expired = 0; - if (ctx->tintv.tv64 != 0) { + if (ctx->ticks) { + ticks = ctx->ticks; + if (ctx->expired && ctx->tintv.tv64) { /* * If tintv.tv64 != 0, this is a periodic timer that * needs to be re-armed. We avoid doing it in the timer * callback to avoid DoS attacks specifying a very * short timer period. */ - ticks = (u64) - hrtimer_forward(&ctx->tmr, - hrtimer_cb_get_time(&ctx->tmr), - ctx->tintv); + ticks += hrtimer_forward_now(&ctx->tmr, + ctx->tintv) - 1; hrtimer_restart(&ctx->tmr); - } else - ticks = 1; + } + ctx->expired = 0; + ctx->ticks = 0; } spin_unlock_irq(&ctx->wqh.lock); if (ticks) @@ -150,76 +163,132 @@ static const struct file_operations timerfd_fops = { .read = timerfd_read, }; -asmlinkage long sys_timerfd(int ufd, int clockid, int flags, - const struct itimerspec __user *utmr) +static struct file *timerfd_fget(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + if (file->f_op != &timerfd_fops) { + fput(file); + return ERR_PTR(-EINVAL); + } + + return file; +} + +asmlinkage long sys_timerfd_create(int clockid, int flags) { - int error; + int error, ufd; struct timerfd_ctx *ctx; struct file *file; struct inode *inode; - struct itimerspec ktmr; - - if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) - return -EFAULT; + if (flags) + return -EINVAL; if (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME) return -EINVAL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + init_waitqueue_head(&ctx->wqh); + ctx->clockid = clockid; + hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); + + error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", + &timerfd_fops, ctx); + if (error) { + kfree(ctx); + return error; + } + + return ufd; +} + +asmlinkage long sys_timerfd_settime(int ufd, int flags, + const struct itimerspec __user *utmr, + struct itimerspec __user *otmr) +{ + struct file *file; + struct timerfd_ctx *ctx; + struct itimerspec ktmr, kotmr; + + if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) + return -EFAULT; + if (!timespec_valid(&ktmr.it_value) || !timespec_valid(&ktmr.it_interval)) return -EINVAL; - if (ufd == -1) { - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - init_waitqueue_head(&ctx->wqh); - - timerfd_setup(ctx, clockid, flags, &ktmr); - - /* - * When we call this, the initialization must be complete, since - * anon_inode_getfd() will install the fd. - */ - error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", - &timerfd_fops, ctx); - if (error) - goto err_tmrcancel; - } else { - file = fget(ufd); - if (!file) - return -EBADF; - ctx = file->private_data; - if (file->f_op != &timerfd_fops) { - fput(file); - return -EINVAL; - } - /* - * We need to stop the existing timer before reprogramming - * it to the new values. - */ - for (;;) { - spin_lock_irq(&ctx->wqh.lock); - if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) - break; - spin_unlock_irq(&ctx->wqh.lock); - cpu_relax(); - } - /* - * Re-program the timer to the new value ... - */ - timerfd_setup(ctx, clockid, flags, &ktmr); + file = timerfd_fget(ufd); + if (IS_ERR(file)) + return PTR_ERR(file); + ctx = file->private_data; + /* + * We need to stop the existing timer before reprogramming + * it to the new values. + */ + for (;;) { + spin_lock_irq(&ctx->wqh.lock); + if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) + break; spin_unlock_irq(&ctx->wqh.lock); - fput(file); + cpu_relax(); } - return ufd; + /* + * If the timer is expired and it's periodic, we need to advance it + * because the caller may want to know the previous expiration time. + * We do not update "ticks" and "expired" since the timer will be + * re-programmed again in the following timerfd_setup() call. + */ + if (ctx->expired && ctx->tintv.tv64) + hrtimer_forward_now(&ctx->tmr, ctx->tintv); -err_tmrcancel: - hrtimer_cancel(&ctx->tmr); - kfree(ctx); - return error; + kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); + kotmr.it_interval = ktime_to_timespec(ctx->tintv); + + /* + * Re-program the timer to the new value ... + */ + timerfd_setup(ctx, flags, &ktmr); + + spin_unlock_irq(&ctx->wqh.lock); + fput(file); + if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) + return -EFAULT; + + return 0; +} + +asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr) +{ + struct file *file; + struct timerfd_ctx *ctx; + struct itimerspec kotmr; + + file = timerfd_fget(ufd); + if (IS_ERR(file)) + return PTR_ERR(file); + ctx = file->private_data; + + spin_lock_irq(&ctx->wqh.lock); + if (ctx->expired && ctx->tintv.tv64) { + ctx->expired = 0; + ctx->ticks += + hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; + hrtimer_restart(&ctx->tmr); + } + kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); + kotmr.it_interval = ktime_to_timespec(ctx->tintv); + spin_unlock_irq(&ctx->wqh.lock); + fput(file); + + return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; } diff --git a/fs/xattr.c b/fs/xattr.c index 6645b7313b33..f7c8f87bb390 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -105,6 +105,33 @@ out: EXPORT_SYMBOL_GPL(vfs_setxattr); ssize_t +xattr_getsecurity(struct inode *inode, const char *name, void *value, + size_t size) +{ + void *buffer = NULL; + ssize_t len; + + if (!value || !size) { + len = security_inode_getsecurity(inode, name, &buffer, false); + goto out_noalloc; + } + + len = security_inode_getsecurity(inode, name, &buffer, true); + if (len < 0) + return len; + if (size < len) { + len = -ERANGE; + goto out; + } + memcpy(value, buffer, len); +out: + security_release_secctx(buffer, len); +out_noalloc: + return len; +} +EXPORT_SYMBOL_GPL(xattr_getsecurity); + +ssize_t vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; @@ -118,23 +145,23 @@ vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) if (error) return error; - if (inode->i_op->getxattr) - error = inode->i_op->getxattr(dentry, name, value, size); - else - error = -EOPNOTSUPP; - if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - int ret = security_inode_getsecurity(inode, suffix, value, - size, error); + int ret = xattr_getsecurity(inode, suffix, value, size); /* * Only overwrite the return value if a security module * is actually active. */ - if (ret != -EOPNOTSUPP) - error = ret; + if (ret == -EOPNOTSUPP) + goto nolsm; + return ret; } +nolsm: + if (inode->i_op->getxattr) + error = inode->i_op->getxattr(dentry, name, value, size); + else + error = -EOPNOTSUPP; return error; } diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index ed2b16dff914..e040f1ce1b6a 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -92,8 +92,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, void kmem_free(void *ptr, size_t size) { - if (((unsigned long)ptr < VMALLOC_START) || - ((unsigned long)ptr >= VMALLOC_END)) { + if (!is_vmalloc_addr(ptr)) { kfree(ptr); } else { vfree(ptr); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index a49dd8d4b069..0382c19d6523 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -709,8 +709,7 @@ static inline struct page * mem_to_page( void *addr) { - if (((unsigned long)addr < VMALLOC_START) || - ((unsigned long)addr >= VMALLOC_END)) { + if ((!is_vmalloc_addr(addr))) { return virt_to_page(addr); } else { return vmalloc_to_page(addr); diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index d6a8dddb2268..6f614f35f650 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -155,7 +155,7 @@ xfs_iozero( if (status) break; - zero_user_page(page, offset, bytes, KM_USER0); + zero_user(page, offset, bytes); status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, page, fsdata); diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h index f5cb7b878af2..ca88e54dec93 100644 --- a/include/asm-alpha/atomic.h +++ b/include/asm-alpha/atomic.h @@ -100,7 +100,7 @@ static __inline__ void atomic64_sub(long i, atomic64_t * v) /* * Same as above, but return the result value */ -static __inline__ long atomic_add_return(int i, atomic_t * v) +static inline int atomic_add_return(int i, atomic_t *v) { long temp, result; smp_mb(); diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h index 30ee7669b19f..d5b10ef64364 100644 --- a/include/asm-alpha/pci.h +++ b/include/asm-alpha/pci.h @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #include <linux/spinlock.h> +#include <linux/dma-mapping.h> #include <asm/scatterlist.h> #include <asm/machvec.h> diff --git a/include/asm-alpha/pgalloc.h b/include/asm-alpha/pgalloc.h index 471864e8d4c3..fdbedacc7375 100644 --- a/include/asm-alpha/pgalloc.h +++ b/include/asm-alpha/pgalloc.h @@ -31,7 +31,7 @@ pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) extern pgd_t *pgd_alloc(struct mm_struct *mm); static inline void -pgd_free(pgd_t *pgd) +pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_page((unsigned long)pgd); } @@ -44,7 +44,7 @@ pmd_alloc_one(struct mm_struct *mm, unsigned long address) } static inline void -pmd_free(pmd_t *pmd) +pmd_free(struct mm_struct *mm, pmd_t *pmd) { free_page((unsigned long)pmd); } @@ -52,7 +52,7 @@ pmd_free(pmd_t *pmd) extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); static inline void -pte_free_kernel(pte_t *pte) +pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } @@ -67,7 +67,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) } static inline void -pte_free(struct page *page) +pte_free(struct mm_struct *mm, struct page *page) { __free_page(page); } diff --git a/include/asm-alpha/tlb.h b/include/asm-alpha/tlb.h index aa91335533e0..c13636575fba 100644 --- a/include/asm-alpha/tlb.h +++ b/include/asm-alpha/tlb.h @@ -9,7 +9,7 @@ #include <asm-generic/tlb.h> -#define __pte_free_tlb(tlb,pte) pte_free(pte) -#define __pmd_free_tlb(tlb,pmd) pmd_free(pmd) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte) +#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd) #endif diff --git a/include/asm-alpha/tlbflush.h b/include/asm-alpha/tlbflush.h index b9e9147226f7..9d87aaa08c0d 100644 --- a/include/asm-alpha/tlbflush.h +++ b/include/asm-alpha/tlbflush.h @@ -142,6 +142,10 @@ extern void flush_tlb_range(struct vm_area_struct *, unsigned long, #endif /* CONFIG_SMP */ -#define flush_tlb_kernel_range(start, end) flush_tlb_all() +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); +} #endif /* _ALPHA_TLBFLUSH_H */ diff --git a/include/asm-alpha/unistd.h b/include/asm-alpha/unistd.h index 29bf2fdc91c0..5b5c17485942 100644 --- a/include/asm-alpha/unistd.h +++ b/include/asm-alpha/unistd.h @@ -442,7 +442,6 @@ #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME -#define __ARCH_WANT_SYS_SOCKETCALL #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_OLD_GETRLIMIT diff --git a/include/asm-arm/arch-pxa/gpio.h b/include/asm-arm/arch-pxa/gpio.h index 9dbc2dc794f7..bdbf5f9ffdd5 100644 --- a/include/asm-arm/arch-pxa/gpio.h +++ b/include/asm-arm/arch-pxa/gpio.h @@ -28,43 +28,35 @@ #include <asm/irq.h> #include <asm/hardware.h> -static inline int gpio_request(unsigned gpio, const char *label) -{ - return 0; -} +#include <asm-generic/gpio.h> -static inline void gpio_free(unsigned gpio) -{ - return; -} -extern int gpio_direction_input(unsigned gpio); -extern int gpio_direction_output(unsigned gpio, int value); +/* NOTE: some PXAs have fewer on-chip GPIOs (like PXA255, with 85). + * Those cases currently cause holes in the GPIO number space. + */ +#define NR_BUILTIN_GPIO 128 -static inline int __gpio_get_value(unsigned gpio) +static inline int gpio_get_value(unsigned gpio) { - return GPLR(gpio) & GPIO_bit(gpio); + if (__builtin_constant_p(gpio) && (gpio < NR_BUILTIN_GPIO)) + return GPLR(gpio) & GPIO_bit(gpio); + else + return __gpio_get_value(gpio); } -#define gpio_get_value(gpio) \ - (__builtin_constant_p(gpio) ? \ - __gpio_get_value(gpio) : \ - pxa_gpio_get_value(gpio)) - -static inline void __gpio_set_value(unsigned gpio, int value) +static inline void gpio_set_value(unsigned gpio, int value) { - if (value) - GPSR(gpio) = GPIO_bit(gpio); - else - GPCR(gpio) = GPIO_bit(gpio); + if (__builtin_constant_p(gpio) && (gpio < NR_BUILTIN_GPIO)) { + if (value) + GPSR(gpio) = GPIO_bit(gpio); + else + GPCR(gpio) = GPIO_bit(gpio); + } else { + __gpio_set_value(gpio, value); + } } -#define gpio_set_value(gpio,value) \ - (__builtin_constant_p(gpio) ? \ - __gpio_set_value(gpio, value) : \ - pxa_gpio_set_value(gpio, value)) - -#include <asm-generic/gpio.h> /* cansleep wrappers */ +#define gpio_cansleep __gpio_cansleep #define gpio_to_irq(gpio) IRQ_GPIO(gpio) #define irq_to_gpio(irq) IRQ_TO_GPIO(irq) diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index 16ed24dbda4e..ac175b4d10cb 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -1131,6 +1131,19 @@ * General Purpose I/O */ +#define GPIO0_BASE ((void __iomem *)io_p2v(0x40E00000)) +#define GPIO1_BASE ((void __iomem *)io_p2v(0x40E00004)) +#define GPIO2_BASE ((void __iomem *)io_p2v(0x40E00008)) +#define GPIO3_BASE ((void __iomem *)io_p2v(0x40E00100)) + +#define GPLR_OFFSET 0x00 +#define GPDR_OFFSET 0x0C +#define GPSR_OFFSET 0x18 +#define GPCR_OFFSET 0x24 +#define GRER_OFFSET 0x30 +#define GFER_OFFSET 0x3C +#define GEDR_OFFSET 0x48 + #define GPLR0 __REG(0x40E00000) /* GPIO Pin-Level Register GPIO<31:0> */ #define GPLR1 __REG(0x40E00004) /* GPIO Pin-Level Register GPIO<63:32> */ #define GPLR2 __REG(0x40E00008) /* GPIO Pin-Level Register GPIO<80:64> */ diff --git a/include/asm-arm/pgalloc.h b/include/asm-arm/pgalloc.h index 4d4394552911..fb6c6e3222bd 100644 --- a/include/asm-arm/pgalloc.h +++ b/include/asm-arm/pgalloc.h @@ -27,14 +27,14 @@ * Since we have only two-level page tables, these are trivial */ #define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(pmd) do { } while (0) +#define pmd_free(mm, pmd) do { } while (0) #define pgd_populate(mm,pmd,pte) BUG() extern pgd_t *get_pgd_slow(struct mm_struct *mm); -extern void free_pgd_slow(pgd_t *pgd); +extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); #define pgd_alloc(mm) get_pgd_slow(mm) -#define pgd_free(pgd) free_pgd_slow(pgd) +#define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) /* * Allocate one PTE table. @@ -83,7 +83,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) /* * Free one PTE table. */ -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { if (pte) { pte -= PTRS_PER_PTE; @@ -91,7 +91,7 @@ static inline void pte_free_kernel(pte_t *pte) } } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_page(pte); } diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h index cb740025d413..36bd402a21cb 100644 --- a/include/asm-arm/tlb.h +++ b/include/asm-arm/tlb.h @@ -85,8 +85,8 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) } #define tlb_remove_page(tlb,page) free_page_and_swap_cache(page) -#define pte_free_tlb(tlb,ptep) pte_free(ptep) -#define pmd_free_tlb(tlb,pmdp) pmd_free(pmdp) +#define pte_free_tlb(tlb, ptep) pte_free((tlb)->mm, ptep) +#define pmd_free_tlb(tlb, pmdp) pmd_free((tlb)->mm, pmdp) #define tlb_migrate_finish(mm) do { } while (0) diff --git a/include/asm-avr32/arch-at32ap/at32ap700x.h b/include/asm-avr32/arch-at32ap/at32ap700x.h index 99684d6f3967..31e48b0e7324 100644 --- a/include/asm-avr32/arch-at32ap/at32ap700x.h +++ b/include/asm-avr32/arch-at32ap/at32ap700x.h @@ -13,8 +13,6 @@ #define GPIO_PERIPH_A 0 #define GPIO_PERIPH_B 1 -#define NR_GPIO_CONTROLLERS 4 - /* * Pin numbers identifying specific GPIO pins on the chip. They can * also be converted to IRQ numbers by passing them through diff --git a/include/asm-avr32/arch-at32ap/gpio.h b/include/asm-avr32/arch-at32ap/gpio.h index af7f9535bab3..0180f584ef03 100644 --- a/include/asm-avr32/arch-at32ap/gpio.h +++ b/include/asm-avr32/arch-at32ap/gpio.h @@ -5,20 +5,36 @@ #include <asm/irq.h> -/* Arch-neutral GPIO API */ -int __must_check gpio_request(unsigned int gpio, const char *label); -void gpio_free(unsigned int gpio); +/* Some GPIO chips can manage IRQs; some can't. The exact numbers can + * be changed if needed, but for the moment they're not configurable. + */ +#define ARCH_NR_GPIOS (NR_GPIO_IRQS + 2 * 32) -int gpio_direction_input(unsigned int gpio); -int gpio_direction_output(unsigned int gpio, int value); -int gpio_get_value(unsigned int gpio); -void gpio_set_value(unsigned int gpio, int value); -#include <asm-generic/gpio.h> /* cansleep wrappers */ +/* Arch-neutral GPIO API, supporting both "native" and external GPIOs. */ +#include <asm-generic/gpio.h> + +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + static inline int gpio_to_irq(unsigned int gpio) { - return gpio + GPIO_IRQ_BASE; + if (gpio < NR_GPIO_IRQS) + return gpio + GPIO_IRQ_BASE; + return -EINVAL; } static inline int irq_to_gpio(unsigned int irq) diff --git a/include/asm-avr32/arch-at32ap/irq.h b/include/asm-avr32/arch-at32ap/irq.h index 5adffab9a577..608e350368c7 100644 --- a/include/asm-avr32/arch-at32ap/irq.h +++ b/include/asm-avr32/arch-at32ap/irq.h @@ -3,11 +3,11 @@ #define EIM_IRQ_BASE NR_INTERNAL_IRQS #define NR_EIM_IRQS 32 - #define AT32_EXTINT(n) (EIM_IRQ_BASE + (n)) #define GPIO_IRQ_BASE (EIM_IRQ_BASE + NR_EIM_IRQS) -#define NR_GPIO_IRQS (5 * 32) +#define NR_GPIO_CTLR (5 /*internal*/ + 1 /*external*/) +#define NR_GPIO_IRQS (NR_GPIO_CTLR * 32) #define NR_IRQS (GPIO_IRQ_BASE + NR_GPIO_IRQS) diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h index 0e680f47209f..b77e364b4c44 100644 --- a/include/asm-avr32/pgalloc.h +++ b/include/asm-avr32/pgalloc.h @@ -30,7 +30,7 @@ static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm) return kcalloc(USER_PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL); } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { kfree(pgd); } @@ -55,12 +55,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return pte; } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_page(pte); } diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h index e2f49c27ed29..75ea6e096483 100644 --- a/include/asm-cris/bitops.h +++ b/include/asm-cris/bitops.h @@ -24,13 +24,6 @@ #include <linux/compiler.h> /* - * Some hacks to defeat gcc over-optimizations.. - */ -struct __dummy { unsigned long a[100]; }; -#define ADDR (*(struct __dummy *) addr) -#define CONST_ADDR (*(const struct __dummy *) addr) - -/* * set_bit - Atomically set a bit in memory * @nr: the bit to set * @addr: the address to start counting from diff --git a/include/asm-cris/pgalloc.h b/include/asm-cris/pgalloc.h index deaddfe79bbc..8ddd66f81773 100644 --- a/include/asm-cris/pgalloc.h +++ b/include/asm-cris/pgalloc.h @@ -16,7 +16,7 @@ static inline pgd_t *pgd_alloc (struct mm_struct *mm) return (pgd_t *)get_zeroed_page(GFP_KERNEL); } -static inline void pgd_free (pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_page((unsigned long)pgd); } @@ -34,12 +34,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long add return pte; } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_page(pte); } diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h index bcb2df68496e..2e8966ca030d 100644 --- a/include/asm-frv/dma-mapping.h +++ b/include/asm-frv/dma-mapping.h @@ -17,16 +17,6 @@ void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); /* - * These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns, or alternatively stop on the first sg_dma_len(sg) which - * is 0. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->length) - -/* * Map a single buffer of the indicated size for DMA in streaming mode. * The 32-bit bus address to use is returned. * diff --git a/include/asm-frv/page.h b/include/asm-frv/page.h index 213d92fd652a..bd9bd2d9cc78 100644 --- a/include/asm-frv/page.h +++ b/include/asm-frv/page.h @@ -76,10 +76,6 @@ extern unsigned long max_pfn; #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_CONTIGUOUS_PAGE_ALLOC -#define WANT_PAGE_VIRTUAL 1 -#endif - #include <asm-generic/memory_model.h> #include <asm-generic/page.h> diff --git a/include/asm-frv/pgalloc.h b/include/asm-frv/pgalloc.h index ce982a6c610f..e89620ef08ca 100644 --- a/include/asm-frv/pgalloc.h +++ b/include/asm-frv/pgalloc.h @@ -31,18 +31,18 @@ do { \ */ extern pgd_t *pgd_alloc(struct mm_struct *); -extern void pgd_free(pgd_t *); +extern void pgd_free(struct mm_struct *mm, pgd_t *); extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); extern struct page *pte_alloc_one(struct mm_struct *, unsigned long); -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_page(pte); } @@ -55,7 +55,7 @@ static inline void pte_free(struct page *pte) * (In the PAE case we free the pmds as part of the pgd.) */ #define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *) 2); }) -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) #endif /* CONFIG_MMU */ diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 3c402afb9e74..6c0682ed5fc9 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -226,7 +226,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) * inside the pgd, so has no extra memory associated with it. */ #define pud_alloc_one(mm, address) NULL -#define pud_free(x) do { } while (0) +#define pud_free(mm, x) do { } while (0) #define __pud_free_tlb(tlb, x) do { } while (0) /* diff --git a/include/asm-frv/scatterlist.h b/include/asm-frv/scatterlist.h index 2e7143b5a7ad..4bca8a28546c 100644 --- a/include/asm-frv/scatterlist.h +++ b/include/asm-frv/scatterlist.h @@ -31,6 +31,16 @@ struct scatterlist { unsigned int length; }; +/* + * These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns, or alternatively stop on the first sg_dma_len(sg) which + * is 0. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->length) + #define ISA_DMA_THRESHOLD (0xffffffffUL) #endif /* !_ASM_SCATTERLIST_H */ diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 7b88d3931e34..9d40e879f99e 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -28,7 +28,7 @@ #undef pud_free_tlb #define pud_free_tlb(tlb, x) do { } while (0) -#define pud_free(x) do { } while (0) +#define pud_free(mm, x) do { } while (0) #define __pud_free_tlb(tlb, x) do { } while (0) #undef pud_addr_end diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 2d0aab1d8611..f29a502f4a6c 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -1,6 +1,102 @@ #ifndef _ASM_GENERIC_GPIO_H #define _ASM_GENERIC_GPIO_H +#ifdef CONFIG_HAVE_GPIO_LIB + +/* Platforms may implement their GPIO interface with library code, + * at a small performance cost for non-inlined operations and some + * extra memory (for code and for per-GPIO table entries). + * + * While the GPIO programming interface defines valid GPIO numbers + * to be in the range 0..MAX_INT, this library restricts them to the + * smaller range 0..ARCH_NR_GPIOS. + */ + +#ifndef ARCH_NR_GPIOS +#define ARCH_NR_GPIOS 256 +#endif + +struct seq_file; + +/** + * struct gpio_chip - abstract a GPIO controller + * @label: for diagnostics + * @direction_input: configures signal "offset" as input, or returns error + * @get: returns value for signal "offset"; for output signals this + * returns either the value actually sensed, or zero + * @direction_output: configures signal "offset" as output, or returns error + * @set: assigns output value for signal "offset" + * @dbg_show: optional routine to show contents in debugfs; default code + * will be used when this is omitted, but custom code can show extra + * state (such as pullup/pulldown configuration). + * @base: identifies the first GPIO number handled by this chip; or, if + * negative during registration, requests dynamic ID allocation. + * @ngpio: the number of GPIOs handled by this controller; the last GPIO + * handled is (base + ngpio - 1). + * @can_sleep: flag must be set iff get()/set() methods sleep, as they + * must while accessing GPIO expander chips over I2C or SPI + * + * A gpio_chip can help platforms abstract various sources of GPIOs so + * they can all be accessed through a common programing interface. + * Example sources would be SOC controllers, FPGAs, multifunction + * chips, dedicated GPIO expanders, and so on. + * + * Each chip controls a number of signals, identified in method calls + * by "offset" values in the range 0..(@ngpio - 1). When those signals + * are referenced through calls like gpio_get_value(gpio), the offset + * is calculated by subtracting @base from the gpio number. + */ +struct gpio_chip { + char *label; + + int (*direction_input)(struct gpio_chip *chip, + unsigned offset); + int (*get)(struct gpio_chip *chip, + unsigned offset); + int (*direction_output)(struct gpio_chip *chip, + unsigned offset, int value); + void (*set)(struct gpio_chip *chip, + unsigned offset, int value); + void (*dbg_show)(struct seq_file *s, + struct gpio_chip *chip); + int base; + u16 ngpio; + unsigned can_sleep:1; +}; + +extern const char *gpiochip_is_requested(struct gpio_chip *chip, + unsigned offset); + +/* add/remove chips */ +extern int gpiochip_add(struct gpio_chip *chip); +extern int __must_check gpiochip_remove(struct gpio_chip *chip); + + +/* Always use the library code for GPIO management calls, + * or when sleeping may be involved. + */ +extern int gpio_request(unsigned gpio, const char *label); +extern void gpio_free(unsigned gpio); + +extern int gpio_direction_input(unsigned gpio); +extern int gpio_direction_output(unsigned gpio, int value); + +extern int gpio_get_value_cansleep(unsigned gpio); +extern void gpio_set_value_cansleep(unsigned gpio, int value); + + +/* A platform's <asm/gpio.h> code may want to inline the I/O calls when + * the GPIO is constant and refers to some always-present controller, + * giving direct access to chip registers and tight bitbanging loops. + */ +extern int __gpio_get_value(unsigned gpio); +extern void __gpio_set_value(unsigned gpio, int value); + +extern int __gpio_cansleep(unsigned gpio); + + +#else + /* platforms that don't directly support access to GPIOs through I2C, SPI, * or other blocking infrastructure can use these wrappers. */ @@ -22,4 +118,6 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value) gpio_set_value(gpio, value); } +#endif + #endif /* _ASM_GENERIC_GPIO_H */ diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index 29ff5d84d8c3..087325ede76c 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -54,7 +54,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address) * inside the pud, so has no extra memory associated with it. */ #define pmd_alloc_one(mm, address) NULL -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define __pmd_free_tlb(tlb, x) do { } while (0) #undef pmd_addr_end diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 566464500558..87cf449a6df3 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -51,7 +51,7 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) * inside the pgd, so has no extra memory associated with it. */ #define pud_alloc_one(mm, address) NULL -#define pud_free(x) do { } while (0) +#define pud_free(mm, x) do { } while (0) #define __pud_free_tlb(tlb, x) do { } while (0) #undef pud_addr_end diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 67552cad5173..556d988123ac 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h @@ -27,7 +27,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return quicklist_alloc(0, GFP_KERNEL, NULL); } -static inline void pgd_free(pgd_t * pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { quicklist_free(0, NULL, pgd); } @@ -44,11 +44,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) return quicklist_alloc(0, GFP_KERNEL, NULL); } -static inline void pud_free(pud_t * pud) +static inline void pud_free(struct mm_struct *mm, pud_t *pud) { quicklist_free(0, NULL, pud); } -#define __pud_free_tlb(tlb, pud) pud_free(pud) +#define __pud_free_tlb(tlb, pud) pud_free((tlb)->mm, pud) #endif /* CONFIG_PGTABLE_4 */ static inline void @@ -62,12 +62,12 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) return quicklist_alloc(0, GFP_KERNEL, NULL); } -static inline void pmd_free(pmd_t * pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { quicklist_free(0, NULL, pmd); } -#define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) +#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd) static inline void pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte) @@ -94,12 +94,12 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, return quicklist_alloc(0, GFP_KERNEL, NULL); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { quicklist_free_page(0, NULL, pte); } -static inline void pte_free_kernel(pte_t * pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { quicklist_free(0, NULL, pte); } @@ -109,6 +109,6 @@ static inline void check_pgt_cache(void) quicklist_trim(0, NULL, 25, 16); } -#define __pte_free_tlb(tlb, pte) pte_free(pte) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte) #endif /* _ASM_IA64_PGALLOC_H */ diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index be3b0ae43270..666385b68820 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -31,7 +31,8 @@ * each (assuming 8KB page size), for a total of 8TB of user virtual * address space. */ -#define TASK_SIZE (current->thread.task_size) +#define TASK_SIZE_OF(tsk) ((tsk)->thread.task_size) +#define TASK_SIZE TASK_SIZE_OF(current) /* * This decides where the kernel will search for a free chunk of vm diff --git a/include/asm-m32r/irq.h b/include/asm-m32r/irq.h index 2f93f4743add..242028b4d86a 100644 --- a/include/asm-m32r/irq.h +++ b/include/asm-m32r/irq.h @@ -3,7 +3,7 @@ #define _ASM_M32R_IRQ_H -#if defined(CONFIG_PLAT_M32700UT_Alpha) || defined(CONFIG_PLAT_USRV) +#if defined(CONFIG_PLAT_USRV) /* * IRQ definitions for M32700UT * M32700 Chip: 64 interrupts diff --git a/include/asm-m32r/m32700ut/m32700ut_pld.h b/include/asm-m32r/m32700ut/m32700ut_pld.h index d39121279a1a..57623beb44cb 100644 --- a/include/asm-m32r/m32700ut/m32700ut_pld.h +++ b/include/asm-m32r/m32700ut/m32700ut_pld.h @@ -13,9 +13,7 @@ * this archive for more details. */ -#if defined(CONFIG_PLAT_M32700UT_Alpha) -#define PLD_PLAT_BASE 0x08c00000 -#elif defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_USRV) +#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_USRV) #define PLD_PLAT_BASE 0x04c00000 #else #error "no platform configuration" diff --git a/include/asm-m32r/pgalloc.h b/include/asm-m32r/pgalloc.h index 943ba63c1ebc..e5921adfad1b 100644 --- a/include/asm-m32r/pgalloc.h +++ b/include/asm-m32r/pgalloc.h @@ -24,7 +24,7 @@ static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; } -static __inline__ void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_page((unsigned long)pgd); } @@ -46,17 +46,17 @@ static __inline__ struct page *pte_alloc_one(struct mm_struct *mm, return pte; } -static __inline__ void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -static __inline__ void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_page(pte); } -#define __pte_free_tlb(tlb, pte) pte_free((pte)) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte)) /* * allocating and freeing a pmd is trivial: the 1-entry pmd is @@ -65,7 +65,7 @@ static __inline__ void pte_free(struct page *pte) */ #define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define __pmd_free_tlb(tlb, x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() diff --git a/include/asm-m68k/macintosh.h b/include/asm-m68k/macintosh.h index 27d11da2b479..28b0f49ee521 100644 --- a/include/asm-m68k/macintosh.h +++ b/include/asm-m68k/macintosh.h @@ -14,8 +14,6 @@ extern void mac_init_IRQ(void); extern int mac_irq_pending(unsigned int); extern void mac_identify(void); extern void mac_report_hardware(void); -extern void mac_debugging_penguin(int); -extern void mac_boom(int); /* * Floppy driver magic hook - probably shouldnt be here diff --git a/include/asm-m68k/motorola_pgalloc.h b/include/asm-m68k/motorola_pgalloc.h index 5158412cd54d..500ec9b8b189 100644 --- a/include/asm-m68k/motorola_pgalloc.h +++ b/include/asm-m68k/motorola_pgalloc.h @@ -22,7 +22,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad return pte; } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { cache_page(pte); free_page((unsigned long) pte); @@ -47,7 +47,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long add return page; } -static inline void pte_free(struct page *page) +static inline void pte_free(struct mm_struct *mm, struct page *page) { cache_page(kmap(page)); kunmap(page); @@ -67,7 +67,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) return get_pointer_table(); } -static inline int pmd_free(pmd_t *pmd) +static inline int pmd_free(struct mm_struct *mm, pmd_t *pmd) { return free_pointer_table(pmd); } @@ -78,9 +78,9 @@ static inline int __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - pmd_free((pmd_t *)pgd); + pmd_free(mm, (pmd_t *)pgd); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/include/asm-m68k/sun3_pgalloc.h b/include/asm-m68k/sun3_pgalloc.h index fd8241117649..a5a91e72714b 100644 --- a/include/asm-m68k/sun3_pgalloc.h +++ b/include/asm-m68k/sun3_pgalloc.h @@ -21,12 +21,12 @@ extern const char bad_pmd_string[]; #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) -static inline void pte_free_kernel(pte_t * pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long) pte); } -static inline void pte_free(struct page *page) +static inline void pte_free(struct mm_struct *mm, struct page *page) { __free_page(page); } @@ -72,10 +72,10 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p * allocating and freeing a pmd is trivial: the 1-entry pmd is * inside the pgd, so has no extra memory associated with it. */ -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define __pmd_free_tlb(tlb, x) do { } while (0) -static inline void pgd_free(pgd_t * pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_page((unsigned long) pgd); } diff --git a/include/asm-m68knommu/mcfne.h b/include/asm-m68knommu/mcfne.h index c920ccdb61fe..431f63aadd0e 100644 --- a/include/asm-m68knommu/mcfne.h +++ b/include/asm-m68knommu/mcfne.h @@ -60,17 +60,6 @@ #define NE2000_BYTE volatile unsigned char #endif -#if defined(CONFIG_CFV240) -#define NE2000_ADDR 0x40010000 -#define NE2000_ADDR1 0x40010001 -#define NE2000_ODDOFFSET 0x00000000 -#define NE2000_IRQ 1 -#define NE2000_IRQ_VECTOR 0x19 -#define NE2000_IRQ_PRIORITY 2 -#define NE2000_IRQ_LEVEL 1 -#define NE2000_BYTE volatile unsigned char -#endif - #if defined(CONFIG_M5307C3) #define NE2000_ADDR 0x40000300 #define NE2000_ODDOFFSET 0x00010000 @@ -173,13 +162,8 @@ void ne2000_outsw(unsigned int addr, void *vbuf, unsigned long len); * On most NE2000 implementations on ColdFire boards the chip is * mapped in kinda funny, due to its ISA heritage. */ -#ifdef CONFIG_CFV240 -#define NE2000_PTR(addr) (NE2000_ADDR + ((addr & 0x3f) << 1) + 1) -#define NE2000_DATA_PTR(addr) (NE2000_ADDR + ((addr & 0x3f) << 1)) -#else #define NE2000_PTR(addr) ((addr&0x1)?(NE2000_ODDOFFSET+addr-1):(addr)) #define NE2000_DATA_PTR(addr) (addr) -#endif void ne2000_outb(unsigned int val, unsigned int addr) @@ -285,17 +269,6 @@ void ne2000_irqsetup(int irq) } #endif -#if defined(CONFIG_CFV240) -void ne2000_irqsetup(int irq) -{ - volatile unsigned char *icrp; - - icrp = (volatile unsigned char *) (MCF_MBAR + MCFSIM_ICR1); - *icrp = MCFSIM_ICR_LEVEL1 | MCFSIM_ICR_PRI2 | MCFSIM_ICR_AUTOVEC; - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_EINT1); -} -#endif - #if defined(CONFIG_M5206e) && defined(CONFIG_NETtel) void ne2000_irqsetup(int irq) { diff --git a/include/asm-m68knommu/mcfsim.h b/include/asm-m68knommu/mcfsim.h index 1074ae717f74..da3f2ceff3a4 100644 --- a/include/asm-m68knommu/mcfsim.h +++ b/include/asm-m68knommu/mcfsim.h @@ -17,9 +17,7 @@ * Include 5204, 5206/e, 5235, 5249, 5270/5271, 5272, 5280/5282, * 5307 or 5407 specific addresses. */ -#if defined(CONFIG_M5204) -#include <asm/m5204sim.h> -#elif defined(CONFIG_M5206) || defined(CONFIG_M5206e) +#if defined(CONFIG_M5206) || defined(CONFIG_M5206e) #include <asm/m5206sim.h> #elif defined(CONFIG_M520x) #include <asm/m520xsim.h> diff --git a/include/asm-m68knommu/mcftimer.h b/include/asm-m68knommu/mcftimer.h index 6f4d796e03db..0f90f6d2227a 100644 --- a/include/asm-m68knommu/mcftimer.h +++ b/include/asm-m68knommu/mcftimer.h @@ -16,7 +16,7 @@ /* * Get address specific defines for this ColdFire member. */ -#if defined(CONFIG_M5204) || defined(CONFIG_M5206) || defined(CONFIG_M5206e) +#if defined(CONFIG_M5206) || defined(CONFIG_M5206e) #define MCFTIMER_BASE1 0x100 /* Base address of TIMER1 */ #define MCFTIMER_BASE2 0x120 /* Base address of TIMER2 */ #elif defined(CONFIG_M5272) diff --git a/include/asm-m68knommu/mcfuart.h b/include/asm-m68knommu/mcfuart.h index 8a7a67703ac3..ef2293873612 100644 --- a/include/asm-m68knommu/mcfuart.h +++ b/include/asm-m68knommu/mcfuart.h @@ -19,7 +19,7 @@ #if defined(CONFIG_M5272) #define MCFUART_BASE1 0x100 /* Base address of UART1 */ #define MCFUART_BASE2 0x140 /* Base address of UART2 */ -#elif defined(CONFIG_M5204) || defined(CONFIG_M5206) || defined(CONFIG_M5206e) +#elif defined(CONFIG_M5206) || defined(CONFIG_M5206e) #if defined(CONFIG_NETtel) #define MCFUART_BASE1 0x180 /* Base address of UART1 */ #define MCFUART_BASE2 0x140 /* Base address of UART2 */ diff --git a/include/asm-m68knommu/system.h b/include/asm-m68knommu/system.h index 15b4c7d45c94..ee2dc07bae0e 100644 --- a/include/asm-m68knommu/system.h +++ b/include/asm-m68knommu/system.h @@ -207,23 +207,6 @@ cmpxchg(volatile int *p, int old, int new) } -#ifdef CONFIG_M68332 -#define HARD_RESET_NOW() ({ \ - local_irq_disable(); \ - asm(" \ - movew #0x0000, 0xfffa6a; \ - reset; \ - /*movew #0x1557, 0xfffa44;*/ \ - /*movew #0x0155, 0xfffa46;*/ \ - moveal #0, %a0; \ - movec %a0, %vbr; \ - moveal 0, %sp; \ - moveal 4, %a0; \ - jmp (%a0); \ - "); \ -}) -#endif - #if defined( CONFIG_M68328 ) || defined( CONFIG_M68EZ328 ) || \ defined (CONFIG_M68360) || defined( CONFIG_M68VZ328 ) #define HARD_RESET_NOW() ({ \ diff --git a/include/asm-mips/pgalloc.h b/include/asm-mips/pgalloc.h index 81b72122207a..c4efeced8396 100644 --- a/include/asm-mips/pgalloc.h +++ b/include/asm-mips/pgalloc.h @@ -58,7 +58,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return ret; } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_pages((unsigned long)pgd, PGD_ORDER); } @@ -85,12 +85,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return pte; } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_pages((unsigned long)pte, PTE_ORDER); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_pages(pte, PTE_ORDER); } @@ -103,7 +103,7 @@ static inline void pte_free(struct page *pte) * allocating and freeing a pmd is trivial: the 1-entry pmd is * inside the pgd, so has no extra memory associated with it. */ -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define __pmd_free_tlb(tlb, x) do { } while (0) #endif @@ -120,12 +120,12 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) return pmd; } -static inline void pmd_free(pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { free_pages((unsigned long)pmd, PMD_ORDER); } -#define __pmd_free_tlb(tlb, x) pmd_free(x) +#define __pmd_free_tlb(tlb, x) pmd_free((tlb)->mm, x) #endif diff --git a/include/asm-mips/processor.h b/include/asm-mips/processor.h index 83bc94534084..36f42de59409 100644 --- a/include/asm-mips/processor.h +++ b/include/asm-mips/processor.h @@ -65,6 +65,8 @@ extern unsigned int vced_count, vcei_count; #define TASK_UNMAPPED_BASE \ (test_thread_flag(TIF_32BIT_ADDR) ? \ PAGE_ALIGN(TASK_SIZE32 / 3) : PAGE_ALIGN(TASK_SIZE / 3)) +#define TASK_SIZE_OF(tsk) \ + (test_tsk_thread_flag(tsk, TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE) #endif #define NUM_FPU_REGS 32 diff --git a/include/asm-parisc/pgalloc.h b/include/asm-parisc/pgalloc.h index 1af1a41e0723..aab66f1bea14 100644 --- a/include/asm-parisc/pgalloc.h +++ b/include/asm-parisc/pgalloc.h @@ -43,7 +43,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return actual_pgd; } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { #ifdef CONFIG_64BIT pgd -= PTRS_PER_PGD; @@ -70,7 +70,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) return pmd; } -static inline void pmd_free(pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { #ifdef CONFIG_64BIT if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) @@ -91,7 +91,7 @@ static inline void pmd_free(pmd_t *pmd) */ #define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() #endif @@ -130,12 +130,12 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) return pte; } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -#define pte_free(page) pte_free_kernel(page_address(page)) +#define pte_free(mm, page) pte_free_kernel(page_address(page)) #define check_pgt_cache() do { } while (0) diff --git a/include/asm-parisc/processor.h b/include/asm-parisc/processor.h index 6b294fb07a23..3bb06e898fde 100644 --- a/include/asm-parisc/processor.h +++ b/include/asm-parisc/processor.h @@ -32,7 +32,8 @@ #endif #define current_text_addr() ({ void *pc; current_ia(pc); pc; }) -#define TASK_SIZE (current->thread.task_size) +#define TASK_SIZE_OF(tsk) ((tsk)->thread.task_size) +#define TASK_SIZE TASK_SIZE_OF(current) #define TASK_UNMAPPED_BASE (current->thread.map_base) #define DEFAULT_TASK_SIZE32 (0xFFF00000UL) diff --git a/include/asm-parisc/tlb.h b/include/asm-parisc/tlb.h index 33107a248e1f..383b1db310ee 100644 --- a/include/asm-parisc/tlb.h +++ b/include/asm-parisc/tlb.h @@ -21,7 +21,7 @@ do { if (!(tlb)->fullmm) \ #include <asm-generic/tlb.h> -#define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) -#define __pte_free_tlb(tlb, pte) pte_free(pte) +#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte) #endif diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h index 7a3cef785abd..852e15f51a1e 100644 --- a/include/asm-powerpc/iommu.h +++ b/include/asm-powerpc/iommu.h @@ -79,19 +79,19 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); -extern int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist, +extern int iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, unsigned long mask, enum dma_data_direction direction); extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems, enum dma_data_direction direction); -extern void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, - dma_addr_t *dma_handle, unsigned long mask, - gfp_t flag, int node); +extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, + size_t size, dma_addr_t *dma_handle, + unsigned long mask, gfp_t flag, int node); extern void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle); -extern dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr, - size_t size, unsigned long mask, +extern dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl, + void *vaddr, size_t size, unsigned long mask, enum dma_data_direction direction); extern void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction); diff --git a/include/asm-powerpc/nvram.h b/include/asm-powerpc/nvram.h index 4e7059cc6113..efde5ac82f7b 100644 --- a/include/asm-powerpc/nvram.h +++ b/include/asm-powerpc/nvram.h @@ -58,6 +58,9 @@ struct nvram_header { }; #ifdef __KERNEL__ + +#include <linux/list.h> + struct nvram_partition { struct list_head partition; struct nvram_header header; diff --git a/include/asm-powerpc/pgalloc-32.h b/include/asm-powerpc/pgalloc-32.h index e1307432163c..c162a4c37b39 100644 --- a/include/asm-powerpc/pgalloc-32.h +++ b/include/asm-powerpc/pgalloc-32.h @@ -6,14 +6,14 @@ extern void __bad_pte(pmd_t *pmd); extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(pgd_t *pgd); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); /* * We don't have any real pmd's, and this code never triggers because * the pgd will always be present.. */ /* #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) */ -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) /* #define pgd_populate(mm, pmd, pte) BUG() */ @@ -31,10 +31,10 @@ extern void pgd_free(pgd_t *pgd); extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr); -extern void pte_free_kernel(pte_t *pte); -extern void pte_free(struct page *pte); +extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte); +extern void pte_free(struct mm_struct *mm, struct page *pte); -#define __pte_free_tlb(tlb, pte) pte_free((pte)) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte)) #define check_pgt_cache() do { } while (0) diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h index 43214c8085b7..5afae8593931 100644 --- a/include/asm-powerpc/pgalloc-64.h +++ b/include/asm-powerpc/pgalloc-64.h @@ -29,7 +29,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { subpage_prot_free(pgd); kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); @@ -45,7 +45,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) GFP_KERNEL|__GFP_REPEAT); } -static inline void pud_free(pud_t *pud) +static inline void pud_free(struct mm_struct *mm, pud_t *pud) { kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); } @@ -81,7 +81,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) GFP_KERNEL|__GFP_REPEAT); } -static inline void pmd_free(pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); } @@ -99,12 +99,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return pte ? virt_to_page(pte) : NULL; } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -static inline void pte_free(struct page *ptepage) +static inline void pte_free(struct mm_struct *mm, struct page *ptepage) { __free_page(ptepage); } diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h index dba7c948189d..1f4765d6546f 100644 --- a/include/asm-powerpc/processor.h +++ b/include/asm-powerpc/processor.h @@ -99,8 +99,9 @@ extern struct task_struct *last_task_used_spe; */ #define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE)) -#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ TASK_SIZE_USER32 : TASK_SIZE_USER64) +#define TASK_SIZE TASK_SIZE_OF(current) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h index 0c8b0d679139..e996521fb3a6 100644 --- a/include/asm-powerpc/systbl.h +++ b/include/asm-powerpc/systbl.h @@ -309,7 +309,7 @@ SYSCALL_SPU(getcpu) COMPAT_SYS(epoll_pwait) COMPAT_SYS_SPU(utimensat) COMPAT_SYS_SPU(signalfd) -COMPAT_SYS_SPU(timerfd) +SYSCALL(ni_syscall) SYSCALL_SPU(eventfd) COMPAT_SYS_SPU(sync_file_range2) COMPAT_SYS(fallocate) diff --git a/include/asm-ppc/pgalloc.h b/include/asm-ppc/pgalloc.h index 44d88a98e87c..7c39a95829c7 100644 --- a/include/asm-ppc/pgalloc.h +++ b/include/asm-ppc/pgalloc.h @@ -7,14 +7,14 @@ extern void __bad_pte(pmd_t *pmd); extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(pgd_t *pgd); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); /* * We don't have any real pmd's, and this code never triggers because * the pgd will always be present.. */ #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() @@ -32,10 +32,10 @@ extern void pgd_free(pgd_t *pgd); extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr); -extern void pte_free_kernel(pte_t *pte); -extern void pte_free(struct page *pte); +extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte); +extern void pte_free(struct mm_struct *mm, struct page *pte); -#define __pte_free_tlb(tlb, pte) pte_free((pte)) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte)) #define check_pgt_cache() do { } while (0) diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 709dd1740956..6f6619ba8980 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -57,10 +57,10 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) } #define pud_alloc_one(mm,address) ({ BUG(); ((pud_t *)2); }) -#define pud_free(x) do { } while (0) +#define pud_free(mm, x) do { } while (0) #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define pgd_populate(mm, pgd, pud) BUG() #define pgd_populate_kernel(mm, pgd, pud) BUG() @@ -76,7 +76,7 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) } #define pud_alloc_one(mm,address) ({ BUG(); ((pud_t *)2); }) -#define pud_free(x) do { } while (0) +#define pud_free(mm, x) do { } while (0) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { @@ -85,7 +85,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) crst_table_init(crst, _SEGMENT_ENTRY_EMPTY); return (pmd_t *) crst; } -#define pmd_free(pmd) crst_table_free((unsigned long *) pmd) +#define pmd_free(mm, pmd) crst_table_free((unsigned long *)pmd) #define pgd_populate(mm, pgd, pud) BUG() #define pgd_populate_kernel(mm, pgd, pud) BUG() @@ -115,7 +115,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) crst_table_init(crst, pgd_entry_type(mm)); return (pgd_t *) crst; } -#define pgd_free(pgd) crst_table_free((unsigned long *) pgd) +#define pgd_free(mm, pgd) crst_table_free((unsigned long *) pgd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) @@ -151,9 +151,9 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page) #define pte_alloc_one(mm, vmaddr) \ virt_to_page(page_table_alloc(s390_noexec)) -#define pte_free_kernel(pte) \ +#define pte_free_kernel(mm, pte) \ page_table_free((unsigned long *) pte) -#define pte_free(pte) \ +#define pte_free(mm, pte) \ page_table_free((unsigned long *) page_to_phys((struct page *) pte)) #endif /* _S390_PGALLOC_H */ diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index c86b982aef5a..4f744609cd11 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h @@ -70,8 +70,9 @@ extern int get_cpu_capability(unsigned int *); #else /* __s390x__ */ -# define TASK_SIZE (test_thread_flag(TIF_31BIT) ? \ +# define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \ (0x80000000UL) : (0x40000000000UL)) +# define TASK_SIZE TASK_SIZE_OF(current) # define TASK_UNMAPPED_BASE (TASK_SIZE / 2) # define DEFAULT_TASK_SIZE (0x40000000000UL) diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 618693cfc10f..985de2b88279 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h @@ -65,9 +65,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb, if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pmds < TLB_NR_PTRS)) __tlb_flush_mm(tlb->mm); while (tlb->nr_ptes > 0) - pte_free(tlb->array[--tlb->nr_ptes]); + pte_free(tlb->mm, tlb->array[--tlb->nr_ptes]); while (tlb->nr_pmds < TLB_NR_PTRS) - pmd_free((pmd_t *) tlb->array[tlb->nr_pmds++]); + pmd_free(tlb->mm, (pmd_t *) tlb->array[tlb->nr_pmds++]); } static inline void tlb_finish_mmu(struct mmu_gather *tlb, @@ -102,7 +102,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page) if (tlb->nr_ptes >= tlb->nr_pmds) tlb_flush_mmu(tlb, 0, 0); } else - pte_free(page); + pte_free(tlb->mm, page); } /* @@ -117,7 +117,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) if (tlb->nr_ptes >= tlb->nr_pmds) tlb_flush_mmu(tlb, 0, 0); } else - pmd_free(pmd); + pmd_free(tlb->mm, pmd); #endif } diff --git a/include/asm-sh/pgalloc.h b/include/asm-sh/pgalloc.h index 18b613c57cf5..59ca16d77a1d 100644 --- a/include/asm-sh/pgalloc.h +++ b/include/asm-sh/pgalloc.h @@ -36,7 +36,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor); } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { quicklist_free(QUICK_PGD, NULL, pgd); } @@ -54,12 +54,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return pg ? virt_to_page(pg) : NULL; } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { quicklist_free(QUICK_PT, NULL, pte); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { quicklist_free_page(QUICK_PT, NULL, pte); } @@ -71,7 +71,7 @@ static inline void pte_free(struct page *pte) * inside the pgd, so has no extra memory associated with it. */ -#define pmd_free(x) do { } while (0) +#define pmd_free(mm, x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) static inline void check_pgt_cache(void) diff --git a/include/asm-sparc/pgalloc.h b/include/asm-sparc/pgalloc.h index a449cd4912d1..b5fbdd36447f 100644 --- a/include/asm-sparc/pgalloc.h +++ b/include/asm-sparc/pgalloc.h @@ -32,7 +32,7 @@ BTFIXUPDEF_CALL(pgd_t *, get_pgd_fast, void) BTFIXUPDEF_CALL(void, free_pgd_fast, pgd_t *) #define free_pgd_fast(pgd) BTFIXUP_CALL(free_pgd_fast)(pgd) -#define pgd_free(pgd) free_pgd_fast(pgd) +#define pgd_free(mm, pgd) free_pgd_fast(pgd) #define pgd_alloc(mm) get_pgd_fast() BTFIXUPDEF_CALL(void, pgd_set, pgd_t *, pmd_t *) @@ -45,8 +45,8 @@ BTFIXUPDEF_CALL(pmd_t *, pmd_alloc_one, struct mm_struct *, unsigned long) BTFIXUPDEF_CALL(void, free_pmd_fast, pmd_t *) #define free_pmd_fast(pmd) BTFIXUP_CALL(free_pmd_fast)(pmd) -#define pmd_free(pmd) free_pmd_fast(pmd) -#define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) +#define pmd_free(mm, pmd) free_pmd_fast(pmd) +#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd) BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *) #define pmd_populate(MM, PMD, PTE) BTFIXUP_CALL(pmd_populate)(PMD, PTE) @@ -59,10 +59,10 @@ BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_kernel, struct mm_struct *, unsigned long #define pte_alloc_one_kernel(mm, addr) BTFIXUP_CALL(pte_alloc_one_kernel)(mm, addr) BTFIXUPDEF_CALL(void, free_pte_fast, pte_t *) -#define pte_free_kernel(pte) BTFIXUP_CALL(free_pte_fast)(pte) +#define pte_free_kernel(mm, pte) BTFIXUP_CALL(free_pte_fast)(pte) BTFIXUPDEF_CALL(void, pte_free, struct page *) -#define pte_free(pte) BTFIXUP_CALL(pte_free)(pte) -#define __pte_free_tlb(tlb, pte) pte_free(pte) +#define pte_free(mm, pte) BTFIXUP_CALL(pte_free)(pte) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte) #endif /* _SPARC_PGALLOC_H */ diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index 5d66b858a965..b48f73c2274e 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -20,7 +20,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return quicklist_alloc(0, GFP_KERNEL, NULL); } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { quicklist_free(0, NULL, pgd); } @@ -32,7 +32,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) return quicklist_alloc(0, GFP_KERNEL, NULL); } -static inline void pmd_free(pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { quicklist_free(0, NULL, pmd); } @@ -50,12 +50,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return pg ? virt_to_page(pg) : NULL; } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { quicklist_free(0, NULL, pte); } -static inline void pte_free(struct page *ptepage) +static inline void pte_free(struct mm_struct *mm, struct page *ptepage) { quicklist_free_page(0, NULL, ptepage); } diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h index 349d1d3e9c27..ec81cdedef2c 100644 --- a/include/asm-sparc64/tlb.h +++ b/include/asm-sparc64/tlb.h @@ -100,8 +100,8 @@ static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page) } #define tlb_remove_tlb_entry(mp,ptep,addr) do { } while (0) -#define pte_free_tlb(mp,ptepage) pte_free(ptepage) -#define pmd_free_tlb(mp,pmdp) pmd_free(pmdp) +#define pte_free_tlb(mp, ptepage) pte_free((mp)->mm, ptepage) +#define pmd_free_tlb(mp, pmdp) pmd_free((mp)->mm, pmdp) #define pud_free_tlb(tlb,pudp) __pud_free_tlb(tlb,pudp) #define tlb_migrate_finish(mm) do { } while (0) diff --git a/include/asm-um/a.out.h b/include/asm-um/a.out.h index 9281dd8eb334..f42ff14577fa 100644 --- a/include/asm-um/a.out.h +++ b/include/asm-um/a.out.h @@ -13,11 +13,9 @@ extern unsigned long stacksizelim; -extern unsigned long host_task_size; - #define STACK_ROOM (stacksizelim) -#define STACK_TOP task_size +#define STACK_TOP (TASK_SIZE - 2 * PAGE_SIZE) #define STACK_TOP_MAX STACK_TOP diff --git a/include/asm-um/current.h b/include/asm-um/current.h index 8fd72f69ce65..c2191d9aa03d 100644 --- a/include/asm-um/current.h +++ b/include/asm-um/current.h @@ -1,32 +1,13 @@ -/* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #ifndef __UM_CURRENT_H #define __UM_CURRENT_H -#ifndef __ASSEMBLY__ - -#include "asm/page.h" #include "linux/thread_info.h" #define current (current_thread_info()->task) -/*Backward compatibility - it's used inside arch/um.*/ -#define current_thread current_thread_info() - -#endif /* __ASSEMBLY__ */ - #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/include/asm-um/elf-i386.h b/include/asm-um/elf-i386.h index ca94a136dfe8..23d6893e8617 100644 --- a/include/asm-um/elf-i386.h +++ b/include/asm-um/elf-i386.h @@ -1,11 +1,11 @@ /* - * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #ifndef __UM_ELF_I386_H #define __UM_ELF_I386_H -#include <linux/sched.h> +#include <asm/user.h> #include "skas.h" #define R_386_NONE 0 @@ -46,7 +46,7 @@ typedef struct user_i387_struct elf_fpregset_t; PT_REGS_EDI(regs) = 0; \ PT_REGS_EBP(regs) = 0; \ PT_REGS_EAX(regs) = 0; \ -} while(0) +} while (0) #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 @@ -74,14 +74,9 @@ typedef struct user_i387_struct elf_fpregset_t; pr_reg[14] = PT_REGS_EFLAGS(regs); \ pr_reg[15] = PT_REGS_SP(regs); \ pr_reg[16] = PT_REGS_SS(regs); \ -} while(0); +} while (0); -static inline int elf_core_copy_fpregs(struct task_struct *t, - elf_fpregset_t *fpu) -{ - int cpu = ((struct thread_info *) t->stack)->cpu; - return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu); -} +extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu); #define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu) @@ -91,7 +86,7 @@ extern long elf_aux_hwcap; extern char * elf_aux_platform; #define ELF_PLATFORM (elf_aux_platform) -#define SET_PERSONALITY(ex, ibcs2) do ; while(0) +#define SET_PERSONALITY(ex, ibcs2) do { } while (0) extern unsigned long vsyscall_ehdr; extern unsigned long vsyscall_end; @@ -166,14 +161,3 @@ if ( vsyscall_ehdr ) { \ } #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/include/asm-um/elf-x86_64.h b/include/asm-um/elf-x86_64.h index 3c9d543eb61e..3b2d5224a7e1 100644 --- a/include/asm-um/elf-x86_64.h +++ b/include/asm-um/elf-x86_64.h @@ -7,7 +7,6 @@ #ifndef __UM_ELF_X86_64_H #define __UM_ELF_X86_64_H -#include <linux/sched.h> #include <asm/user.h> #include "skas.h" @@ -96,12 +95,7 @@ typedef struct user_i387_struct elf_fpregset_t; (pr_reg)[25] = 0; \ (pr_reg)[26] = 0; -static inline int elf_core_copy_fpregs(struct task_struct *t, - elf_fpregset_t *fpu) -{ - int cpu = current_thread->cpu; - return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu); -} +extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu); #define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu) diff --git a/include/asm-um/fixmap.h b/include/asm-um/fixmap.h index d352a35cfafb..89a87c18b927 100644 --- a/include/asm-um/fixmap.h +++ b/include/asm-um/fixmap.h @@ -1,9 +1,10 @@ #ifndef __UM_FIXMAP_H #define __UM_FIXMAP_H +#include <asm/system.h> #include <asm/kmap_types.h> #include <asm/archparam.h> -#include <asm/elf.h> +#include <asm/page.h> /* * Here we define all the compile-time 'special' virtual @@ -55,9 +56,8 @@ extern void __set_fixmap (enum fixed_addresses idx, * the start of the fixmap, and leave one page empty * at the top of mem.. */ -extern unsigned long get_kmem_end(void); -#define FIXADDR_TOP (get_kmem_end() - 0x2000) +#define FIXADDR_TOP (CONFIG_TOP_ADDR - 2 * PAGE_SIZE) #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) diff --git a/include/asm-um/ldt.h b/include/asm-um/ldt.h index b2553f3e87eb..52af512f5e7d 100644 --- a/include/asm-um/ldt.h +++ b/include/asm-um/ldt.h @@ -8,7 +8,7 @@ #ifndef __ASM_LDT_H #define __ASM_LDT_H -#include "asm/semaphore.h" +#include <linux/mutex.h> #include "asm/host_ldt.h" extern void ldt_host_info(void); @@ -27,7 +27,7 @@ struct ldt_entry { typedef struct uml_ldt { int entry_count; - struct semaphore semaphore; + struct mutex lock; union { struct ldt_entry * pages[LDT_PAGES_MAX]; struct ldt_entry entries[LDT_DIRECT_ENTRIES]; diff --git a/include/asm-um/linkage.h b/include/asm-um/linkage.h index cdb3024a699a..7dfce37adc8b 100644 --- a/include/asm-um/linkage.h +++ b/include/asm-um/linkage.h @@ -3,10 +3,4 @@ #include "asm/arch/linkage.h" - -/* <linux/linkage.h> will pick sane defaults */ -#ifdef CONFIG_GPROF -#undef fastcall -#endif - #endif diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h index 5f3b863aef9a..6686fc524ca1 100644 --- a/include/asm-um/mmu_context.h +++ b/include/asm-um/mmu_context.h @@ -6,11 +6,12 @@ #ifndef __UM_MMU_CONTEXT_H #define __UM_MMU_CONTEXT_H -#include <asm-generic/mm_hooks.h> - #include "linux/sched.h" #include "um_mmu.h" +extern void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); +extern void arch_exit_mmap(struct mm_struct *mm); + #define get_mmu_context(task) do ; while(0) #define activate_context(tsk) do ; while(0) @@ -30,6 +31,8 @@ static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) */ if (old != new && (current->flags & PF_BORROWED_MM)) __switch_mm(&new->context.id); + + arch_dup_mmap(old, new); } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, diff --git a/include/asm-um/page.h b/include/asm-um/page.h index 4b424c75fca5..fe2374d705d1 100644 --- a/include/asm-um/page.h +++ b/include/asm-um/page.h @@ -30,7 +30,7 @@ struct page; #if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT) typedef struct { unsigned long pte_low, pte_high; } pte_t; -typedef struct { unsigned long long pmd; } pmd_t; +typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; #define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32)) @@ -106,8 +106,8 @@ extern unsigned long uml_physmem; #define __pa(virt) to_phys((void *) (unsigned long) (virt)) #define __va(phys) to_virt((unsigned long) (phys)) -#define phys_to_pfn(p) ((p) >> PAGE_SHIFT) -#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) +#define phys_to_pfn(p) ((pfn_t) ((p) >> PAGE_SHIFT)) +#define pfn_to_phys(pfn) ((phys_t) ((pfn) << PAGE_SHIFT)) #define pfn_valid(pfn) ((pfn) < max_mapnr) #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) diff --git a/include/asm-um/param.h b/include/asm-um/param.h index f914e7d67b01..4cd4a226f8c1 100644 --- a/include/asm-um/param.h +++ b/include/asm-um/param.h @@ -10,7 +10,7 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ #ifdef __KERNEL__ -#define HZ 100 +#define HZ CONFIG_HZ #define USER_HZ 100 /* .. some user interfaces are in "ticks" */ #define CLOCKS_PER_SEC (USER_HZ) /* frequency at which times() counts */ #endif diff --git a/include/asm-um/pgalloc.h b/include/asm-um/pgalloc.h index 14904876e8fb..4f3e62b02861 100644 --- a/include/asm-um/pgalloc.h +++ b/include/asm-um/pgalloc.h @@ -23,17 +23,17 @@ * Allocate and free page tables. */ extern pgd_t *pgd_alloc(struct mm_struct *); -extern void pgd_free(pgd_t *pgd); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); extern struct page *pte_alloc_one(struct mm_struct *, unsigned long); -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long) pte); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_page(pte); } @@ -42,7 +42,7 @@ static inline void pte_free(struct page *pte) #ifdef CONFIG_3_LEVEL_PGTABLES -static inline void pmd_free(pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { free_page((unsigned long)pmd); } diff --git a/include/asm-um/pgtable-2level.h b/include/asm-um/pgtable-2level.h index 172a75fde512..f534b73e753e 100644 --- a/include/asm-um/pgtable-2level.h +++ b/include/asm-um/pgtable-2level.h @@ -41,9 +41,6 @@ static inline void pgd_mkuptodate(pgd_t pgd) { } #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) -#define pmd_page_vaddr(pmd) \ - ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) - /* * Bits 0 through 4 are taken */ diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h index 3ebafbaacb24..0446f456b428 100644 --- a/include/asm-um/pgtable-3level.h +++ b/include/asm-um/pgtable-3level.h @@ -11,7 +11,11 @@ /* PGDIR_SHIFT determines what a third-level page table entry can map */ +#ifdef CONFIG_64BIT #define PGDIR_SHIFT 30 +#else +#define PGDIR_SHIFT 31 +#endif #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -28,9 +32,15 @@ */ #define PTRS_PER_PTE 512 +#ifdef CONFIG_64BIT #define PTRS_PER_PMD 512 -#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) #define PTRS_PER_PGD 512 +#else +#define PTRS_PER_PMD 1024 +#define PTRS_PER_PGD 1024 +#endif + +#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) #define FIRST_USER_ADDRESS 0 #define pte_ERROR(e) \ @@ -49,7 +59,12 @@ #define pud_populate(mm, pud, pmd) \ set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd))) +#ifdef CONFIG_64BIT #define set_pud(pudptr, pudval) set_64bit((phys_t *) (pudptr), pud_val(pudval)) +#else +#define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) +#endif + static inline int pgd_newpage(pgd_t pgd) { return(pgd_val(pgd) & _PAGE_NEWPAGE); @@ -57,17 +72,14 @@ static inline int pgd_newpage(pgd_t pgd) static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } +#ifdef CONFIG_64BIT #define set_pmd(pmdptr, pmdval) set_64bit((phys_t *) (pmdptr), pmd_val(pmdval)) +#else +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#endif -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) -{ - pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL); - - if(pmd) - memset(pmd, 0, PAGE_SIZE); - - return pmd; -} +struct mm_struct; +extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address); static inline void pud_clear (pud_t *pud) { @@ -75,8 +87,7 @@ static inline void pud_clear (pud_t *pud) } #define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) -#define pud_page_vaddr(pud) \ - ((struct page *) __va(pud_val(pud) & PAGE_MASK)) +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PAGE_MASK)) /* Find an entry in the second-level page table.. */ #define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \ diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h index 830fc6e5d49d..4102b443e925 100644 --- a/include/asm-um/pgtable.h +++ b/include/asm-um/pgtable.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright 2003 PathScale, Inc. * Derived from include/asm-i386/pgtable.h * Licensed under the GPL @@ -8,11 +8,7 @@ #ifndef __UM_PGTABLE_H #define __UM_PGTABLE_H -#include "linux/sched.h" -#include "linux/linkage.h" -#include "asm/processor.h" -#include "asm/page.h" -#include "asm/fixmap.h" +#include <asm/fixmap.h> #define _PAGE_PRESENT 0x001 #define _PAGE_NEWPAGE 0x002 @@ -34,22 +30,11 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt, - pte_t *pte_out); - /* zero page used for uninitialized stuff */ extern unsigned long *empty_zero_page; #define pgtable_cache_init() do ; while (0) -/* - * pgd entries used up by user/kernel: - */ - -#define USER_PGD_PTRS (TASK_SIZE >> PGDIR_SHIFT) -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) - -#ifndef __ASSEMBLY__ /* Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the * physical memory until the kernel virtual memory starts. That means that @@ -62,16 +47,12 @@ extern unsigned long end_iomem; #define VMALLOC_OFFSET (__va_space) #define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) - #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) #else # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) #endif -#define REGION_SHIFT (sizeof(pte_t) * 8 - 4) -#define REGION_MASK (((unsigned long) 0xf) << REGION_SHIFT) - #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) @@ -81,11 +62,12 @@ extern unsigned long end_iomem; #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) /* - * The i386 can't do page protection for execute, and considers that the same are read. - * Also, write permissions imply read permissions. This is the closest we can get.. + * The i386 can't do page protection for execute, and considers that the same + * are read. + * Also, write permissions imply read permissions. This is the closest we can + * get.. */ #define __P000 PAGE_NONE #define __P001 PAGE_READONLY @@ -106,40 +88,16 @@ extern unsigned long end_iomem; #define __S111 PAGE_SHARED /* - * Define this if things work differently on an i386 and an i486: - * it will (on an i486) warn about kernel memory accesses that are - * done without a 'access_ok(VERIFY_WRITE,..)' - */ -#undef TEST_VERIFY_AREA - -/* page table for 0-4MB for everybody */ -extern unsigned long pg0[1024]; - -/* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ - #define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) -/* number of bits that fit into a memory pointer */ -#define BITS_PER_PTR (8*sizeof(unsigned long)) - -/* to align the pointer to a pointer address */ -#define PTR_MASK (~(sizeof(void*)-1)) - -/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ -/* 64-bit machines, beware! SRB. */ -#define SIZEOF_PTR_LOG2 3 - -/* to find an entry in a page-table */ -#define PAGE_PTR(address) \ -((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) - #define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE)) #define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE)) #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) @@ -149,14 +107,9 @@ extern unsigned long pg0[1024]; #define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE) #define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE) -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pte_address(x) (__va(pte_val(x) & PAGE_MASK)) -#define mk_phys(a, r) ((a) + (((unsigned long) r) << REGION_SHIFT)) -#define phys_addr(p) ((p) & ~REGION_MASK) #define pte_present(x) pte_get_bits(x, (_PAGE_PRESENT | _PAGE_PROTNONE)) @@ -309,7 +262,8 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) #define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) #define __virt_to_page(virt) phys_to_page(__pa(virt)) -#define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) +#define page_to_phys(page) pfn_to_phys((pfn_t) page_to_pfn(page)) +#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) #define mk_pte(page, pgprot) \ ({ pte_t pte; \ @@ -325,8 +279,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return pte; } -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) - /* * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] * @@ -335,8 +287,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -#define pgd_index_k(addr) pgd_index(addr) - /* * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; @@ -355,8 +305,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * this macro returns the index of the entry in the pmd page which would * control the given virtual address */ +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) +#define pmd_page_vaddr(pmd) \ + ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + /* * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] * @@ -372,6 +326,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) +struct mm_struct; +extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); + #define update_mmu_cache(vma,address,pte) do ; while (0) /* Encode and de-code a swap entry */ @@ -388,29 +345,4 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #include <asm-generic/pgtable.h> -#include <asm-generic/pgtable-nopud.h> - -#ifdef CONFIG_HIGHMEM -/* Clear a kernel PTE and flush it from the TLB */ -#define kpte_clear_flush(ptep, vaddr) \ -do { \ - pte_clear(&init_mm, vaddr, ptep); \ - __flush_tlb_one(vaddr); \ -} while (0) #endif - -#endif -#endif - -#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h index 78c0599cc80c..b7d9a16a7451 100644 --- a/include/asm-um/processor-generic.h +++ b/include/asm-um/processor-generic.h @@ -11,6 +11,7 @@ struct pt_regs; struct task_struct; #include "asm/ptrace.h" +#include "asm/pgtable.h" #include "registers.h" #include "sysdep/archsetjmp.h" @@ -26,7 +27,6 @@ struct thread_struct { * as of 2.6.11). */ int forking; - int nsyscalls; struct pt_regs regs; int singlestep_syscall; void *fault_addr; @@ -58,7 +58,6 @@ struct thread_struct { #define INIT_THREAD \ { \ .forking = 0, \ - .nsyscalls = 0, \ .regs = EMPTY_REGS, \ .fault_addr = NULL, \ .prev_sched = NULL, \ @@ -68,10 +67,6 @@ struct thread_struct { .request = { 0 } \ } -typedef struct { - unsigned long seg; -} mm_segment_t; - extern struct task_struct *alloc_task_struct(void); static inline void release_thread(struct task_struct *task) @@ -97,9 +92,7 @@ static inline void mm_copy_segments(struct mm_struct *from_mm, /* * User space process size: 3GB (default). */ -extern unsigned long task_size; - -#define TASK_SIZE (task_size) +#define TASK_SIZE (CONFIG_TOP_ADDR & PGDIR_MASK) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. @@ -128,6 +121,6 @@ extern struct cpuinfo_um cpu_data[]; #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf) -#define get_wchan(p) (0) +extern unsigned long get_wchan(struct task_struct *p); #endif diff --git a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h index 595f1c3e1e40..a2b7fe13fe1e 100644 --- a/include/asm-um/processor-i386.h +++ b/include/asm-um/processor-i386.h @@ -10,7 +10,6 @@ #include "asm/host_ldt.h" #include "asm/segment.h" -extern int host_has_xmm; extern int host_has_cmov; /* include faultinfo structure */ diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h index 6e5fd5c892d0..356b83e2c22e 100644 --- a/include/asm-um/thread_info.h +++ b/include/asm-um/thread_info.h @@ -1,5 +1,5 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -8,8 +8,9 @@ #ifndef __ASSEMBLY__ -#include <asm/processor.h> #include <asm/types.h> +#include <asm/page.h> +#include <asm/uaccess.h> struct thread_info { struct task_struct *task; /* main task structure */ @@ -75,8 +76,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling - * TIF_NEED_RESCHED +#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling + * TIF_NEED_RESCHED */ #define TIF_RESTART_BLOCK 4 #define TIF_MEMDIE 5 diff --git a/include/asm-um/tlb.h b/include/asm-um/tlb.h index c640033bc1fd..39fc475df6c9 100644 --- a/include/asm-um/tlb.h +++ b/include/asm-um/tlb.h @@ -1,6 +1,126 @@ #ifndef __UM_TLB_H #define __UM_TLB_H -#include <asm/arch/tlb.h> +#include <linux/swap.h> +#include <asm/percpu.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) + +/* struct mmu_gather is an opaque type used by the mm code for passing around + * any data needed by arch specific code for tlb_remove_page. + */ +struct mmu_gather { + struct mm_struct *mm; + unsigned int need_flush; /* Really unmapped some ptes? */ + unsigned long start; + unsigned long end; + unsigned int fullmm; /* non-zero means full mm flush */ +}; + +/* Users of the generic TLB shootdown code must declare this storage space. */ +DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); + +static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, + unsigned long address) +{ + if (tlb->start > address) + tlb->start = address; + if (tlb->end < address + PAGE_SIZE) + tlb->end = address + PAGE_SIZE; +} + +static inline void init_tlb_gather(struct mmu_gather *tlb) +{ + tlb->need_flush = 0; + + tlb->start = TASK_SIZE; + tlb->end = 0; + + if (tlb->fullmm) { + tlb->start = 0; + tlb->end = TASK_SIZE; + } +} + +/* tlb_gather_mmu + * Return a pointer to an initialized struct mmu_gather. + */ +static inline struct mmu_gather * +tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) +{ + struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); + + tlb->mm = mm; + tlb->fullmm = full_mm_flush; + + init_tlb_gather(tlb); + + return tlb; +} + +extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end); + +static inline void +tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +{ + if (!tlb->need_flush) + return; + + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end); + init_tlb_gather(tlb); +} + +/* tlb_finish_mmu + * Called at the end of the shootdown operation to free up any resources + * that were required. + */ +static inline void +tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +{ + tlb_flush_mmu(tlb, start, end); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + put_cpu_var(mmu_gathers); +} + +/* tlb_remove_page + * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), + * while handling the additional races in SMP caused by other CPUs + * caching valid mappings in their TLBs. + */ +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + tlb->need_flush = 1; + free_page_and_swap_cache(page); + return; +} + +/** + * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. + * + * Record the fact that pte's were really umapped in ->need_flush, so we can + * later optimise away the tlb invalidate. This helps when userspace is + * unmapping already-unmapped pages, which happens quite a lot. + */ +#define tlb_remove_tlb_entry(tlb, ptep, address) \ + do { \ + tlb->need_flush = 1; \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ + } while (0) + +#define pte_free_tlb(tlb, ptep) __pte_free_tlb(tlb, ptep) + +#define pud_free_tlb(tlb, pudp) __pud_free_tlb(tlb, pudp) + +#define pmd_free_tlb(tlb, pmdp) __pmd_free_tlb(tlb, pmdp) + +#define tlb_migrate_finish(mm) do {} while (0) #endif diff --git a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h index 077032d4fc47..b9a895d6fa1d 100644 --- a/include/asm-um/uaccess.h +++ b/include/asm-um/uaccess.h @@ -6,7 +6,15 @@ #ifndef __UM_UACCESS_H #define __UM_UACCESS_H -#include "linux/sched.h" +#include <asm/errno.h> +#include <asm/processor.h> + +/* thread_info has a mm_segment_t in it, so put the definition up here */ +typedef struct { + unsigned long seg; +} mm_segment_t; + +#include "linux/thread_info.h" #define VERIFY_READ 0 #define VERIFY_WRITE 1 diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h index 48adbf56ca60..aaf15194d536 100644 --- a/include/asm-x86/bitops_64.h +++ b/include/asm-x86/bitops_64.h @@ -37,12 +37,6 @@ static inline long __scanbit(unsigned long val, unsigned long max) ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \ find_next_zero_bit(addr,size,off))) -/* - * Find string of zero bits in a bitmap. -1 when not found. - */ -extern unsigned long -find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len); - static inline void set_bit_string(unsigned long *bitmap, unsigned long i, int len) { @@ -53,16 +47,6 @@ static inline void set_bit_string(unsigned long *bitmap, unsigned long i, } } -static inline void __clear_bit_string(unsigned long *bitmap, unsigned long i, - int len) -{ - unsigned long end = i + len; - while (i < end) { - __clear_bit(i, bitmap); - i++; - } -} - /** * ffz - find first zero in word. * @word: The word to search diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h index c25cfcaab589..479767c9195f 100644 --- a/include/asm-x86/highmem.h +++ b/include/asm-x86/highmem.h @@ -38,11 +38,6 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif /* * Ordering is: * @@ -58,7 +53,6 @@ extern pte_t *pkmap_page_table; * VMALLOC_START * high_memory */ -#define PKMAP_BASE ( (FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK ) #define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h index 6c21ef951dab..bab12718a913 100644 --- a/include/asm-x86/pgalloc_32.h +++ b/include/asm-x86/pgalloc_32.h @@ -36,17 +36,17 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p * Allocate and free page tables. */ extern pgd_t *pgd_alloc(struct mm_struct *); -extern void pgd_free(pgd_t *pgd); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); extern struct page *pte_alloc_one(struct mm_struct *, unsigned long); -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_page(pte); } @@ -63,7 +63,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } -static inline void pmd_free(pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); free_page((unsigned long)pmd); diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h index 8bb564687860..315314ce4bfb 100644 --- a/include/asm-x86/pgalloc_64.h +++ b/include/asm-x86/pgalloc_64.h @@ -17,7 +17,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); } -static inline void pmd_free(pmd_t *pmd) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); free_page((unsigned long)pmd); @@ -33,7 +33,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } -static inline void pud_free (pud_t *pud) +static inline void pud_free(struct mm_struct *mm, pud_t *pud) { BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); free_page((unsigned long)pud); @@ -77,7 +77,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { BUG_ON((unsigned long)pgd & (PAGE_SIZE-1)); pgd_list_del(pgd); @@ -100,13 +100,13 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long add /* Should really implement gc for free page table pages. This could be done with a reference count in struct page. */ -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); free_page((unsigned long)pte); } -static inline void pte_free(struct page *pte) +static inline void pte_free(struct mm_struct *mm, struct page *pte) { __free_page(pte); } diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 935630d17304..80dd438642f6 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -66,6 +66,14 @@ void paging_init(void); #define VMALLOC_OFFSET (8*1024*1024) #define VMALLOC_START (((unsigned long) high_memory + \ 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK) + #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) #else diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h index 8d8f9b5adbb9..984123a68f7c 100644 --- a/include/asm-x86/unistd_32.h +++ b/include/asm-x86/unistd_32.h @@ -327,9 +327,11 @@ #define __NR_epoll_pwait 319 #define __NR_utimensat 320 #define __NR_signalfd 321 -#define __NR_timerfd 322 +#define __NR_timerfd_create 322 #define __NR_eventfd 323 #define __NR_fallocate 324 +#define __NR_timerfd_settime 325 +#define __NR_timerfd_gettime 326 #ifdef __KERNEL__ diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h index 5ff4d3e24c34..3883ceb54ef5 100644 --- a/include/asm-x86/unistd_64.h +++ b/include/asm-x86/unistd_64.h @@ -629,12 +629,17 @@ __SYSCALL(__NR_utimensat, sys_utimensat) __SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) #define __NR_signalfd 282 __SYSCALL(__NR_signalfd, sys_signalfd) -#define __NR_timerfd 283 -__SYSCALL(__NR_timerfd, sys_timerfd) +#define __NR_timerfd_create 283 +__SYSCALL(__NR_timerfd_create, sys_timerfd_create) #define __NR_eventfd 284 __SYSCALL(__NR_eventfd, sys_eventfd) #define __NR_fallocate 285 __SYSCALL(__NR_fallocate, sys_fallocate) +#define __NR_timerfd_settime 286 +__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) +#define __NR_timerfd_gettime 287 +__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) + #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-xtensa/pgalloc.h b/include/asm-xtensa/pgalloc.h index 3e5b56525102..1d51ba5463f9 100644 --- a/include/asm-xtensa/pgalloc.h +++ b/include/asm-xtensa/pgalloc.h @@ -31,7 +31,7 @@ pgd_alloc(struct mm_struct *mm) return (pgd_t*) __get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER); } -static inline void pgd_free(pgd_t *pgd) +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_page((unsigned long)pgd); } @@ -52,12 +52,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return virt_to_page(pte_alloc_one_kernel(mm, addr)); } -static inline void pte_free_kernel(pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { kmem_cache_free(pgtable_cache, pte); } -static inline void pte_free(struct page *page) +static inline void pte_free(struct mm_struct *mm, struct page *page) { kmem_cache_free(pgtable_cache, page_address(page)); } diff --git a/include/asm-xtensa/tlb.h b/include/asm-xtensa/tlb.h index 4830232017af..31c220faca02 100644 --- a/include/asm-xtensa/tlb.h +++ b/include/asm-xtensa/tlb.h @@ -42,6 +42,6 @@ #include <asm-generic/tlb.h> -#define __pte_free_tlb(tlb,pte) pte_free(pte) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte) #endif /* _XTENSA_TLB_H */ diff --git a/include/linux/capability.h b/include/linux/capability.h index bb017edffd56..7d50ff6d269f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -14,7 +14,6 @@ #define _LINUX_CAPABILITY_H #include <linux/types.h> -#include <linux/compiler.h> struct task_struct; @@ -23,13 +22,20 @@ struct task_struct; kernel might be somewhat backwards compatible, but don't bet on it. */ -/* XXX - Note, cap_t, is defined by POSIX to be an "opaque" pointer to +/* Note, cap_t, is defined by POSIX (draft) to be an "opaque" pointer to a set of three capability sets. The transposition of 3*the following structure to such a composite is better handled in a user library since the draft standard requires the use of malloc/free etc.. */ -#define _LINUX_CAPABILITY_VERSION 0x19980330 +#define _LINUX_CAPABILITY_VERSION_1 0x19980330 +#define _LINUX_CAPABILITY_U32S_1 1 + +#define _LINUX_CAPABILITY_VERSION_2 0x20071026 +#define _LINUX_CAPABILITY_U32S_2 2 + +#define _LINUX_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_2 +#define _LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_2 typedef struct __user_cap_header_struct { __u32 version; @@ -42,41 +48,42 @@ typedef struct __user_cap_data_struct { __u32 inheritable; } __user *cap_user_data_t; + #define XATTR_CAPS_SUFFIX "capability" #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX -#define XATTR_CAPS_SZ (3*sizeof(__le32)) #define VFS_CAP_REVISION_MASK 0xFF000000 +#define VFS_CAP_FLAGS_MASK ~VFS_CAP_REVISION_MASK +#define VFS_CAP_FLAGS_EFFECTIVE 0x000001 + #define VFS_CAP_REVISION_1 0x01000000 +#define VFS_CAP_U32_1 1 +#define XATTR_CAPS_SZ_1 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_1)) -#define VFS_CAP_REVISION VFS_CAP_REVISION_1 +#define VFS_CAP_REVISION_2 0x02000000 +#define VFS_CAP_U32_2 2 +#define XATTR_CAPS_SZ_2 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2)) + +#define XATTR_CAPS_SZ XATTR_CAPS_SZ_2 +#define VFS_CAP_U32 VFS_CAP_U32_2 +#define VFS_CAP_REVISION VFS_CAP_REVISION_2 -#define VFS_CAP_FLAGS_MASK ~VFS_CAP_REVISION_MASK -#define VFS_CAP_FLAGS_EFFECTIVE 0x000001 struct vfs_cap_data { - __u32 magic_etc; /* Little endian */ - __u32 permitted; /* Little endian */ - __u32 inheritable; /* Little endian */ + __le32 magic_etc; /* Little endian */ + struct { + __le32 permitted; /* Little endian */ + __le32 inheritable; /* Little endian */ + } data[VFS_CAP_U32]; }; #ifdef __KERNEL__ -/* #define STRICT_CAP_T_TYPECHECKS */ - -#ifdef STRICT_CAP_T_TYPECHECKS - typedef struct kernel_cap_struct { - __u32 cap; + __u32 cap[_LINUX_CAPABILITY_U32S]; } kernel_cap_t; -#else - -typedef __u32 kernel_cap_t; - -#endif - -#define _USER_CAP_HEADER_SIZE (2*sizeof(__u32)) +#define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) #endif @@ -119,10 +126,6 @@ typedef __u32 kernel_cap_t; #define CAP_FSETID 4 -/* Used to decide between falling back on the old suser() or fsuser(). */ - -#define CAP_FS_MASK 0x1f - /* Overrides the restriction that the real or effective user ID of a process sending a signal must match the real or effective user ID of the process receiving the signal. */ @@ -145,8 +148,14 @@ typedef __u32 kernel_cap_t; ** Linux-specific capabilities **/ -/* Transfer any capability in your permitted set to any pid, - remove any capability in your permitted set from any pid */ +/* Without VFS support for capabilities: + * Transfer any capability in your permitted set to any pid, + * remove any capability in your permitted set from any pid + * With VFS support for capabilities (neither of above, but) + * Add any capability from current's capability bounding set + * to the current process' inheritable set + * Allow taking bits out of capability bounding set + */ #define CAP_SETPCAP 8 @@ -195,7 +204,6 @@ typedef __u32 kernel_cap_t; #define CAP_IPC_OWNER 15 /* Insert and remove kernel modules - modify kernel without limit */ -/* Modify cap_bset */ #define CAP_SYS_MODULE 16 /* Allow ioperm/iopl access */ @@ -307,74 +315,183 @@ typedef __u32 kernel_cap_t; #define CAP_SETFCAP 31 +/* Override MAC access. + The base kernel enforces no MAC policy. + An LSM may enforce a MAC policy, and if it does and it chooses + to implement capability based overrides of that policy, this is + the capability it should use to do so. */ + +#define CAP_MAC_OVERRIDE 32 + +/* Allow MAC configuration or state changes. + The base kernel requires no MAC configuration. + An LSM may enforce a MAC policy, and if it does and it chooses + to implement capability based checks on modifications to that + policy or the data required to maintain it, this is the + capability it should use to do so. */ + +#define CAP_MAC_ADMIN 33 + +#define CAP_LAST_CAP CAP_MAC_ADMIN + +#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) + +/* + * Bit location of each capability (used by user-space library and kernel) + */ + +#define CAP_TO_INDEX(x) ((x) >> 5) /* 1 << 5 == bits in __u32 */ +#define CAP_TO_MASK(x) (1 << ((x) & 31)) /* mask for indexed __u32 */ + #ifdef __KERNEL__ /* * Internal kernel functions only */ -#ifdef STRICT_CAP_T_TYPECHECKS +#define CAP_FOR_EACH_U32(__capi) \ + for (__capi = 0; __capi < _LINUX_CAPABILITY_U32S; ++__capi) + +# define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \ + | CAP_TO_MASK(CAP_DAC_OVERRIDE) \ + | CAP_TO_MASK(CAP_DAC_READ_SEARCH) \ + | CAP_TO_MASK(CAP_FOWNER) \ + | CAP_TO_MASK(CAP_FSETID)) + +# define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE)) + +#if _LINUX_CAPABILITY_U32S != 2 +# error Fix up hand-coded capability macro initializers +#else /* HAND-CODED capability initializers */ + +# define CAP_EMPTY_SET {{ 0, 0 }} +# define CAP_FULL_SET {{ ~0, ~0 }} +# define CAP_INIT_EFF_SET {{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }} +# define CAP_FS_SET {{ CAP_FS_MASK_B0, CAP_FS_MASK_B1 } } +# define CAP_NFSD_SET {{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), \ + CAP_FS_MASK_B1 } } + +#endif /* _LINUX_CAPABILITY_U32S != 2 */ + +#define CAP_INIT_INH_SET CAP_EMPTY_SET + +# define cap_clear(c) do { (c) = __cap_empty_set; } while (0) +# define cap_set_full(c) do { (c) = __cap_full_set; } while (0) +# define cap_set_init_eff(c) do { (c) = __cap_init_eff_set; } while (0) + +#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) +#define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) +#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag)) + +#define CAP_BOP_ALL(c, a, b, OP) \ +do { \ + unsigned __capi; \ + CAP_FOR_EACH_U32(__capi) { \ + c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \ + } \ +} while (0) + +#define CAP_UOP_ALL(c, a, OP) \ +do { \ + unsigned __capi; \ + CAP_FOR_EACH_U32(__capi) { \ + c.cap[__capi] = OP a.cap[__capi]; \ + } \ +} while (0) + +static inline kernel_cap_t cap_combine(const kernel_cap_t a, + const kernel_cap_t b) +{ + kernel_cap_t dest; + CAP_BOP_ALL(dest, a, b, |); + return dest; +} -#define to_cap_t(x) { x } -#define cap_t(x) (x).cap +static inline kernel_cap_t cap_intersect(const kernel_cap_t a, + const kernel_cap_t b) +{ + kernel_cap_t dest; + CAP_BOP_ALL(dest, a, b, &); + return dest; +} -#else +static inline kernel_cap_t cap_drop(const kernel_cap_t a, + const kernel_cap_t drop) +{ + kernel_cap_t dest; + CAP_BOP_ALL(dest, a, drop, &~); + return dest; +} -#define to_cap_t(x) (x) -#define cap_t(x) (x) +static inline kernel_cap_t cap_invert(const kernel_cap_t c) +{ + kernel_cap_t dest; + CAP_UOP_ALL(dest, c, ~); + return dest; +} -#endif +static inline int cap_isclear(const kernel_cap_t a) +{ + unsigned __capi; + CAP_FOR_EACH_U32(__capi) { + if (a.cap[__capi] != 0) + return 0; + } + return 1; +} -#define CAP_EMPTY_SET to_cap_t(0) -#define CAP_FULL_SET to_cap_t(~0) -#define CAP_INIT_EFF_SET to_cap_t(~0 & ~CAP_TO_MASK(CAP_SETPCAP)) -#define CAP_INIT_INH_SET to_cap_t(0) +static inline int cap_issubset(const kernel_cap_t a, const kernel_cap_t set) +{ + kernel_cap_t dest; + dest = cap_drop(a, set); + return cap_isclear(dest); +} -#define CAP_TO_MASK(x) (1 << (x)) -#define cap_raise(c, flag) (cap_t(c) |= CAP_TO_MASK(flag)) -#define cap_lower(c, flag) (cap_t(c) &= ~CAP_TO_MASK(flag)) -#define cap_raised(c, flag) (cap_t(c) & CAP_TO_MASK(flag)) +/* Used to decide between falling back on the old suser() or fsuser(). */ -static inline kernel_cap_t cap_combine(kernel_cap_t a, kernel_cap_t b) +static inline int cap_is_fs_cap(int cap) { - kernel_cap_t dest; - cap_t(dest) = cap_t(a) | cap_t(b); - return dest; + const kernel_cap_t __cap_fs_set = CAP_FS_SET; + return !!(CAP_TO_MASK(cap) & __cap_fs_set.cap[CAP_TO_INDEX(cap)]); } -static inline kernel_cap_t cap_intersect(kernel_cap_t a, kernel_cap_t b) +static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a) { - kernel_cap_t dest; - cap_t(dest) = cap_t(a) & cap_t(b); - return dest; + const kernel_cap_t __cap_fs_set = CAP_FS_SET; + return cap_drop(a, __cap_fs_set); } -static inline kernel_cap_t cap_drop(kernel_cap_t a, kernel_cap_t drop) +static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a, + const kernel_cap_t permitted) { - kernel_cap_t dest; - cap_t(dest) = cap_t(a) & ~cap_t(drop); - return dest; + const kernel_cap_t __cap_fs_set = CAP_FS_SET; + return cap_combine(a, + cap_intersect(permitted, __cap_fs_set)); } -static inline kernel_cap_t cap_invert(kernel_cap_t c) +static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a) { - kernel_cap_t dest; - cap_t(dest) = ~cap_t(c); - return dest; + const kernel_cap_t __cap_fs_set = CAP_NFSD_SET; + return cap_drop(a, __cap_fs_set); } -#define cap_isclear(c) (!cap_t(c)) -#define cap_issubset(a,set) (!(cap_t(a) & ~cap_t(set))) - -#define cap_clear(c) do { cap_t(c) = 0; } while(0) -#define cap_set_full(c) do { cap_t(c) = ~0; } while(0) -#define cap_mask(c,mask) do { cap_t(c) &= cap_t(mask); } while(0) +static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, + const kernel_cap_t permitted) +{ + const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET; + return cap_combine(a, + cap_intersect(permitted, __cap_nfsd_set)); +} -#define cap_is_fs_cap(c) (CAP_TO_MASK(c) & CAP_FS_MASK) +extern const kernel_cap_t __cap_empty_set; +extern const kernel_cap_t __cap_full_set; +extern const kernel_cap_t __cap_init_eff_set; int capable(int cap); int __capable(struct task_struct *t, int cap); +extern long cap_prctl_drop(unsigned long cap); + #endif /* __KERNEL__ */ #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/compat.h b/include/linux/compat.h index d38655f2be70..ae0a483bef9b 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -279,8 +279,11 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, asmlinkage long compat_sys_signalfd(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); -asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, - const struct compat_itimerspec __user *utmr); +asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, + const struct compat_itimerspec __user *utmr, + struct compat_itimerspec __user *otmr); +asmlinkage long compat_sys_timerfd_gettime(int ufd, + struct compat_itimerspec __user *otmr); #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/device.h b/include/linux/device.h index 479c0b31593c..2258d89bf523 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -410,6 +410,15 @@ extern int devres_release_group(struct device *dev, void *id); extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp); extern void devm_kfree(struct device *dev, void *p); +struct device_dma_parameters { + /* + * a low level driver may set these to teach IOMMU code about + * sg limitations. + */ + unsigned int max_segment_size; + unsigned long segment_boundary_mask; +}; + struct device { struct klist klist_children; struct klist_node knode_parent; /* node in sibling list */ @@ -445,6 +454,8 @@ struct device { 64 bit addresses for consistent allocations such descriptors. */ + struct device_dma_parameters *dma_parms; + struct list_head dma_pools; /* dma pools (if dma'ble) */ struct dma_coherent_mem *dma_mem; /* internal for coherent mem diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4470950892be..332030709623 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -60,6 +60,36 @@ static inline int is_device_dma_capable(struct device *dev) extern u64 dma_get_required_mask(struct device *dev); +static inline unsigned int dma_get_max_seg_size(struct device *dev) +{ + return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536; +} + +static inline unsigned int dma_set_max_seg_size(struct device *dev, + unsigned int size) +{ + if (dev->dma_parms) { + dev->dma_parms->max_segment_size = size; + return 0; + } else + return -EIO; +} + +static inline unsigned long dma_get_seg_boundary(struct device *dev) +{ + return dev->dma_parms ? + dev->dma_parms->segment_boundary_mask : 0xffffffff; +} + +static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) +{ + if (dev->dma_parms) { + dev->dma_parms->segment_boundary_mask = mask; + return 0; + } else + return -EIO; +} + /* flags for the coherent memory api */ #define DMA_MEMORY_MAP 0x01 #define DMA_MEMORY_IO 0x02 diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 7e93a9ae7064..0c6ce515185d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -228,5 +228,7 @@ extern void FASTCALL(free_cold_page(struct page *page)); void page_alloc_init(void); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); +void drain_all_pages(void); +void drain_local_pages(void *dummy); #endif /* __LINUX_GFP_H */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 1fcb0033179e..7dcbc82f3b7b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -68,8 +68,6 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr) void *addr = kmap_atomic(page, KM_USER0); clear_user_page(addr, vaddr, page); kunmap_atomic(addr, KM_USER0); - /* Make sure this page is cleared on other CPU's too before using it */ - smp_wmb(); } #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE @@ -124,28 +122,40 @@ static inline void clear_highpage(struct page *page) kunmap_atomic(kaddr, KM_USER0); } -/* - * Same but also flushes aliased cache contents to RAM. - * - * This must be a macro because KM_USER0 and friends aren't defined if - * !CONFIG_HIGHMEM - */ -#define zero_user_page(page, offset, size, km_type) \ - do { \ - void *kaddr; \ - \ - BUG_ON((offset) + (size) > PAGE_SIZE); \ - \ - kaddr = kmap_atomic(page, km_type); \ - memset((char *)kaddr + (offset), 0, (size)); \ - flush_dcache_page(page); \ - kunmap_atomic(kaddr, (km_type)); \ - } while (0) +static inline void zero_user_segments(struct page *page, + unsigned start1, unsigned end1, + unsigned start2, unsigned end2) +{ + void *kaddr = kmap_atomic(page, KM_USER0); + + BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE); + + if (end1 > start1) + memset(kaddr + start1, 0, end1 - start1); + + if (end2 > start2) + memset(kaddr + start2, 0, end2 - start2); + + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); +} + +static inline void zero_user_segment(struct page *page, + unsigned start, unsigned end) +{ + zero_user_segments(page, start, end, 0, 0); +} + +static inline void zero_user(struct page *page, + unsigned start, unsigned size) +{ + zero_user_segments(page, start, start + size, 0, 0); +} static inline void __deprecated memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) { - zero_user_page(page, offset, size, KM_USER0); + zero_user(page, offset, size); } #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE @@ -160,8 +170,6 @@ static inline void copy_user_highpage(struct page *to, struct page *from, copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); - /* Make sure this page is cleared on other CPU's too before using it */ - smp_wmb(); } #endif diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f79dcba4b2c1..8371b664b41f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -301,9 +301,16 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) } /* Forward a hrtimer so it expires after now: */ -extern unsigned long +extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); +/* Forward a hrtimer so it expires after the hrtimer's current now */ +static inline u64 hrtimer_forward_now(struct hrtimer *timer, + ktime_t interval) +{ + return hrtimer_forward(timer, timer->base->get_time(), interval); +} + /* Precise sleep: */ extern long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp, @@ -322,9 +329,9 @@ extern void hrtimer_run_pending(void); extern void __init hrtimers_init(void); #if BITS_PER_LONG < 64 -extern unsigned long ktime_divns(const ktime_t kt, s64 div); +extern u64 ktime_divns(const ktime_t kt, s64 div); #else /* BITS_PER_LONG < 64 */ -# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) +# define ktime_divns(kt, div) (u64)((kt).tv64 / (div)) #endif /* Show pending timers: */ diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 85d11916e9ea..42131820bb89 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -44,7 +44,15 @@ struct hwrng { /** Register a new Hardware Random Number Generator driver. */ extern int hwrng_register(struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ -extern void hwrng_unregister(struct hwrng *rng); +extern void __hwrng_unregister(struct hwrng *rng, bool suspended); +static inline void hwrng_unregister(struct hwrng *rng) +{ + __hwrng_unregister(rng, false); +} +static inline void hwrng_unregister_suspended(struct hwrng *rng) +{ + __hwrng_unregister(rng, true); +} #endif /* __KERNEL__ */ #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/i2c/pca9539.h b/include/linux/i2c/pca9539.h new file mode 100644 index 000000000000..611d84ab7a30 --- /dev/null +++ b/include/linux/i2c/pca9539.h @@ -0,0 +1,18 @@ +/* platform data for the PCA9539 16-bit I/O expander driver */ + +struct pca9539_platform_data { + /* number of the first GPIO */ + unsigned gpio_base; + + /* initial polarity inversion setting */ + uint16_t invert; + + void *context; /* param to setup/teardown */ + + int (*setup)(struct i2c_client *client, + unsigned gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + unsigned gpio, unsigned ngpio, + void *context); +}; diff --git a/include/linux/i2c/pcf857x.h b/include/linux/i2c/pcf857x.h new file mode 100644 index 000000000000..ba8ea6e16476 --- /dev/null +++ b/include/linux/i2c/pcf857x.h @@ -0,0 +1,45 @@ +#ifndef __LINUX_PCF857X_H +#define __LINUX_PCF857X_H + +/** + * struct pcf857x_platform_data - data to set up pcf857x driver + * @gpio_base: number of the chip's first GPIO + * @n_latch: optional bit-inverse of initial register value; if + * you leave this initialized to zero the driver will act + * like the chip was just reset + * @setup: optional callback issued once the GPIOs are valid + * @teardown: optional callback issued before the GPIOs are invalidated + * @context: optional parameter passed to setup() and teardown() + * + * In addition to the I2C_BOARD_INFO() state appropriate to each chip, + * the i2c_board_info used with the pcf875x driver must provide the + * chip "type" ("pcf8574", "pcf8574a", "pcf8575", "pcf8575c") and its + * platform_data (pointer to one of these structures) with at least + * the gpio_base value initialized. + * + * The @setup callback may be used with the kind of board-specific glue + * which hands the (now-valid) GPIOs to other drivers, or which puts + * devices in their initial states using these GPIOs. + * + * These GPIO chips are only "quasi-bidirectional"; read the chip specs + * to understand the behavior. They don't have separate registers to + * record which pins are used for input or output, record which output + * values are driven, or provide access to input values. That must be + * inferred by reading the chip's value and knowing the last value written + * to it. If you leave n_latch initialized to zero, that last written + * value is presumed to be all ones (as if the chip were just reset). + */ +struct pcf857x_platform_data { + unsigned gpio_base; + unsigned n_latch; + + int (*setup)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + void *context; +}; + +#endif /* __LINUX_PCF857X_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f42663eaf655..1f74e1d7415f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -121,6 +121,18 @@ extern struct group_info init_groups; #else #define INIT_IDS #endif + +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +/* + * Because of the reduced scope of CAP_SETPCAP when filesystem + * capabilities are in effect, it is safe to allow CAP_SETPCAP to + * be available in the default configuration. + */ +# define CAP_INIT_BSET CAP_FULL_SET +#else +# define CAP_INIT_BSET CAP_INIT_EFF_SET +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -156,6 +168,7 @@ extern struct group_info init_groups; .cap_effective = CAP_INIT_EFF_SET, \ .cap_inheritable = CAP_INIT_INH_SET, \ .cap_permitted = CAP_FULL_SET, \ + .cap_bset = CAP_INIT_BSET, \ .keep_capabilities = 0, \ .user = INIT_USER, \ .comm = "swapper", \ diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h new file mode 100644 index 000000000000..4dd4c04ff2f4 --- /dev/null +++ b/include/linux/iommu-helper.h @@ -0,0 +1,7 @@ +extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, + unsigned long start, unsigned int nr, + unsigned long shift, + unsigned long boundary_size, + unsigned long align_mask); +extern void iommu_area_free(unsigned long *map, unsigned long start, + unsigned int nr); diff --git a/include/linux/latency.h b/include/linux/latency.h deleted file mode 100644 index c08b52bb55b0..000000000000 --- a/include/linux/latency.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * latency.h: Explicit system-wide latency-expectation infrastructure - * - * (C) Copyright 2006 Intel Corporation - * Author: Arjan van de Ven <arjan@linux.intel.com> - * - */ - -#ifndef _INCLUDE_GUARD_LATENCY_H_ -#define _INCLUDE_GUARD_LATENCY_H_ - -#include <linux/notifier.h> - -void set_acceptable_latency(char *identifier, int usecs); -void modify_acceptable_latency(char *identifier, int usecs); -void remove_acceptable_latency(char *identifier); -void synchronize_acceptable_latency(void); -int system_latency_constraint(void); - -int register_latency_notifier(struct notifier_block * nb); -int unregister_latency_notifier(struct notifier_block * nb); - -#define INFINITE_LATENCY 1000000 - -#endif diff --git a/include/linux/leds.h b/include/linux/leds.h index b4130ff58d0c..00f89fd6c52a 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -54,7 +54,15 @@ struct led_classdev { extern int led_classdev_register(struct device *parent, struct led_classdev *led_cdev); -extern void led_classdev_unregister(struct led_classdev *led_cdev); +extern void __led_classdev_unregister(struct led_classdev *led_cdev, bool sus); +static inline void led_classdev_unregister(struct led_classdev *lcd) +{ + __led_classdev_unregister(lcd, false); +} +static inline void led_classdev_unregister_suspended(struct led_classdev *lcd) +{ + __led_classdev_unregister(lcd, true); +} extern void led_classdev_suspend(struct led_classdev *led_cdev); extern void led_classdev_resume(struct led_classdev *led_cdev); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index dff9ea32606a..24b30b9b4f8a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -43,7 +43,15 @@ struct miscdevice { }; extern int misc_register(struct miscdevice * misc); -extern int misc_deregister(struct miscdevice * misc); +extern int __misc_deregister(struct miscdevice *misc, bool suspended); +static inline int misc_deregister(struct miscdevice *misc) +{ + return __misc_deregister(misc, false); +} +static inline int misc_deregister_suspended(struct miscdevice *misc) +{ + return __misc_deregister(misc, true); +} #define MODULE_ALIAS_MISCDEV(minor) \ MODULE_ALIAS("char-major-" __stringify(MISC_MAJOR) \ diff --git a/include/linux/mm.h b/include/linux/mm.h index 1bba6789a50a..89d7c691b93a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -227,10 +227,22 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { - VM_BUG_ON(PageCompound(page)); + VM_BUG_ON(PageTail(page)); return atomic_inc_not_zero(&page->_count); } +/* Support for virtually mapped pages */ +struct page *vmalloc_to_page(const void *addr); +unsigned long vmalloc_to_pfn(const void *addr); + +/* Determine if an address is within the vmalloc range */ +static inline int is_vmalloc_addr(const void *x) +{ + unsigned long addr = (unsigned long)x; + + return addr >= VMALLOC_START && addr < VMALLOC_END; +} + static inline struct page *compound_head(struct page *page) { if (unlikely(PageTail(page))) @@ -706,6 +718,28 @@ unsigned long unmap_vmas(struct mmu_gather **tlb, struct vm_area_struct *start_vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *); + +/** + * mm_walk - callbacks for walk_page_range + * @pgd_entry: if set, called for each non-empty PGD (top-level) entry + * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry + * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry + * @pte_entry: if set, called for each non-empty PTE (4th-level) entry + * @pte_hole: if set, called for each hole at all levels + * + * (see walk_page_range for more details) + */ +struct mm_walk { + int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *); + int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *); + int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *); + int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *); + int (*pte_hole)(unsigned long, unsigned long, void *); +}; + +int walk_page_range(const struct mm_struct *, unsigned long addr, + unsigned long end, const struct mm_walk *walk, + void *private); void free_pgd_range(struct mmu_gather **tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma, @@ -1089,8 +1123,6 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) pgprot_t vm_get_page_prot(unsigned long vm_flags); struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); -struct page *vmalloc_to_page(void *addr); -unsigned long vmalloc_to_pfn(void *addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4c4522a51a3b..8d8d1977736e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -113,7 +113,7 @@ struct per_cpu_pages { }; struct per_cpu_pageset { - struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ + struct per_cpu_pages pcp; #ifdef CONFIG_NUMA s8 expire; #endif diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index e9fddb42f26c..139d49d2f078 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -343,7 +343,8 @@ struct sdio_device_id { __u8 class; /* Standard interface or SDIO_ANY_ID */ __u16 vendor; /* Vendor or SDIO_ANY_ID */ __u16 device; /* Device ID or SDIO_ANY_ID */ - kernel_ulong_t driver_data; /* Data private to the driver */ + kernel_ulong_t driver_data /* Data private to the driver */ + __attribute__((aligned(sizeof(kernel_ulong_t)))); }; /* SSB core, see drivers/ssb/ */ diff --git a/include/linux/nubus.h b/include/linux/nubus.h index cdb3e9b8db54..c4355076d1a5 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -132,10 +132,12 @@ enum nubus_drhw { NUBUS_DRHW_RDIUS_DCGX = 0x027C, /* Radius DirectColor/GX */ NUBUS_DRHW_RDIUS_PC8 = 0x0291, /* Radius PrecisionColor 8 */ NUBUS_DRHW_LAPIS_PCS8 = 0x0292, /* Lapis ProColorServer 8 */ - NUBUS_DRHW_RASTER_24LXI = 0x02A0, /* RasterOps 8/24 XLi */ + NUBUS_DRHW_RASTER_24XLI = 0x02A0, /* RasterOps 8/24 XLi */ NUBUS_DRHW_RASTER_PBPGT = 0x02A5, /* RasterOps PaintBoard Prism GT */ NUBUS_DRHW_EMACH_FSX = 0x02AE, /* E-Machines Futura SX */ + NUBUS_DRHW_RASTER_24XLTV = 0x02B7, /* RasterOps 24XLTV */ NUBUS_DRHW_SMAC_THUND24 = 0x02CB, /* SuperMac Thunder/24 */ + NUBUS_DRHW_SMAC_THUNDLGHT = 0x03D9, /* SuperMac ThunderLight */ NUBUS_DRHW_RDIUS_PC24XP = 0x0406, /* Radius PrecisionColor 24Xp */ NUBUS_DRHW_RDIUS_PC24X = 0x040A, /* Radius PrecisionColor 24X */ NUBUS_DRHW_RDIUS_PC8XJ = 0x040B, /* Radius PrecisionColor 8XJ */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 209d3a47f50f..bbad43fb8181 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -131,16 +131,52 @@ #define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) #define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) -#define PageUptodate(page) test_bit(PG_uptodate, &(page)->flags) +static inline int PageUptodate(struct page *page) +{ + int ret = test_bit(PG_uptodate, &(page)->flags); + + /* + * Must ensure that the data we read out of the page is loaded + * _after_ we've loaded page->flags to check for PageUptodate. + * We can skip the barrier if the page is not uptodate, because + * we wouldn't be reading anything from it. + * + * See SetPageUptodate() for the other side of the story. + */ + if (ret) + smp_rmb(); + + return ret; +} + +static inline void __SetPageUptodate(struct page *page) +{ + smp_wmb(); + __set_bit(PG_uptodate, &(page)->flags); #ifdef CONFIG_S390 + page_clear_dirty(page); +#endif +} + static inline void SetPageUptodate(struct page *page) { +#ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) page_clear_dirty(page); -} #else -#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) + /* + * Memory barrier must be issued before setting the PG_uptodate bit, + * so that all previous stores issued in order to bring the page + * uptodate are actually visible before PageUptodate becomes true. + * + * s390 doesn't need an explicit smp_wmb here because the test and + * set bit already provides full barriers. + */ + smp_wmb(); + set_bit(PG_uptodate, &(page)->flags); #endif +} + #define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) #define PageDirty(page) test_bit(PG_dirty, &(page)->flags) diff --git a/include/linux/pci.h b/include/linux/pci.h index cee75c0ff6e7..7215d3b1f4af 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -159,6 +159,8 @@ struct pci_dev { this if your device has broken DMA or supports 64-bit transfers. */ + struct device_dma_parameters dma_parms; + pci_power_t current_state; /* Current operating state. In ACPI-speak, this is D0-D3, D0 being fully functional, and D3 being off. */ @@ -580,6 +582,8 @@ void pci_intx(struct pci_dev *dev, int enable); void pci_msi_off(struct pci_dev *dev); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); +int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size); +int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask); int pcix_get_max_mmrbc(struct pci_dev *dev); int pcix_get_mmrbc(struct pci_dev *dev); int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); @@ -822,6 +826,18 @@ static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) return -EIO; } +static inline int pci_set_dma_max_seg_size(struct pci_dev *dev, + unsigned int size) +{ + return -EIO; +} + +static inline int pci_set_dma_seg_boundary(struct pci_dev *dev, + unsigned long mask) +{ + return -EIO; +} + static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 41f6f28690f6..39d32837265b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2043,6 +2043,23 @@ #define PCI_VENDOR_ID_QUICKNET 0x15e2 #define PCI_DEVICE_ID_QUICKNET_XJ 0x0500 +/* + * ADDI-DATA GmbH communication cards <info@addi-data.com> + */ +#define PCI_VENDOR_ID_ADDIDATA_OLD 0x10E8 +#define PCI_VENDOR_ID_ADDIDATA 0x15B8 +#define PCI_DEVICE_ID_ADDIDATA_APCI7500 0x7000 +#define PCI_DEVICE_ID_ADDIDATA_APCI7420 0x7001 +#define PCI_DEVICE_ID_ADDIDATA_APCI7300 0x7002 +#define PCI_DEVICE_ID_ADDIDATA_APCI7800 0x818E +#define PCI_DEVICE_ID_ADDIDATA_APCI7500_2 0x7009 +#define PCI_DEVICE_ID_ADDIDATA_APCI7420_2 0x700A +#define PCI_DEVICE_ID_ADDIDATA_APCI7300_2 0x700B +#define PCI_DEVICE_ID_ADDIDATA_APCI7500_3 0x700C +#define PCI_DEVICE_ID_ADDIDATA_APCI7420_3 0x700D +#define PCI_DEVICE_ID_ADDIDATA_APCI7300_3 0x700E +#define PCI_DEVICE_ID_ADDIDATA_APCI7800_3 0x700F + #define PCI_VENDOR_ID_PDC 0x15e9 #define PCI_VENDOR_ID_FARSITE 0x1619 diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h new file mode 100644 index 000000000000..2e4e97bd19f7 --- /dev/null +++ b/include/linux/pm_qos_params.h @@ -0,0 +1,25 @@ +/* interface for the pm_qos_power infrastructure of the linux kernel. + * + * Mark Gross + */ +#include <linux/list.h> +#include <linux/notifier.h> +#include <linux/miscdevice.h> + +#define PM_QOS_RESERVED 0 +#define PM_QOS_CPU_DMA_LATENCY 1 +#define PM_QOS_NETWORK_LATENCY 2 +#define PM_QOS_NETWORK_THROUGHPUT 3 + +#define PM_QOS_NUM_CLASSES 4 +#define PM_QOS_DEFAULT_VALUE -1 + +int pm_qos_add_requirement(int qos, char *name, s32 value); +int pm_qos_update_requirement(int qos, char *name, s32 new_value); +void pm_qos_remove_requirement(int qos, char *name); + +int pm_qos_requirement(int qos); + +int pm_qos_add_notifier(int qos, struct notifier_block *notifier); +int pm_qos_remove_notifier(int qos, struct notifier_block *notifier); + diff --git a/include/linux/prctl.h b/include/linux/prctl.h index e2eff9079fe9..3800639775ae 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -63,4 +63,8 @@ #define PR_GET_SECCOMP 21 #define PR_SET_SECCOMP 22 +/* Get/set the capability bounding set */ +#define PR_CAPBSET_READ 23 +#define PR_CAPBSET_DROP 24 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 8f92546b403d..e43551516831 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -19,6 +19,8 @@ struct completion; */ #define FIRST_PROCESS_ENTRY 256 +/* Worst case buffer size needed for holding an integer. */ +#define PROC_NUMBUF 13 /* * We always define these enumerators @@ -117,7 +119,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); unsigned long task_vsize(struct mm_struct *); int task_statm(struct mm_struct *, int *, int *, int *, int *); char *task_mem(struct mm_struct *, char *); -void clear_refs_smap(struct mm_struct *mm); struct proc_dir_entry *de_get(struct proc_dir_entry *de); void de_put(struct proc_dir_entry *de); diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 85ea63f462af..b93b541cf111 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -59,8 +59,6 @@ extern void machine_crash_shutdown(struct pt_regs *); * Architecture independent implemenations of sys_reboot commands. */ -extern void kernel_shutdown_prepare(enum system_states state); - extern void kernel_restart(char *cmd); extern void kernel_halt(void); extern void kernel_power_off(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index af6947e69b40..9c13be3a21e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -555,6 +555,13 @@ struct signal_struct { #define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ +/* If true, all threads except ->group_exit_task have pending SIGKILL */ +static inline int signal_group_exit(const struct signal_struct *sig) +{ + return (sig->flags & SIGNAL_GROUP_EXIT) || + (sig->group_exit_task != NULL); +} + /* * Some day this will be a full-fledged user tracking system.. */ @@ -1091,7 +1098,7 @@ struct task_struct { uid_t uid,euid,suid,fsuid; gid_t gid,egid,sgid,fsgid; struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted; + kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; unsigned keep_capabilities:1; struct user_struct *user; #ifdef CONFIG_KEYS @@ -1770,7 +1777,7 @@ extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned lon struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); -extern void get_task_comm(char *to, struct task_struct *tsk); +extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP extern void wait_task_inactive(struct task_struct * p); @@ -2080,6 +2087,10 @@ static inline void migration_init(void) } #endif +#ifndef TASK_SIZE_OF +#define TASK_SIZE_OF(tsk) TASK_SIZE +#endif + #endif /* __KERNEL__ */ #endif diff --git a/include/linux/security.h b/include/linux/security.h index d24974262dc6..fe52cdeab0a6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -40,11 +40,6 @@ #define ROOTCONTEXT_MNT 0x04 #define DEFCONTEXT_MNT 0x08 -/* - * Bounding set - */ -extern kernel_cap_t cap_bset; - extern unsigned securebits; struct ctl_table; @@ -423,15 +418,12 @@ struct request_sock; * identified by @name for @dentry. * Return 0 if permission is granted. * @inode_getsecurity: - * Copy the extended attribute representation of the security label - * associated with @name for @inode into @buffer. @buffer may be - * NULL to request the size of the buffer required. @size indicates - * the size of @buffer in bytes. Note that @name is the remainder - * of the attribute name after the security. prefix has been removed. - * @err is the return value from the preceding fs getxattr call, - * and can be used by the security module to determine whether it - * should try and canonicalize the attribute value. - * Return number of bytes used/required on success. + * Retrieve a copy of the extended attribute representation of the + * security label associated with @name for @inode via @buffer. Note that + * @name is the remainder of the attribute name after the security prefix + * has been removed. @alloc is used to specify of the call should return a + * value via the buffer or just the value length Return size of buffer on + * success. * @inode_setsecurity: * Set the security label associated with @name for @inode from the * extended attribute value @value. @size indicates the size of the @@ -1304,7 +1296,7 @@ struct security_operations { int (*inode_removexattr) (struct dentry *dentry, char *name); int (*inode_need_killpriv) (struct dentry *dentry); int (*inode_killpriv) (struct dentry *dentry); - int (*inode_getsecurity)(const struct inode *inode, const char *name, void *buffer, size_t size, int err); + int (*inode_getsecurity)(const struct inode *inode, const char *name, void **buffer, bool alloc); int (*inode_setsecurity)(struct inode *inode, const char *name, const void *value, size_t size, int flags); int (*inode_listsecurity)(struct inode *inode, char *buffer, size_t buffer_size); @@ -1565,7 +1557,7 @@ int security_inode_listxattr(struct dentry *dentry); int security_inode_removexattr(struct dentry *dentry, char *name); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct dentry *dentry); -int security_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err); +int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc); int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags); int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size); int security_file_permission(struct file *file, int mask); @@ -1967,7 +1959,7 @@ static inline int security_inode_killpriv(struct dentry *dentry) return cap_inode_killpriv(dentry); } -static inline int security_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err) +static inline int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) { return -EOPNOTSUPP; } diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index afe0f6d9b9bc..00b65c0a82ca 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -23,6 +23,7 @@ struct plat_serial8250_port { resource_size_t mapbase; /* resource base */ unsigned int irq; /* interrupt number */ unsigned int uartclk; /* UART clock rate */ + void *private_data; unsigned char regshift; /* register shift */ unsigned char iotype; /* UPIO_* */ unsigned char hub6; diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h new file mode 100644 index 000000000000..835ddf47d45c --- /dev/null +++ b/include/linux/spi/mcp23s08.h @@ -0,0 +1,24 @@ + +/* FIXME driver should be able to handle all four slaves that + * can be hooked up to each chipselect, as well as IRQs... + */ + +struct mcp23s08_platform_data { + /* four slaves can share one SPI chipselect */ + u8 slave; + + /* number assigned to the first GPIO */ + unsigned base; + + /* pins with pullups */ + u8 pullups; + + void *context; /* param to setup/teardown */ + + int (*setup)(struct spi_device *spi, + int gpio, unsigned ngpio, + void *context); + int (*teardown)(struct spi_device *spi, + int gpio, unsigned ngpio, + void *context); +}; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 646ce2d068d4..1d7d4c5797ee 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -130,7 +130,6 @@ struct pbe { }; /* mm/page_alloc.c */ -extern void drain_local_pages(void); extern void mark_free_pages(struct zone *zone); /** diff --git a/include/linux/swap.h b/include/linux/swap.h index 4f3838adbb30..353153ea0bd5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -158,9 +158,6 @@ struct swap_list_t { /* Swap 50% full? Release swapcache more aggressively.. */ #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) -/* linux/mm/memory.c */ -extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); - /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; @@ -223,16 +220,17 @@ extern struct address_space swapper_space; #define total_swapcache_pages swapper_space.nrpages extern void show_swap_cache_info(void); extern int add_to_swap(struct page *, gfp_t); +extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); -extern int move_to_swap_cache(struct page *, swp_entry_t); -extern int move_from_swap_cache(struct page *, unsigned long, - struct address_space *); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); -extern struct page * lookup_swap_cache(swp_entry_t); -extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma, - unsigned long addr); +extern struct page *lookup_swap_cache(swp_entry_t); +extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, + struct vm_area_struct *vma, unsigned long addr); +extern struct page *swapin_readahead(swp_entry_t, gfp_t, + struct vm_area_struct *vma, unsigned long addr); + /* linux/mm/swapfile.c */ extern long total_swap_pages; extern unsigned int nr_swapfiles; @@ -306,7 +304,7 @@ static inline void swap_free(swp_entry_t swp) { } -static inline struct page *read_swap_cache_async(swp_entry_t swp, +static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { return NULL; @@ -317,22 +315,12 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } -static inline int valid_swaphandles(swp_entry_t entry, unsigned long *offset) -{ - return 0; -} - #define can_share_swap_page(p) (page_mapcount(p) == 1) -static inline int move_to_swap_cache(struct page *page, swp_entry_t entry) -{ - return 1; -} - -static inline int move_from_swap_cache(struct page *page, unsigned long index, - struct address_space *mapping) +static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, + gfp_t gfp_mask) { - return 1; + return -1; } static inline void __delete_from_swap_cache(struct page *page) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index ceb6cc5ceebb..7bf2d149d209 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -42,6 +42,12 @@ static inline pgoff_t swp_offset(swp_entry_t entry) return entry.val & SWP_OFFSET_MASK(entry); } +/* check whether a pte points to a swap entry */ +static inline int is_swap_pte(pte_t pte) +{ + return !pte_none(pte) && !pte_present(pte) && !pte_file(pte); +} + /* * Convert the arch-dependent pte representation of a swp_entry_t into an * arch-independent swp_entry_t. diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 61def7c8fbb3..4c2577bd1c85 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -607,8 +607,11 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask); -asmlinkage long sys_timerfd(int ufd, int clockid, int flags, - const struct itimerspec __user *utmr); +asmlinkage long sys_timerfd_create(int clockid, int flags); +asmlinkage long sys_timerfd_settime(int ufd, int flags, + const struct itimerspec __user *utmr, + struct itimerspec __user *otmr); +asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index bf4ae4e138f7..571f01d20a86 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -102,7 +102,6 @@ enum KERN_NODENAME=7, KERN_DOMAINNAME=8, - KERN_CAP_BSET=14, /* int: capability bounding set */ KERN_PANIC=15, /* int: panic timeout */ KERN_REALROOTDEV=16, /* real root device to mount after initrd */ @@ -965,8 +964,6 @@ extern int proc_dostring(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int proc_dointvec(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -extern int proc_dointvec_bset(struct ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 89338b468d0d..ce8e7da05807 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -45,11 +45,11 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); -extern void vfree(void *addr); +extern void vfree(const void *addr); extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); -extern void vunmap(void *addr); +extern void vunmap(const void *addr); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); @@ -71,7 +71,7 @@ extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, extern struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node, gfp_t gfp_mask); -extern struct vm_struct *remove_vm_area(void *addr); +extern struct vm_struct *remove_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); diff --git a/include/linux/wait.h b/include/linux/wait.h index 1f4fb0a81ecd..33a2aa9e02f2 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -162,6 +162,22 @@ wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int)); #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1) +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * macro to avoid include hell + */ +#define wake_up_nested(x, s) \ +do { \ + unsigned long flags; \ + \ + spin_lock_irqsave_nested(&(x)->lock, flags, (s)); \ + wake_up_locked(x); \ + spin_unlock_irqrestore(&(x)->lock, flags); \ +} while (0) +#else +#define wake_up_nested(x, s) wake_up(x) +#endif + #define __wait_event(wq, condition) \ do { \ DEFINE_WAIT(__wait); \ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c6148bbf1250..b7b3362f7717 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -62,6 +62,7 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ + unsigned more_io:1; /* more io to be dispatched */ }; /* @@ -100,6 +101,7 @@ extern int dirty_background_ratio; extern int vm_dirty_ratio; extern int dirty_writeback_interval; extern int dirty_expire_interval; +extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; diff --git a/include/linux/xattr.h b/include/linux/xattr.h index def131a5ac70..df6b95d2218e 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -46,6 +46,7 @@ struct xattr_handler { size_t size, int flags); }; +ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct dentry *, char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); int vfs_setxattr(struct dentry *, char *, void *, size_t, int); diff --git a/include/net/netlabel.h b/include/net/netlabel.h index b3213c7c5309..0ca67d73c7ad 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -36,6 +36,8 @@ #include <net/netlink.h> #include <asm/atomic.h> +struct cipso_v4_doi; + /* * NetLabel - A management interface for maintaining network packet label * mapping tables for explicit packet labling protocols. @@ -103,12 +105,6 @@ struct netlbl_audit { uid_t loginuid; }; -/* Domain mapping definition struct */ -struct netlbl_dom_map; - -/* Domain mapping operations */ -int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); - /* * LSM security attributes */ @@ -344,6 +340,19 @@ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr) #ifdef CONFIG_NETLABEL /* + * LSM configuration operations + */ +int netlbl_cfg_map_del(const char *domain, struct netlbl_audit *audit_info); +int netlbl_cfg_unlbl_add_map(const char *domain, + struct netlbl_audit *audit_info); +int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, + struct netlbl_audit *audit_info); +int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, + const char *domain, + struct netlbl_audit *audit_info); +int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info); + +/* * LSM security attribute operations */ int netlbl_secattr_catmap_walk(struct netlbl_lsm_secattr_catmap *catmap, @@ -378,6 +387,32 @@ void netlbl_cache_invalidate(void); int netlbl_cache_add(const struct sk_buff *skb, const struct netlbl_lsm_secattr *secattr); #else +static inline int netlbl_cfg_map_del(const char *domain, + struct netlbl_audit *audit_info) +{ + return -ENOSYS; +} +static inline int netlbl_cfg_unlbl_add_map(const char *domain, + struct netlbl_audit *audit_info) +{ + return -ENOSYS; +} +static inline int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, + struct netlbl_audit *audit_info) +{ + return -ENOSYS; +} +static inline int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, + const char *domain, + struct netlbl_audit *audit_info) +{ + return -ENOSYS; +} +static inline int netlbl_cfg_cipsov4_del(u32 doi, + struct netlbl_audit *audit_info) +{ + return -ENOSYS; +} static inline int netlbl_secattr_catmap_walk( struct netlbl_lsm_secattr_catmap *catmap, u32 offset) diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index d5838c30d20f..87a260e3699e 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -147,11 +147,11 @@ typedef struct config_req_t { /* For RequestIO and ReleaseIO */ typedef struct io_req_t { - ioaddr_t BasePort1; - ioaddr_t NumPorts1; + u_int BasePort1; + u_int NumPorts1; u_int Attributes1; - ioaddr_t BasePort2; - ioaddr_t NumPorts2; + u_int BasePort2; + u_int NumPorts2; u_int Attributes2; u_int IOAddrLines; } io_req_t; diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h index 5f388035687d..9a6bcc4952f0 100644 --- a/include/pcmcia/cs_types.h +++ b/include/pcmcia/cs_types.h @@ -27,7 +27,6 @@ typedef u_int ioaddr_t; #else typedef u_short ioaddr_t; #endif -typedef unsigned long kio_addr_t; typedef u_short socket_t; typedef u_int event_t; diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 6e84258b94de..f95dca077c1c 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -92,7 +92,7 @@ typedef struct pccard_io_map { u_char map; u_char flags; u_short speed; - kio_addr_t start, stop; + u_int start, stop; } pccard_io_map; typedef struct pccard_mem_map { @@ -155,7 +155,7 @@ extern struct pccard_resource_ops pccard_iodyn_ops; struct pcmcia_socket; typedef struct io_window_t { - kio_addr_t InUse, Config; + u_int InUse, Config; struct resource *res; } io_window_t; @@ -208,7 +208,7 @@ struct pcmcia_socket { u_int features; u_int irq_mask; u_int map_size; - kio_addr_t io_offset; + u_int io_offset; u_char pci_irq; struct pci_dev * cb_dev; diff --git a/init/Kconfig b/init/Kconfig index b2acdeb2d312..87f50df58893 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -582,7 +582,6 @@ config SIGNALFD config TIMERFD bool "Enable timerfd() system call" if EMBEDDED select ANON_INODES - depends on BROKEN default y help Enable the timerfd() system call that allows to receive timer @@ -657,11 +656,9 @@ config SLOB depends on EMBEDDED bool "SLOB (Simple Allocator)" help - SLOB replaces the SLAB allocator with a drastically simpler - allocator. SLOB is more space efficient than SLAB but does not - scale well (single lock for all operations) and is also highly - susceptible to fragmentation. SLUB can accomplish a higher object - density. It is usually better to use SLUB instead of SLOB. + SLOB replaces the stock allocator with a drastically simpler + allocator. SLOB is generally more space efficient but + does not perform as well on large systems. endchoice @@ -679,6 +676,16 @@ config MARKERS source "arch/Kconfig" +config PROC_PAGE_MONITOR + default y + depends on PROC_FS && MMU + bool "Enable /proc page monitoring" if EMBEDDED + help + Various /proc files exist to monitor process memory utilization: + /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, + /proc/kpagecount, and /proc/kpageflags. Disabling these + interfaces will reduce the size of the kernel by approximately 4kb. + endmenu # General setup config SLABINFO diff --git a/kernel/Makefile b/kernel/Makefile index db9af707ff5b..135a1b943446 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,8 +8,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ - utsname.o notifier.o ksysfs.o + hrtimer.o rwsem.o nsproxy.o srcu.o \ + utsname.o notifier.o ksysfs.o pm_qos_params.o obj-$(CONFIG_SYSCTL) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/capability.c b/kernel/capability.c index efbd9cdce132..39e8193b41ea 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -22,6 +22,37 @@ static DEFINE_SPINLOCK(task_capability_lock); /* + * Leveraged for setting/resetting capabilities + */ + +const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; +const kernel_cap_t __cap_full_set = CAP_FULL_SET; +const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET; + +EXPORT_SYMBOL(__cap_empty_set); +EXPORT_SYMBOL(__cap_full_set); +EXPORT_SYMBOL(__cap_init_eff_set); + +/* + * More recent versions of libcap are available from: + * + * http://www.kernel.org/pub/linux/libs/security/linux-privs/ + */ + +static void warn_legacy_capability_use(void) +{ + static int warned; + if (!warned) { + char name[sizeof(current->comm)]; + + printk(KERN_INFO "warning: `%s' uses 32-bit capabilities" + " (legacy support in use)\n", + get_task_comm(name, current)); + warned = 1; + } +} + +/* * For sys_getproccap() and sys_setproccap(), any of the three * capability set pointers may be NULL -- indicating that that set is * uninteresting and/or not to be changed. @@ -42,12 +73,21 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) pid_t pid; __u32 version; struct task_struct *target; - struct __user_cap_data_struct data; + unsigned tocopy; + kernel_cap_t pE, pI, pP; if (get_user(version, &header->version)) return -EFAULT; - if (version != _LINUX_CAPABILITY_VERSION) { + switch (version) { + case _LINUX_CAPABILITY_VERSION_1: + warn_legacy_capability_use(); + tocopy = _LINUX_CAPABILITY_U32S_1; + break; + case _LINUX_CAPABILITY_VERSION_2: + tocopy = _LINUX_CAPABILITY_U32S_2; + break; + default: if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) return -EFAULT; return -EINVAL; @@ -71,14 +111,47 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) } else target = current; - ret = security_capget(target, &data.effective, &data.inheritable, &data.permitted); + ret = security_capget(target, &pE, &pI, &pP); out: read_unlock(&tasklist_lock); spin_unlock(&task_capability_lock); - if (!ret && copy_to_user(dataptr, &data, sizeof data)) - return -EFAULT; + if (!ret) { + struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S]; + unsigned i; + + for (i = 0; i < tocopy; i++) { + kdata[i].effective = pE.cap[i]; + kdata[i].permitted = pP.cap[i]; + kdata[i].inheritable = pI.cap[i]; + } + + /* + * Note, in the case, tocopy < _LINUX_CAPABILITY_U32S, + * we silently drop the upper capabilities here. This + * has the effect of making older libcap + * implementations implicitly drop upper capability + * bits when they perform a: capget/modify/capset + * sequence. + * + * This behavior is considered fail-safe + * behavior. Upgrading the application to a newer + * version of libcap will enable access to the newer + * capabilities. + * + * An alternative would be to return an error here + * (-ERANGE), but that causes legacy applications to + * unexpectidly fail; the capget/modify/capset aborts + * before modification is attempted and the application + * fails. + */ + + if (copy_to_user(dataptr, kdata, tocopy + * sizeof(struct __user_cap_data_struct))) { + return -EFAULT; + } + } return ret; } @@ -167,6 +240,8 @@ static inline int cap_set_all(kernel_cap_t *effective, */ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) { + struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S]; + unsigned i, tocopy; kernel_cap_t inheritable, permitted, effective; __u32 version; struct task_struct *target; @@ -176,7 +251,15 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (get_user(version, &header->version)) return -EFAULT; - if (version != _LINUX_CAPABILITY_VERSION) { + switch (version) { + case _LINUX_CAPABILITY_VERSION_1: + warn_legacy_capability_use(); + tocopy = _LINUX_CAPABILITY_U32S_1; + break; + case _LINUX_CAPABILITY_VERSION_2: + tocopy = _LINUX_CAPABILITY_U32S_2; + break; + default: if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) return -EFAULT; return -EINVAL; @@ -188,10 +271,22 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP)) return -EPERM; - if (copy_from_user(&effective, &data->effective, sizeof(effective)) || - copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) || - copy_from_user(&permitted, &data->permitted, sizeof(permitted))) + if (copy_from_user(&kdata, data, tocopy + * sizeof(struct __user_cap_data_struct))) { return -EFAULT; + } + + for (i = 0; i < tocopy; i++) { + effective.cap[i] = kdata[i].effective; + permitted.cap[i] = kdata[i].permitted; + inheritable.cap[i] = kdata[i].inheritable; + } + while (i < _LINUX_CAPABILITY_U32S) { + effective.cap[i] = 0; + permitted.cap[i] = 0; + inheritable.cap[i] = 0; + i++; + } spin_lock(&task_capability_lock); read_lock(&tasklist_lock); diff --git a/kernel/exit.c b/kernel/exit.c index 9e459fefda77..9d3d0f0b27d9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1083,11 +1083,12 @@ do_group_exit(int exit_code) struct signal_struct *const sig = current->signal; struct sighand_struct *const sighand = current->sighand; spin_lock_irq(&sighand->siglock); - if (sig->flags & SIGNAL_GROUP_EXIT) + if (signal_group_exit(sig)) /* Another thread got here before we took the lock. */ exit_code = sig->group_exit_code; else { sig->group_exit_code = exit_code; + sig->flags = SIGNAL_GROUP_EXIT; zap_other_threads(current); } spin_unlock_irq(&sighand->siglock); diff --git a/kernel/fork.c b/kernel/fork.c index 05e0b6f4365b..2b55b74cd999 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -325,7 +325,7 @@ static inline int mm_alloc_pgd(struct mm_struct * mm) static inline void mm_free_pgd(struct mm_struct * mm) { - pgd_free(mm->pgd); + pgd_free(mm, mm->pgd); } #else #define dup_mmap(mm, oldmm) (0) @@ -1118,6 +1118,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_SECURITY p->security = NULL; #endif + p->cap_bset = current->cap_bset; p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1450,6 +1451,23 @@ long do_fork(unsigned long clone_flags, int trace = 0; long nr; + /* + * We hope to recycle these flags after 2.6.26 + */ + if (unlikely(clone_flags & CLONE_STOPPED)) { + static int __read_mostly count = 100; + + if (count > 0 && printk_ratelimit()) { + char comm[TASK_COMM_LEN]; + + count--; + printk(KERN_INFO "fork(): process `%s' used deprecated " + "clone flags 0x%lx\n", + get_task_comm(comm, current), + clone_flags & CLONE_STOPPED); + } + } + if (unlikely(current->ptrace)) { trace = fork_traceflag (clone_flags); if (trace) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 1069998fe25f..668f3967eb39 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -306,7 +306,7 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns); /* * Divide a ktime value by a nanosecond value */ -unsigned long ktime_divns(const ktime_t kt, s64 div) +u64 ktime_divns(const ktime_t kt, s64 div) { u64 dclc, inc, dns; int sft = 0; @@ -321,7 +321,7 @@ unsigned long ktime_divns(const ktime_t kt, s64 div) dclc >>= sft; do_div(dclc, (unsigned long) div); - return (unsigned long) dclc; + return dclc; } #endif /* BITS_PER_LONG >= 64 */ @@ -656,10 +656,9 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) * Forward the timer expiry so it will expire in the future. * Returns the number of overruns. */ -unsigned long -hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) +u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) { - unsigned long orun = 1; + u64 orun = 1; ktime_t delta; delta = ktime_sub(now, timer->expires); diff --git a/kernel/latency.c b/kernel/latency.c deleted file mode 100644 index e63fcacb61a7..000000000000 --- a/kernel/latency.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * latency.c: Explicit system-wide latency-expectation infrastructure - * - * The purpose of this infrastructure is to allow device drivers to set - * latency constraint they have and to collect and summarize these - * expectations globally. The cummulated result can then be used by - * power management and similar users to make decisions that have - * tradoffs with a latency component. - * - * An example user of this are the x86 C-states; each higher C state saves - * more power, but has a higher exit latency. For the idle loop power - * code to make a good decision which C-state to use, information about - * acceptable latencies is required. - * - * An example announcer of latency is an audio driver that knowns it - * will get an interrupt when the hardware has 200 usec of samples - * left in the DMA buffer; in that case the driver can set a latency - * constraint of, say, 150 usec. - * - * Multiple drivers can each announce their maximum accepted latency, - * to keep these appart, a string based identifier is used. - * - * - * (C) Copyright 2006 Intel Corporation - * Author: Arjan van de Ven <arjan@linux.intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include <linux/latency.h> -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/notifier.h> -#include <linux/jiffies.h> -#include <asm/atomic.h> - -struct latency_info { - struct list_head list; - int usecs; - char *identifier; -}; - -/* - * locking rule: all modifications to current_max_latency and - * latency_list need to be done while holding the latency_lock. - * latency_lock needs to be taken _irqsave. - */ -static atomic_t current_max_latency; -static DEFINE_SPINLOCK(latency_lock); - -static LIST_HEAD(latency_list); -static BLOCKING_NOTIFIER_HEAD(latency_notifier); - -/* - * This function returns the maximum latency allowed, which - * happens to be the minimum of all maximum latencies on the - * list. - */ -static int __find_max_latency(void) -{ - int min = INFINITE_LATENCY; - struct latency_info *info; - - list_for_each_entry(info, &latency_list, list) { - if (info->usecs < min) - min = info->usecs; - } - return min; -} - -/** - * set_acceptable_latency - sets the maximum latency acceptable - * @identifier: string that identifies this driver - * @usecs: maximum acceptable latency for this driver - * - * This function informs the kernel that this device(driver) - * can accept at most usecs latency. This setting is used for - * power management and similar tradeoffs. - * - * This function sleeps and can only be called from process - * context. - * Calling this function with an existing identifier is valid - * and will cause the existing latency setting to be changed. - */ -void set_acceptable_latency(char *identifier, int usecs) -{ - struct latency_info *info, *iter; - unsigned long flags; - int found_old = 0; - - info = kzalloc(sizeof(struct latency_info), GFP_KERNEL); - if (!info) - return; - info->usecs = usecs; - info->identifier = kstrdup(identifier, GFP_KERNEL); - if (!info->identifier) - goto free_info; - - spin_lock_irqsave(&latency_lock, flags); - list_for_each_entry(iter, &latency_list, list) { - if (strcmp(iter->identifier, identifier)==0) { - found_old = 1; - iter->usecs = usecs; - break; - } - } - if (!found_old) - list_add(&info->list, &latency_list); - - if (usecs < atomic_read(¤t_max_latency)) - atomic_set(¤t_max_latency, usecs); - - spin_unlock_irqrestore(&latency_lock, flags); - - blocking_notifier_call_chain(&latency_notifier, - atomic_read(¤t_max_latency), NULL); - - /* - * if we inserted the new one, we're done; otherwise there was - * an existing one so we need to free the redundant data - */ - if (!found_old) - return; - - kfree(info->identifier); -free_info: - kfree(info); -} -EXPORT_SYMBOL_GPL(set_acceptable_latency); - -/** - * modify_acceptable_latency - changes the maximum latency acceptable - * @identifier: string that identifies this driver - * @usecs: maximum acceptable latency for this driver - * - * This function informs the kernel that this device(driver) - * can accept at most usecs latency. This setting is used for - * power management and similar tradeoffs. - * - * This function does not sleep and can be called in any context. - * Trying to use a non-existing identifier silently gets ignored. - * - * Due to the atomic nature of this function, the modified latency - * value will only be used for future decisions; past decisions - * can still lead to longer latencies in the near future. - */ -void modify_acceptable_latency(char *identifier, int usecs) -{ - struct latency_info *iter; - unsigned long flags; - - spin_lock_irqsave(&latency_lock, flags); - list_for_each_entry(iter, &latency_list, list) { - if (strcmp(iter->identifier, identifier) == 0) { - iter->usecs = usecs; - break; - } - } - if (usecs < atomic_read(¤t_max_latency)) - atomic_set(¤t_max_latency, usecs); - spin_unlock_irqrestore(&latency_lock, flags); -} -EXPORT_SYMBOL_GPL(modify_acceptable_latency); - -/** - * remove_acceptable_latency - removes the maximum latency acceptable - * @identifier: string that identifies this driver - * - * This function removes a previously set maximum latency setting - * for the driver and frees up any resources associated with the - * bookkeeping needed for this. - * - * This function does not sleep and can be called in any context. - * Trying to use a non-existing identifier silently gets ignored. - */ -void remove_acceptable_latency(char *identifier) -{ - unsigned long flags; - int newmax = 0; - struct latency_info *iter, *temp; - - spin_lock_irqsave(&latency_lock, flags); - - list_for_each_entry_safe(iter, temp, &latency_list, list) { - if (strcmp(iter->identifier, identifier) == 0) { - list_del(&iter->list); - newmax = iter->usecs; - kfree(iter->identifier); - kfree(iter); - break; - } - } - - /* If we just deleted the system wide value, we need to - * recalculate with a full search - */ - if (newmax == atomic_read(¤t_max_latency)) { - newmax = __find_max_latency(); - atomic_set(¤t_max_latency, newmax); - } - spin_unlock_irqrestore(&latency_lock, flags); -} -EXPORT_SYMBOL_GPL(remove_acceptable_latency); - -/** - * system_latency_constraint - queries the system wide latency maximum - * - * This function returns the system wide maximum latency in - * microseconds. - * - * This function does not sleep and can be called in any context. - */ -int system_latency_constraint(void) -{ - return atomic_read(¤t_max_latency); -} -EXPORT_SYMBOL_GPL(system_latency_constraint); - -/** - * synchronize_acceptable_latency - recalculates all latency decisions - * - * This function will cause a callback to various kernel pieces that - * will make those pieces rethink their latency decisions. This implies - * that if there are overlong latencies in hardware state already, those - * latencies get taken right now. When this call completes no overlong - * latency decisions should be active anymore. - * - * Typical usecase of this is after a modify_acceptable_latency() call, - * which in itself is non-blocking and non-synchronizing. - * - * This function blocks and should not be called with locks held. - */ - -void synchronize_acceptable_latency(void) -{ - blocking_notifier_call_chain(&latency_notifier, - atomic_read(¤t_max_latency), NULL); -} -EXPORT_SYMBOL_GPL(synchronize_acceptable_latency); - -/* - * Latency notifier: this notifier gets called when a non-atomic new - * latency value gets set. The expectation nof the caller of the - * non-atomic set is that when the call returns, future latencies - * are within bounds, so the functions on the notifier list are - * expected to take the overlong latencies immediately, inside the - * callback, and not make a overlong latency decision anymore. - * - * The callback gets called when the new latency value is made - * active so system_latency_constraint() returns the new latency. - */ -int register_latency_notifier(struct notifier_block * nb) -{ - return blocking_notifier_chain_register(&latency_notifier, nb); -} -EXPORT_SYMBOL_GPL(register_latency_notifier); - -int unregister_latency_notifier(struct notifier_block * nb) -{ - return blocking_notifier_chain_unregister(&latency_notifier, nb); -} -EXPORT_SYMBOL_GPL(unregister_latency_notifier); - -static __init int latency_init(void) -{ - atomic_set(¤t_max_latency, INFINITE_LATENCY); - /* - * we don't want by default to have longer latencies than 2 ticks, - * since that would cause lost ticks - */ - set_acceptable_latency("kernel", 2*1000000/HZ); - return 0; -} - -module_init(latency_init); diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c new file mode 100644 index 000000000000..0afe32be4c85 --- /dev/null +++ b/kernel/pm_qos_params.c @@ -0,0 +1,425 @@ +/* + * This module exposes the interface to kernel space for specifying + * QoS dependencies. It provides infrastructure for registration of: + * + * Dependents on a QoS value : register requirements + * Watchers of QoS value : get notified when target QoS value changes + * + * This QoS design is best effort based. Dependents register their QoS needs. + * Watchers register to keep track of the current QoS needs of the system. + * + * There are 3 basic classes of QoS parameter: latency, timeout, throughput + * each have defined units: + * latency: usec + * timeout: usec <-- currently not used. + * throughput: kbs (kilo byte / sec) + * + * There are lists of pm_qos_objects each one wrapping requirements, notifiers + * + * User mode requirements on a QOS parameter register themselves to the + * subsystem by opening the device node /dev/... and writing there request to + * the node. As long as the process holds a file handle open to the node the + * client continues to be accounted for. Upon file release the usermode + * requirement is removed and a new qos target is computed. This way when the + * requirement that the application has is cleaned up when closes the file + * pointer or exits the pm_qos_object will get an opportunity to clean up. + * + * mark gross mgross@linux.intel.com + */ + +#include <linux/pm_qos_params.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/string.h> +#include <linux/platform_device.h> +#include <linux/init.h> + +#include <linux/uaccess.h> + +/* + * locking rule: all changes to target_value or requirements or notifiers lists + * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock + * held, taken with _irqsave. One lock to rule them all + */ +struct requirement_list { + struct list_head list; + union { + s32 value; + s32 usec; + s32 kbps; + }; + char *name; +}; + +static s32 max_compare(s32 v1, s32 v2); +static s32 min_compare(s32 v1, s32 v2); + +struct pm_qos_object { + struct requirement_list requirements; + struct blocking_notifier_head *notifiers; + struct miscdevice pm_qos_power_miscdev; + char *name; + s32 default_value; + s32 target_value; + s32 (*comparitor)(s32, s32); +}; + +static struct pm_qos_object null_pm_qos; +static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); +static struct pm_qos_object cpu_dma_pm_qos = { + .requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)}, + .notifiers = &cpu_dma_lat_notifier, + .name = "cpu_dma_latency", + .default_value = 2000 * USEC_PER_SEC, + .target_value = 2000 * USEC_PER_SEC, + .comparitor = min_compare +}; + +static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); +static struct pm_qos_object network_lat_pm_qos = { + .requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)}, + .notifiers = &network_lat_notifier, + .name = "network_latency", + .default_value = 2000 * USEC_PER_SEC, + .target_value = 2000 * USEC_PER_SEC, + .comparitor = min_compare +}; + + +static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); +static struct pm_qos_object network_throughput_pm_qos = { + .requirements = + {LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)}, + .notifiers = &network_throughput_notifier, + .name = "network_throughput", + .default_value = 0, + .target_value = 0, + .comparitor = max_compare +}; + + +static struct pm_qos_object *pm_qos_array[] = { + &null_pm_qos, + &cpu_dma_pm_qos, + &network_lat_pm_qos, + &network_throughput_pm_qos +}; + +static DEFINE_SPINLOCK(pm_qos_lock); + +static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos); +static int pm_qos_power_open(struct inode *inode, struct file *filp); +static int pm_qos_power_release(struct inode *inode, struct file *filp); + +static const struct file_operations pm_qos_power_fops = { + .write = pm_qos_power_write, + .open = pm_qos_power_open, + .release = pm_qos_power_release, +}; + +/* static helper functions */ +static s32 max_compare(s32 v1, s32 v2) +{ + return max(v1, v2); +} + +static s32 min_compare(s32 v1, s32 v2) +{ + return min(v1, v2); +} + + +static void update_target(int target) +{ + s32 extreme_value; + struct requirement_list *node; + unsigned long flags; + int call_notifier = 0; + + spin_lock_irqsave(&pm_qos_lock, flags); + extreme_value = pm_qos_array[target]->default_value; + list_for_each_entry(node, + &pm_qos_array[target]->requirements.list, list) { + extreme_value = pm_qos_array[target]->comparitor( + extreme_value, node->value); + } + if (pm_qos_array[target]->target_value != extreme_value) { + call_notifier = 1; + pm_qos_array[target]->target_value = extreme_value; + pr_debug(KERN_ERR "new target for qos %d is %d\n", target, + pm_qos_array[target]->target_value); + } + spin_unlock_irqrestore(&pm_qos_lock, flags); + + if (call_notifier) + blocking_notifier_call_chain(pm_qos_array[target]->notifiers, + (unsigned long) extreme_value, NULL); +} + +static int register_pm_qos_misc(struct pm_qos_object *qos) +{ + qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; + qos->pm_qos_power_miscdev.name = qos->name; + qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; + + return misc_register(&qos->pm_qos_power_miscdev); +} + +static int find_pm_qos_object_by_minor(int minor) +{ + int pm_qos_class; + + for (pm_qos_class = 0; + pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { + if (minor == + pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) + return pm_qos_class; + } + return -1; +} + +/** + * pm_qos_requirement - returns current system wide qos expectation + * @pm_qos_class: identification of which qos value is requested + * + * This function returns the current target value in an atomic manner. + */ +int pm_qos_requirement(int pm_qos_class) +{ + int ret_val; + unsigned long flags; + + spin_lock_irqsave(&pm_qos_lock, flags); + ret_val = pm_qos_array[pm_qos_class]->target_value; + spin_unlock_irqrestore(&pm_qos_lock, flags); + + return ret_val; +} +EXPORT_SYMBOL_GPL(pm_qos_requirement); + +/** + * pm_qos_add_requirement - inserts new qos request into the list + * @pm_qos_class: identifies which list of qos request to us + * @name: identifies the request + * @value: defines the qos request + * + * This function inserts a new entry in the pm_qos_class list of requested qos + * performance charactoistics. It recomputes the agregate QoS expectations for + * the pm_qos_class of parrameters. + */ +int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value) +{ + struct requirement_list *dep; + unsigned long flags; + + dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL); + if (dep) { + if (value == PM_QOS_DEFAULT_VALUE) + dep->value = pm_qos_array[pm_qos_class]->default_value; + else + dep->value = value; + dep->name = kstrdup(name, GFP_KERNEL); + if (!dep->name) + goto cleanup; + + spin_lock_irqsave(&pm_qos_lock, flags); + list_add(&dep->list, + &pm_qos_array[pm_qos_class]->requirements.list); + spin_unlock_irqrestore(&pm_qos_lock, flags); + update_target(pm_qos_class); + + return 0; + } + +cleanup: + kfree(dep); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(pm_qos_add_requirement); + +/** + * pm_qos_update_requirement - modifies an existing qos request + * @pm_qos_class: identifies which list of qos request to us + * @name: identifies the request + * @value: defines the qos request + * + * Updates an existing qos requierement for the pm_qos_class of parameters along + * with updating the target pm_qos_class value. + * + * If the named request isn't in the lest then no change is made. + */ +int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value) +{ + unsigned long flags; + struct requirement_list *node; + int pending_update = 0; + + spin_lock_irqsave(&pm_qos_lock, flags); + list_for_each_entry(node, + &pm_qos_array[pm_qos_class]->requirements.list, list) { + if (strcmp(node->name, name) == 0) { + if (new_value == PM_QOS_DEFAULT_VALUE) + node->value = + pm_qos_array[pm_qos_class]->default_value; + else + node->value = new_value; + pending_update = 1; + break; + } + } + spin_unlock_irqrestore(&pm_qos_lock, flags); + if (pending_update) + update_target(pm_qos_class); + + return 0; +} +EXPORT_SYMBOL_GPL(pm_qos_update_requirement); + +/** + * pm_qos_remove_requirement - modifies an existing qos request + * @pm_qos_class: identifies which list of qos request to us + * @name: identifies the request + * + * Will remove named qos request from pm_qos_class list of parrameters and + * recompute the current target value for the pm_qos_class. + */ +void pm_qos_remove_requirement(int pm_qos_class, char *name) +{ + unsigned long flags; + struct requirement_list *node; + int pending_update = 0; + + spin_lock_irqsave(&pm_qos_lock, flags); + list_for_each_entry(node, + &pm_qos_array[pm_qos_class]->requirements.list, list) { + if (strcmp(node->name, name) == 0) { + kfree(node->name); + list_del(&node->list); + kfree(node); + pending_update = 1; + break; + } + } + spin_unlock_irqrestore(&pm_qos_lock, flags); + if (pending_update) + update_target(pm_qos_class); +} +EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); + +/** + * pm_qos_add_notifier - sets notification entry for changes to target value + * @pm_qos_class: identifies which qos target changes should be notified. + * @notifier: notifier block managed by caller. + * + * will register the notifier into a notification chain that gets called + * uppon changes to the pm_qos_class target value. + */ + int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) +{ + int retval; + + retval = blocking_notifier_chain_register( + pm_qos_array[pm_qos_class]->notifiers, notifier); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_qos_add_notifier); + +/** + * pm_qos_remove_notifier - deletes notification entry from chain. + * @pm_qos_class: identifies which qos target changes are notified. + * @notifier: notifier block to be removed. + * + * will remove the notifier from the notification chain that gets called + * uppon changes to the pm_qos_class target value. + */ +int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) +{ + int retval; + + retval = blocking_notifier_chain_unregister( + pm_qos_array[pm_qos_class]->notifiers, notifier); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); + +#define PID_NAME_LEN sizeof("process_1234567890") +static char name[PID_NAME_LEN]; + +static int pm_qos_power_open(struct inode *inode, struct file *filp) +{ + int ret; + long pm_qos_class; + + pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); + if (pm_qos_class >= 0) { + filp->private_data = (void *)pm_qos_class; + sprintf(name, "process_%d", current->pid); + ret = pm_qos_add_requirement(pm_qos_class, name, + PM_QOS_DEFAULT_VALUE); + if (ret >= 0) + return 0; + } + + return -EPERM; +} + +static int pm_qos_power_release(struct inode *inode, struct file *filp) +{ + int pm_qos_class; + + pm_qos_class = (long)filp->private_data; + sprintf(name, "process_%d", current->pid); + pm_qos_remove_requirement(pm_qos_class, name); + + return 0; +} + +static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + s32 value; + int pm_qos_class; + + pm_qos_class = (long)filp->private_data; + if (count != sizeof(s32)) + return -EINVAL; + if (copy_from_user(&value, buf, sizeof(s32))) + return -EFAULT; + sprintf(name, "process_%d", current->pid); + pm_qos_update_requirement(pm_qos_class, name, value); + + return sizeof(s32); +} + + +static int __init pm_qos_power_init(void) +{ + int ret = 0; + + ret = register_pm_qos_misc(&cpu_dma_pm_qos); + if (ret < 0) { + printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n"); + return ret; + } + ret = register_pm_qos_misc(&network_lat_pm_qos); + if (ret < 0) { + printk(KERN_ERR "pm_qos_param: network_latency setup failed\n"); + return ret; + } + ret = register_pm_qos_misc(&network_throughput_pm_qos); + if (ret < 0) + printk(KERN_ERR + "pm_qos_param: network_throughput setup failed\n"); + + return ret; +} + +late_initcall(pm_qos_power_init); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 36d563fd9e3b..122d5c787fe2 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -256,8 +256,9 @@ static void schedule_next_timer(struct k_itimer *timr) if (timr->it.real.interval.tv64 == 0) return; - timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), - timr->it.real.interval); + timr->it_overrun += (unsigned int) hrtimer_forward(timer, + timer->base->get_time(), + timr->it.real.interval); timr->it_overrun_last = timr->it_overrun; timr->it_overrun = -1; @@ -386,7 +387,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) now = ktime_add(now, kj); } #endif - timr->it_overrun += + timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, timr->it.real.interval); ret = HRTIMER_RESTART; @@ -662,7 +663,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) - timr->it_overrun += hrtimer_forward(timer, now, iv); + timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); remaining = ktime_sub(timer->expires, now); /* Return 0 only, when the timer is expired and not pending */ diff --git a/kernel/power/disk.c b/kernel/power/disk.c index d09da0895174..859a8e59773a 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -26,7 +26,7 @@ static int noresume = 0; -char resume_file[256] = CONFIG_PM_STD_PARTITION; +static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; sector_t swsusp_resume_block; @@ -185,7 +185,7 @@ static void platform_restore_cleanup(int platform_mode) * reappears in this routine after a restore. */ -int create_image(int platform_mode) +static int create_image(int platform_mode) { int error; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f6a5df934f8d..95250d7c8d91 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1203,7 +1203,7 @@ asmlinkage int swsusp_save(void) printk(KERN_INFO "PM: Creating hibernation image: \n"); - drain_local_pages(); + drain_local_pages(NULL); nr_pages = count_data_pages(); nr_highmem = count_highmem_pages(); printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); @@ -1221,7 +1221,7 @@ asmlinkage int swsusp_save(void) /* During allocating of suspend pagedir, new cold pages may appear. * Kill them. */ - drain_local_pages(); + drain_local_pages(NULL); copy_data_pages(©_bm, &orig_bm); /* diff --git a/kernel/signal.c b/kernel/signal.c index 4333b6dbb424..6a5f97cd337a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -911,27 +911,6 @@ __group_complete_signal(int sig, struct task_struct *p) } while_each_thread(p, t); return; } - - /* - * There will be a core dump. We make all threads other - * than the chosen one go into a group stop so that nothing - * happens until it gets scheduled, takes the signal off - * the shared queue, and does the core dump. This is a - * little more complicated than strictly necessary, but it - * keeps the signal state that winds up in the core dump - * unchanged from the death state, e.g. which thread had - * the core-dump signal unblocked. - */ - rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); - rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); - p->signal->group_stop_count = 0; - p->signal->group_exit_task = t; - p = t; - do { - p->signal->group_stop_count++; - signal_wake_up(t, t == p); - } while_each_thread(p, t); - return; } /* @@ -978,7 +957,6 @@ void zap_other_threads(struct task_struct *p) { struct task_struct *t; - p->signal->flags = SIGNAL_GROUP_EXIT; p->signal->group_stop_count = 0; for (t = next_thread(p); t != p; t = next_thread(t)) { @@ -1709,9 +1687,6 @@ static int do_signal_stop(int signr) struct signal_struct *sig = current->signal; int stop_count; - if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) - return 0; - if (sig->group_stop_count > 0) { /* * There is a group stop in progress. We don't need to @@ -1719,12 +1694,15 @@ static int do_signal_stop(int signr) */ stop_count = --sig->group_stop_count; } else { + struct task_struct *t; + + if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || + unlikely(sig->group_exit_task)) + return 0; /* * There is no group stop already in progress. * We must initiate one now. */ - struct task_struct *t; - sig->group_exit_code = signr; stop_count = 0; @@ -1752,47 +1730,6 @@ static int do_signal_stop(int signr) return 1; } -/* - * Do appropriate magic when group_stop_count > 0. - * We return nonzero if we stopped, after releasing the siglock. - * We return zero if we still hold the siglock and should look - * for another signal without checking group_stop_count again. - */ -static int handle_group_stop(void) -{ - int stop_count; - - if (current->signal->group_exit_task == current) { - /* - * Group stop is so we can do a core dump, - * We are the initiating thread, so get on with it. - */ - current->signal->group_exit_task = NULL; - return 0; - } - - if (current->signal->flags & SIGNAL_GROUP_EXIT) - /* - * Group stop is so another thread can do a core dump, - * or else we are racing against a death signal. - * Just punt the stop so we can get the next signal. - */ - return 0; - - /* - * There is a group stop in progress. We stop - * without any associated signal being in our queue. - */ - stop_count = --current->signal->group_stop_count; - if (stop_count == 0) - current->signal->flags = SIGNAL_STOP_STOPPED; - current->exit_code = current->signal->group_exit_code; - set_current_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); - finish_stop(stop_count); - return 1; -} - int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie) { @@ -1807,7 +1744,7 @@ relock: struct k_sigaction *ka; if (unlikely(current->signal->group_stop_count > 0) && - handle_group_stop()) + do_signal_stop(0)) goto relock; signr = dequeue_signal(current, mask, info); diff --git a/kernel/sys.c b/kernel/sys.c index d1fe71eb4546..53de35fc8245 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -315,7 +315,7 @@ static void kernel_kexec(void) #endif } -void kernel_shutdown_prepare(enum system_states state) +static void kernel_shutdown_prepare(enum system_states state) { blocking_notifier_call_chain(&reboot_notifier_list, (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); @@ -1637,7 +1637,7 @@ asmlinkage long sys_umask(int mask) mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); return mask; } - + asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { @@ -1742,6 +1742,17 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = prctl_set_seccomp(arg2); break; + case PR_CAPBSET_READ: + if (!cap_valid(arg2)) + return -EINVAL; + return !!cap_raised(current->cap_bset, arg2); + case PR_CAPBSET_DROP: +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES + return cap_prctl_drop(arg2); +#else + return -EINVAL; +#endif + default: error = -EINVAL; break; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index beee5b3b68a2..5b9b467de070 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -154,7 +154,10 @@ cond_syscall(sys_ioprio_get); /* New file descriptors */ cond_syscall(sys_signalfd); -cond_syscall(sys_timerfd); cond_syscall(compat_sys_signalfd); -cond_syscall(compat_sys_timerfd); +cond_syscall(sys_timerfd_create); +cond_syscall(sys_timerfd_settime); +cond_syscall(sys_timerfd_gettime); +cond_syscall(compat_sys_timerfd_settime); +cond_syscall(compat_sys_timerfd_gettime); cond_syscall(sys_eventfd); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7cb1ac3e6fff..5e2ad5bf88e2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -84,8 +84,11 @@ extern int sysctl_stat_interval; extern int latencytop_enabled; /* Constants used for minimum and maximum */ -#ifdef CONFIG_DETECT_SOFTLOCKUP +#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) static int one = 1; +#endif + +#ifdef CONFIG_DETECT_SOFTLOCKUP static int sixty = 60; #endif @@ -416,15 +419,6 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_SECURITY_CAPABILITIES - { - .procname = "cap-bound", - .data = &cap_bset, - .maxlen = sizeof(kernel_cap_t), - .mode = 0600, - .proc_handler = &proc_dointvec_bset, - }, -#endif /* def CONFIG_SECURITY_CAPABILITIES */ #ifdef CONFIG_BLK_DEV_INITRD { .ctl_name = KERN_REALROOTDEV, @@ -1150,6 +1144,19 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, }, #endif +#ifdef CONFIG_HIGHMEM + { + .ctl_name = CTL_UNNUMBERED, + .procname = "highmem_is_dirtyable", + .data = &vm_highmem_is_dirtyable, + .maxlen = sizeof(vm_highmem_is_dirtyable), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt @@ -2080,26 +2087,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, return 0; } -#ifdef CONFIG_SECURITY_CAPABILITIES -/* - * init may raise the set. - */ - -int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int op; - - if (write && !capable(CAP_SYS_MODULE)) { - return -EPERM; - } - - op = is_global_init(current) ? OP_SET : OP_AND; - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, - do_proc_dointvec_bset_conv,&op); -} -#endif /* def CONFIG_SECURITY_CAPABILITIES */ - /* * Taint values can only be increased */ @@ -2513,12 +2500,6 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp, return -ENOSYS; } -int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index c3206fa50048..006365b69eaf 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -37,10 +37,6 @@ static struct trans_ctl_table trans_kern_table[] = { { KERN_NODENAME, "hostname" }, { KERN_DOMAINNAME, "domainname" }, -#ifdef CONFIG_SECURITY_CAPABILITIES - { KERN_CAP_BSET, "cap-bound" }, -#endif /* def CONFIG_SECURITY_CAPABILITIES */ - { KERN_PANIC, "panic" }, { KERN_REALROOTDEV, "real-root-dev" }, @@ -1498,9 +1494,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) (table->strategy == sysctl_ms_jiffies) || (table->proc_handler == proc_dostring) || (table->proc_handler == proc_dointvec) || -#ifdef CONFIG_SECURITY_CAPABILITIES - (table->proc_handler == proc_dointvec_bset) || -#endif /* def CONFIG_SECURITY_CAPABILITIES */ (table->proc_handler == proc_dointvec_minmax) || (table->proc_handler == proc_dointvec_jiffies) || (table->proc_handler == proc_dointvec_userhz_jiffies) || diff --git a/lib/Makefile b/lib/Makefile index 543f2502b60a..a18062e4633f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_SMP) += pcounter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c new file mode 100644 index 000000000000..495575a59ca6 --- /dev/null +++ b/lib/iommu-helper.c @@ -0,0 +1,80 @@ +/* + * IOMMU helper functions for the free area management + */ + +#include <linux/module.h> +#include <linux/bitops.h> + +static unsigned long find_next_zero_area(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask) +{ + unsigned long index, end, i; +again: + index = find_next_zero_bit(map, size, start); + + /* Align allocation */ + index = (index + align_mask) & ~align_mask; + + end = index + nr; + if (end >= size) + return -1; + for (i = index; i < end; i++) { + if (test_bit(i, map)) { + start = i+1; + goto again; + } + } + return index; +} + +static inline void set_bit_area(unsigned long *map, unsigned long i, + int len) +{ + unsigned long end = i + len; + while (i < end) { + __set_bit(i, map); + i++; + } +} + +static inline int is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size) +{ + shift = (shift + index) & (boundary_size - 1); + return shift + nr > boundary_size; +} + +unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, + unsigned long start, unsigned int nr, + unsigned long shift, unsigned long boundary_size, + unsigned long align_mask) +{ + unsigned long index; +again: + index = find_next_zero_area(map, size, start, nr, align_mask); + if (index != -1) { + if (is_span_boundary(index, nr, shift, boundary_size)) { + /* we could do more effectively */ + start = index + 1; + goto again; + } + set_bit_area(map, index, nr); + } + return index; +} +EXPORT_SYMBOL(iommu_area_alloc); + +void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr) +{ + unsigned long end = start + nr; + + while (start < end) { + __clear_bit(start, map); + start++; + } +} +EXPORT_SYMBOL(iommu_area_free); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 48c250fe2233..65f0e758ec38 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -95,14 +95,17 @@ static inline gfp_t root_gfp_mask(struct radix_tree_root *root) static struct radix_tree_node * radix_tree_node_alloc(struct radix_tree_root *root) { - struct radix_tree_node *ret; + struct radix_tree_node *ret = NULL; gfp_t gfp_mask = root_gfp_mask(root); - ret = kmem_cache_alloc(radix_tree_node_cachep, - set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); - if (ret == NULL && !(gfp_mask & __GFP_WAIT)) { + if (!(gfp_mask & __GFP_WAIT)) { struct radix_tree_preload *rtp; + /* + * Provided the caller has preloaded here, we will always + * succeed in getting a node here (and never reach + * kmem_cache_alloc) + */ rtp = &__get_cpu_var(radix_tree_preloads); if (rtp->nr) { ret = rtp->nodes[rtp->nr - 1]; @@ -110,6 +113,10 @@ radix_tree_node_alloc(struct radix_tree_root *root) rtp->nr--; } } + if (ret == NULL) + ret = kmem_cache_alloc(radix_tree_node_cachep, + set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); + BUG_ON(radix_tree_is_indirect_ptr(ret)); return ret; } diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 1a8050ade861..4bb5a11e18a2 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -282,6 +282,15 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr) return (addr & ~mask) != 0; } +static inline unsigned int is_span_boundary(unsigned int index, + unsigned int nslots, + unsigned long offset_slots, + unsigned long max_slots) +{ + unsigned long offset = (offset_slots + index) & (max_slots - 1); + return offset + nslots > max_slots; +} + /* * Allocates bounce buffer and returns its kernel virtual address. */ @@ -292,6 +301,16 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir) char *dma_addr; unsigned int nslots, stride, index, wrap; int i; + unsigned long start_dma_addr; + unsigned long mask; + unsigned long offset_slots; + unsigned long max_slots; + + mask = dma_get_seg_boundary(hwdev); + start_dma_addr = virt_to_bus(io_tlb_start) & mask; + + offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + max_slots = ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; /* * For mappings greater than a page, we limit the stride (and @@ -311,10 +330,17 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir) */ spin_lock_irqsave(&io_tlb_lock, flags); { - wrap = index = ALIGN(io_tlb_index, stride); - + index = ALIGN(io_tlb_index, stride); if (index >= io_tlb_nslabs) - wrap = index = 0; + index = 0; + + while (is_span_boundary(index, nslots, offset_slots, + max_slots)) { + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } + wrap = index; do { /* @@ -341,9 +367,12 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir) goto found; } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; + do { + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } while (is_span_boundary(index, nslots, offset_slots, + max_slots)); } while (index != wrap); spin_unlock_irqrestore(&io_tlb_lock, flags); diff --git a/mm/Makefile b/mm/Makefile index 5c0b0ea7572d..44e2528af70c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -13,6 +13,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ page_isolation.o $(mmu-y) +obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o diff --git a/mm/fadvise.c b/mm/fadvise.c index 0df4c899e979..3c0f1e99f5e4 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -49,9 +49,21 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) goto out; } - if (mapping->a_ops->get_xip_page) - /* no bad return value, but ignore advice */ + if (mapping->a_ops->get_xip_page) { + switch (advice) { + case POSIX_FADV_NORMAL: + case POSIX_FADV_RANDOM: + case POSIX_FADV_SEQUENTIAL: + case POSIX_FADV_WILLNEED: + case POSIX_FADV_NOREUSE: + case POSIX_FADV_DONTNEED: + /* no bad return value, but ignore advice */ + break; + default: + ret = -EINVAL; + } goto out; + } /* Careful about overflows. Len == 0 means "as much as possible" */ endbyte = offset + len; diff --git a/mm/filemap.c b/mm/filemap.c index 76bea88cbebc..81fb9bff0d4f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -65,7 +65,6 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * ->private_lock (__free_pte->__set_page_dirty_buffers) * ->swap_lock (exclusive_swap_page, others) * ->mapping->tree_lock - * ->zone.lock * * ->i_mutex * ->i_mmap_lock (truncate->unmap_mapping_range) @@ -528,7 +527,7 @@ static inline void wake_up_page(struct page *page, int bit) __wake_up_bit(page_waitqueue(page), &page->flags, bit); } -void fastcall wait_on_page_bit(struct page *page, int bit_nr) +void wait_on_page_bit(struct page *page, int bit_nr) { DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); @@ -552,7 +551,7 @@ EXPORT_SYMBOL(wait_on_page_bit); * the clear_bit and the read of the waitqueue (to avoid SMP races with a * parallel wait_on_page_locked()). */ -void fastcall unlock_page(struct page *page) +void unlock_page(struct page *page) { smp_mb__before_clear_bit(); if (!TestClearPageLocked(page)) @@ -586,7 +585,7 @@ EXPORT_SYMBOL(end_page_writeback); * chances are that on the second loop, the block layer's plug list is empty, * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. */ -void fastcall __lock_page(struct page *page) +void __lock_page(struct page *page) { DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); @@ -607,7 +606,7 @@ int fastcall __lock_page_killable(struct page *page) * Variant of lock_page that does not require the caller to hold a reference * on the page's mapping. */ -void fastcall __lock_page_nosync(struct page *page) +void __lock_page_nosync(struct page *page) { DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, @@ -1277,7 +1276,7 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) * This adds the requested page to the page cache if it isn't already there, * and schedules an I/O to read in its contents from disk. */ -static int fastcall page_cache_read(struct file * file, pgoff_t offset) +static int page_cache_read(struct file *file, pgoff_t offset) { struct address_space *mapping = file->f_mapping; struct page *page; diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index f874ae818ad3..0420a0292b03 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -431,7 +431,7 @@ xip_truncate_page(struct address_space *mapping, loff_t from) else return PTR_ERR(page); } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); return 0; } EXPORT_SYMBOL_GPL(xip_truncate_page); diff --git a/mm/fremap.c b/mm/fremap.c index 14bd3bf7826e..69a37c2bdf81 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -190,10 +190,13 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, */ if (mapping_cap_account_dirty(mapping)) { unsigned long addr; + struct file *file = vma->vm_file; flags &= MAP_NONBLOCK; - addr = mmap_region(vma->vm_file, start, size, + get_file(file); + addr = mmap_region(file, start, size, flags, vma->vm_flags, pgoff, 1); + fput(file); if (IS_ERR_VALUE(addr)) { err = addr; } else { diff --git a/mm/highmem.c b/mm/highmem.c index 7a967bc35152..35d47733cde4 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -163,7 +163,7 @@ start: return vaddr; } -void fastcall *kmap_high(struct page *page) +void *kmap_high(struct page *page) { unsigned long vaddr; @@ -185,7 +185,7 @@ void fastcall *kmap_high(struct page *page) EXPORT_SYMBOL(kmap_high); -void fastcall kunmap_high(struct page *page) +void kunmap_high(struct page *page) { unsigned long vaddr; unsigned long nr; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index db861d8b6c28..1a5642074e34 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -813,6 +813,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(&mm->page_table_lock); copy_huge_page(new_page, old_page, address, vma); + __SetPageUptodate(new_page); spin_lock(&mm->page_table_lock); ptep = huge_pte_offset(mm, address & HPAGE_MASK); @@ -858,6 +859,7 @@ retry: goto out; } clear_huge_page(page, address); + __SetPageUptodate(page); if (vma->vm_flags & VM_SHARED) { int err; diff --git a/mm/internal.h b/mm/internal.h index 953f941ea867..5a9a6200e034 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -24,7 +24,7 @@ static inline void set_page_count(struct page *page, int v) */ static inline void set_page_refcounted(struct page *page) { - VM_BUG_ON(PageCompound(page) && PageTail(page)); + VM_BUG_ON(PageTail(page)); VM_BUG_ON(atomic_read(&page->_count)); set_page_count(page, 1); } @@ -34,7 +34,7 @@ static inline void __put_page(struct page *page) atomic_dec(&page->_count); } -extern void fastcall __init __free_pages_bootmem(struct page *page, +extern void __init __free_pages_bootmem(struct page *page, unsigned int order); /* diff --git a/mm/memory.c b/mm/memory.c index d902d0e25edc..7bb70728bb52 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -305,7 +305,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) spin_lock(&mm->page_table_lock); if (pmd_present(*pmd)) { /* Another has populated it */ pte_lock_deinit(new); - pte_free(new); + pte_free(mm, new); } else { mm->nr_ptes++; inc_zone_page_state(new, NR_PAGETABLE); @@ -323,7 +323,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) spin_lock(&init_mm.page_table_lock); if (pmd_present(*pmd)) /* Another has populated it */ - pte_free_kernel(new); + pte_free_kernel(&init_mm, new); else pmd_populate_kernel(&init_mm, pmd, new); spin_unlock(&init_mm.page_table_lock); @@ -1109,7 +1109,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages); -pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) +pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl) { pgd_t * pgd = pgd_offset(mm, addr); pud_t * pud = pud_alloc(mm, pgd, addr); @@ -1517,10 +1518,8 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo memset(kaddr, 0, PAGE_SIZE); kunmap_atomic(kaddr, KM_USER0); flush_dcache_page(dst); - return; - - } - copy_user_highpage(dst, src, va, vma); + } else + copy_user_highpage(dst, src, va, vma); } /* @@ -1629,6 +1628,7 @@ gotten: if (!new_page) goto oom; cow_user_page(new_page, old_page, address, vma); + __SetPageUptodate(new_page); /* * Re-check the pte - we dropped the lock @@ -1909,50 +1909,49 @@ EXPORT_SYMBOL(unmap_mapping_range); */ int vmtruncate(struct inode * inode, loff_t offset) { - struct address_space *mapping = inode->i_mapping; - unsigned long limit; + if (inode->i_size < offset) { + unsigned long limit; - if (inode->i_size < offset) - goto do_expand; - /* - * truncation of in-use swapfiles is disallowed - it would cause - * subsequent swapout to scribble on the now-freed blocks. - */ - if (IS_SWAPFILE(inode)) - goto out_busy; - i_size_write(inode, offset); + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out_big; + i_size_write(inode, offset); + } else { + struct address_space *mapping = inode->i_mapping; + + /* + * truncation of in-use swapfiles is disallowed - it would + * cause subsequent swapout to scribble on the now-freed + * blocks. + */ + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + i_size_write(inode, offset); + + /* + * unmap_mapping_range is called twice, first simply for + * efficiency so that truncate_inode_pages does fewer + * single-page unmaps. However after this first call, and + * before truncate_inode_pages finishes, it is possible for + * private pages to be COWed, which remain after + * truncate_inode_pages finishes, hence the second + * unmap_mapping_range call must be made for correctness. + */ + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(mapping, offset); + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + } - /* - * unmap_mapping_range is called twice, first simply for efficiency - * so that truncate_inode_pages does fewer single-page unmaps. However - * after this first call, and before truncate_inode_pages finishes, - * it is possible for private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second unmap_mapping_range - * call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - goto out_truncate; - -do_expand: - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - i_size_write(inode, offset); - -out_truncate: if (inode->i_op && inode->i_op->truncate) inode->i_op->truncate(inode); return 0; + out_sig: send_sig(SIGXFSZ, current, 0); out_big: return -EFBIG; -out_busy: - return -ETXTBSY; } EXPORT_SYMBOL(vmtruncate); @@ -1980,67 +1979,6 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) return 0; } -/** - * swapin_readahead - swap in pages in hope we need them soon - * @entry: swap entry of this memory - * @addr: address to start - * @vma: user vma this addresses belong to - * - * Primitive swap readahead code. We simply read an aligned block of - * (1 << page_cluster) entries in the swap area. This method is chosen - * because it doesn't cost us any seek time. We also make sure to queue - * the 'original' request together with the readahead ones... - * - * This has been extended to use the NUMA policies from the mm triggering - * the readahead. - * - * Caller must hold down_read on the vma->vm_mm if vma is not NULL. - */ -void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma) -{ -#ifdef CONFIG_NUMA - struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL; -#endif - int i, num; - struct page *new_page; - unsigned long offset; - - /* - * Get the number of handles we should do readahead io to. - */ - num = valid_swaphandles(entry, &offset); - for (i = 0; i < num; offset++, i++) { - /* Ok, do the async read-ahead now */ - new_page = read_swap_cache_async(swp_entry(swp_type(entry), - offset), vma, addr); - if (!new_page) - break; - page_cache_release(new_page); -#ifdef CONFIG_NUMA - /* - * Find the next applicable VMA for the NUMA policy. - */ - addr += PAGE_SIZE; - if (addr == 0) - vma = NULL; - if (vma) { - if (addr >= vma->vm_end) { - vma = next_vma; - next_vma = vma ? vma->vm_next : NULL; - } - if (vma && addr < vma->vm_start) - vma = NULL; - } else { - if (next_vma && addr >= next_vma->vm_start) { - vma = next_vma; - next_vma = vma->vm_next; - } - } -#endif - } - lru_add_drain(); /* Push any new pages onto the LRU now */ -} - /* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. @@ -2068,8 +2006,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, page = lookup_swap_cache(entry); if (!page) { grab_swap_token(); /* Contend for token _before_ read-in */ - swapin_readahead(entry, address, vma); - page = read_swap_cache_async(entry, vma, address); + page = swapin_readahead(entry, + GFP_HIGHUSER_MOVABLE, vma, address); if (!page) { /* * Back out if somebody else faulted in this pte @@ -2163,6 +2101,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, page = alloc_zeroed_user_highpage_movable(vma, address); if (!page) goto oom; + __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); @@ -2263,6 +2202,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto out; } copy_user_highpage(page, vmf.page, address, vma); + __SetPageUptodate(page); } else { /* * If the page will be shareable, see if the backing @@ -2563,7 +2503,7 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) spin_lock(&mm->page_table_lock); if (pgd_present(*pgd)) /* Another has populated it */ - pud_free(new); + pud_free(mm, new); else pgd_populate(mm, pgd, new); spin_unlock(&mm->page_table_lock); @@ -2585,12 +2525,12 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) spin_lock(&mm->page_table_lock); #ifndef __ARCH_HAS_4LEVEL_HACK if (pud_present(*pud)) /* Another has populated it */ - pmd_free(new); + pmd_free(mm, new); else pud_populate(mm, pud, new); #else if (pgd_present(*pud)) /* Another has populated it */ - pmd_free(new); + pmd_free(mm, new); else pgd_populate(mm, pud, new); #endif /* __ARCH_HAS_4LEVEL_HACK */ @@ -2618,46 +2558,6 @@ int make_pages_present(unsigned long addr, unsigned long end) return ret == len ? 0 : -1; } -/* - * Map a vmalloc()-space virtual address to the physical page. - */ -struct page * vmalloc_to_page(void * vmalloc_addr) -{ - unsigned long addr = (unsigned long) vmalloc_addr; - struct page *page = NULL; - pgd_t *pgd = pgd_offset_k(addr); - pud_t *pud; - pmd_t *pmd; - pte_t *ptep, pte; - - if (!pgd_none(*pgd)) { - pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) { - ptep = pte_offset_map(pmd, addr); - pte = *ptep; - if (pte_present(pte)) - page = pte_page(pte); - pte_unmap(ptep); - } - } - } - return page; -} - -EXPORT_SYMBOL(vmalloc_to_page); - -/* - * Map a vmalloc()-space virtual address to the physical page frame number. - */ -unsigned long vmalloc_to_pfn(void * vmalloc_addr) -{ - return page_to_pfn(vmalloc_to_page(vmalloc_addr)); -} - -EXPORT_SYMBOL(vmalloc_to_pfn); - #if !defined(__HAVE_ARCH_GATE_AREA) #if defined(AT_SYSINFO_EHDR) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9512a544d044..7469c503580d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -481,8 +481,6 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) return offlined; } -extern void drain_all_local_pages(void); - int offline_pages(unsigned long start_pfn, unsigned long end_pfn, unsigned long timeout) { @@ -540,7 +538,7 @@ repeat: lru_add_drain_all(); flush_scheduled_work(); cond_resched(); - drain_all_local_pages(); + drain_all_pages(); } pfn = scan_lru_pages(start_pfn, end_pfn); @@ -563,7 +561,7 @@ repeat: flush_scheduled_work(); yield(); /* drain pcp pages , this is synchrouns. */ - drain_all_local_pages(); + drain_all_pages(); /* check again */ offlined_pages = check_pages_isolated(start_pfn, end_pfn); if (offlined_pages < 0) { diff --git a/mm/migrate.c b/mm/migrate.c index 6a207e8d17ea..857a987e3690 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -115,11 +115,6 @@ int putback_lru_pages(struct list_head *l) return count; } -static inline int is_swap_pte(pte_t pte) -{ - return !pte_none(pte) && !pte_present(pte) && !pte_file(pte); -} - /* * Restore a potential migration pte to a working pte entry */ @@ -645,15 +640,33 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, rcu_read_lock(); rcu_locked = 1; } + /* - * This is a corner case handling. - * When a new swap-cache is read into, it is linked to LRU - * and treated as swapcache but has no rmap yet. - * Calling try_to_unmap() against a page->mapping==NULL page is - * BUG. So handle it here. + * Corner case handling: + * 1. When a new swap-cache page is read into, it is added to the LRU + * and treated as swapcache but it has no rmap yet. + * Calling try_to_unmap() against a page->mapping==NULL page will + * trigger a BUG. So handle it here. + * 2. An orphaned page (see truncate_complete_page) might have + * fs-private metadata. The page can be picked up due to memory + * offlining. Everywhere else except page reclaim, the page is + * invisible to the vm, so the page can not be migrated. So try to + * free the metadata, so the page can be freed. */ - if (!page->mapping) + if (!page->mapping) { + if (!PageAnon(page) && PagePrivate(page)) { + /* + * Go direct to try_to_free_buffers() here because + * a) that's what try_to_release_page() would do anyway + * b) we may be under rcu_read_lock() here, so we can't + * use GFP_KERNEL which is what try_to_release_page() + * needs to be effective. + */ + try_to_free_buffers(page); + } goto rcu_unlock; + } + /* Establish migration ptes or remove ptes */ try_to_unmap(page, 1); diff --git a/mm/mmap.c b/mm/mmap.c index 8295577a83b2..bb4c963cc534 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -36,6 +36,10 @@ #define arch_mmap_check(addr, len, flags) (0) #endif +#ifndef arch_rebalance_pgtables +#define arch_rebalance_pgtables(addr, len) (addr) +#endif + static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long start, unsigned long end); @@ -1424,7 +1428,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, if (addr & ~PAGE_MASK) return -EINVAL; - return addr; + return arch_rebalance_pgtables(addr, len); } EXPORT_SYMBOL(get_unmapped_area); diff --git a/mm/nommu.c b/mm/nommu.c index b989cb928a7c..5d8ae086f74e 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -10,6 +10,7 @@ * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> + * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org> */ #include <linux/module.h> @@ -167,7 +168,7 @@ EXPORT_SYMBOL(get_user_pages); DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist; -void vfree(void *addr) +void vfree(const void *addr) { kfree(addr); } @@ -183,13 +184,33 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) } EXPORT_SYMBOL(__vmalloc); -struct page * vmalloc_to_page(void *addr) +void *vmalloc_user(unsigned long size) +{ + void *ret; + + ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); + if (ret) { + struct vm_area_struct *vma; + + down_write(¤t->mm->mmap_sem); + vma = find_vma(current->mm, (unsigned long)ret); + if (vma) + vma->vm_flags |= VM_USERMAP; + up_write(¤t->mm->mmap_sem); + } + + return ret; +} +EXPORT_SYMBOL(vmalloc_user); + +struct page *vmalloc_to_page(const void *addr) { return virt_to_page(addr); } EXPORT_SYMBOL(vmalloc_to_page); -unsigned long vmalloc_to_pfn(void *addr) +unsigned long vmalloc_to_pfn(const void *addr) { return page_to_pfn(virt_to_page(addr)); } @@ -253,10 +274,17 @@ EXPORT_SYMBOL(vmalloc_32); * * The resulting memory area is 32bit addressable and zeroed so it can be * mapped to userspace without leaking data. + * + * VM_USERMAP is set on the corresponding VMA so that subsequent calls to + * remap_vmalloc_range() are permissible. */ void *vmalloc_32_user(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + /* + * We'll have to sort out the ZONE_DMA bits for 64-bit, + * but for now this can simply use vmalloc_user() directly. + */ + return vmalloc_user(size); } EXPORT_SYMBOL(vmalloc_32_user); @@ -267,7 +295,7 @@ void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_ } EXPORT_SYMBOL(vmap); -void vunmap(void *addr) +void vunmap(const void *addr) { BUG(); } @@ -1216,6 +1244,21 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, } EXPORT_SYMBOL(remap_pfn_range); +int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, + unsigned long pgoff) +{ + unsigned int size = vma->vm_end - vma->vm_start; + + if (!(vma->vm_flags & VM_USERMAP)) + return -EINVAL; + + vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); + vma->vm_end = vma->vm_start + size; + + return 0; +} +EXPORT_SYMBOL(remap_vmalloc_range); + void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 96473b482099..c1850bf991cd 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -125,8 +125,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * Superuser processes are usually more important, so we make it * less likely that we kill those. */ - if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) || - p->uid == 0 || p->euid == 0) + if (__capable(p, CAP_SYS_ADMIN) || __capable(p, CAP_SYS_RESOURCE)) points /= 4; /* @@ -135,7 +134,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * tend to only have this flag set on applications they think * of as important. */ - if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) + if (__capable(p, CAP_SYS_RAWIO)) points /= 4; /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3d3848fa6324..5e00f1772c20 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -69,6 +69,12 @@ static inline long sync_writeback_pages(void) int dirty_background_ratio = 5; /* + * free highmem will not be subtracted from the total free memory + * for calculating free ratios if vm_highmem_is_dirtyable is true + */ +int vm_highmem_is_dirtyable; + +/* * The generator of dirty data starts writeback at this percentage */ int vm_dirty_ratio = 10; @@ -219,7 +225,7 @@ static inline void task_dirties_fraction(struct task_struct *tsk, * * dirty -= (dirty/8) * p_{t} */ -void task_dirty_limit(struct task_struct *tsk, long *pdirty) +static void task_dirty_limit(struct task_struct *tsk, long *pdirty) { long numerator, denominator; long dirty = *pdirty; @@ -287,7 +293,10 @@ static unsigned long determine_dirtyable_memory(void) x = global_page_state(NR_FREE_PAGES) + global_page_state(NR_INACTIVE) + global_page_state(NR_ACTIVE); - x -= highmem_dirtyable_memory(x); + + if (!vm_highmem_is_dirtyable) + x -= highmem_dirtyable_memory(x); + return x + 1; /* Ensure that we never return 0 */ } @@ -558,6 +567,7 @@ static void background_writeout(unsigned long _min_pages) global_page_state(NR_UNSTABLE_NFS) < background_thresh && min_pages <= 0) break; + wbc.more_io = 0; wbc.encountered_congestion = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; @@ -565,8 +575,9 @@ static void background_writeout(unsigned long _min_pages) min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { /* Wrote less than expected */ - congestion_wait(WRITE, HZ/10); - if (!wbc.encountered_congestion) + if (wbc.encountered_congestion || wbc.more_io) + congestion_wait(WRITE, HZ/10); + else break; } } @@ -631,11 +642,12 @@ static void wb_kupdate(unsigned long arg) global_page_state(NR_UNSTABLE_NFS) + (inodes_stat.nr_inodes - inodes_stat.nr_unused); while (nr_to_write > 0) { + wbc.more_io = 0; wbc.encountered_congestion = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; writeback_inodes(&wbc); if (wbc.nr_to_write > 0) { - if (wbc.encountered_congestion) + if (wbc.encountered_congestion || wbc.more_io) congestion_wait(WRITE, HZ/10); else break; /* All the old data is written */ @@ -1064,7 +1076,7 @@ static int __set_page_dirty(struct page *page) return 0; } -int fastcall set_page_dirty(struct page *page) +int set_page_dirty(struct page *page) { int ret = __set_page_dirty(page); if (ret) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b2838c24e582..37576b822f06 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -537,7 +537,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) /* * permit the bootmem allocator to evade page validation on high-order frees */ -void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) +void __init __free_pages_bootmem(struct page *page, unsigned int order) { if (order == 0) { __ClearPageReserved(page); @@ -890,31 +890,51 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) } #endif -static void __drain_pages(unsigned int cpu) +/* + * Drain pages of the indicated processor. + * + * The processor must either be the current processor and the + * thread pinned to the current processor or a processor that + * is not online. + */ +static void drain_pages(unsigned int cpu) { unsigned long flags; struct zone *zone; - int i; for_each_zone(zone) { struct per_cpu_pageset *pset; + struct per_cpu_pages *pcp; if (!populated_zone(zone)) continue; pset = zone_pcp(zone, cpu); - for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { - struct per_cpu_pages *pcp; - - pcp = &pset->pcp[i]; - local_irq_save(flags); - free_pages_bulk(zone, pcp->count, &pcp->list, 0); - pcp->count = 0; - local_irq_restore(flags); - } + + pcp = &pset->pcp; + local_irq_save(flags); + free_pages_bulk(zone, pcp->count, &pcp->list, 0); + pcp->count = 0; + local_irq_restore(flags); } } +/* + * Spill all of this CPU's per-cpu pages back into the buddy allocator. + */ +void drain_local_pages(void *arg) +{ + drain_pages(smp_processor_id()); +} + +/* + * Spill all the per-cpu pages from all CPUs back into the buddy allocator + */ +void drain_all_pages(void) +{ + on_each_cpu(drain_local_pages, NULL, 0, 1); +} + #ifdef CONFIG_HIBERNATION void mark_free_pages(struct zone *zone) @@ -952,40 +972,9 @@ void mark_free_pages(struct zone *zone) #endif /* CONFIG_PM */ /* - * Spill all of this CPU's per-cpu pages back into the buddy allocator. - */ -void drain_local_pages(void) -{ - unsigned long flags; - - local_irq_save(flags); - __drain_pages(smp_processor_id()); - local_irq_restore(flags); -} - -void smp_drain_local_pages(void *arg) -{ - drain_local_pages(); -} - -/* - * Spill all the per-cpu pages from all CPUs back into the buddy allocator - */ -void drain_all_local_pages(void) -{ - unsigned long flags; - - local_irq_save(flags); - __drain_pages(smp_processor_id()); - local_irq_restore(flags); - - smp_call_function(smp_drain_local_pages, NULL, 0, 1); -} - -/* * Free a 0-order page */ -static void fastcall free_hot_cold_page(struct page *page, int cold) +static void free_hot_cold_page(struct page *page, int cold) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; @@ -1001,10 +990,13 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) arch_free_page(page, 0); kernel_map_pages(page, 1, 0); - pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; + pcp = &zone_pcp(zone, get_cpu())->pcp; local_irq_save(flags); __count_vm_event(PGFREE); - list_add(&page->lru, &pcp->list); + if (cold) + list_add_tail(&page->lru, &pcp->list); + else + list_add(&page->lru, &pcp->list); set_page_private(page, get_pageblock_migratetype(page)); pcp->count++; if (pcp->count >= pcp->high) { @@ -1015,12 +1007,12 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) put_cpu(); } -void fastcall free_hot_page(struct page *page) +void free_hot_page(struct page *page) { free_hot_cold_page(page, 0); } -void fastcall free_cold_page(struct page *page) +void free_cold_page(struct page *page) { free_hot_cold_page(page, 1); } @@ -1062,7 +1054,7 @@ again: if (likely(order == 0)) { struct per_cpu_pages *pcp; - pcp = &zone_pcp(zone, cpu)->pcp[cold]; + pcp = &zone_pcp(zone, cpu)->pcp; local_irq_save(flags); if (!pcp->count) { pcp->count = rmqueue_bulk(zone, 0, @@ -1072,9 +1064,15 @@ again: } /* Find a page of the appropriate migrate type */ - list_for_each_entry(page, &pcp->list, lru) - if (page_private(page) == migratetype) - break; + if (cold) { + list_for_each_entry_reverse(page, &pcp->list, lru) + if (page_private(page) == migratetype) + break; + } else { + list_for_each_entry(page, &pcp->list, lru) + if (page_private(page) == migratetype) + break; + } /* Allocate more to the pcp list if necessary */ if (unlikely(&page->lru == &pcp->list)) { @@ -1569,7 +1567,7 @@ nofail_alloc: cond_resched(); if (order != 0) - drain_all_local_pages(); + drain_all_pages(); if (likely(did_some_progress)) { page = get_page_from_freelist(gfp_mask, order, @@ -1643,7 +1641,7 @@ EXPORT_SYMBOL(__alloc_pages); /* * Common helper functions. */ -fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) +unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) { struct page * page; page = alloc_pages(gfp_mask, order); @@ -1654,7 +1652,7 @@ fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) EXPORT_SYMBOL(__get_free_pages); -fastcall unsigned long get_zeroed_page(gfp_t gfp_mask) +unsigned long get_zeroed_page(gfp_t gfp_mask) { struct page * page; @@ -1680,7 +1678,7 @@ void __pagevec_free(struct pagevec *pvec) free_hot_cold_page(pvec->pages[i], pvec->cold); } -fastcall void __free_pages(struct page *page, unsigned int order) +void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { if (order == 0) @@ -1692,7 +1690,7 @@ fastcall void __free_pages(struct page *page, unsigned int order) EXPORT_SYMBOL(__free_pages); -fastcall void free_pages(unsigned long addr, unsigned int order) +void free_pages(unsigned long addr, unsigned int order) { if (addr != 0) { VM_BUG_ON(!virt_addr_valid((void *)addr)); @@ -1801,12 +1799,9 @@ void show_free_areas(void) pageset = zone_pcp(zone, cpu); - printk("CPU %4d: Hot: hi:%5d, btch:%4d usd:%4d " - "Cold: hi:%5d, btch:%4d usd:%4d\n", - cpu, pageset->pcp[0].high, - pageset->pcp[0].batch, pageset->pcp[0].count, - pageset->pcp[1].high, pageset->pcp[1].batch, - pageset->pcp[1].count); + printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", + cpu, pageset->pcp.high, + pageset->pcp.batch, pageset->pcp.count); } } @@ -1879,6 +1874,8 @@ void show_free_areas(void) printk("= %lukB\n", K(total)); } + printk("%ld total pagecache pages\n", global_page_state(NR_FILE_PAGES)); + show_swap_cache_info(); } @@ -2551,8 +2548,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, } } -static void __meminit zone_init_free_lists(struct pglist_data *pgdat, - struct zone *zone, unsigned long size) +static void __meminit zone_init_free_lists(struct zone *zone) { int order, t; for_each_migratetype_order(order, t) { @@ -2604,17 +2600,11 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) memset(p, 0, sizeof(*p)); - pcp = &p->pcp[0]; /* hot */ + pcp = &p->pcp; pcp->count = 0; pcp->high = 6 * batch; pcp->batch = max(1UL, 1 * batch); INIT_LIST_HEAD(&pcp->list); - - pcp = &p->pcp[1]; /* cold*/ - pcp->count = 0; - pcp->high = 2 * batch; - pcp->batch = max(1UL, batch/2); - INIT_LIST_HEAD(&pcp->list); } /* @@ -2627,7 +2617,7 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, { struct per_cpu_pages *pcp; - pcp = &p->pcp[0]; /* hot list */ + pcp = &p->pcp; pcp->high = high; pcp->batch = max(1UL, high/4); if ((high/4) > (PAGE_SHIFT * 8)) @@ -2831,7 +2821,7 @@ __meminit int init_currently_empty_zone(struct zone *zone, memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); - zone_init_free_lists(pgdat, zone, zone->spanned_pages); + zone_init_free_lists(zone); return 0; } @@ -3978,10 +3968,23 @@ static int page_alloc_cpu_notify(struct notifier_block *self, int cpu = (unsigned long)hcpu; if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - local_irq_disable(); - __drain_pages(cpu); + drain_pages(cpu); + + /* + * Spill the event counters of the dead processor + * into the current processors event counters. + * This artificially elevates the count of the current + * processor. + */ vm_events_fold_cpu(cpu); - local_irq_enable(); + + /* + * Zero the differential counters of the dead processor + * so that the vm statistics are consistent. + * + * This is only okay since the processor is dead and cannot + * race with what we are doing. + */ refresh_cpu_vm_stats(cpu); } return NOTIFY_OK; @@ -4480,7 +4483,7 @@ int set_migratetype_isolate(struct page *page) out: spin_unlock_irqrestore(&zone->lock, flags); if (!ret) - drain_all_local_pages(); + drain_all_pages(); return ret; } diff --git a/mm/page_io.c b/mm/page_io.c index 3b97f6850273..065c4480eaf0 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -126,7 +126,7 @@ int swap_readpage(struct file *file, struct page *page) int ret = 0; BUG_ON(!PageLocked(page)); - ClearPageUptodate(page); + BUG_ON(PageUptodate(page)); bio = get_swap_bio(GFP_KERNEL, page_private(page), page, end_swap_bio_read); if (bio == NULL) { diff --git a/mm/pagewalk.c b/mm/pagewalk.c new file mode 100644 index 000000000000..b4f27d22da91 --- /dev/null +++ b/mm/pagewalk.c @@ -0,0 +1,131 @@ +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/sched.h> + +static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + const struct mm_walk *walk, void *private) +{ + pte_t *pte; + int err = 0; + + pte = pte_offset_map(pmd, addr); + do { + err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private); + if (err) + break; + } while (pte++, addr += PAGE_SIZE, addr != end); + + pte_unmap(pte); + return err; +} + +static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, + const struct mm_walk *walk, void *private) +{ + pmd_t *pmd; + unsigned long next; + int err = 0; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) { + if (walk->pte_hole) + err = walk->pte_hole(addr, next, private); + if (err) + break; + continue; + } + if (walk->pmd_entry) + err = walk->pmd_entry(pmd, addr, next, private); + if (!err && walk->pte_entry) + err = walk_pte_range(pmd, addr, next, walk, private); + if (err) + break; + } while (pmd++, addr = next, addr != end); + + return err; +} + +static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, + const struct mm_walk *walk, void *private) +{ + pud_t *pud; + unsigned long next; + int err = 0; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) { + if (walk->pte_hole) + err = walk->pte_hole(addr, next, private); + if (err) + break; + continue; + } + if (walk->pud_entry) + err = walk->pud_entry(pud, addr, next, private); + if (!err && (walk->pmd_entry || walk->pte_entry)) + err = walk_pmd_range(pud, addr, next, walk, private); + if (err) + break; + } while (pud++, addr = next, addr != end); + + return err; +} + +/** + * walk_page_range - walk a memory map's page tables with a callback + * @mm - memory map to walk + * @addr - starting address + * @end - ending address + * @walk - set of callbacks to invoke for each level of the tree + * @private - private data passed to the callback function + * + * Recursively walk the page table for the memory area in a VMA, + * calling supplied callbacks. Callbacks are called in-order (first + * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, + * etc.). If lower-level callbacks are omitted, walking depth is reduced. + * + * Each callback receives an entry pointer, the start and end of the + * associated range, and a caller-supplied private data pointer. + * + * No locks are taken, but the bottom level iterator will map PTE + * directories from highmem if necessary. + * + * If any callback returns a non-zero value, the walk is aborted and + * the return value is propagated back to the caller. Otherwise 0 is returned. + */ +int walk_page_range(const struct mm_struct *mm, + unsigned long addr, unsigned long end, + const struct mm_walk *walk, void *private) +{ + pgd_t *pgd; + unsigned long next; + int err = 0; + + if (addr >= end) + return err; + + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) { + if (walk->pte_hole) + err = walk->pte_hole(addr, next, private); + if (err) + break; + continue; + } + if (walk->pgd_entry) + err = walk->pgd_entry(pgd, addr, next, private); + if (!err && + (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) + err = walk_pud_range(pgd, addr, next, walk, private); + if (err) + break; + } while (pgd++, addr = next, addr != end); + + return err; +} diff --git a/mm/rmap.c b/mm/rmap.c index dbc2ca2057a5..57ad276900c9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -36,7 +36,6 @@ * mapping->tree_lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, * within inode_lock in __sync_single_inode) - * zone->lock (within radix tree node alloc) */ #include <linux/mm.h> @@ -284,7 +283,10 @@ static int page_referenced_one(struct page *page, if (!pte) goto out; - if (ptep_clear_flush_young(vma, address, pte)) + if (vma->vm_flags & VM_LOCKED) { + referenced++; + *mapcount = 1; /* break early from loop */ + } else if (ptep_clear_flush_young(vma, address, pte)) referenced++; /* Pretend the page is referenced if the task has the diff --git a/mm/shmem.c b/mm/shmem.c index 51b3d6ccddab..0f246c44a574 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -78,11 +78,10 @@ /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ enum sgp_type { - SGP_QUICK, /* don't try more than file page cache lookup */ SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ + SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ SGP_WRITE, /* may exceed i_size, may allocate page */ - SGP_FAULT, /* same as SGP_CACHE, return with page locked */ }; static int shmem_getpage(struct inode *inode, unsigned long idx, @@ -194,7 +193,7 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = { }; static LIST_HEAD(shmem_swaplist); -static DEFINE_SPINLOCK(shmem_swaplist_lock); +static DEFINE_MUTEX(shmem_swaplist_mutex); static void shmem_free_blocks(struct inode *inode, long pages) { @@ -207,6 +206,31 @@ static void shmem_free_blocks(struct inode *inode, long pages) } } +static int shmem_reserve_inode(struct super_block *sb) +{ + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + if (sbinfo->max_inodes) { + spin_lock(&sbinfo->stat_lock); + if (!sbinfo->free_inodes) { + spin_unlock(&sbinfo->stat_lock); + return -ENOSPC; + } + sbinfo->free_inodes--; + spin_unlock(&sbinfo->stat_lock); + } + return 0; +} + +static void shmem_free_inode(struct super_block *sb) +{ + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + if (sbinfo->max_inodes) { + spin_lock(&sbinfo->stat_lock); + sbinfo->free_inodes++; + spin_unlock(&sbinfo->stat_lock); + } +} + /* * shmem_recalc_inode - recalculate the size of an inode * @@ -731,6 +755,8 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) (void) shmem_getpage(inode, attr->ia_size>>PAGE_CACHE_SHIFT, &page, SGP_READ, NULL); + if (page) + unlock_page(page); } /* * Reset SHMEM_PAGEIN flag so that shmem_truncate can @@ -762,7 +788,6 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) static void shmem_delete_inode(struct inode *inode) { - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct shmem_inode_info *info = SHMEM_I(inode); if (inode->i_op->truncate == shmem_truncate) { @@ -771,17 +796,13 @@ static void shmem_delete_inode(struct inode *inode) inode->i_size = 0; shmem_truncate(inode); if (!list_empty(&info->swaplist)) { - spin_lock(&shmem_swaplist_lock); + mutex_lock(&shmem_swaplist_mutex); list_del_init(&info->swaplist); - spin_unlock(&shmem_swaplist_lock); + mutex_unlock(&shmem_swaplist_mutex); } } BUG_ON(inode->i_blocks); - if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_inodes++; - spin_unlock(&sbinfo->stat_lock); - } + shmem_free_inode(inode->i_sb); clear_inode(inode); } @@ -807,19 +828,22 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s struct page *subdir; swp_entry_t *ptr; int offset; + int error; idx = 0; ptr = info->i_direct; spin_lock(&info->lock); + if (!info->swapped) { + list_del_init(&info->swaplist); + goto lost2; + } limit = info->next_index; size = limit; if (size > SHMEM_NR_DIRECT) size = SHMEM_NR_DIRECT; offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) { - shmem_swp_balance_unmap(); + if (offset >= 0) goto found; - } if (!info->i_indirect) goto lost2; @@ -829,6 +853,14 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { if (unlikely(idx == stage)) { shmem_dir_unmap(dir-1); + if (cond_resched_lock(&info->lock)) { + /* check it has not been truncated */ + if (limit > info->next_index) { + limit = info->next_index; + if (idx >= limit) + goto lost2; + } + } dir = shmem_dir_map(info->i_indirect) + ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; while (!*dir) { @@ -849,11 +881,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s if (size > ENTRIES_PER_PAGE) size = ENTRIES_PER_PAGE; offset = shmem_find_swp(entry, ptr, ptr+size); + shmem_swp_unmap(ptr); if (offset >= 0) { shmem_dir_unmap(dir); goto found; } - shmem_swp_unmap(ptr); } } lost1: @@ -863,19 +895,63 @@ lost2: return 0; found: idx += offset; - inode = &info->vfs_inode; - if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) { - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, ptr + offset, 0); - } - shmem_swp_unmap(ptr); + inode = igrab(&info->vfs_inode); spin_unlock(&info->lock); + /* - * Decrement swap count even when the entry is left behind: - * try_to_unuse will skip over mms, then reincrement count. + * Move _head_ to start search for next from here. + * But be careful: shmem_delete_inode checks list_empty without taking + * mutex, and there's an instant in list_move_tail when info->swaplist + * would appear empty, if it were the only one on shmem_swaplist. We + * could avoid doing it if inode NULL; or use this minor optimization. */ - swap_free(entry); - return 1; + if (shmem_swaplist.next != &info->swaplist) + list_move_tail(&shmem_swaplist, &info->swaplist); + mutex_unlock(&shmem_swaplist_mutex); + + error = 1; + if (!inode) + goto out; + error = radix_tree_preload(GFP_KERNEL); + if (error) + goto out; + error = 1; + + spin_lock(&info->lock); + ptr = shmem_swp_entry(info, idx, NULL); + if (ptr && ptr->val == entry.val) + error = add_to_page_cache(page, inode->i_mapping, + idx, GFP_NOWAIT); + if (error == -EEXIST) { + struct page *filepage = find_get_page(inode->i_mapping, idx); + error = 1; + if (filepage) { + /* + * There might be a more uptodate page coming down + * from a stacked writepage: forget our swappage if so. + */ + if (PageUptodate(filepage)) + error = 0; + page_cache_release(filepage); + } + } + if (!error) { + delete_from_swap_cache(page); + set_page_dirty(page); + info->flags |= SHMEM_PAGEIN; + shmem_swp_set(info, ptr, 0); + swap_free(entry); + error = 1; /* not an error, but entry was found */ + } + if (ptr) + shmem_swp_unmap(ptr); + spin_unlock(&info->lock); + radix_tree_preload_end(); +out: + unlock_page(page); + page_cache_release(page); + iput(inode); /* allows for NULL */ + return error; } /* @@ -887,20 +963,16 @@ int shmem_unuse(swp_entry_t entry, struct page *page) struct shmem_inode_info *info; int found = 0; - spin_lock(&shmem_swaplist_lock); + mutex_lock(&shmem_swaplist_mutex); list_for_each_safe(p, next, &shmem_swaplist) { info = list_entry(p, struct shmem_inode_info, swaplist); - if (!info->swapped) - list_del_init(&info->swaplist); - else if (shmem_unuse_inode(info, entry, page)) { - /* move head to start search for next from here */ - list_move_tail(&shmem_swaplist, &info->swaplist); - found = 1; - break; - } + found = shmem_unuse_inode(info, entry, page); + cond_resched(); + if (found) + goto out; } - spin_unlock(&shmem_swaplist_lock); - return found; + mutex_unlock(&shmem_swaplist_mutex); +out: return found; /* 0 or 1 or -ENOMEM */ } /* @@ -915,54 +987,65 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) struct inode *inode; BUG_ON(!PageLocked(page)); - /* - * shmem_backing_dev_info's capabilities prevent regular writeback or - * sync from ever calling shmem_writepage; but a stacking filesystem - * may use the ->writepage of its underlying filesystem, in which case - * we want to do nothing when that underlying filesystem is tmpfs - * (writing out to swap is useful as a response to memory pressure, but - * of no use to stabilize the data) - just redirty the page, unlock it - * and claim success in this case. AOP_WRITEPAGE_ACTIVATE, and the - * page_mapped check below, must be avoided unless we're in reclaim. - */ - if (!wbc->for_reclaim) { - set_page_dirty(page); - unlock_page(page); - return 0; - } - BUG_ON(page_mapped(page)); - mapping = page->mapping; index = page->index; inode = mapping->host; info = SHMEM_I(inode); if (info->flags & VM_LOCKED) goto redirty; - swap = get_swap_page(); - if (!swap.val) + if (!total_swap_pages) goto redirty; + /* + * shmem_backing_dev_info's capabilities prevent regular writeback or + * sync from ever calling shmem_writepage; but a stacking filesystem + * may use the ->writepage of its underlying filesystem, in which case + * tmpfs should write out to swap only in response to memory pressure, + * and not for pdflush or sync. However, in those cases, we do still + * want to check if there's a redundant swappage to be discarded. + */ + if (wbc->for_reclaim) + swap = get_swap_page(); + else + swap.val = 0; + spin_lock(&info->lock); - shmem_recalc_inode(inode); if (index >= info->next_index) { BUG_ON(!(info->flags & SHMEM_TRUNCATE)); goto unlock; } entry = shmem_swp_entry(info, index, NULL); - BUG_ON(!entry); - BUG_ON(entry->val); + if (entry->val) { + /* + * The more uptodate page coming down from a stacked + * writepage should replace our old swappage. + */ + free_swap_and_cache(*entry); + shmem_swp_set(info, entry, 0); + } + shmem_recalc_inode(inode); - if (move_to_swap_cache(page, swap) == 0) { + if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { + remove_from_page_cache(page); shmem_swp_set(info, entry, swap.val); shmem_swp_unmap(entry); + if (list_empty(&info->swaplist)) + inode = igrab(inode); + else + inode = NULL; spin_unlock(&info->lock); - if (list_empty(&info->swaplist)) { - spin_lock(&shmem_swaplist_lock); + swap_duplicate(swap); + BUG_ON(page_mapped(page)); + page_cache_release(page); /* pagecache ref */ + set_page_dirty(page); + unlock_page(page); + if (inode) { + mutex_lock(&shmem_swaplist_mutex); /* move instead of add in case we're racing */ list_move_tail(&info->swaplist, &shmem_swaplist); - spin_unlock(&shmem_swaplist_lock); + mutex_unlock(&shmem_swaplist_mutex); + iput(inode); } - unlock_page(page); return 0; } @@ -972,7 +1055,10 @@ unlock: swap_free(swap); redirty: set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */ + if (wbc->for_reclaim) + return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */ + unlock_page(page); + return 0; } #ifdef CONFIG_NUMA @@ -1025,53 +1111,33 @@ out: return err; } -static struct page *shmem_swapin_async(struct shared_policy *p, - swp_entry_t entry, unsigned long idx) +static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, + struct shmem_inode_info *info, unsigned long idx) { - struct page *page; struct vm_area_struct pvma; + struct page *page; /* Create a pseudo vma that just contains the policy */ - memset(&pvma, 0, sizeof(struct vm_area_struct)); - pvma.vm_end = PAGE_SIZE; + pvma.vm_start = 0; pvma.vm_pgoff = idx; - pvma.vm_policy = mpol_shared_policy_lookup(p, idx); - page = read_swap_cache_async(entry, &pvma, 0); + pvma.vm_ops = NULL; + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); + page = swapin_readahead(entry, gfp, &pvma, 0); mpol_free(pvma.vm_policy); return page; } -static struct page *shmem_swapin(struct shmem_inode_info *info, - swp_entry_t entry, unsigned long idx) -{ - struct shared_policy *p = &info->policy; - int i, num; - struct page *page; - unsigned long offset; - - num = valid_swaphandles(entry, &offset); - for (i = 0; i < num; offset++, i++) { - page = shmem_swapin_async(p, - swp_entry(swp_type(entry), offset), idx); - if (!page) - break; - page_cache_release(page); - } - lru_add_drain(); /* Push any new pages onto the LRU now */ - return shmem_swapin_async(p, entry, idx); -} - -static struct page * -shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, - unsigned long idx) +static struct page *shmem_alloc_page(gfp_t gfp, + struct shmem_inode_info *info, unsigned long idx) { struct vm_area_struct pvma; struct page *page; - memset(&pvma, 0, sizeof(struct vm_area_struct)); - pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); + /* Create a pseudo vma that just contains the policy */ + pvma.vm_start = 0; pvma.vm_pgoff = idx; - pvma.vm_end = PAGE_SIZE; + pvma.vm_ops = NULL; + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); page = alloc_page_vma(gfp, &pvma, 0); mpol_free(pvma.vm_policy); return page; @@ -1083,15 +1149,14 @@ static inline int shmem_parse_mpol(char *value, int *policy, return 1; } -static inline struct page * -shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) +static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, + struct shmem_inode_info *info, unsigned long idx) { - swapin_readahead(entry, 0, NULL); - return read_swap_cache_async(entry, NULL, 0); + return swapin_readahead(entry, gfp, NULL, 0); } -static inline struct page * -shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx) +static inline struct page *shmem_alloc_page(gfp_t gfp, + struct shmem_inode_info *info, unsigned long idx) { return alloc_page(gfp); } @@ -1114,6 +1179,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page *swappage; swp_entry_t *entry; swp_entry_t swap; + gfp_t gfp; int error; if (idx >= SHMEM_MAX_INDEX) @@ -1126,7 +1192,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, * Normally, filepage is NULL on entry, and either found * uptodate immediately, or allocated and zeroed, or read * in under swappage, which is then assigned to filepage. - * But shmem_readpage and shmem_write_begin pass in a locked + * But shmem_readpage (required for splice) passes in a locked * filepage, which may be found not uptodate by other callers * too, and may need to be copied from the swappage read in. */ @@ -1136,8 +1202,17 @@ repeat: if (filepage && PageUptodate(filepage)) goto done; error = 0; - if (sgp == SGP_QUICK) - goto failed; + gfp = mapping_gfp_mask(mapping); + if (!filepage) { + /* + * Try to preload while we can wait, to not make a habit of + * draining atomic reserves; but don't latch on to this cpu. + */ + error = radix_tree_preload(gfp & ~__GFP_HIGHMEM); + if (error) + goto failed; + radix_tree_preload_end(); + } spin_lock(&info->lock); shmem_recalc_inode(inode); @@ -1160,7 +1235,7 @@ repeat: *type |= VM_FAULT_MAJOR; } spin_unlock(&info->lock); - swappage = shmem_swapin(info, swap, idx); + swappage = shmem_swapin(swap, gfp, info, idx); if (!swappage) { spin_lock(&info->lock); entry = shmem_swp_alloc(info, idx, sgp); @@ -1218,23 +1293,21 @@ repeat: SetPageUptodate(filepage); set_page_dirty(filepage); swap_free(swap); - } else if (!(error = move_from_swap_cache( - swappage, idx, mapping))) { + } else if (!(error = add_to_page_cache( + swappage, mapping, idx, GFP_NOWAIT))) { info->flags |= SHMEM_PAGEIN; shmem_swp_set(info, entry, 0); shmem_swp_unmap(entry); + delete_from_swap_cache(swappage); spin_unlock(&info->lock); filepage = swappage; + set_page_dirty(filepage); swap_free(swap); } else { shmem_swp_unmap(entry); spin_unlock(&info->lock); unlock_page(swappage); page_cache_release(swappage); - if (error == -ENOMEM) { - /* let kswapd refresh zone for GFP_ATOMICs */ - congestion_wait(WRITE, HZ/50); - } goto repeat; } } else if (sgp == SGP_READ && !filepage) { @@ -1272,9 +1345,7 @@ repeat: if (!filepage) { spin_unlock(&info->lock); - filepage = shmem_alloc_page(mapping_gfp_mask(mapping), - info, - idx); + filepage = shmem_alloc_page(gfp, info, idx); if (!filepage) { shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); @@ -1291,7 +1362,7 @@ repeat: shmem_swp_unmap(entry); } if (error || swap.val || 0 != add_to_page_cache_lru( - filepage, mapping, idx, GFP_ATOMIC)) { + filepage, mapping, idx, GFP_NOWAIT)) { spin_unlock(&info->lock); page_cache_release(filepage); shmem_unacct_blocks(info->flags, 1); @@ -1309,14 +1380,11 @@ repeat: clear_highpage(filepage); flush_dcache_page(filepage); SetPageUptodate(filepage); + if (sgp == SGP_DIRTY) + set_page_dirty(filepage); } done: - if (*pagep != filepage) { - *pagep = filepage; - if (sgp != SGP_FAULT) - unlock_page(filepage); - - } + *pagep = filepage; return 0; failed: @@ -1336,7 +1404,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; - error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret); + error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); @@ -1399,15 +1467,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) struct shmem_inode_info *info; struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - if (!sbinfo->free_inodes) { - spin_unlock(&sbinfo->stat_lock); - return NULL; - } - sbinfo->free_inodes--; - spin_unlock(&sbinfo->stat_lock); - } + if (shmem_reserve_inode(sb)) + return NULL; inode = new_inode(sb); if (inode) { @@ -1451,11 +1512,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) NULL); break; } - } else if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_inodes++; - spin_unlock(&sbinfo->stat_lock); - } + } else + shmem_free_inode(sb); return inode; } @@ -1494,123 +1552,30 @@ shmem_write_end(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; + if (pos + copied > inode->i_size) + i_size_write(inode, pos + copied); + + unlock_page(page); set_page_dirty(page); page_cache_release(page); - if (pos+copied > inode->i_size) - i_size_write(inode, pos+copied); - return copied; } -static ssize_t -shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) -{ - struct inode *inode = file->f_path.dentry->d_inode; - loff_t pos; - unsigned long written; - ssize_t err; - - if ((ssize_t) count < 0) - return -EINVAL; - - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - - mutex_lock(&inode->i_mutex); - - pos = *ppos; - written = 0; - - err = generic_write_checks(file, &pos, &count, 0); - if (err || !count) - goto out; - - err = remove_suid(file->f_path.dentry); - if (err) - goto out; - - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - - do { - struct page *page = NULL; - unsigned long bytes, index, offset; - char *kaddr; - int left; - - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - index = pos >> PAGE_CACHE_SHIFT; - bytes = PAGE_CACHE_SIZE - offset; - if (bytes > count) - bytes = count; - - /* - * We don't hold page lock across copy from user - - * what would it guard against? - so no deadlock here. - * But it still may be a good idea to prefault below. - */ - - err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL); - if (err) - break; - - left = bytes; - if (PageHighMem(page)) { - volatile unsigned char dummy; - __get_user(dummy, buf); - __get_user(dummy, buf + bytes - 1); - - kaddr = kmap_atomic(page, KM_USER0); - left = __copy_from_user_inatomic(kaddr + offset, - buf, bytes); - kunmap_atomic(kaddr, KM_USER0); - } - if (left) { - kaddr = kmap(page); - left = __copy_from_user(kaddr + offset, buf, bytes); - kunmap(page); - } - - written += bytes; - count -= bytes; - pos += bytes; - buf += bytes; - if (pos > inode->i_size) - i_size_write(inode, pos); - - flush_dcache_page(page); - set_page_dirty(page); - mark_page_accessed(page); - page_cache_release(page); - - if (left) { - pos -= left; - written -= left; - err = -EFAULT; - break; - } - - /* - * Our dirty pages are not counted in nr_dirty, - * and we do not attempt to balance dirty pages. - */ - - cond_resched(); - } while (count); - - *ppos = pos; - if (written) - err = written; -out: - mutex_unlock(&inode->i_mutex); - return err; -} - static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) { struct inode *inode = filp->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; unsigned long index, offset; + enum sgp_type sgp = SGP_READ; + + /* + * Might this read be for a stacking filesystem? Then when reading + * holes of a sparse file, we actually need to allocate those pages, + * and even mark them dirty, so it cannot exceed the max_blocks limit. + */ + if (segment_eq(get_fs(), KERNEL_DS)) + sgp = SGP_DIRTY; index = *ppos >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; @@ -1629,12 +1594,14 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ break; } - desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL); + desc->error = shmem_getpage(inode, index, &page, sgp, NULL); if (desc->error) { if (desc->error == -EINVAL) desc->error = 0; break; } + if (page) + unlock_page(page); /* * We must evaluate after, since reads (unlike writes) @@ -1798,22 +1765,16 @@ static int shmem_create(struct inode *dir, struct dentry *dentry, int mode, static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + int ret; /* * No ordinary (disk based) filesystem counts links as inodes; * but each new link needs a new dentry, pinning lowmem, and * tmpfs dentries cannot be pruned until they are unlinked. */ - if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - if (!sbinfo->free_inodes) { - spin_unlock(&sbinfo->stat_lock); - return -ENOSPC; - } - sbinfo->free_inodes--; - spin_unlock(&sbinfo->stat_lock); - } + ret = shmem_reserve_inode(inode->i_sb); + if (ret) + goto out; dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; @@ -1821,21 +1782,16 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr atomic_inc(&inode->i_count); /* New dentry reference */ dget(dentry); /* Extra pinning count for the created dentry */ d_instantiate(dentry, inode); - return 0; +out: + return ret; } static int shmem_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) { - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_inodes++; - spin_unlock(&sbinfo->stat_lock); - } - } + if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) + shmem_free_inode(inode->i_sb); dir->i_size -= BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; @@ -1924,6 +1880,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s iput(inode); return error; } + unlock_page(page); inode->i_op = &shmem_symlink_inode_operations; kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, len); @@ -1951,6 +1908,8 @@ static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) struct page *page = NULL; int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); + if (page) + unlock_page(page); return page; } @@ -1996,8 +1955,7 @@ static int shmem_xattr_security_get(struct inode *inode, const char *name, { if (strcmp(name, "") == 0) return -EINVAL; - return security_inode_getsecurity(inode, name, buffer, size, - -EOPNOTSUPP); + return xattr_getsecurity(inode, name, buffer, size); } static int shmem_xattr_security_set(struct inode *inode, const char *name, @@ -2138,7 +2096,7 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid, } if (*rest) goto bad_val; - *blocks = size >> PAGE_CACHE_SHIFT; + *blocks = DIV_ROUND_UP(size, PAGE_CACHE_SIZE); } else if (!strcmp(this_char,"nr_blocks")) { *blocks = memparse(value,&rest); if (*rest) @@ -2375,7 +2333,8 @@ static const struct file_operations shmem_file_operations = { #ifdef CONFIG_TMPFS .llseek = generic_file_llseek, .read = shmem_file_read, - .write = shmem_file_write, + .write = do_sync_write, + .aio_write = generic_file_aio_write, .fsync = simple_sync_file, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, diff --git a/mm/slob.c b/mm/slob.c index 773a7aa80ab5..e2c3c0ec5463 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -12,10 +12,17 @@ * allocator is as little as 2 bytes, however typically most architectures * will require 4 bytes on 32-bit and 8 bytes on 64-bit. * - * The slob heap is a linked list of pages from alloc_pages(), and - * within each page, there is a singly-linked list of free blocks (slob_t). - * The heap is grown on demand and allocation from the heap is currently - * first-fit. + * The slob heap is a set of linked list of pages from alloc_pages(), + * and within each page, there is a singly-linked list of free blocks + * (slob_t). The heap is grown on demand. To reduce fragmentation, + * heap pages are segregated into three lists, with objects less than + * 256 bytes, objects less than 1024 bytes, and all other objects. + * + * Allocation from heap involves first searching for a page with + * sufficient free blocks (using a next-fit-like approach) followed by + * a first-fit scan of the page. Deallocation inserts objects back + * into the free list in address order, so this is effectively an + * address-ordered first fit. * * Above this is an implementation of kmalloc/kfree. Blocks returned * from kmalloc are prepended with a 4-byte header with the kmalloc size. @@ -110,9 +117,13 @@ static inline void free_slob_page(struct slob_page *sp) } /* - * All (partially) free slob pages go on this list. + * All partially free slob pages go on these lists. */ -static LIST_HEAD(free_slob_pages); +#define SLOB_BREAK1 256 +#define SLOB_BREAK2 1024 +static LIST_HEAD(free_slob_small); +static LIST_HEAD(free_slob_medium); +static LIST_HEAD(free_slob_large); /* * slob_page: True for all slob pages (false for bigblock pages) @@ -140,9 +151,9 @@ static inline int slob_page_free(struct slob_page *sp) return test_bit(PG_private, &sp->flags); } -static inline void set_slob_page_free(struct slob_page *sp) +static void set_slob_page_free(struct slob_page *sp, struct list_head *list) { - list_add(&sp->list, &free_slob_pages); + list_add(&sp->list, list); __set_bit(PG_private, &sp->flags); } @@ -294,12 +305,20 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) { struct slob_page *sp; struct list_head *prev; + struct list_head *slob_list; slob_t *b = NULL; unsigned long flags; + if (size < SLOB_BREAK1) + slob_list = &free_slob_small; + else if (size < SLOB_BREAK2) + slob_list = &free_slob_medium; + else + slob_list = &free_slob_large; + spin_lock_irqsave(&slob_lock, flags); /* Iterate through each partially free page, try to find room */ - list_for_each_entry(sp, &free_slob_pages, list) { + list_for_each_entry(sp, slob_list, list) { #ifdef CONFIG_NUMA /* * If there's a node specification, search for a partial @@ -321,9 +340,9 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) /* Improve fragment distribution and reduce our average * search time by starting our next search here. (see * Knuth vol 1, sec 2.5, pg 449) */ - if (prev != free_slob_pages.prev && - free_slob_pages.next != prev->next) - list_move_tail(&free_slob_pages, prev->next); + if (prev != slob_list->prev && + slob_list->next != prev->next) + list_move_tail(slob_list, prev->next); break; } spin_unlock_irqrestore(&slob_lock, flags); @@ -341,7 +360,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) sp->free = b; INIT_LIST_HEAD(&sp->list); set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); - set_slob_page_free(sp); + set_slob_page_free(sp, slob_list); b = slob_page_alloc(sp, size, align); BUG_ON(!b); spin_unlock_irqrestore(&slob_lock, flags); @@ -387,7 +406,7 @@ static void slob_free(void *block, int size) set_slob(b, units, (void *)((unsigned long)(b + SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); - set_slob_page_free(sp); + set_slob_page_free(sp, &free_slob_small); goto out; } @@ -398,6 +417,10 @@ static void slob_free(void *block, int size) sp->units += units; if (b < sp->free) { + if (b + units == sp->free) { + units += slob_units(sp->free); + sp->free = slob_next(sp->free); + } set_slob(b, units, sp->free); sp->free = b; } else { diff --git a/mm/sparse.c b/mm/sparse.c index a2183cb5d524..f6a43c09c322 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -237,7 +237,7 @@ static unsigned long *__kmalloc_section_usemap(void) } #endif /* CONFIG_MEMORY_HOTPLUG */ -static unsigned long *sparse_early_usemap_alloc(unsigned long pnum) +static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) { unsigned long *usemap; struct mem_section *ms = __nr_to_section(pnum); @@ -353,17 +353,9 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, return __kmalloc_section_memmap(nr_pages); } -static int vaddr_in_vmalloc_area(void *addr) -{ - if (addr >= (void *)VMALLOC_START && - addr < (void *)VMALLOC_END) - return 1; - return 0; -} - static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) { - if (vaddr_in_vmalloc_area(memmap)) + if (is_vmalloc_addr(memmap)) vfree(memmap); else free_pages((unsigned long)memmap, diff --git a/mm/swap.c b/mm/swap.c index 9ac88323d237..57b7e25a939c 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -41,7 +41,7 @@ static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs) = { 0, }; * This path almost never happens for VM activity - pages are normally * freed via pagevecs. But it gets used by networking. */ -static void fastcall __page_cache_release(struct page *page) +static void __page_cache_release(struct page *page) { if (PageLRU(page)) { unsigned long flags; @@ -165,7 +165,7 @@ int rotate_reclaimable_page(struct page *page) /* * FIXME: speed this up? */ -void fastcall activate_page(struct page *page) +void activate_page(struct page *page) { struct zone *zone = page_zone(page); @@ -186,7 +186,7 @@ void fastcall activate_page(struct page *page) * inactive,referenced -> active,unreferenced * active,unreferenced -> active,referenced */ -void fastcall mark_page_accessed(struct page *page) +void mark_page_accessed(struct page *page) { if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) { activate_page(page); @@ -202,7 +202,7 @@ EXPORT_SYMBOL(mark_page_accessed); * lru_cache_add: add a page to the page lists * @page: the page to add */ -void fastcall lru_cache_add(struct page *page) +void lru_cache_add(struct page *page) { struct pagevec *pvec = &get_cpu_var(lru_add_pvecs); @@ -212,7 +212,7 @@ void fastcall lru_cache_add(struct page *page) put_cpu_var(lru_add_pvecs); } -void fastcall lru_cache_add_active(struct page *page) +void lru_cache_add_active(struct page *page) { struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs); diff --git a/mm/swap_state.c b/mm/swap_state.c index b52635601dfe..ec42f01a8d02 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -10,6 +10,7 @@ #include <linux/mm.h> #include <linux/kernel_stat.h> #include <linux/swap.h> +#include <linux/swapops.h> #include <linux/init.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> @@ -51,26 +52,22 @@ static struct { unsigned long del_total; unsigned long find_success; unsigned long find_total; - unsigned long noent_race; - unsigned long exist_race; } swap_cache_info; void show_swap_cache_info(void) { - printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", + printk("Swap cache: add %lu, delete %lu, find %lu/%lu\n", swap_cache_info.add_total, swap_cache_info.del_total, - swap_cache_info.find_success, swap_cache_info.find_total, - swap_cache_info.noent_race, swap_cache_info.exist_race); + swap_cache_info.find_success, swap_cache_info.find_total); printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); } /* - * __add_to_swap_cache resembles add_to_page_cache on swapper_space, + * add_to_swap_cache resembles add_to_page_cache on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ -static int __add_to_swap_cache(struct page *page, swp_entry_t entry, - gfp_t gfp_mask) +int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { int error; @@ -88,6 +85,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry, set_page_private(page, entry.val); total_swapcache_pages++; __inc_zone_page_state(page, NR_FILE_PAGES); + INC_CACHE_INFO(add_total); } write_unlock_irq(&swapper_space.tree_lock); radix_tree_preload_end(); @@ -95,31 +93,6 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry, return error; } -static int add_to_swap_cache(struct page *page, swp_entry_t entry) -{ - int error; - - BUG_ON(PageLocked(page)); - if (!swap_duplicate(entry)) { - INC_CACHE_INFO(noent_race); - return -ENOENT; - } - SetPageLocked(page); - error = __add_to_swap_cache(page, entry, GFP_KERNEL); - /* - * Anon pages are already on the LRU, we don't run lru_cache_add here. - */ - if (error) { - ClearPageLocked(page); - swap_free(entry); - if (error == -EEXIST) - INC_CACHE_INFO(exist_race); - return error; - } - INC_CACHE_INFO(add_total); - return 0; -} - /* * This must be called only on pages that have * been verified to be in the swap cache. @@ -152,6 +125,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask) int err; BUG_ON(!PageLocked(page)); + BUG_ON(!PageUptodate(page)); for (;;) { entry = get_swap_page(); @@ -169,18 +143,15 @@ int add_to_swap(struct page * page, gfp_t gfp_mask) /* * Add it to the swap cache and mark it dirty */ - err = __add_to_swap_cache(page, entry, + err = add_to_swap_cache(page, entry, gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); switch (err) { case 0: /* Success */ - SetPageUptodate(page); SetPageDirty(page); - INC_CACHE_INFO(add_total); return 1; case -EEXIST: /* Raced with "speculative" read_swap_cache_async */ - INC_CACHE_INFO(exist_race); swap_free(entry); continue; default: @@ -211,40 +182,6 @@ void delete_from_swap_cache(struct page *page) page_cache_release(page); } -/* - * Strange swizzling function only for use by shmem_writepage - */ -int move_to_swap_cache(struct page *page, swp_entry_t entry) -{ - int err = __add_to_swap_cache(page, entry, GFP_ATOMIC); - if (!err) { - remove_from_page_cache(page); - page_cache_release(page); /* pagecache ref */ - if (!swap_duplicate(entry)) - BUG(); - SetPageDirty(page); - INC_CACHE_INFO(add_total); - } else if (err == -EEXIST) - INC_CACHE_INFO(exist_race); - return err; -} - -/* - * Strange swizzling function for shmem_getpage (and shmem_unuse) - */ -int move_from_swap_cache(struct page *page, unsigned long index, - struct address_space *mapping) -{ - int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC); - if (!err) { - delete_from_swap_cache(page); - /* shift page from clean_pages to dirty_pages list */ - ClearPageDirty(page); - set_page_dirty(page); - } - return err; -} - /* * If we are the only user, then try to free up the swap cache. * @@ -317,7 +254,7 @@ struct page * lookup_swap_cache(swp_entry_t entry) * A failure return means that either the page allocation failed or that * the swap entry is no longer in use. */ -struct page *read_swap_cache_async(swp_entry_t entry, +struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *found_page, *new_page = NULL; @@ -337,23 +274,27 @@ struct page *read_swap_cache_async(swp_entry_t entry, * Get a new page to read into from swap. */ if (!new_page) { - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, - vma, addr); + new_page = alloc_page_vma(gfp_mask, vma, addr); if (!new_page) break; /* Out of memory */ } /* + * Swap entry may have been freed since our caller observed it. + */ + if (!swap_duplicate(entry)) + break; + + /* * Associate the page with swap entry in the swap cache. - * May fail (-ENOENT) if swap entry has been freed since - * our caller observed it. May fail (-EEXIST) if there - * is already a page associated with this entry in the - * swap cache: added by a racing read_swap_cache_async, - * or by try_to_swap_out (or shmem_writepage) re-using - * the just freed swap entry for an existing page. + * May fail (-EEXIST) if there is already a page associated + * with this entry in the swap cache: added by a racing + * read_swap_cache_async, or add_to_swap or shmem_writepage + * re-using the just freed swap entry for an existing page. * May fail (-ENOMEM) if radix-tree node allocation failed. */ - err = add_to_swap_cache(new_page, entry); + SetPageLocked(new_page); + err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); if (!err) { /* * Initiate read into locked page and return. @@ -362,9 +303,57 @@ struct page *read_swap_cache_async(swp_entry_t entry, swap_readpage(NULL, new_page); return new_page; } - } while (err != -ENOENT && err != -ENOMEM); + ClearPageLocked(new_page); + swap_free(entry); + } while (err != -ENOMEM); if (new_page) page_cache_release(new_page); return found_page; } + +/** + * swapin_readahead - swap in pages in hope we need them soon + * @entry: swap entry of this memory + * @vma: user vma this address belongs to + * @addr: target address for mempolicy + * + * Returns the struct page for entry and addr, after queueing swapin. + * + * Primitive swap readahead code. We simply read an aligned block of + * (1 << page_cluster) entries in the swap area. This method is chosen + * because it doesn't cost us any seek time. We also make sure to queue + * the 'original' request together with the readahead ones... + * + * This has been extended to use the NUMA policies from the mm triggering + * the readahead. + * + * Caller must hold down_read on the vma->vm_mm if vma is not NULL. + */ +struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr) +{ + int nr_pages; + struct page *page; + unsigned long offset; + unsigned long end_offset; + + /* + * Get starting offset for readaround, and number of pages to read. + * Adjust starting address by readbehind (for NUMA interleave case)? + * No, it's very unlikely that swap layout would follow vma layout, + * more likely that neighbouring swap pages came from the same node: + * so use the same "addr" to choose the same node for each swap read. + */ + nr_pages = valid_swaphandles(entry, &offset); + for (end_offset = offset + nr_pages; offset < end_offset; offset++) { + /* Ok, do the async read-ahead now */ + page = read_swap_cache_async(swp_entry(swp_type(entry), offset), + gfp_mask, vma, addr); + if (!page) + break; + page_cache_release(page); + } + lru_add_drain(); /* Push any new pages onto the LRU now */ + return read_swap_cache_async(entry, gfp_mask, vma, addr); +} diff --git a/mm/swapfile.c b/mm/swapfile.c index f071648e1360..eade24da9310 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -506,9 +506,19 @@ unsigned int count_swap_pages(int type, int free) * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ -static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, +static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { + spinlock_t *ptl; + pte_t *pte; + int found = 1; + + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { + found = 0; + goto out; + } + inc_mm_counter(vma->vm_mm, anon_rss); get_page(page); set_pte_at(vma->vm_mm, addr, pte, @@ -520,6 +530,9 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, * immediately swapped out again after swapon. */ activate_page(page); +out: + pte_unmap_unlock(pte, ptl); + return found; } static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, @@ -528,22 +541,33 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, { pte_t swp_pte = swp_entry_to_pte(entry); pte_t *pte; - spinlock_t *ptl; int found = 0; - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + /* + * We don't actually need pte lock while scanning for swp_pte: since + * we hold page lock and mmap_sem, swp_pte cannot be inserted into the + * page table while we're scanning; though it could get zapped, and on + * some architectures (e.g. x86_32 with PAE) we might catch a glimpse + * of unmatched parts which look like swp_pte, so unuse_pte must + * recheck under pte lock. Scanning without pte lock lets it be + * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE. + */ + pte = pte_offset_map(pmd, addr); do { /* * swapoff spends a _lot_ of time in this loop! * Test inline before going to call unuse_pte. */ if (unlikely(pte_same(*pte, swp_pte))) { - unuse_pte(vma, pte++, addr, entry, page); - found = 1; - break; + pte_unmap(pte); + found = unuse_pte(vma, pmd, addr, entry, page); + if (found) + goto out; + pte = pte_offset_map(pmd, addr); } } while (pte++, addr += PAGE_SIZE, addr != end); - pte_unmap_unlock(pte - 1, ptl); + pte_unmap(pte - 1); +out: return found; } @@ -730,7 +754,8 @@ static int try_to_unuse(unsigned int type) */ swap_map = &si->swap_map[i]; entry = swp_entry(type, i); - page = read_swap_cache_async(entry, NULL, 0); + page = read_swap_cache_async(entry, + GFP_HIGHUSER_MOVABLE, NULL, 0); if (!page) { /* * Either swap_duplicate() failed because entry @@ -789,7 +814,7 @@ static int try_to_unuse(unsigned int type) atomic_inc(&new_start_mm->mm_users); atomic_inc(&prev_mm->mm_users); spin_lock(&mmlist_lock); - while (*swap_map > 1 && !retval && + while (*swap_map > 1 && !retval && !shmem && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!atomic_inc_not_zero(&mm->mm_users)) @@ -821,6 +846,13 @@ static int try_to_unuse(unsigned int type) mmput(start_mm); start_mm = new_start_mm; } + if (shmem) { + /* page has already been unlocked and released */ + if (shmem > 0) + continue; + retval = shmem; + break; + } if (retval) { unlock_page(page); page_cache_release(page); @@ -859,12 +891,6 @@ static int try_to_unuse(unsigned int type) * read from disk into another page. Splitting into two * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. - * - * Note shmem_unuse already deleted a swappage from - * the swap cache, unless the move to filepage failed: - * in which case it left swappage in cache, lowered its - * swap count to pass quickly through the loops above, - * and now we must reincrement count to try again later. */ if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) { struct writeback_control wbc = { @@ -875,12 +901,8 @@ static int try_to_unuse(unsigned int type) lock_page(page); wait_on_page_writeback(page); } - if (PageSwapCache(page)) { - if (shmem) - swap_duplicate(entry); - else - delete_from_swap_cache(page); - } + if (PageSwapCache(page)) + delete_from_swap_cache(page); /* * So we could skip searching mms once swap count went @@ -1768,31 +1790,48 @@ get_swap_info_struct(unsigned type) */ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) { + struct swap_info_struct *si; int our_page_cluster = page_cluster; - int ret = 0, i = 1 << our_page_cluster; - unsigned long toff; - struct swap_info_struct *swapdev = swp_type(entry) + swap_info; + pgoff_t target, toff; + pgoff_t base, end; + int nr_pages = 0; if (!our_page_cluster) /* no readahead */ return 0; - toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster; - if (!toff) /* first page is swap header */ - toff++, i--; - *offset = toff; + + si = &swap_info[swp_type(entry)]; + target = swp_offset(entry); + base = (target >> our_page_cluster) << our_page_cluster; + end = base + (1 << our_page_cluster); + if (!base) /* first page is swap header */ + base++; spin_lock(&swap_lock); - do { - /* Don't read-ahead past the end of the swap area */ - if (toff >= swapdev->max) + if (end > si->max) /* don't go beyond end of map */ + end = si->max; + + /* Count contiguous allocated slots above our target */ + for (toff = target; ++toff < end; nr_pages++) { + /* Don't read in free or bad pages */ + if (!si->swap_map[toff]) + break; + if (si->swap_map[toff] == SWAP_MAP_BAD) break; + } + /* Count contiguous allocated slots below our target */ + for (toff = target; --toff >= base; nr_pages++) { /* Don't read in free or bad pages */ - if (!swapdev->swap_map[toff]) + if (!si->swap_map[toff]) break; - if (swapdev->swap_map[toff] == SWAP_MAP_BAD) + if (si->swap_map[toff] == SWAP_MAP_BAD) break; - toff++; - ret++; - } while (--i); + } spin_unlock(&swap_lock); - return ret; + + /* + * Indicate starting offset, and return number of pages to get: + * if only 1, say 0, since there's then no readahead to be done. + */ + *offset = ++toff; + return nr_pages? ++nr_pages: 0; } diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c index d436a9c82db7..702083638c16 100644 --- a/mm/tiny-shmem.c +++ b/mm/tiny-shmem.c @@ -121,18 +121,6 @@ int shmem_unuse(swp_entry_t entry, struct page *page) return 0; } -#if 0 -int shmem_mmap(struct file *file, struct vm_area_struct *vma) -{ - file_accessed(file); -#ifndef CONFIG_MMU - return ramfs_nommu_mmap(file, vma); -#else - return 0; -#endif -} -#endif /* 0 */ - #ifndef CONFIG_MMU unsigned long shmem_get_unmapped_area(struct file *file, unsigned long addr, diff --git a/mm/truncate.c b/mm/truncate.c index c3123b08ff6d..c35c49e54fb6 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -48,7 +48,7 @@ void do_invalidatepage(struct page *page, unsigned long offset) static inline void truncate_partial_page(struct page *page, unsigned partial) { - zero_user_page(page, partial, PAGE_CACHE_SIZE - partial, KM_USER0); + zero_user_segment(page, partial, PAGE_CACHE_SIZE); if (PagePrivate(page)) do_invalidatepage(page, partial); } @@ -84,7 +84,7 @@ EXPORT_SYMBOL(cancel_dirty_page); /* * If truncate cannot remove the fs-private metadata from the page, the page - * becomes anonymous. It will be left on the LRU and may even be mapped into + * becomes orphaned. It will be left on the LRU and may even be mapped into * user pagetables if we're racing with filemap_fault(). * * We need to bale out if page->mapping is no longer equal to the original @@ -98,11 +98,11 @@ truncate_complete_page(struct address_space *mapping, struct page *page) if (page->mapping != mapping) return; - cancel_dirty_page(page, PAGE_CACHE_SIZE); - if (PagePrivate(page)) do_invalidatepage(page, 0); + cancel_dirty_page(page, PAGE_CACHE_SIZE); + remove_from_page_cache(page); ClearPageUptodate(page); ClearPageMappedToDisk(page); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index af77e171e339..0536dde139d1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -166,6 +166,44 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) } EXPORT_SYMBOL_GPL(map_vm_area); +/* + * Map a vmalloc()-space virtual address to the physical page. + */ +struct page *vmalloc_to_page(const void *vmalloc_addr) +{ + unsigned long addr = (unsigned long) vmalloc_addr; + struct page *page = NULL; + pgd_t *pgd = pgd_offset_k(addr); + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + + if (!pgd_none(*pgd)) { + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) { + ptep = pte_offset_map(pmd, addr); + pte = *ptep; + if (pte_present(pte)) + page = pte_page(pte); + pte_unmap(ptep); + } + } + } + return page; +} +EXPORT_SYMBOL(vmalloc_to_page); + +/* + * Map a vmalloc()-space virtual address to the physical page frame number. + */ +unsigned long vmalloc_to_pfn(const void *vmalloc_addr) +{ + return page_to_pfn(vmalloc_to_page(vmalloc_addr)); +} +EXPORT_SYMBOL(vmalloc_to_pfn); + static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask) @@ -216,6 +254,10 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long fl if (addr > end - size) goto out; } + if ((size + addr) < addr) + goto out; + if (addr > end - size) + goto out; found: area->next = *p; @@ -268,7 +310,7 @@ struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, } /* Caller must hold vmlist_lock */ -static struct vm_struct *__find_vm_area(void *addr) +static struct vm_struct *__find_vm_area(const void *addr) { struct vm_struct *tmp; @@ -281,7 +323,7 @@ static struct vm_struct *__find_vm_area(void *addr) } /* Caller must hold vmlist_lock */ -static struct vm_struct *__remove_vm_area(void *addr) +static struct vm_struct *__remove_vm_area(const void *addr) { struct vm_struct **p, *tmp; @@ -310,7 +352,7 @@ found: * This function returns the found VM area, but using it is NOT safe * on SMP machines, except for its size or flags. */ -struct vm_struct *remove_vm_area(void *addr) +struct vm_struct *remove_vm_area(const void *addr) { struct vm_struct *v; write_lock(&vmlist_lock); @@ -319,7 +361,7 @@ struct vm_struct *remove_vm_area(void *addr) return v; } -static void __vunmap(void *addr, int deallocate_pages) +static void __vunmap(const void *addr, int deallocate_pages) { struct vm_struct *area; @@ -346,8 +388,10 @@ static void __vunmap(void *addr, int deallocate_pages) int i; for (i = 0; i < area->nr_pages; i++) { - BUG_ON(!area->pages[i]); - __free_page(area->pages[i]); + struct page *page = area->pages[i]; + + BUG_ON(!page); + __free_page(page); } if (area->flags & VM_VPAGES) @@ -370,7 +414,7 @@ static void __vunmap(void *addr, int deallocate_pages) * * Must not be called in interrupt context. */ -void vfree(void *addr) +void vfree(const void *addr) { BUG_ON(in_interrupt()); __vunmap(addr, 1); @@ -386,7 +430,7 @@ EXPORT_SYMBOL(vfree); * * Must not be called in interrupt context. */ -void vunmap(void *addr) +void vunmap(const void *addr) { BUG_ON(in_interrupt()); __vunmap(addr, 0); @@ -423,8 +467,8 @@ void *vmap(struct page **pages, unsigned int count, } EXPORT_SYMBOL(vmap); -void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, - pgprot_t prot, int node) +static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, + pgprot_t prot, int node) { struct page **pages; unsigned int nr_pages, array_size, i; @@ -451,15 +495,19 @@ void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } for (i = 0; i < area->nr_pages; i++) { + struct page *page; + if (node < 0) - area->pages[i] = alloc_page(gfp_mask); + page = alloc_page(gfp_mask); else - area->pages[i] = alloc_pages_node(node, gfp_mask, 0); - if (unlikely(!area->pages[i])) { + page = alloc_pages_node(node, gfp_mask, 0); + + if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vunmap() */ area->nr_pages = i; goto fail; } + area->pages[i] = page; } if (map_vm_area(area, prot, &pages)) diff --git a/mm/vmstat.c b/mm/vmstat.c index e8d846f57774..422d960ffcd8 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -21,21 +21,14 @@ EXPORT_PER_CPU_SYMBOL(vm_event_states); static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask) { - int cpu = 0; + int cpu; int i; memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); - cpu = first_cpu(*cpumask); - while (cpu < NR_CPUS) { + for_each_cpu_mask(cpu, *cpumask) { struct vm_event_state *this = &per_cpu(vm_event_states, cpu); - cpu = next_cpu(cpu, *cpumask); - - if (cpu < NR_CPUS) - prefetch(&per_cpu(vm_event_states, cpu)); - - for (i = 0; i < NR_VM_EVENT_ITEMS; i++) ret[i] += this->event[i]; } @@ -284,6 +277,10 @@ EXPORT_SYMBOL(dec_zone_page_state); /* * Update the zone counters for one cpu. * + * The cpu specified must be either the current cpu or a processor that + * is not online. If it is the current cpu then the execution thread must + * be pinned to the current cpu. + * * Note that refresh_cpu_vm_stats strives to only access * node local memory. The per cpu pagesets on remote zones are placed * in the memory local to the processor using that pageset. So the @@ -299,7 +296,7 @@ void refresh_cpu_vm_stats(int cpu) { struct zone *zone; int i; - unsigned long flags; + int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; for_each_zone(zone) { struct per_cpu_pageset *p; @@ -311,15 +308,19 @@ void refresh_cpu_vm_stats(int cpu) for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) if (p->vm_stat_diff[i]) { + unsigned long flags; + int v; + local_irq_save(flags); - zone_page_state_add(p->vm_stat_diff[i], - zone, i); + v = p->vm_stat_diff[i]; p->vm_stat_diff[i] = 0; + local_irq_restore(flags); + atomic_long_add(v, &zone->vm_stat[i]); + global_diff[i] += v; #ifdef CONFIG_NUMA /* 3 seconds idle till flush */ p->expire = 3; #endif - local_irq_restore(flags); } #ifdef CONFIG_NUMA /* @@ -329,7 +330,7 @@ void refresh_cpu_vm_stats(int cpu) * Check if there are pages remaining in this pageset * if not then there is nothing to expire. */ - if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count)) + if (!p->expire || !p->pcp.count) continue; /* @@ -344,13 +345,14 @@ void refresh_cpu_vm_stats(int cpu) if (p->expire) continue; - if (p->pcp[0].count) - drain_zone_pages(zone, p->pcp + 0); - - if (p->pcp[1].count) - drain_zone_pages(zone, p->pcp + 1); + if (p->pcp.count) + drain_zone_pages(zone, &p->pcp); #endif } + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) + if (global_diff[i]) + atomic_long_add(global_diff[i], &vm_stat[i]); } #endif @@ -681,20 +683,17 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n pagesets"); for_each_online_cpu(i) { struct per_cpu_pageset *pageset; - int j; pageset = zone_pcp(zone, i); - for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { - seq_printf(m, - "\n cpu: %i pcp: %i" - "\n count: %i" - "\n high: %i" - "\n batch: %i", - i, j, - pageset->pcp[j].count, - pageset->pcp[j].high, - pageset->pcp[j].batch); - } + seq_printf(m, + "\n cpu: %i" + "\n count: %i" + "\n high: %i" + "\n batch: %i", + i, + pageset->pcp.count, + pageset->pcp.high, + pageset->pcp.batch); #ifdef CONFIG_SMP seq_printf(m, "\n vm stats threshold: %d", pageset->stat_threshold); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index a2241060113b..8cd357f41283 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -547,8 +547,8 @@ int cipso_v4_doi_remove(u32 doi, rcu_read_lock(); list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) if (dom_iter->valid) - netlbl_domhsh_remove(dom_iter->domain, - audit_info); + netlbl_cfg_map_del(dom_iter->domain, + audit_info); rcu_read_unlock(); cipso_v4_cache_invalidate(); call_rcu(&doi_def->rcu, callback); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index becf91a952ae..c7ad64d664ad 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -90,7 +90,7 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1 * safely. * */ -static void netlbl_cipsov4_doi_free(struct rcu_head *entry) +void netlbl_cipsov4_doi_free(struct rcu_head *entry) { struct cipso_v4_doi *ptr; diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index f03cf9b78286..220cb9d06b49 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -163,4 +163,7 @@ enum { /* NetLabel protocol functions */ int netlbl_cipsov4_genl_init(void); +/* Free the memory associated with a CIPSOv4 DOI definition */ +void netlbl_cipsov4_doi_free(struct rcu_head *entry); + #endif diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 3689956c3436..8220990ceb96 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -61,6 +61,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info); int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info); +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); int netlbl_domhsh_walk(u32 *skip_bkt, diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index c69e3e1f05c3..39793a1a93aa 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -30,6 +30,7 @@ #include <linux/init.h> #include <linux/types.h> +#include <linux/audit.h> #include <net/ip.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> @@ -38,10 +39,186 @@ #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" +#include "netlabel_cipso_v4.h" #include "netlabel_user.h" #include "netlabel_mgmt.h" /* + * Configuration Functions + */ + +/** + * netlbl_cfg_map_del - Remove a NetLabel/LSM domain mapping + * @domain: the domain mapping to remove + * @audit_info: NetLabel audit information + * + * Description: + * Removes a NetLabel/LSM domain mapping. A @domain value of NULL causes the + * default domain mapping to be removed. Returns zero on success, negative + * values on failure. + * + */ +int netlbl_cfg_map_del(const char *domain, struct netlbl_audit *audit_info) +{ + return netlbl_domhsh_remove(domain, audit_info); +} + +/** + * netlbl_cfg_unlbl_add_map - Add an unlabeled NetLabel/LSM domain mapping + * @domain: the domain mapping to add + * @audit_info: NetLabel audit information + * + * Description: + * Adds a new unlabeled NetLabel/LSM domain mapping. A @domain value of NULL + * causes a new default domain mapping to be added. Returns zero on success, + * negative values on failure. + * + */ +int netlbl_cfg_unlbl_add_map(const char *domain, + struct netlbl_audit *audit_info) +{ + int ret_val = -ENOMEM; + struct netlbl_dom_map *entry; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) + goto cfg_unlbl_add_map_failure; + if (domain != NULL) { + entry->domain = kstrdup(domain, GFP_ATOMIC); + if (entry->domain == NULL) + goto cfg_unlbl_add_map_failure; + } + entry->type = NETLBL_NLTYPE_UNLABELED; + + ret_val = netlbl_domhsh_add(entry, audit_info); + if (ret_val != 0) + goto cfg_unlbl_add_map_failure; + + return 0; + +cfg_unlbl_add_map_failure: + if (entry != NULL) + kfree(entry->domain); + kfree(entry); + return ret_val; +} + +/** + * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition + * @doi_def: the DOI definition + * @audit_info: NetLabel audit information + * + * Description: + * Add a new CIPSOv4 DOI definition to the NetLabel subsystem. Returns zero on + * success, negative values on failure. + * + */ +int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, + struct netlbl_audit *audit_info) +{ + int ret_val; + const char *type_str; + struct audit_buffer *audit_buf; + + ret_val = cipso_v4_doi_add(doi_def); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + audit_info); + if (audit_buf != NULL) { + switch (doi_def->type) { + case CIPSO_V4_MAP_STD: + type_str = "std"; + break; + case CIPSO_V4_MAP_PASS: + type_str = "pass"; + break; + default: + type_str = "(unknown)"; + } + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi_def->doi, + type_str, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } + + return ret_val; +} + +/** + * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping + * @doi_def: the DOI definition + * @domain: the domain mapping to add + * @audit_info: NetLabel audit information + * + * Description: + * Add a new CIPSOv4 DOI definition and NetLabel/LSM domain mapping for this + * new DOI definition to the NetLabel subsystem. A @domain value of NULL adds + * a new default domain mapping. Returns zero on success, negative values on + * failure. + * + */ +int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, + const char *domain, + struct netlbl_audit *audit_info) +{ + int ret_val = -ENOMEM; + struct netlbl_dom_map *entry; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) + goto cfg_cipsov4_add_map_failure; + if (domain != NULL) { + entry->domain = kstrdup(domain, GFP_ATOMIC); + if (entry->domain == NULL) + goto cfg_cipsov4_add_map_failure; + } + entry->type = NETLBL_NLTYPE_CIPSOV4; + entry->type_def.cipsov4 = doi_def; + + /* Grab a RCU read lock here so nothing happens to the doi_def variable + * between adding it to the CIPSOv4 protocol engine and adding a + * domain mapping for it. */ + + rcu_read_lock(); + ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info); + if (ret_val != 0) + goto cfg_cipsov4_add_map_failure_unlock; + ret_val = netlbl_domhsh_add(entry, audit_info); + if (ret_val != 0) + goto cfg_cipsov4_add_map_failure_remove_doi; + rcu_read_unlock(); + + return 0; + +cfg_cipsov4_add_map_failure_remove_doi: + cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free); +cfg_cipsov4_add_map_failure_unlock: + rcu_read_unlock(); +cfg_cipsov4_add_map_failure: + if (entry != NULL) + kfree(entry->domain); + kfree(entry); + return ret_val; +} + +/** + * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition + * @doi: the CIPSO DOI value + * @audit_info: NetLabel audit information + * + * Description: + * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem. + * Returns zero on success, negative values on failure. + * + */ +int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info) +{ + return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free); +} + +/* * Security Attribute Functions */ diff --git a/security/Kconfig b/security/Kconfig index 389e151e3b68..25ffe1b9dc98 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -105,6 +105,7 @@ config SECURITY_ROOTPLUG If you are unsure how to answer this question, answer N. source security/selinux/Kconfig +source security/smack/Kconfig endmenu diff --git a/security/Makefile b/security/Makefile index ef87df2f50a4..9e8b02525014 100644 --- a/security/Makefile +++ b/security/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_KEYS) += keys/ subdir-$(CONFIG_SECURITY_SELINUX) += selinux +subdir-$(CONFIG_SECURITY_SMACK) += smack # if we don't select a security model, use the default capabilities ifneq ($(CONFIG_SECURITY),y) @@ -14,5 +15,6 @@ endif obj-$(CONFIG_SECURITY) += security.o dummy.o inode.o # Must precede capability.o in order to stack properly. obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o +obj-$(CONFIG_SECURITY_SMACK) += commoncap.o smack/built-in.o obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o diff --git a/security/commoncap.c b/security/commoncap.c index ea61bc73f6d3..5aba82679a0b 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1,4 +1,4 @@ -/* Common capabilities, needed by capability.o and root_plug.o +/* Common capabilities, needed by capability.o and root_plug.o * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,20 +25,6 @@ #include <linux/mount.h> #include <linux/sched.h> -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES -/* - * Because of the reduced scope of CAP_SETPCAP when filesystem - * capabilities are in effect, it is safe to allow this capability to - * be available in the default configuration. - */ -# define CAP_INIT_BSET CAP_FULL_SET -#else /* ie. ndef CONFIG_SECURITY_FILE_CAPABILITIES */ -# define CAP_INIT_BSET CAP_INIT_EFF_SET -#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ - -kernel_cap_t cap_bset = CAP_INIT_BSET; /* systemwide capability bound */ -EXPORT_SYMBOL(cap_bset); - /* Global security state */ unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ @@ -93,9 +79,9 @@ int cap_capget (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { /* Derived from kernel/capability.c:sys_capget. */ - *effective = cap_t (target->cap_effective); - *inheritable = cap_t (target->cap_inheritable); - *permitted = cap_t (target->cap_permitted); + *effective = target->cap_effective; + *inheritable = target->cap_inheritable; + *permitted = target->cap_permitted; return 0; } @@ -140,6 +126,12 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, /* incapable of using this inheritable set */ return -EPERM; } + if (!cap_issubset(*inheritable, + cap_combine(target->cap_inheritable, + current->cap_bset))) { + /* no new pI capabilities outside bounding set */ + return -EPERM; + } /* verify restrictions on target's new Permitted set */ if (!cap_issubset (*permitted, @@ -198,28 +190,50 @@ int cap_inode_killpriv(struct dentry *dentry) } static inline int cap_from_disk(struct vfs_cap_data *caps, - struct linux_binprm *bprm, - int size) + struct linux_binprm *bprm, unsigned size) { __u32 magic_etc; + unsigned tocopy, i; - if (size != XATTR_CAPS_SZ) + if (size < sizeof(magic_etc)) return -EINVAL; magic_etc = le32_to_cpu(caps->magic_etc); switch ((magic_etc & VFS_CAP_REVISION_MASK)) { - case VFS_CAP_REVISION: - if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) - bprm->cap_effective = true; - else - bprm->cap_effective = false; - bprm->cap_permitted = to_cap_t(le32_to_cpu(caps->permitted)); - bprm->cap_inheritable = to_cap_t(le32_to_cpu(caps->inheritable)); - return 0; + case VFS_CAP_REVISION_1: + if (size != XATTR_CAPS_SZ_1) + return -EINVAL; + tocopy = VFS_CAP_U32_1; + break; + case VFS_CAP_REVISION_2: + if (size != XATTR_CAPS_SZ_2) + return -EINVAL; + tocopy = VFS_CAP_U32_2; + break; default: return -EINVAL; } + + if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) { + bprm->cap_effective = true; + } else { + bprm->cap_effective = false; + } + + for (i = 0; i < tocopy; ++i) { + bprm->cap_permitted.cap[i] = + le32_to_cpu(caps->data[i].permitted); + bprm->cap_inheritable.cap[i] = + le32_to_cpu(caps->data[i].inheritable); + } + while (i < VFS_CAP_U32) { + bprm->cap_permitted.cap[i] = 0; + bprm->cap_inheritable.cap[i] = 0; + i++; + } + + return 0; } /* Locate any VFS capabilities: */ @@ -227,7 +241,7 @@ static int get_file_caps(struct linux_binprm *bprm) { struct dentry *dentry; int rc = 0; - struct vfs_cap_data incaps; + struct vfs_cap_data vcaps; struct inode *inode; if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { @@ -240,14 +254,8 @@ static int get_file_caps(struct linux_binprm *bprm) if (!inode->i_op || !inode->i_op->getxattr) goto out; - rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); - if (rc > 0) { - if (rc == XATTR_CAPS_SZ) - rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, - &incaps, XATTR_CAPS_SZ); - else - rc = -EINVAL; - } + rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps, + XATTR_CAPS_SZ); if (rc == -ENODATA || rc == -EOPNOTSUPP) { /* no data, that's ok */ rc = 0; @@ -256,7 +264,7 @@ static int get_file_caps(struct linux_binprm *bprm) if (rc < 0) goto out; - rc = cap_from_disk(&incaps, bprm, rc); + rc = cap_from_disk(&vcaps, bprm, rc); if (rc) printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", __FUNCTION__, rc, bprm->filename); @@ -321,10 +329,11 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; - new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); - working = cap_intersect (bprm->cap_inheritable, + new_permitted = cap_intersect(bprm->cap_permitted, + current->cap_bset); + working = cap_intersect(bprm->cap_inheritable, current->cap_inheritable); - new_permitted = cap_combine (new_permitted, working); + new_permitted = cap_combine(new_permitted, working); if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || !cap_issubset (new_permitted, current->cap_permitted)) { @@ -351,8 +360,10 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) * capability rules */ if (!is_global_init(current)) { current->cap_permitted = new_permitted; - current->cap_effective = bprm->cap_effective ? - new_permitted : 0; + if (bprm->cap_effective) + current->cap_effective = new_permitted; + else + cap_clear(current->cap_effective); } /* AUD: Audit candidate if current->cap_effective is set */ @@ -474,13 +485,15 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, if (!issecure (SECURE_NO_SETUID_FIXUP)) { if (old_fsuid == 0 && current->fsuid != 0) { - cap_t (current->cap_effective) &= - ~CAP_FS_MASK; + current->cap_effective = + cap_drop_fs_set( + current->cap_effective); } if (old_fsuid != 0 && current->fsuid == 0) { - cap_t (current->cap_effective) |= - (cap_t (current->cap_permitted) & - CAP_FS_MASK); + current->cap_effective = + cap_raise_fs_set( + current->cap_effective, + current->cap_permitted); } } break; @@ -561,6 +574,23 @@ int cap_task_kill(struct task_struct *p, struct siginfo *info, return -EPERM; } + +/* + * called from kernel/sys.c for prctl(PR_CABSET_DROP) + * done without task_capability_lock() because it introduces + * no new races - i.e. only another task doing capget() on + * this task could get inconsistent info. There can be no + * racing writer bc a task can only change its own caps. + */ +long cap_prctl_drop(unsigned long cap) +{ + if (!capable(CAP_SETPCAP)) + return -EPERM; + if (!cap_valid(cap)) + return -EINVAL; + cap_lower(current->cap_bset, cap); + return 0; +} #else int cap_task_setscheduler (struct task_struct *p, int policy, struct sched_param *lp) @@ -584,9 +614,9 @@ int cap_task_kill(struct task_struct *p, struct siginfo *info, void cap_task_reparent_to_init (struct task_struct *p) { - p->cap_effective = CAP_INIT_EFF_SET; - p->cap_inheritable = CAP_INIT_INH_SET; - p->cap_permitted = CAP_FULL_SET; + cap_set_init_eff(p->cap_effective); + cap_clear(p->cap_inheritable); + cap_set_full(p->cap_permitted); p->keep_capabilities = 0; return; } diff --git a/security/dummy.c b/security/dummy.c index 48d4b0a52737..649326bf64ea 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -36,14 +36,19 @@ static int dummy_ptrace (struct task_struct *parent, struct task_struct *child) static int dummy_capget (struct task_struct *target, kernel_cap_t * effective, kernel_cap_t * inheritable, kernel_cap_t * permitted) { - *effective = *inheritable = *permitted = 0; if (target->euid == 0) { - *permitted |= (~0 & ~CAP_FS_MASK); - *effective |= (~0 & ~CAP_TO_MASK(CAP_SETPCAP) & ~CAP_FS_MASK); + cap_set_full(*permitted); + cap_set_init_eff(*effective); + } else { + cap_clear(*permitted); + cap_clear(*effective); } - if (target->fsuid == 0) { - *permitted |= CAP_FS_MASK; - *effective |= CAP_FS_MASK; + + cap_clear(*inheritable); + + if (target->fsuid != 0) { + *permitted = cap_drop_fs_set(*permitted); + *effective = cap_drop_fs_set(*effective); } return 0; } @@ -402,7 +407,7 @@ static int dummy_inode_killpriv(struct dentry *dentry) return 0; } -static int dummy_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err) +static int dummy_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) { return -EOPNOTSUPP; } diff --git a/security/security.c b/security/security.c index ca475ca206e4..b6c57a6b2ff5 100644 --- a/security/security.c +++ b/security/security.c @@ -493,11 +493,11 @@ int security_inode_killpriv(struct dentry *dentry) return security_ops->inode_killpriv(dentry); } -int security_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err) +int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) { if (unlikely(IS_PRIVATE(inode))) return 0; - return security_ops->inode_getsecurity(inode, name, buffer, size, err); + return security_ops->inode_getsecurity(inode, name, buffer, alloc); } int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index be6de0b8734f..e5ed07510309 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -136,32 +136,6 @@ static DEFINE_SPINLOCK(sb_security_lock); static struct kmem_cache *sel_inode_cache; -/* Return security context for a given sid or just the context - length if the buffer is null or length is 0 */ -static int selinux_getsecurity(u32 sid, void *buffer, size_t size) -{ - char *context; - unsigned len; - int rc; - - rc = security_sid_to_context(sid, &context, &len); - if (rc) - return rc; - - if (!buffer || !size) - goto getsecurity_exit; - - if (size < len) { - len = -ERANGE; - goto getsecurity_exit; - } - memcpy(buffer, context, len); - -getsecurity_exit: - kfree(context); - return len; -} - /** * selinux_secmark_enabled - Check to see if SECMARK is currently enabled * @@ -2675,14 +2649,27 @@ static int selinux_inode_removexattr (struct dentry *dentry, char *name) * * Permission check is handled by selinux_inode_getxattr hook. */ -static int selinux_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err) +static int selinux_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) { + u32 size; + int error; + char *context = NULL; struct inode_security_struct *isec = inode->i_security; if (strcmp(name, XATTR_SELINUX_SUFFIX)) return -EOPNOTSUPP; - return selinux_getsecurity(isec->sid, buffer, size); + error = security_sid_to_context(isec->sid, &context, &size); + if (error) + return error; + error = size; + if (alloc) { + *buffer = context; + goto out_nofree; + } + kfree(context); +out_nofree: + return error; } static int selinux_inode_setsecurity(struct inode *inode, const char *name, diff --git a/security/smack/Kconfig b/security/smack/Kconfig new file mode 100644 index 000000000000..603b08784341 --- /dev/null +++ b/security/smack/Kconfig @@ -0,0 +1,10 @@ +config SECURITY_SMACK + bool "Simplified Mandatory Access Control Kernel Support" + depends on NETLABEL && SECURITY_NETWORK + default n + help + This selects the Simplified Mandatory Access Control Kernel. + Smack is useful for sensitivity, integrity, and a variety + of other mandatory security schemes. + If you are unsure how to answer this question, answer N. + diff --git a/security/smack/Makefile b/security/smack/Makefile new file mode 100644 index 000000000000..67a63aaec827 --- /dev/null +++ b/security/smack/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the SMACK LSM +# + +obj-$(CONFIG_SECURITY_SMACK) := smack.o + +smack-y := smack_lsm.o smack_access.o smackfs.o diff --git a/security/smack/smack.h b/security/smack/smack.h new file mode 100644 index 000000000000..a21a0e907ab3 --- /dev/null +++ b/security/smack/smack.h @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * Author: + * Casey Schaufler <casey@schaufler-ca.com> + * + */ + +#ifndef _SECURITY_SMACK_H +#define _SECURITY_SMACK_H + +#include <linux/capability.h> +#include <linux/spinlock.h> +#include <net/netlabel.h> + +/* + * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is + * bigger than can be used, and 24 is the next lower multiple + * of 8, and there are too many issues if there isn't space set + * aside for the terminating null byte. + */ +#define SMK_MAXLEN 23 +#define SMK_LABELLEN (SMK_MAXLEN+1) + +/* + * How many kinds of access are there? + * Here's your answer. + */ +#define SMK_ACCESSDASH '-' +#define SMK_ACCESSLOW "rwxa" +#define SMK_ACCESSKINDS (sizeof(SMK_ACCESSLOW) - 1) + +struct superblock_smack { + char *smk_root; + char *smk_floor; + char *smk_hat; + char *smk_default; + int smk_initialized; + spinlock_t smk_sblock; /* for initialization */ +}; + +struct socket_smack { + char *smk_out; /* outbound label */ + char *smk_in; /* inbound label */ + char smk_packet[SMK_LABELLEN]; /* TCP peer label */ +}; + +/* + * Inode smack data + */ +struct inode_smack { + char *smk_inode; /* label of the fso */ + struct mutex smk_lock; /* initialization lock */ + int smk_flags; /* smack inode flags */ +}; + +#define SMK_INODE_INSTANT 0x01 /* inode is instantiated */ + +/* + * A label access rule. + */ +struct smack_rule { + char *smk_subject; + char *smk_object; + int smk_access; +}; + +/* + * An entry in the table of permitted label accesses. + */ +struct smk_list_entry { + struct smk_list_entry *smk_next; + struct smack_rule smk_rule; +}; + +/* + * An entry in the table mapping smack values to + * CIPSO level/category-set values. + */ +struct smack_cipso { + int smk_level; + char smk_catset[SMK_LABELLEN]; +}; + +/* + * This is the repository for labels seen so that it is + * not necessary to keep allocating tiny chuncks of memory + * and so that they can be shared. + * + * Labels are never modified in place. Anytime a label + * is imported (e.g. xattrset on a file) the list is checked + * for it and it is added if it doesn't exist. The address + * is passed out in either case. Entries are added, but + * never deleted. + * + * Since labels are hanging around anyway it doesn't + * hurt to maintain a secid for those awkward situations + * where kernel components that ought to use LSM independent + * interfaces don't. The secid should go away when all of + * these components have been repaired. + * + * If there is a cipso value associated with the label it + * gets stored here, too. This will most likely be rare as + * the cipso direct mapping in used internally. + */ +struct smack_known { + struct smack_known *smk_next; + char smk_known[SMK_LABELLEN]; + u32 smk_secid; + struct smack_cipso *smk_cipso; + spinlock_t smk_cipsolock; /* for changing cipso map */ +}; + +/* + * Mount options + */ +#define SMK_FSDEFAULT "smackfsdef=" +#define SMK_FSFLOOR "smackfsfloor=" +#define SMK_FSHAT "smackfshat=" +#define SMK_FSROOT "smackfsroot=" + +/* + * xattr names + */ +#define XATTR_SMACK_SUFFIX "SMACK64" +#define XATTR_SMACK_IPIN "SMACK64IPIN" +#define XATTR_SMACK_IPOUT "SMACK64IPOUT" +#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX +#define XATTR_NAME_SMACKIPIN XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN +#define XATTR_NAME_SMACKIPOUT XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT + +/* + * smackfs macic number + */ +#define SMACK_MAGIC 0x43415d53 /* "SMAC" */ + +/* + * A limit on the number of entries in the lists + * makes some of the list administration easier. + */ +#define SMACK_LIST_MAX 10000 + +/* + * CIPSO defaults. + */ +#define SMACK_CIPSO_DOI_DEFAULT 3 /* Historical */ +#define SMACK_CIPSO_DIRECT_DEFAULT 250 /* Arbitrary */ +#define SMACK_CIPSO_MAXCATVAL 63 /* Bigger gets harder */ +#define SMACK_CIPSO_MAXLEVEL 255 /* CIPSO 2.2 standard */ +#define SMACK_CIPSO_MAXCATNUM 239 /* CIPSO 2.2 standard */ + +/* + * Just to make the common cases easier to deal with + */ +#define MAY_ANY (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC) +#define MAY_ANYREAD (MAY_READ | MAY_EXEC) +#define MAY_ANYWRITE (MAY_WRITE | MAY_APPEND) +#define MAY_READWRITE (MAY_READ | MAY_WRITE) +#define MAY_NOT 0 + +/* + * These functions are in smack_lsm.c + */ +struct inode_smack *new_inode_smack(char *); + +/* + * These functions are in smack_access.c + */ +int smk_access(char *, char *, int); +int smk_curacc(char *, u32); +int smack_to_cipso(const char *, struct smack_cipso *); +void smack_from_cipso(u32, char *, char *); +char *smack_from_secid(const u32); +char *smk_import(const char *, int); +struct smack_known *smk_import_entry(const char *, int); +u32 smack_to_secid(const char *); + +/* + * Shared data. + */ +extern int smack_cipso_direct; +extern int smack_net_nltype; +extern char *smack_net_ambient; + +extern struct smack_known *smack_known; +extern struct smack_known smack_known_floor; +extern struct smack_known smack_known_hat; +extern struct smack_known smack_known_huh; +extern struct smack_known smack_known_invalid; +extern struct smack_known smack_known_star; +extern struct smack_known smack_known_unset; + +extern struct smk_list_entry *smack_list; + +/* + * Stricly for CIPSO level manipulation. + * Set the category bit number in a smack label sized buffer. + */ +static inline void smack_catset_bit(int cat, char *catsetp) +{ + if (cat > SMK_LABELLEN * 8) + return; + + catsetp[(cat - 1) / 8] |= 0x80 >> ((cat - 1) % 8); +} + +/* + * Present a pointer to the smack label in an inode blob. + */ +static inline char *smk_of_inode(const struct inode *isp) +{ + struct inode_smack *sip = isp->i_security; + return sip->smk_inode; +} + +#endif /* _SECURITY_SMACK_H */ diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c new file mode 100644 index 000000000000..f6b5f6eed6dd --- /dev/null +++ b/security/smack/smack_access.c @@ -0,0 +1,356 @@ +/* + * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * Author: + * Casey Schaufler <casey@schaufler-ca.com> + * + */ + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include "smack.h" + +struct smack_known smack_known_unset = { + .smk_next = NULL, + .smk_known = "UNSET", + .smk_secid = 1, + .smk_cipso = NULL, +}; + +struct smack_known smack_known_huh = { + .smk_next = &smack_known_unset, + .smk_known = "?", + .smk_secid = 2, + .smk_cipso = NULL, +}; + +struct smack_known smack_known_hat = { + .smk_next = &smack_known_huh, + .smk_known = "^", + .smk_secid = 3, + .smk_cipso = NULL, +}; + +struct smack_known smack_known_star = { + .smk_next = &smack_known_hat, + .smk_known = "*", + .smk_secid = 4, + .smk_cipso = NULL, +}; + +struct smack_known smack_known_floor = { + .smk_next = &smack_known_star, + .smk_known = "_", + .smk_secid = 5, + .smk_cipso = NULL, +}; + +struct smack_known smack_known_invalid = { + .smk_next = &smack_known_floor, + .smk_known = "", + .smk_secid = 6, + .smk_cipso = NULL, +}; + +struct smack_known *smack_known = &smack_known_invalid; + +/* + * The initial value needs to be bigger than any of the + * known values above. + */ +static u32 smack_next_secid = 10; + +/** + * smk_access - determine if a subject has a specific access to an object + * @subject_label: a pointer to the subject's Smack label + * @object_label: a pointer to the object's Smack label + * @request: the access requested, in "MAY" format + * + * This function looks up the subject/object pair in the + * access rule list and returns 0 if the access is permitted, + * non zero otherwise. + * + * Even though Smack labels are usually shared on smack_list + * labels that come in off the network can't be imported + * and added to the list for locking reasons. + * + * Therefore, it is necessary to check the contents of the labels, + * not just the pointer values. Of course, in most cases the labels + * will be on the list, so checking the pointers may be a worthwhile + * optimization. + */ +int smk_access(char *subject_label, char *object_label, int request) +{ + u32 may = MAY_NOT; + struct smk_list_entry *sp; + struct smack_rule *srp; + + /* + * Hardcoded comparisons. + * + * A star subject can't access any object. + */ + if (subject_label == smack_known_star.smk_known || + strcmp(subject_label, smack_known_star.smk_known) == 0) + return -EACCES; + /* + * A star object can be accessed by any subject. + */ + if (object_label == smack_known_star.smk_known || + strcmp(object_label, smack_known_star.smk_known) == 0) + return 0; + /* + * An object can be accessed in any way by a subject + * with the same label. + */ + if (subject_label == object_label || + strcmp(subject_label, object_label) == 0) + return 0; + /* + * A hat subject can read any object. + * A floor object can be read by any subject. + */ + if ((request & MAY_ANYREAD) == request) { + if (object_label == smack_known_floor.smk_known || + strcmp(object_label, smack_known_floor.smk_known) == 0) + return 0; + if (subject_label == smack_known_hat.smk_known || + strcmp(subject_label, smack_known_hat.smk_known) == 0) + return 0; + } + /* + * Beyond here an explicit relationship is required. + * If the requested access is contained in the available + * access (e.g. read is included in readwrite) it's + * good. + */ + for (sp = smack_list; sp != NULL; sp = sp->smk_next) { + srp = &sp->smk_rule; + + if (srp->smk_subject == subject_label || + strcmp(srp->smk_subject, subject_label) == 0) { + if (srp->smk_object == object_label || + strcmp(srp->smk_object, object_label) == 0) { + may = srp->smk_access; + break; + } + } + } + /* + * This is a bit map operation. + */ + if ((request & may) == request) + return 0; + + return -EACCES; +} + +/** + * smk_curacc - determine if current has a specific access to an object + * @object_label: a pointer to the object's Smack label + * @request: the access requested, in "MAY" format + * + * This function checks the current subject label/object label pair + * in the access rule list and returns 0 if the access is permitted, + * non zero otherwise. It allows that current my have the capability + * to override the rules. + */ +int smk_curacc(char *obj_label, u32 mode) +{ + int rc; + + rc = smk_access(current->security, obj_label, mode); + if (rc == 0) + return 0; + + if (capable(CAP_MAC_OVERRIDE)) + return 0; + + return rc; +} + +static DEFINE_MUTEX(smack_known_lock); + +/** + * smk_import_entry - import a label, return the list entry + * @string: a text string that might be a Smack label + * @len: the maximum size, or zero if it is NULL terminated. + * + * Returns a pointer to the entry in the label list that + * matches the passed string, adding it if necessary. + */ +struct smack_known *smk_import_entry(const char *string, int len) +{ + struct smack_known *skp; + char smack[SMK_LABELLEN]; + int found; + int i; + + if (len <= 0 || len > SMK_MAXLEN) + len = SMK_MAXLEN; + + for (i = 0, found = 0; i < SMK_LABELLEN; i++) { + if (found) + smack[i] = '\0'; + else if (i >= len || string[i] > '~' || string[i] <= ' ' || + string[i] == '/') { + smack[i] = '\0'; + found = 1; + } else + smack[i] = string[i]; + } + + if (smack[0] == '\0') + return NULL; + + mutex_lock(&smack_known_lock); + + for (skp = smack_known; skp != NULL; skp = skp->smk_next) + if (strncmp(skp->smk_known, smack, SMK_MAXLEN) == 0) + break; + + if (skp == NULL) { + skp = kzalloc(sizeof(struct smack_known), GFP_KERNEL); + if (skp != NULL) { + skp->smk_next = smack_known; + strncpy(skp->smk_known, smack, SMK_MAXLEN); + skp->smk_secid = smack_next_secid++; + skp->smk_cipso = NULL; + spin_lock_init(&skp->smk_cipsolock); + /* + * Make sure that the entry is actually + * filled before putting it on the list. + */ + smp_mb(); + smack_known = skp; + } + } + + mutex_unlock(&smack_known_lock); + + return skp; +} + +/** + * smk_import - import a smack label + * @string: a text string that might be a Smack label + * @len: the maximum size, or zero if it is NULL terminated. + * + * Returns a pointer to the label in the label list that + * matches the passed string, adding it if necessary. + */ +char *smk_import(const char *string, int len) +{ + struct smack_known *skp; + + skp = smk_import_entry(string, len); + if (skp == NULL) + return NULL; + return skp->smk_known; +} + +/** + * smack_from_secid - find the Smack label associated with a secid + * @secid: an integer that might be associated with a Smack label + * + * Returns a pointer to the appropraite Smack label if there is one, + * otherwise a pointer to the invalid Smack label. + */ +char *smack_from_secid(const u32 secid) +{ + struct smack_known *skp; + + for (skp = smack_known; skp != NULL; skp = skp->smk_next) + if (skp->smk_secid == secid) + return skp->smk_known; + + /* + * If we got this far someone asked for the translation + * of a secid that is not on the list. + */ + return smack_known_invalid.smk_known; +} + +/** + * smack_to_secid - find the secid associated with a Smack label + * @smack: the Smack label + * + * Returns the appropriate secid if there is one, + * otherwise 0 + */ +u32 smack_to_secid(const char *smack) +{ + struct smack_known *skp; + + for (skp = smack_known; skp != NULL; skp = skp->smk_next) + if (strncmp(skp->smk_known, smack, SMK_MAXLEN) == 0) + return skp->smk_secid; + return 0; +} + +/** + * smack_from_cipso - find the Smack label associated with a CIPSO option + * @level: Bell & LaPadula level from the network + * @cp: Bell & LaPadula categories from the network + * @result: where to put the Smack value + * + * This is a simple lookup in the label table. + * + * This is an odd duck as far as smack handling goes in that + * it sends back a copy of the smack label rather than a pointer + * to the master list. This is done because it is possible for + * a foreign host to send a smack label that is new to this + * machine and hence not on the list. That would not be an + * issue except that adding an entry to the master list can't + * be done at that point. + */ +void smack_from_cipso(u32 level, char *cp, char *result) +{ + struct smack_known *kp; + char *final = NULL; + + for (kp = smack_known; final == NULL && kp != NULL; kp = kp->smk_next) { + if (kp->smk_cipso == NULL) + continue; + + spin_lock_bh(&kp->smk_cipsolock); + + if (kp->smk_cipso->smk_level == level && + memcmp(kp->smk_cipso->smk_catset, cp, SMK_LABELLEN) == 0) + final = kp->smk_known; + + spin_unlock_bh(&kp->smk_cipsolock); + } + if (final == NULL) + final = smack_known_huh.smk_known; + strncpy(result, final, SMK_MAXLEN); + return; +} + +/** + * smack_to_cipso - find the CIPSO option to go with a Smack label + * @smack: a pointer to the smack label in question + * @cp: where to put the result + * + * Returns zero if a value is available, non-zero otherwise. + */ +int smack_to_cipso(const char *smack, struct smack_cipso *cp) +{ + struct smack_known *kp; + + for (kp = smack_known; kp != NULL; kp = kp->smk_next) + if (kp->smk_known == smack || + strcmp(kp->smk_known, smack) == 0) + break; + + if (kp == NULL || kp->smk_cipso == NULL) + return -ENOENT; + + memcpy(cp, kp->smk_cipso, sizeof(struct smack_cipso)); + return 0; +} diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c new file mode 100644 index 000000000000..1c11e4245859 --- /dev/null +++ b/security/smack/smack_lsm.c @@ -0,0 +1,2518 @@ +/* + * Simplified MAC Kernel (smack) security module + * + * This file contains the smack hook function implementations. + * + * Author: + * Casey Schaufler <casey@schaufler-ca.com> + * + * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + */ + +#include <linux/xattr.h> +#include <linux/pagemap.h> +#include <linux/mount.h> +#include <linux/stat.h> +#include <linux/ext2_fs.h> +#include <linux/kd.h> +#include <asm/ioctls.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/mutex.h> +#include <linux/pipe_fs_i.h> +#include <net/netlabel.h> +#include <net/cipso_ipv4.h> + +#include "smack.h" + +/* + * I hope these are the hokeyist lines of code in the module. Casey. + */ +#define DEVPTS_SUPER_MAGIC 0x1cd1 +#define SOCKFS_MAGIC 0x534F434B +#define TMPFS_MAGIC 0x01021994 + +/** + * smk_fetch - Fetch the smack label from a file. + * @ip: a pointer to the inode + * @dp: a pointer to the dentry + * + * Returns a pointer to the master list entry for the Smack label + * or NULL if there was no label to fetch. + */ +static char *smk_fetch(struct inode *ip, struct dentry *dp) +{ + int rc; + char in[SMK_LABELLEN]; + + if (ip->i_op->getxattr == NULL) + return NULL; + + rc = ip->i_op->getxattr(dp, XATTR_NAME_SMACK, in, SMK_LABELLEN); + if (rc < 0) + return NULL; + + return smk_import(in, rc); +} + +/** + * new_inode_smack - allocate an inode security blob + * @smack: a pointer to the Smack label to use in the blob + * + * Returns the new blob or NULL if there's no memory available + */ +struct inode_smack *new_inode_smack(char *smack) +{ + struct inode_smack *isp; + + isp = kzalloc(sizeof(struct inode_smack), GFP_KERNEL); + if (isp == NULL) + return NULL; + + isp->smk_inode = smack; + isp->smk_flags = 0; + mutex_init(&isp->smk_lock); + + return isp; +} + +/* + * LSM hooks. + * We he, that is fun! + */ + +/** + * smack_ptrace - Smack approval on ptrace + * @ptp: parent task pointer + * @ctp: child task pointer + * + * Returns 0 if access is OK, an error code otherwise + * + * Do the capability checks, and require read and write. + */ +static int smack_ptrace(struct task_struct *ptp, struct task_struct *ctp) +{ + int rc; + + rc = cap_ptrace(ptp, ctp); + if (rc != 0) + return rc; + + rc = smk_access(ptp->security, ctp->security, MAY_READWRITE); + if (rc != 0 && __capable(ptp, CAP_MAC_OVERRIDE)) + return 0; + + return rc; +} + +/** + * smack_syslog - Smack approval on syslog + * @type: message type + * + * Require that the task has the floor label + * + * Returns 0 on success, error code otherwise. + */ +static int smack_syslog(int type) +{ + int rc; + char *sp = current->security; + + rc = cap_syslog(type); + if (rc != 0) + return rc; + + if (capable(CAP_MAC_OVERRIDE)) + return 0; + + if (sp != smack_known_floor.smk_known) + rc = -EACCES; + + return rc; +} + + +/* + * Superblock Hooks. + */ + +/** + * smack_sb_alloc_security - allocate a superblock blob + * @sb: the superblock getting the blob + * + * Returns 0 on success or -ENOMEM on error. + */ +static int smack_sb_alloc_security(struct super_block *sb) +{ + struct superblock_smack *sbsp; + + sbsp = kzalloc(sizeof(struct superblock_smack), GFP_KERNEL); + + if (sbsp == NULL) + return -ENOMEM; + + sbsp->smk_root = smack_known_floor.smk_known; + sbsp->smk_default = smack_known_floor.smk_known; + sbsp->smk_floor = smack_known_floor.smk_known; + sbsp->smk_hat = smack_known_hat.smk_known; + sbsp->smk_initialized = 0; + spin_lock_init(&sbsp->smk_sblock); + + sb->s_security = sbsp; + + return 0; +} + +/** + * smack_sb_free_security - free a superblock blob + * @sb: the superblock getting the blob + * + */ +static void smack_sb_free_security(struct super_block *sb) +{ + kfree(sb->s_security); + sb->s_security = NULL; +} + +/** + * smack_sb_copy_data - copy mount options data for processing + * @type: file system type + * @orig: where to start + * @smackopts + * + * Returns 0 on success or -ENOMEM on error. + * + * Copy the Smack specific mount options out of the mount + * options list. + */ +static int smack_sb_copy_data(struct file_system_type *type, void *orig, + void *smackopts) +{ + char *cp, *commap, *otheropts, *dp; + + /* Binary mount data: just copy */ + if (type->fs_flags & FS_BINARY_MOUNTDATA) { + copy_page(smackopts, orig); + return 0; + } + + otheropts = (char *)get_zeroed_page(GFP_KERNEL); + if (otheropts == NULL) + return -ENOMEM; + + for (cp = orig, commap = orig; commap != NULL; cp = commap + 1) { + if (strstr(cp, SMK_FSDEFAULT) == cp) + dp = smackopts; + else if (strstr(cp, SMK_FSFLOOR) == cp) + dp = smackopts; + else if (strstr(cp, SMK_FSHAT) == cp) + dp = smackopts; + else if (strstr(cp, SMK_FSROOT) == cp) + dp = smackopts; + else + dp = otheropts; + + commap = strchr(cp, ','); + if (commap != NULL) + *commap = '\0'; + + if (*dp != '\0') + strcat(dp, ","); + strcat(dp, cp); + } + + strcpy(orig, otheropts); + free_page((unsigned long)otheropts); + + return 0; +} + +/** + * smack_sb_kern_mount - Smack specific mount processing + * @sb: the file system superblock + * @data: the smack mount options + * + * Returns 0 on success, an error code on failure + */ +static int smack_sb_kern_mount(struct super_block *sb, void *data) +{ + struct dentry *root = sb->s_root; + struct inode *inode = root->d_inode; + struct superblock_smack *sp = sb->s_security; + struct inode_smack *isp; + char *op; + char *commap; + char *nsp; + + spin_lock(&sp->smk_sblock); + if (sp->smk_initialized != 0) { + spin_unlock(&sp->smk_sblock); + return 0; + } + sp->smk_initialized = 1; + spin_unlock(&sp->smk_sblock); + + for (op = data; op != NULL; op = commap) { + commap = strchr(op, ','); + if (commap != NULL) + *commap++ = '\0'; + + if (strncmp(op, SMK_FSHAT, strlen(SMK_FSHAT)) == 0) { + op += strlen(SMK_FSHAT); + nsp = smk_import(op, 0); + if (nsp != NULL) + sp->smk_hat = nsp; + } else if (strncmp(op, SMK_FSFLOOR, strlen(SMK_FSFLOOR)) == 0) { + op += strlen(SMK_FSFLOOR); + nsp = smk_import(op, 0); + if (nsp != NULL) + sp->smk_floor = nsp; + } else if (strncmp(op, SMK_FSDEFAULT, + strlen(SMK_FSDEFAULT)) == 0) { + op += strlen(SMK_FSDEFAULT); + nsp = smk_import(op, 0); + if (nsp != NULL) + sp->smk_default = nsp; + } else if (strncmp(op, SMK_FSROOT, strlen(SMK_FSROOT)) == 0) { + op += strlen(SMK_FSROOT); + nsp = smk_import(op, 0); + if (nsp != NULL) + sp->smk_root = nsp; + } + } + + /* + * Initialize the root inode. + */ + isp = inode->i_security; + if (isp == NULL) + inode->i_security = new_inode_smack(sp->smk_root); + else + isp->smk_inode = sp->smk_root; + + return 0; +} + +/** + * smack_sb_statfs - Smack check on statfs + * @dentry: identifies the file system in question + * + * Returns 0 if current can read the floor of the filesystem, + * and error code otherwise + */ +static int smack_sb_statfs(struct dentry *dentry) +{ + struct superblock_smack *sbp = dentry->d_sb->s_security; + + return smk_curacc(sbp->smk_floor, MAY_READ); +} + +/** + * smack_sb_mount - Smack check for mounting + * @dev_name: unused + * @nd: mount point + * @type: unused + * @flags: unused + * @data: unused + * + * Returns 0 if current can write the floor of the filesystem + * being mounted on, an error code otherwise. + */ +static int smack_sb_mount(char *dev_name, struct nameidata *nd, + char *type, unsigned long flags, void *data) +{ + struct superblock_smack *sbp = nd->mnt->mnt_sb->s_security; + + return smk_curacc(sbp->smk_floor, MAY_WRITE); +} + +/** + * smack_sb_umount - Smack check for unmounting + * @mnt: file system to unmount + * @flags: unused + * + * Returns 0 if current can write the floor of the filesystem + * being unmounted, an error code otherwise. + */ +static int smack_sb_umount(struct vfsmount *mnt, int flags) +{ + struct superblock_smack *sbp; + + sbp = mnt->mnt_sb->s_security; + + return smk_curacc(sbp->smk_floor, MAY_WRITE); +} + +/* + * Inode hooks + */ + +/** + * smack_inode_alloc_security - allocate an inode blob + * @inode - the inode in need of a blob + * + * Returns 0 if it gets a blob, -ENOMEM otherwise + */ +static int smack_inode_alloc_security(struct inode *inode) +{ + inode->i_security = new_inode_smack(current->security); + if (inode->i_security == NULL) + return -ENOMEM; + return 0; +} + +/** + * smack_inode_free_security - free an inode blob + * @inode - the inode with a blob + * + * Clears the blob pointer in inode + */ +static void smack_inode_free_security(struct inode *inode) +{ + kfree(inode->i_security); + inode->i_security = NULL; +} + +/** + * smack_inode_init_security - copy out the smack from an inode + * @inode: the inode + * @dir: unused + * @name: where to put the attribute name + * @value: where to put the attribute value + * @len: where to put the length of the attribute + * + * Returns 0 if it all works out, -ENOMEM if there's no memory + */ +static int smack_inode_init_security(struct inode *inode, struct inode *dir, + char **name, void **value, size_t *len) +{ + char *isp = smk_of_inode(inode); + + if (name) { + *name = kstrdup(XATTR_SMACK_SUFFIX, GFP_KERNEL); + if (*name == NULL) + return -ENOMEM; + } + + if (value) { + *value = kstrdup(isp, GFP_KERNEL); + if (*value == NULL) + return -ENOMEM; + } + + if (len) + *len = strlen(isp) + 1; + + return 0; +} + +/** + * smack_inode_link - Smack check on link + * @old_dentry: the existing object + * @dir: unused + * @new_dentry: the new object + * + * Returns 0 if access is permitted, an error code otherwise + */ +static int smack_inode_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + int rc; + char *isp; + + isp = smk_of_inode(old_dentry->d_inode); + rc = smk_curacc(isp, MAY_WRITE); + + if (rc == 0 && new_dentry->d_inode != NULL) { + isp = smk_of_inode(new_dentry->d_inode); + rc = smk_curacc(isp, MAY_WRITE); + } + + return rc; +} + +/** + * smack_inode_unlink - Smack check on inode deletion + * @dir: containing directory object + * @dentry: file to unlink + * + * Returns 0 if current can write the containing directory + * and the object, error code otherwise + */ +static int smack_inode_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *ip = dentry->d_inode; + int rc; + + /* + * You need write access to the thing you're unlinking + */ + rc = smk_curacc(smk_of_inode(ip), MAY_WRITE); + if (rc == 0) + /* + * You also need write access to the containing directory + */ + rc = smk_curacc(smk_of_inode(dir), MAY_WRITE); + + return rc; +} + +/** + * smack_inode_rmdir - Smack check on directory deletion + * @dir: containing directory object + * @dentry: directory to unlink + * + * Returns 0 if current can write the containing directory + * and the directory, error code otherwise + */ +static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry) +{ + int rc; + + /* + * You need write access to the thing you're removing + */ + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); + if (rc == 0) + /* + * You also need write access to the containing directory + */ + rc = smk_curacc(smk_of_inode(dir), MAY_WRITE); + + return rc; +} + +/** + * smack_inode_rename - Smack check on rename + * @old_inode: the old directory + * @old_dentry: unused + * @new_inode: the new directory + * @new_dentry: unused + * + * Read and write access is required on both the old and + * new directories. + * + * Returns 0 if access is permitted, an error code otherwise + */ +static int smack_inode_rename(struct inode *old_inode, + struct dentry *old_dentry, + struct inode *new_inode, + struct dentry *new_dentry) +{ + int rc; + char *isp; + + isp = smk_of_inode(old_dentry->d_inode); + rc = smk_curacc(isp, MAY_READWRITE); + + if (rc == 0 && new_dentry->d_inode != NULL) { + isp = smk_of_inode(new_dentry->d_inode); + rc = smk_curacc(isp, MAY_READWRITE); + } + + return rc; +} + +/** + * smack_inode_permission - Smack version of permission() + * @inode: the inode in question + * @mask: the access requested + * @nd: unused + * + * This is the important Smack hook. + * + * Returns 0 if access is permitted, -EACCES otherwise + */ +static int smack_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + /* + * No permission to check. Existence test. Yup, it's there. + */ + if (mask == 0) + return 0; + + return smk_curacc(smk_of_inode(inode), mask); +} + +/** + * smack_inode_setattr - Smack check for setting attributes + * @dentry: the object + * @iattr: for the force flag + * + * Returns 0 if access is permitted, an error code otherwise + */ +static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr) +{ + /* + * Need to allow for clearing the setuid bit. + */ + if (iattr->ia_valid & ATTR_FORCE) + return 0; + + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); +} + +/** + * smack_inode_getattr - Smack check for getting attributes + * @mnt: unused + * @dentry: the object + * + * Returns 0 if access is permitted, an error code otherwise + */ +static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +{ + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); +} + +/** + * smack_inode_setxattr - Smack check for setting xattrs + * @dentry: the object + * @name: name of the attribute + * @value: unused + * @size: unused + * @flags: unused + * + * This protects the Smack attribute explicitly. + * + * Returns 0 if access is permitted, an error code otherwise + */ +static int smack_inode_setxattr(struct dentry *dentry, char *name, + void *value, size_t size, int flags) +{ + if (!capable(CAP_MAC_ADMIN)) { + if (strcmp(name, XATTR_NAME_SMACK) == 0 || + strcmp(name, XATTR_NAME_SMACKIPIN) == 0 || + strcmp(name, XATTR_NAME_SMACKIPOUT) == 0) + return -EPERM; + } + + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); +} + +/** + * smack_inode_post_setxattr - Apply the Smack update approved above + * @dentry: object + * @name: attribute name + * @value: attribute value + * @size: attribute size + * @flags: unused + * + * Set the pointer in the inode blob to the entry found + * in the master label list. + */ +static void smack_inode_post_setxattr(struct dentry *dentry, char *name, + void *value, size_t size, int flags) +{ + struct inode_smack *isp; + char *nsp; + + /* + * Not SMACK + */ + if (strcmp(name, XATTR_NAME_SMACK)) + return; + + if (size >= SMK_LABELLEN) + return; + + isp = dentry->d_inode->i_security; + + /* + * No locking is done here. This is a pointer + * assignment. + */ + nsp = smk_import(value, size); + if (nsp != NULL) + isp->smk_inode = nsp; + else + isp->smk_inode = smack_known_invalid.smk_known; + + return; +} + +/* + * smack_inode_getxattr - Smack check on getxattr + * @dentry: the object + * @name: unused + * + * Returns 0 if access is permitted, an error code otherwise + */ +static int smack_inode_getxattr(struct dentry *dentry, char *name) +{ + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); +} + +/* + * smack_inode_removexattr - Smack check on removexattr + * @dentry: the object + * @name: name of the attribute + * + * Removing the Smack attribute requires CAP_MAC_ADMIN + * + * Returns 0 if access is permitted, an error code otherwise + */ +static int smack_inode_removexattr(struct dentry *dentry, char *name) +{ + if (strcmp(name, XATTR_NAME_SMACK) == 0 && !capable(CAP_MAC_ADMIN)) + return -EPERM; + + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); +} + +/** + * smack_inode_getsecurity - get smack xattrs + * @inode: the object + * @name: attribute name + * @buffer: where to put the result + * @size: size of the buffer + * @err: unused + * + * Returns the size of the attribute or an error code + */ +static int smack_inode_getsecurity(const struct inode *inode, + const char *name, void **buffer, + bool alloc) +{ + struct socket_smack *ssp; + struct socket *sock; + struct super_block *sbp; + struct inode *ip = (struct inode *)inode; + char *isp; + int ilen; + int rc = 0; + + if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) { + isp = smk_of_inode(inode); + ilen = strlen(isp) + 1; + *buffer = isp; + return ilen; + } + + /* + * The rest of the Smack xattrs are only on sockets. + */ + sbp = ip->i_sb; + if (sbp->s_magic != SOCKFS_MAGIC) + return -EOPNOTSUPP; + + sock = SOCKET_I(ip); + if (sock == NULL) + return -EOPNOTSUPP; + + ssp = sock->sk->sk_security; + + if (strcmp(name, XATTR_SMACK_IPIN) == 0) + isp = ssp->smk_in; + else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) + isp = ssp->smk_out; + else + return -EOPNOTSUPP; + + ilen = strlen(isp) + 1; + if (rc == 0) { + *buffer = isp; + rc = ilen; + } + + return rc; +} + + +/** + * smack_inode_listsecurity - list the Smack attributes + * @inode: the object + * @buffer: where they go + * @buffer_size: size of buffer + * + * Returns 0 on success, -EINVAL otherwise + */ +static int smack_inode_listsecurity(struct inode *inode, char *buffer, + size_t buffer_size) +{ + int len = strlen(XATTR_NAME_SMACK); + + if (buffer != NULL && len <= buffer_size) { + memcpy(buffer, XATTR_NAME_SMACK, len); + return len; + } + return -EINVAL; +} + +/* + * File Hooks + */ + +/** + * smack_file_permission - Smack check on file operations + * @file: unused + * @mask: unused + * + * Returns 0 + * + * Should access checks be done on each read or write? + * UNICOS and SELinux say yes. + * Trusted Solaris, Trusted Irix, and just about everyone else says no. + * + * I'll say no for now. Smack does not do the frequent + * label changing that SELinux does. + */ +static int smack_file_permission(struct file *file, int mask) +{ + return 0; +} + +/** + * smack_file_alloc_security - assign a file security blob + * @file: the object + * + * The security blob for a file is a pointer to the master + * label list, so no allocation is done. + * + * Returns 0 + */ +static int smack_file_alloc_security(struct file *file) +{ + file->f_security = current->security; + return 0; +} + +/** + * smack_file_free_security - clear a file security blob + * @file: the object + * + * The security blob for a file is a pointer to the master + * label list, so no memory is freed. + */ +static void smack_file_free_security(struct file *file) +{ + file->f_security = NULL; +} + +/** + * smack_file_ioctl - Smack check on ioctls + * @file: the object + * @cmd: what to do + * @arg: unused + * + * Relies heavily on the correct use of the ioctl command conventions. + * + * Returns 0 if allowed, error code otherwise + */ +static int smack_file_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int rc = 0; + + if (_IOC_DIR(cmd) & _IOC_WRITE) + rc = smk_curacc(file->f_security, MAY_WRITE); + + if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ)) + rc = smk_curacc(file->f_security, MAY_READ); + + return rc; +} + +/** + * smack_file_lock - Smack check on file locking + * @file: the object + * @cmd unused + * + * Returns 0 if current has write access, error code otherwise + */ +static int smack_file_lock(struct file *file, unsigned int cmd) +{ + return smk_curacc(file->f_security, MAY_WRITE); +} + +/** + * smack_file_fcntl - Smack check on fcntl + * @file: the object + * @cmd: what action to check + * @arg: unused + * + * Returns 0 if current has access, error code otherwise + */ +static int smack_file_fcntl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int rc; + + switch (cmd) { + case F_DUPFD: + case F_GETFD: + case F_GETFL: + case F_GETLK: + case F_GETOWN: + case F_GETSIG: + rc = smk_curacc(file->f_security, MAY_READ); + break; + case F_SETFD: + case F_SETFL: + case F_SETLK: + case F_SETLKW: + case F_SETOWN: + case F_SETSIG: + rc = smk_curacc(file->f_security, MAY_WRITE); + break; + default: + rc = smk_curacc(file->f_security, MAY_READWRITE); + } + + return rc; +} + +/** + * smack_file_set_fowner - set the file security blob value + * @file: object in question + * + * Returns 0 + * Further research may be required on this one. + */ +static int smack_file_set_fowner(struct file *file) +{ + file->f_security = current->security; + return 0; +} + +/** + * smack_file_send_sigiotask - Smack on sigio + * @tsk: The target task + * @fown: the object the signal come from + * @signum: unused + * + * Allow a privileged task to get signals even if it shouldn't + * + * Returns 0 if a subject with the object's smack could + * write to the task, an error code otherwise. + */ +static int smack_file_send_sigiotask(struct task_struct *tsk, + struct fown_struct *fown, int signum) +{ + struct file *file; + int rc; + + /* + * struct fown_struct is never outside the context of a struct file + */ + file = container_of(fown, struct file, f_owner); + rc = smk_access(file->f_security, tsk->security, MAY_WRITE); + if (rc != 0 && __capable(tsk, CAP_MAC_OVERRIDE)) + return 0; + return rc; +} + +/** + * smack_file_receive - Smack file receive check + * @file: the object + * + * Returns 0 if current has access, error code otherwise + */ +static int smack_file_receive(struct file *file) +{ + int may = 0; + + /* + * This code relies on bitmasks. + */ + if (file->f_mode & FMODE_READ) + may = MAY_READ; + if (file->f_mode & FMODE_WRITE) + may |= MAY_WRITE; + + return smk_curacc(file->f_security, may); +} + +/* + * Task hooks + */ + +/** + * smack_task_alloc_security - "allocate" a task blob + * @tsk: the task in need of a blob + * + * Smack isn't using copies of blobs. Everyone + * points to an immutable list. No alloc required. + * No data copy required. + * + * Always returns 0 + */ +static int smack_task_alloc_security(struct task_struct *tsk) +{ + tsk->security = current->security; + + return 0; +} + +/** + * smack_task_free_security - "free" a task blob + * @task: the task with the blob + * + * Smack isn't using copies of blobs. Everyone + * points to an immutable list. The blobs never go away. + * There is no leak here. + */ +static void smack_task_free_security(struct task_struct *task) +{ + task->security = NULL; +} + +/** + * smack_task_setpgid - Smack check on setting pgid + * @p: the task object + * @pgid: unused + * + * Return 0 if write access is permitted + */ +static int smack_task_setpgid(struct task_struct *p, pid_t pgid) +{ + return smk_curacc(p->security, MAY_WRITE); +} + +/** + * smack_task_getpgid - Smack access check for getpgid + * @p: the object task + * + * Returns 0 if current can read the object task, error code otherwise + */ +static int smack_task_getpgid(struct task_struct *p) +{ + return smk_curacc(p->security, MAY_READ); +} + +/** + * smack_task_getsid - Smack access check for getsid + * @p: the object task + * + * Returns 0 if current can read the object task, error code otherwise + */ +static int smack_task_getsid(struct task_struct *p) +{ + return smk_curacc(p->security, MAY_READ); +} + +/** + * smack_task_getsecid - get the secid of the task + * @p: the object task + * @secid: where to put the result + * + * Sets the secid to contain a u32 version of the smack label. + */ +static void smack_task_getsecid(struct task_struct *p, u32 *secid) +{ + *secid = smack_to_secid(p->security); +} + +/** + * smack_task_setnice - Smack check on setting nice + * @p: the task object + * @nice: unused + * + * Return 0 if write access is permitted + */ +static int smack_task_setnice(struct task_struct *p, int nice) +{ + return smk_curacc(p->security, MAY_WRITE); +} + +/** + * smack_task_setioprio - Smack check on setting ioprio + * @p: the task object + * @ioprio: unused + * + * Return 0 if write access is permitted + */ +static int smack_task_setioprio(struct task_struct *p, int ioprio) +{ + return smk_curacc(p->security, MAY_WRITE); +} + +/** + * smack_task_getioprio - Smack check on reading ioprio + * @p: the task object + * + * Return 0 if read access is permitted + */ +static int smack_task_getioprio(struct task_struct *p) +{ + return smk_curacc(p->security, MAY_READ); +} + +/** + * smack_task_setscheduler - Smack check on setting scheduler + * @p: the task object + * @policy: unused + * @lp: unused + * + * Return 0 if read access is permitted + */ +static int smack_task_setscheduler(struct task_struct *p, int policy, + struct sched_param *lp) +{ + return smk_curacc(p->security, MAY_WRITE); +} + +/** + * smack_task_getscheduler - Smack check on reading scheduler + * @p: the task object + * + * Return 0 if read access is permitted + */ +static int smack_task_getscheduler(struct task_struct *p) +{ + return smk_curacc(p->security, MAY_READ); +} + +/** + * smack_task_movememory - Smack check on moving memory + * @p: the task object + * + * Return 0 if write access is permitted + */ +static int smack_task_movememory(struct task_struct *p) +{ + return smk_curacc(p->security, MAY_WRITE); +} + +/** + * smack_task_kill - Smack check on signal delivery + * @p: the task object + * @info: unused + * @sig: unused + * @secid: identifies the smack to use in lieu of current's + * + * Return 0 if write access is permitted + * + * The secid behavior is an artifact of an SELinux hack + * in the USB code. Someday it may go away. + */ +static int smack_task_kill(struct task_struct *p, struct siginfo *info, + int sig, u32 secid) +{ + /* + * Special cases where signals really ought to go through + * in spite of policy. Stephen Smalley suggests it may + * make sense to change the caller so that it doesn't + * bother with the LSM hook in these cases. + */ + if (info != SEND_SIG_NOINFO && + (is_si_special(info) || SI_FROMKERNEL(info))) + return 0; + /* + * Sending a signal requires that the sender + * can write the receiver. + */ + if (secid == 0) + return smk_curacc(p->security, MAY_WRITE); + /* + * If the secid isn't 0 we're dealing with some USB IO + * specific behavior. This is not clean. For one thing + * we can't take privilege into account. + */ + return smk_access(smack_from_secid(secid), p->security, MAY_WRITE); +} + +/** + * smack_task_wait - Smack access check for waiting + * @p: task to wait for + * + * Returns 0 if current can wait for p, error code otherwise + */ +static int smack_task_wait(struct task_struct *p) +{ + int rc; + + rc = smk_access(current->security, p->security, MAY_WRITE); + if (rc == 0) + return 0; + + /* + * Allow the operation to succeed if either task + * has privilege to perform operations that might + * account for the smack labels having gotten to + * be different in the first place. + * + * This breaks the strict subjet/object access + * control ideal, taking the object's privilege + * state into account in the decision as well as + * the smack value. + */ + if (capable(CAP_MAC_OVERRIDE) || __capable(p, CAP_MAC_OVERRIDE)) + return 0; + + return rc; +} + +/** + * smack_task_to_inode - copy task smack into the inode blob + * @p: task to copy from + * inode: inode to copy to + * + * Sets the smack pointer in the inode security blob + */ +static void smack_task_to_inode(struct task_struct *p, struct inode *inode) +{ + struct inode_smack *isp = inode->i_security; + isp->smk_inode = p->security; +} + +/* + * Socket hooks. + */ + +/** + * smack_sk_alloc_security - Allocate a socket blob + * @sk: the socket + * @family: unused + * @priority: memory allocation priority + * + * Assign Smack pointers to current + * + * Returns 0 on success, -ENOMEM is there's no memory + */ +static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) +{ + char *csp = current->security; + struct socket_smack *ssp; + + ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); + if (ssp == NULL) + return -ENOMEM; + + ssp->smk_in = csp; + ssp->smk_out = csp; + ssp->smk_packet[0] = '\0'; + + sk->sk_security = ssp; + + return 0; +} + +/** + * smack_sk_free_security - Free a socket blob + * @sk: the socket + * + * Clears the blob pointer + */ +static void smack_sk_free_security(struct sock *sk) +{ + kfree(sk->sk_security); +} + +/** + * smack_set_catset - convert a capset to netlabel mls categories + * @catset: the Smack categories + * @sap: where to put the netlabel categories + * + * Allocates and fills attr.mls.cat + */ +static void smack_set_catset(char *catset, struct netlbl_lsm_secattr *sap) +{ + unsigned char *cp; + unsigned char m; + int cat; + int rc; + int byte; + + if (catset == 0) + return; + + sap->flags |= NETLBL_SECATTR_MLS_CAT; + sap->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); + sap->attr.mls.cat->startbit = 0; + + for (cat = 1, cp = catset, byte = 0; byte < SMK_LABELLEN; cp++, byte++) + for (m = 0x80; m != 0; m >>= 1, cat++) { + if ((m & *cp) == 0) + continue; + rc = netlbl_secattr_catmap_setbit(sap->attr.mls.cat, + cat, GFP_ATOMIC); + } +} + +/** + * smack_to_secattr - fill a secattr from a smack value + * @smack: the smack value + * @nlsp: where the result goes + * + * Casey says that CIPSO is good enough for now. + * It can be used to effect. + * It can also be abused to effect when necessary. + * Appologies to the TSIG group in general and GW in particular. + */ +static void smack_to_secattr(char *smack, struct netlbl_lsm_secattr *nlsp) +{ + struct smack_cipso cipso; + int rc; + + switch (smack_net_nltype) { + case NETLBL_NLTYPE_CIPSOV4: + nlsp->domain = NULL; + nlsp->flags = NETLBL_SECATTR_DOMAIN; + nlsp->flags |= NETLBL_SECATTR_MLS_LVL; + + rc = smack_to_cipso(smack, &cipso); + if (rc == 0) { + nlsp->attr.mls.lvl = cipso.smk_level; + smack_set_catset(cipso.smk_catset, nlsp); + } else { + nlsp->attr.mls.lvl = smack_cipso_direct; + smack_set_catset(smack, nlsp); + } + break; + default: + break; + } +} + +/** + * smack_netlabel - Set the secattr on a socket + * @sk: the socket + * + * Convert the outbound smack value (smk_out) to a + * secattr and attach it to the socket. + * + * Returns 0 on success or an error code + */ +static int smack_netlabel(struct sock *sk) +{ + struct socket_smack *ssp = sk->sk_security; + struct netlbl_lsm_secattr secattr; + int rc = 0; + + netlbl_secattr_init(&secattr); + smack_to_secattr(ssp->smk_out, &secattr); + if (secattr.flags != NETLBL_SECATTR_NONE) + rc = netlbl_sock_setattr(sk, &secattr); + + netlbl_secattr_destroy(&secattr); + return rc; +} + +/** + * smack_inode_setsecurity - set smack xattrs + * @inode: the object + * @name: attribute name + * @value: attribute value + * @size: size of the attribute + * @flags: unused + * + * Sets the named attribute in the appropriate blob + * + * Returns 0 on success, or an error code + */ +static int smack_inode_setsecurity(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + char *sp; + struct inode_smack *nsp = inode->i_security; + struct socket_smack *ssp; + struct socket *sock; + + if (value == NULL || size > SMK_LABELLEN) + return -EACCES; + + sp = smk_import(value, size); + if (sp == NULL) + return -EINVAL; + + if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) { + nsp->smk_inode = sp; + return 0; + } + /* + * The rest of the Smack xattrs are only on sockets. + */ + if (inode->i_sb->s_magic != SOCKFS_MAGIC) + return -EOPNOTSUPP; + + sock = SOCKET_I(inode); + if (sock == NULL) + return -EOPNOTSUPP; + + ssp = sock->sk->sk_security; + + if (strcmp(name, XATTR_SMACK_IPIN) == 0) + ssp->smk_in = sp; + else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) { + ssp->smk_out = sp; + return smack_netlabel(sock->sk); + } else + return -EOPNOTSUPP; + + return 0; +} + +/** + * smack_socket_post_create - finish socket setup + * @sock: the socket + * @family: protocol family + * @type: unused + * @protocol: unused + * @kern: unused + * + * Sets the netlabel information on the socket + * + * Returns 0 on success, and error code otherwise + */ +static int smack_socket_post_create(struct socket *sock, int family, + int type, int protocol, int kern) +{ + if (family != PF_INET) + return 0; + /* + * Set the outbound netlbl. + */ + return smack_netlabel(sock->sk); +} + +/** + * smack_flags_to_may - convert S_ to MAY_ values + * @flags: the S_ value + * + * Returns the equivalent MAY_ value + */ +static int smack_flags_to_may(int flags) +{ + int may = 0; + + if (flags & S_IRUGO) + may |= MAY_READ; + if (flags & S_IWUGO) + may |= MAY_WRITE; + if (flags & S_IXUGO) + may |= MAY_EXEC; + + return may; +} + +/** + * smack_msg_msg_alloc_security - Set the security blob for msg_msg + * @msg: the object + * + * Returns 0 + */ +static int smack_msg_msg_alloc_security(struct msg_msg *msg) +{ + msg->security = current->security; + return 0; +} + +/** + * smack_msg_msg_free_security - Clear the security blob for msg_msg + * @msg: the object + * + * Clears the blob pointer + */ +static void smack_msg_msg_free_security(struct msg_msg *msg) +{ + msg->security = NULL; +} + +/** + * smack_of_shm - the smack pointer for the shm + * @shp: the object + * + * Returns a pointer to the smack value + */ +static char *smack_of_shm(struct shmid_kernel *shp) +{ + return (char *)shp->shm_perm.security; +} + +/** + * smack_shm_alloc_security - Set the security blob for shm + * @shp: the object + * + * Returns 0 + */ +static int smack_shm_alloc_security(struct shmid_kernel *shp) +{ + struct kern_ipc_perm *isp = &shp->shm_perm; + + isp->security = current->security; + return 0; +} + +/** + * smack_shm_free_security - Clear the security blob for shm + * @shp: the object + * + * Clears the blob pointer + */ +static void smack_shm_free_security(struct shmid_kernel *shp) +{ + struct kern_ipc_perm *isp = &shp->shm_perm; + + isp->security = NULL; +} + +/** + * smack_shm_associate - Smack access check for shm + * @shp: the object + * @shmflg: access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smack_shm_associate(struct shmid_kernel *shp, int shmflg) +{ + char *ssp = smack_of_shm(shp); + int may; + + may = smack_flags_to_may(shmflg); + return smk_curacc(ssp, may); +} + +/** + * smack_shm_shmctl - Smack access check for shm + * @shp: the object + * @cmd: what it wants to do + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) +{ + char *ssp = smack_of_shm(shp); + int may; + + switch (cmd) { + case IPC_STAT: + case SHM_STAT: + may = MAY_READ; + break; + case IPC_SET: + case SHM_LOCK: + case SHM_UNLOCK: + case IPC_RMID: + may = MAY_READWRITE; + break; + case IPC_INFO: + case SHM_INFO: + /* + * System level information. + */ + return 0; + default: + return -EINVAL; + } + + return smk_curacc(ssp, may); +} + +/** + * smack_shm_shmat - Smack access for shmat + * @shp: the object + * @shmaddr: unused + * @shmflg: access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smack_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, + int shmflg) +{ + char *ssp = smack_of_shm(shp); + int may; + + may = smack_flags_to_may(shmflg); + return smk_curacc(ssp, may); +} + +/** + * smack_of_sem - the smack pointer for the sem + * @sma: the object + * + * Returns a pointer to the smack value + */ +static char *smack_of_sem(struct sem_array *sma) +{ + return (char *)sma->sem_perm.security; +} + +/** + * smack_sem_alloc_security - Set the security blob for sem + * @sma: the object + * + * Returns 0 + */ +static int smack_sem_alloc_security(struct sem_array *sma) +{ + struct kern_ipc_perm *isp = &sma->sem_perm; + + isp->security = current->security; + return 0; +} + +/** + * smack_sem_free_security - Clear the security blob for sem + * @sma: the object + * + * Clears the blob pointer + */ +static void smack_sem_free_security(struct sem_array *sma) +{ + struct kern_ipc_perm *isp = &sma->sem_perm; + + isp->security = NULL; +} + +/** + * smack_sem_associate - Smack access check for sem + * @sma: the object + * @semflg: access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smack_sem_associate(struct sem_array *sma, int semflg) +{ + char *ssp = smack_of_sem(sma); + int may; + + may = smack_flags_to_may(semflg); + return smk_curacc(ssp, may); +} + +/** + * smack_sem_shmctl - Smack access check for sem + * @sma: the object + * @cmd: what it wants to do + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smack_sem_semctl(struct sem_array *sma, int cmd) +{ + char *ssp = smack_of_sem(sma); + int may; + + switch (cmd) { + case GETPID: + case GETNCNT: + case GETZCNT: + case GETVAL: + case GETALL: + case IPC_STAT: + case SEM_STAT: + may = MAY_READ; + break; + case SETVAL: + case SETALL: + case IPC_RMID: + case IPC_SET: + may = MAY_READWRITE; + break; + case IPC_INFO: + case SEM_INFO: + /* + * System level information + */ + return 0; + default: + return -EINVAL; + } + + return smk_curacc(ssp, may); +} + +/** + * smack_sem_semop - Smack checks of semaphore operations + * @sma: the object + * @sops: unused + * @nsops: unused + * @alter: unused + * + * Treated as read and write in all cases. + * + * Returns 0 if access is allowed, error code otherwise + */ +static int smack_sem_semop(struct sem_array *sma, struct sembuf *sops, + unsigned nsops, int alter) +{ + char *ssp = smack_of_sem(sma); + + return smk_curacc(ssp, MAY_READWRITE); +} + +/** + * smack_msg_alloc_security - Set the security blob for msg + * @msq: the object + * + * Returns 0 + */ +static int smack_msg_queue_alloc_security(struct msg_queue *msq) +{ + struct kern_ipc_perm *kisp = &msq->q_perm; + + kisp->security = current->security; + return 0; +} + +/** + * smack_msg_free_security - Clear the security blob for msg + * @msq: the object + * + * Clears the blob pointer + */ +static void smack_msg_queue_free_security(struct msg_queue *msq) +{ + struct kern_ipc_perm *kisp = &msq->q_perm; + + kisp->security = NULL; +} + +/** + * smack_of_msq - the smack pointer for the msq + * @msq: the object + * + * Returns a pointer to the smack value + */ +static char *smack_of_msq(struct msg_queue *msq) +{ + return (char *)msq->q_perm.security; +} + +/** + * smack_msg_queue_associate - Smack access check for msg_queue + * @msq: the object + * @msqflg: access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg) +{ + char *msp = smack_of_msq(msq); + int may; + + may = smack_flags_to_may(msqflg); + return smk_curacc(msp, may); +} + +/** + * smack_msg_queue_msgctl - Smack access check for msg_queue + * @msq: the object + * @cmd: what it wants to do + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd) +{ + char *msp = smack_of_msq(msq); + int may; + + switch (cmd) { + case IPC_STAT: + case MSG_STAT: + may = MAY_READ; + break; + case IPC_SET: + case IPC_RMID: + may = MAY_READWRITE; + break; + case IPC_INFO: + case MSG_INFO: + /* + * System level information + */ + return 0; + default: + return -EINVAL; + } + + return smk_curacc(msp, may); +} + +/** + * smack_msg_queue_msgsnd - Smack access check for msg_queue + * @msq: the object + * @msg: unused + * @msqflg: access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, + int msqflg) +{ + char *msp = smack_of_msq(msq); + int rc; + + rc = smack_flags_to_may(msqflg); + return smk_curacc(msp, rc); +} + +/** + * smack_msg_queue_msgsnd - Smack access check for msg_queue + * @msq: the object + * @msg: unused + * @target: unused + * @type: unused + * @mode: unused + * + * Returns 0 if current has read and write access, error code otherwise + */ +static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, + struct task_struct *target, long type, int mode) +{ + char *msp = smack_of_msq(msq); + + return smk_curacc(msp, MAY_READWRITE); +} + +/** + * smack_ipc_permission - Smack access for ipc_permission() + * @ipp: the object permissions + * @flag: access requested + * + * Returns 0 if current has read and write access, error code otherwise + */ +static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag) +{ + char *isp = ipp->security; + int may; + + may = smack_flags_to_may(flag); + return smk_curacc(isp, may); +} + +/** + * smack_d_instantiate - Make sure the blob is correct on an inode + * @opt_dentry: unused + * @inode: the object + * + * Set the inode's security blob if it hasn't been done already. + */ +static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) +{ + struct super_block *sbp; + struct superblock_smack *sbsp; + struct inode_smack *isp; + char *csp = current->security; + char *fetched; + char *final; + struct dentry *dp; + + if (inode == NULL) + return; + + isp = inode->i_security; + + mutex_lock(&isp->smk_lock); + /* + * If the inode is already instantiated + * take the quick way out + */ + if (isp->smk_flags & SMK_INODE_INSTANT) + goto unlockandout; + + sbp = inode->i_sb; + sbsp = sbp->s_security; + /* + * We're going to use the superblock default label + * if there's no label on the file. + */ + final = sbsp->smk_default; + + /* + * This is pretty hackish. + * Casey says that we shouldn't have to do + * file system specific code, but it does help + * with keeping it simple. + */ + switch (sbp->s_magic) { + case SMACK_MAGIC: + /* + * Casey says that it's a little embarassing + * that the smack file system doesn't do + * extended attributes. + */ + final = smack_known_star.smk_known; + break; + case PIPEFS_MAGIC: + /* + * Casey says pipes are easy (?) + */ + final = smack_known_star.smk_known; + break; + case DEVPTS_SUPER_MAGIC: + /* + * devpts seems content with the label of the task. + * Programs that change smack have to treat the + * pty with respect. + */ + final = csp; + break; + case SOCKFS_MAGIC: + /* + * Casey says sockets get the smack of the task. + */ + final = csp; + break; + case PROC_SUPER_MAGIC: + /* + * Casey says procfs appears not to care. + * The superblock default suffices. + */ + break; + case TMPFS_MAGIC: + /* + * Device labels should come from the filesystem, + * but watch out, because they're volitile, + * getting recreated on every reboot. + */ + final = smack_known_star.smk_known; + /* + * No break. + * + * If a smack value has been set we want to use it, + * but since tmpfs isn't giving us the opportunity + * to set mount options simulate setting the + * superblock default. + */ + default: + /* + * This isn't an understood special case. + * Get the value from the xattr. + * + * No xattr support means, alas, no SMACK label. + * Use the aforeapplied default. + * It would be curious if the label of the task + * does not match that assigned. + */ + if (inode->i_op->getxattr == NULL) + break; + /* + * Get the dentry for xattr. + */ + if (opt_dentry == NULL) { + dp = d_find_alias(inode); + if (dp == NULL) + break; + } else { + dp = dget(opt_dentry); + if (dp == NULL) + break; + } + + fetched = smk_fetch(inode, dp); + if (fetched != NULL) + final = fetched; + + dput(dp); + break; + } + + if (final == NULL) + isp->smk_inode = csp; + else + isp->smk_inode = final; + + isp->smk_flags |= SMK_INODE_INSTANT; + +unlockandout: + mutex_unlock(&isp->smk_lock); + return; +} + +/** + * smack_getprocattr - Smack process attribute access + * @p: the object task + * @name: the name of the attribute in /proc/.../attr + * @value: where to put the result + * + * Places a copy of the task Smack into value + * + * Returns the length of the smack label or an error code + */ +static int smack_getprocattr(struct task_struct *p, char *name, char **value) +{ + char *cp; + int slen; + + if (strcmp(name, "current") != 0) + return -EINVAL; + + cp = kstrdup(p->security, GFP_KERNEL); + if (cp == NULL) + return -ENOMEM; + + slen = strlen(cp); + *value = cp; + return slen; +} + +/** + * smack_setprocattr - Smack process attribute setting + * @p: the object task + * @name: the name of the attribute in /proc/.../attr + * @value: the value to set + * @size: the size of the value + * + * Sets the Smack value of the task. Only setting self + * is permitted and only with privilege + * + * Returns the length of the smack label or an error code + */ +static int smack_setprocattr(struct task_struct *p, char *name, + void *value, size_t size) +{ + char *newsmack; + + if (!__capable(p, CAP_MAC_ADMIN)) + return -EPERM; + + /* + * Changing another process' Smack value is too dangerous + * and supports no sane use case. + */ + if (p != current) + return -EPERM; + + if (value == NULL || size == 0 || size >= SMK_LABELLEN) + return -EINVAL; + + if (strcmp(name, "current") != 0) + return -EINVAL; + + newsmack = smk_import(value, size); + if (newsmack == NULL) + return -EINVAL; + + p->security = newsmack; + return size; +} + +/** + * smack_unix_stream_connect - Smack access on UDS + * @sock: one socket + * @other: the other socket + * @newsk: unused + * + * Return 0 if a subject with the smack of sock could access + * an object with the smack of other, otherwise an error code + */ +static int smack_unix_stream_connect(struct socket *sock, + struct socket *other, struct sock *newsk) +{ + struct inode *sp = SOCK_INODE(sock); + struct inode *op = SOCK_INODE(other); + + return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_READWRITE); +} + +/** + * smack_unix_may_send - Smack access on UDS + * @sock: one socket + * @other: the other socket + * + * Return 0 if a subject with the smack of sock could access + * an object with the smack of other, otherwise an error code + */ +static int smack_unix_may_send(struct socket *sock, struct socket *other) +{ + struct inode *sp = SOCK_INODE(sock); + struct inode *op = SOCK_INODE(other); + + return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE); +} + +/** + * smack_from_secattr - Convert a netlabel attr.mls.lvl/attr.mls.cat + * pair to smack + * @sap: netlabel secattr + * @sip: where to put the result + * + * Copies a smack label into sip + */ +static void smack_from_secattr(struct netlbl_lsm_secattr *sap, char *sip) +{ + char smack[SMK_LABELLEN]; + int pcat; + + if ((sap->flags & NETLBL_SECATTR_MLS_LVL) == 0) { + /* + * If there are flags but no level netlabel isn't + * behaving the way we expect it to. + * + * Without guidance regarding the smack value + * for the packet fall back on the network + * ambient value. + */ + strncpy(sip, smack_net_ambient, SMK_MAXLEN); + return; + } + /* + * Get the categories, if any + */ + memset(smack, '\0', SMK_LABELLEN); + if ((sap->flags & NETLBL_SECATTR_MLS_CAT) != 0) + for (pcat = -1;;) { + pcat = netlbl_secattr_catmap_walk(sap->attr.mls.cat, + pcat + 1); + if (pcat < 0) + break; + smack_catset_bit(pcat, smack); + } + /* + * If it is CIPSO using smack direct mapping + * we are already done. WeeHee. + */ + if (sap->attr.mls.lvl == smack_cipso_direct) { + memcpy(sip, smack, SMK_MAXLEN); + return; + } + /* + * Look it up in the supplied table if it is not a direct mapping. + */ + smack_from_cipso(sap->attr.mls.lvl, smack, sip); + return; +} + +/** + * smack_socket_sock_rcv_skb - Smack packet delivery access check + * @sk: socket + * @skb: packet + * + * Returns 0 if the packet should be delivered, an error code otherwise + */ +static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + struct netlbl_lsm_secattr secattr; + struct socket_smack *ssp = sk->sk_security; + char smack[SMK_LABELLEN]; + int rc; + + if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + return 0; + + /* + * Translate what netlabel gave us. + */ + memset(smack, '\0', SMK_LABELLEN); + netlbl_secattr_init(&secattr); + rc = netlbl_skbuff_getattr(skb, sk->sk_family, &secattr); + if (rc == 0) + smack_from_secattr(&secattr, smack); + else + strncpy(smack, smack_net_ambient, SMK_MAXLEN); + netlbl_secattr_destroy(&secattr); + /* + * Receiving a packet requires that the other end + * be able to write here. Read access is not required. + * This is the simplist possible security model + * for networking. + */ + return smk_access(smack, ssp->smk_in, MAY_WRITE); +} + +/** + * smack_socket_getpeersec_stream - pull in packet label + * @sock: the socket + * @optval: user's destination + * @optlen: size thereof + * @len: max thereoe + * + * returns zero on success, an error code otherwise + */ +static int smack_socket_getpeersec_stream(struct socket *sock, + char __user *optval, + int __user *optlen, unsigned len) +{ + struct socket_smack *ssp; + int slen; + int rc = 0; + + ssp = sock->sk->sk_security; + slen = strlen(ssp->smk_packet) + 1; + + if (slen > len) + rc = -ERANGE; + else if (copy_to_user(optval, ssp->smk_packet, slen) != 0) + rc = -EFAULT; + + if (put_user(slen, optlen) != 0) + rc = -EFAULT; + + return rc; +} + + +/** + * smack_socket_getpeersec_dgram - pull in packet label + * @sock: the socket + * @skb: packet data + * @secid: pointer to where to put the secid of the packet + * + * Sets the netlabel socket state on sk from parent + */ +static int smack_socket_getpeersec_dgram(struct socket *sock, + struct sk_buff *skb, u32 *secid) + +{ + struct netlbl_lsm_secattr secattr; + struct sock *sk; + char smack[SMK_LABELLEN]; + int family = PF_INET; + u32 s; + int rc; + + /* + * Only works for families with packets. + */ + if (sock != NULL) { + sk = sock->sk; + if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + return 0; + family = sk->sk_family; + } + /* + * Translate what netlabel gave us. + */ + memset(smack, '\0', SMK_LABELLEN); + netlbl_secattr_init(&secattr); + rc = netlbl_skbuff_getattr(skb, family, &secattr); + if (rc == 0) + smack_from_secattr(&secattr, smack); + netlbl_secattr_destroy(&secattr); + + /* + * Give up if we couldn't get anything + */ + if (rc != 0) + return rc; + + s = smack_to_secid(smack); + if (s == 0) + return -EINVAL; + + *secid = s; + return 0; +} + +/** + * smack_sock_graft - graft access state between two sockets + * @sk: fresh sock + * @parent: donor socket + * + * Sets the netlabel socket state on sk from parent + */ +static void smack_sock_graft(struct sock *sk, struct socket *parent) +{ + struct socket_smack *ssp; + int rc; + + if (sk == NULL) + return; + + if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + return; + + ssp = sk->sk_security; + ssp->smk_in = current->security; + ssp->smk_out = current->security; + ssp->smk_packet[0] = '\0'; + + rc = smack_netlabel(sk); +} + +/** + * smack_inet_conn_request - Smack access check on connect + * @sk: socket involved + * @skb: packet + * @req: unused + * + * Returns 0 if a task with the packet label could write to + * the socket, otherwise an error code + */ +static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct netlbl_lsm_secattr skb_secattr; + struct socket_smack *ssp = sk->sk_security; + char smack[SMK_LABELLEN]; + int rc; + + if (skb == NULL) + return -EACCES; + + memset(smack, '\0', SMK_LABELLEN); + netlbl_secattr_init(&skb_secattr); + rc = netlbl_skbuff_getattr(skb, sk->sk_family, &skb_secattr); + if (rc == 0) + smack_from_secattr(&skb_secattr, smack); + else + strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN); + netlbl_secattr_destroy(&skb_secattr); + /* + * Receiving a packet requires that the other end + * be able to write here. Read access is not required. + * + * If the request is successful save the peer's label + * so that SO_PEERCRED can report it. + */ + rc = smk_access(smack, ssp->smk_in, MAY_WRITE); + if (rc == 0) + strncpy(ssp->smk_packet, smack, SMK_MAXLEN); + + return rc; +} + +/* + * Key management security hooks + * + * Casey has not tested key support very heavily. + * The permission check is most likely too restrictive. + * If you care about keys please have a look. + */ +#ifdef CONFIG_KEYS + +/** + * smack_key_alloc - Set the key security blob + * @key: object + * @tsk: the task associated with the key + * @flags: unused + * + * No allocation required + * + * Returns 0 + */ +static int smack_key_alloc(struct key *key, struct task_struct *tsk, + unsigned long flags) +{ + key->security = tsk->security; + return 0; +} + +/** + * smack_key_free - Clear the key security blob + * @key: the object + * + * Clear the blob pointer + */ +static void smack_key_free(struct key *key) +{ + key->security = NULL; +} + +/* + * smack_key_permission - Smack access on a key + * @key_ref: gets to the object + * @context: task involved + * @perm: unused + * + * Return 0 if the task has read and write to the object, + * an error code otherwise + */ +static int smack_key_permission(key_ref_t key_ref, + struct task_struct *context, key_perm_t perm) +{ + struct key *keyp; + + keyp = key_ref_to_ptr(key_ref); + if (keyp == NULL) + return -EINVAL; + /* + * If the key hasn't been initialized give it access so that + * it may do so. + */ + if (keyp->security == NULL) + return 0; + /* + * This should not occur + */ + if (context->security == NULL) + return -EACCES; + + return smk_access(context->security, keyp->security, MAY_READWRITE); +} +#endif /* CONFIG_KEYS */ + +/* + * smack_secid_to_secctx - return the smack label for a secid + * @secid: incoming integer + * @secdata: destination + * @seclen: how long it is + * + * Exists for networking code. + */ +static int smack_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + char *sp = smack_from_secid(secid); + + *secdata = sp; + *seclen = strlen(sp); + return 0; +} + +/* + * smack_release_secctx - don't do anything. + * @key_ref: unused + * @context: unused + * @perm: unused + * + * Exists to make sure nothing gets done, and properly + */ +static void smack_release_secctx(char *secdata, u32 seclen) +{ +} + +static struct security_operations smack_ops = { + .ptrace = smack_ptrace, + .capget = cap_capget, + .capset_check = cap_capset_check, + .capset_set = cap_capset_set, + .capable = cap_capable, + .syslog = smack_syslog, + .settime = cap_settime, + .vm_enough_memory = cap_vm_enough_memory, + + .bprm_apply_creds = cap_bprm_apply_creds, + .bprm_set_security = cap_bprm_set_security, + .bprm_secureexec = cap_bprm_secureexec, + + .sb_alloc_security = smack_sb_alloc_security, + .sb_free_security = smack_sb_free_security, + .sb_copy_data = smack_sb_copy_data, + .sb_kern_mount = smack_sb_kern_mount, + .sb_statfs = smack_sb_statfs, + .sb_mount = smack_sb_mount, + .sb_umount = smack_sb_umount, + + .inode_alloc_security = smack_inode_alloc_security, + .inode_free_security = smack_inode_free_security, + .inode_init_security = smack_inode_init_security, + .inode_link = smack_inode_link, + .inode_unlink = smack_inode_unlink, + .inode_rmdir = smack_inode_rmdir, + .inode_rename = smack_inode_rename, + .inode_permission = smack_inode_permission, + .inode_setattr = smack_inode_setattr, + .inode_getattr = smack_inode_getattr, + .inode_setxattr = smack_inode_setxattr, + .inode_post_setxattr = smack_inode_post_setxattr, + .inode_getxattr = smack_inode_getxattr, + .inode_removexattr = smack_inode_removexattr, + .inode_getsecurity = smack_inode_getsecurity, + .inode_setsecurity = smack_inode_setsecurity, + .inode_listsecurity = smack_inode_listsecurity, + + .file_permission = smack_file_permission, + .file_alloc_security = smack_file_alloc_security, + .file_free_security = smack_file_free_security, + .file_ioctl = smack_file_ioctl, + .file_lock = smack_file_lock, + .file_fcntl = smack_file_fcntl, + .file_set_fowner = smack_file_set_fowner, + .file_send_sigiotask = smack_file_send_sigiotask, + .file_receive = smack_file_receive, + + .task_alloc_security = smack_task_alloc_security, + .task_free_security = smack_task_free_security, + .task_post_setuid = cap_task_post_setuid, + .task_setpgid = smack_task_setpgid, + .task_getpgid = smack_task_getpgid, + .task_getsid = smack_task_getsid, + .task_getsecid = smack_task_getsecid, + .task_setnice = smack_task_setnice, + .task_setioprio = smack_task_setioprio, + .task_getioprio = smack_task_getioprio, + .task_setscheduler = smack_task_setscheduler, + .task_getscheduler = smack_task_getscheduler, + .task_movememory = smack_task_movememory, + .task_kill = smack_task_kill, + .task_wait = smack_task_wait, + .task_reparent_to_init = cap_task_reparent_to_init, + .task_to_inode = smack_task_to_inode, + + .ipc_permission = smack_ipc_permission, + + .msg_msg_alloc_security = smack_msg_msg_alloc_security, + .msg_msg_free_security = smack_msg_msg_free_security, + + .msg_queue_alloc_security = smack_msg_queue_alloc_security, + .msg_queue_free_security = smack_msg_queue_free_security, + .msg_queue_associate = smack_msg_queue_associate, + .msg_queue_msgctl = smack_msg_queue_msgctl, + .msg_queue_msgsnd = smack_msg_queue_msgsnd, + .msg_queue_msgrcv = smack_msg_queue_msgrcv, + + .shm_alloc_security = smack_shm_alloc_security, + .shm_free_security = smack_shm_free_security, + .shm_associate = smack_shm_associate, + .shm_shmctl = smack_shm_shmctl, + .shm_shmat = smack_shm_shmat, + + .sem_alloc_security = smack_sem_alloc_security, + .sem_free_security = smack_sem_free_security, + .sem_associate = smack_sem_associate, + .sem_semctl = smack_sem_semctl, + .sem_semop = smack_sem_semop, + + .netlink_send = cap_netlink_send, + .netlink_recv = cap_netlink_recv, + + .d_instantiate = smack_d_instantiate, + + .getprocattr = smack_getprocattr, + .setprocattr = smack_setprocattr, + + .unix_stream_connect = smack_unix_stream_connect, + .unix_may_send = smack_unix_may_send, + + .socket_post_create = smack_socket_post_create, + .socket_sock_rcv_skb = smack_socket_sock_rcv_skb, + .socket_getpeersec_stream = smack_socket_getpeersec_stream, + .socket_getpeersec_dgram = smack_socket_getpeersec_dgram, + .sk_alloc_security = smack_sk_alloc_security, + .sk_free_security = smack_sk_free_security, + .sock_graft = smack_sock_graft, + .inet_conn_request = smack_inet_conn_request, + /* key management security hooks */ +#ifdef CONFIG_KEYS + .key_alloc = smack_key_alloc, + .key_free = smack_key_free, + .key_permission = smack_key_permission, +#endif /* CONFIG_KEYS */ + .secid_to_secctx = smack_secid_to_secctx, + .release_secctx = smack_release_secctx, +}; + +/** + * smack_init - initialize the smack system + * + * Returns 0 + */ +static __init int smack_init(void) +{ + printk(KERN_INFO "Smack: Initializing.\n"); + + /* + * Set the security state for the initial task. + */ + current->security = &smack_known_floor.smk_known; + + /* + * Initialize locks + */ + spin_lock_init(&smack_known_unset.smk_cipsolock); + spin_lock_init(&smack_known_huh.smk_cipsolock); + spin_lock_init(&smack_known_hat.smk_cipsolock); + spin_lock_init(&smack_known_star.smk_cipsolock); + spin_lock_init(&smack_known_floor.smk_cipsolock); + spin_lock_init(&smack_known_invalid.smk_cipsolock); + + /* + * Register with LSM + */ + if (register_security(&smack_ops)) + panic("smack: Unable to register with kernel.\n"); + + return 0; +} + +/* + * Smack requires early initialization in order to label + * all processes and objects when they are created. + */ +security_initcall(smack_init); + diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c new file mode 100644 index 000000000000..15aa37f65b39 --- /dev/null +++ b/security/smack/smackfs.c @@ -0,0 +1,981 @@ +/* + * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * Authors: + * Casey Schaufler <casey@schaufler-ca.com> + * Ahmed S. Darwish <darwish.07@gmail.com> + * + * Special thanks to the authors of selinuxfs. + * + * Karl MacMillan <kmacmillan@tresys.com> + * James Morris <jmorris@redhat.com> + * + */ + +#include <linux/kernel.h> +#include <linux/vmalloc.h> +#include <linux/security.h> +#include <linux/mutex.h> +#include <net/netlabel.h> +#include <net/cipso_ipv4.h> +#include <linux/seq_file.h> +#include <linux/ctype.h> +#include "smack.h" + +/* + * smackfs pseudo filesystem. + */ + +enum smk_inos { + SMK_ROOT_INO = 2, + SMK_LOAD = 3, /* load policy */ + SMK_CIPSO = 4, /* load label -> CIPSO mapping */ + SMK_DOI = 5, /* CIPSO DOI */ + SMK_DIRECT = 6, /* CIPSO level indicating direct label */ + SMK_AMBIENT = 7, /* internet ambient label */ + SMK_NLTYPE = 8, /* label scheme to use by default */ +}; + +/* + * List locks + */ +static DEFINE_MUTEX(smack_list_lock); +static DEFINE_MUTEX(smack_cipso_lock); + +/* + * This is the "ambient" label for network traffic. + * If it isn't somehow marked, use this. + * It can be reset via smackfs/ambient + */ +char *smack_net_ambient = smack_known_floor.smk_known; + +/* + * This is the default packet marking scheme for network traffic. + * It can be reset via smackfs/nltype + */ +int smack_net_nltype = NETLBL_NLTYPE_CIPSOV4; + +/* + * This is the level in a CIPSO header that indicates a + * smack label is contained directly in the category set. + * It can be reset via smackfs/direct + */ +int smack_cipso_direct = SMACK_CIPSO_DIRECT_DEFAULT; + +static int smk_cipso_doi_value = SMACK_CIPSO_DOI_DEFAULT; +struct smk_list_entry *smack_list; + +#define SEQ_READ_FINISHED 1 + +/* + * Disable concurrent writing open() operations + */ +static struct semaphore smack_write_sem; + +/* + * Values for parsing cipso rules + * SMK_DIGITLEN: Length of a digit field in a rule. + * SMK_CIPSOMEN: Minimum possible cipso rule length. + */ +#define SMK_DIGITLEN 4 +#define SMK_CIPSOMIN (SMK_MAXLEN + 2 * SMK_DIGITLEN) + +/* + * Seq_file read operations for /smack/load + */ + +static void *load_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos == SEQ_READ_FINISHED) + return NULL; + + return smack_list; +} + +static void *load_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct smk_list_entry *skp = ((struct smk_list_entry *) v)->smk_next; + + if (skp == NULL) + *pos = SEQ_READ_FINISHED; + + return skp; +} + +static int load_seq_show(struct seq_file *s, void *v) +{ + struct smk_list_entry *slp = (struct smk_list_entry *) v; + struct smack_rule *srp = &slp->smk_rule; + + seq_printf(s, "%s %s", (char *)srp->smk_subject, + (char *)srp->smk_object); + + seq_putc(s, ' '); + + if (srp->smk_access & MAY_READ) + seq_putc(s, 'r'); + if (srp->smk_access & MAY_WRITE) + seq_putc(s, 'w'); + if (srp->smk_access & MAY_EXEC) + seq_putc(s, 'x'); + if (srp->smk_access & MAY_APPEND) + seq_putc(s, 'a'); + if (srp->smk_access == 0) + seq_putc(s, '-'); + + seq_putc(s, '\n'); + + return 0; +} + +static void load_seq_stop(struct seq_file *s, void *v) +{ + /* No-op */ +} + +static struct seq_operations load_seq_ops = { + .start = load_seq_start, + .next = load_seq_next, + .show = load_seq_show, + .stop = load_seq_stop, +}; + +/** + * smk_open_load - open() for /smack/load + * @inode: inode structure representing file + * @file: "load" file pointer + * + * For reading, use load_seq_* seq_file reading operations. + */ +static int smk_open_load(struct inode *inode, struct file *file) +{ + if ((file->f_flags & O_ACCMODE) == O_RDONLY) + return seq_open(file, &load_seq_ops); + + if (down_interruptible(&smack_write_sem)) + return -ERESTARTSYS; + + return 0; +} + +/** + * smk_release_load - release() for /smack/load + * @inode: inode structure representing file + * @file: "load" file pointer + * + * For a reading session, use the seq_file release + * implementation. + * Otherwise, we are at the end of a writing session so + * clean everything up. + */ +static int smk_release_load(struct inode *inode, struct file *file) +{ + if ((file->f_flags & O_ACCMODE) == O_RDONLY) + return seq_release(inode, file); + + up(&smack_write_sem); + return 0; +} + +/** + * smk_set_access - add a rule to the rule list + * @srp: the new rule to add + * + * Looks through the current subject/object/access list for + * the subject/object pair and replaces the access that was + * there. If the pair isn't found add it with the specified + * access. + */ +static void smk_set_access(struct smack_rule *srp) +{ + struct smk_list_entry *sp; + struct smk_list_entry *newp; + + mutex_lock(&smack_list_lock); + + for (sp = smack_list; sp != NULL; sp = sp->smk_next) + if (sp->smk_rule.smk_subject == srp->smk_subject && + sp->smk_rule.smk_object == srp->smk_object) { + sp->smk_rule.smk_access = srp->smk_access; + break; + } + + if (sp == NULL) { + newp = kzalloc(sizeof(struct smk_list_entry), GFP_KERNEL); + newp->smk_rule = *srp; + newp->smk_next = smack_list; + smack_list = newp; + } + + mutex_unlock(&smack_list_lock); + + return; +} + +/** + * smk_write_load - write() for /smack/load + * @filp: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start - must be 0 + * + * Get one smack access rule from above. + * The format is exactly: + * char subject[SMK_LABELLEN] + * char object[SMK_LABELLEN] + * char access[SMK_ACCESSKINDS] + * + * Anything following is commentary and ignored. + * + * writes must be SMK_LABELLEN+SMK_LABELLEN+4 bytes. + */ +#define MINIMUM_LOAD (SMK_LABELLEN + SMK_LABELLEN + SMK_ACCESSKINDS) + +static ssize_t smk_write_load(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct smack_rule rule; + char *data; + int rc = -EINVAL; + + /* + * Must have privilege. + * No partial writes. + * Enough data must be present. + */ + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + if (*ppos != 0) + return -EINVAL; + if (count < MINIMUM_LOAD) + return -EINVAL; + + data = kzalloc(count, GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + if (copy_from_user(data, buf, count) != 0) { + rc = -EFAULT; + goto out; + } + + rule.smk_subject = smk_import(data, 0); + if (rule.smk_subject == NULL) + goto out; + + rule.smk_object = smk_import(data + SMK_LABELLEN, 0); + if (rule.smk_object == NULL) + goto out; + + rule.smk_access = 0; + + switch (data[SMK_LABELLEN + SMK_LABELLEN]) { + case '-': + break; + case 'r': + case 'R': + rule.smk_access |= MAY_READ; + break; + default: + goto out; + } + + switch (data[SMK_LABELLEN + SMK_LABELLEN + 1]) { + case '-': + break; + case 'w': + case 'W': + rule.smk_access |= MAY_WRITE; + break; + default: + goto out; + } + + switch (data[SMK_LABELLEN + SMK_LABELLEN + 2]) { + case '-': + break; + case 'x': + case 'X': + rule.smk_access |= MAY_EXEC; + break; + default: + goto out; + } + + switch (data[SMK_LABELLEN + SMK_LABELLEN + 3]) { + case '-': + break; + case 'a': + case 'A': + rule.smk_access |= MAY_READ; + break; + default: + goto out; + } + + smk_set_access(&rule); + rc = count; + +out: + kfree(data); + return rc; +} + +static const struct file_operations smk_load_ops = { + .open = smk_open_load, + .read = seq_read, + .llseek = seq_lseek, + .write = smk_write_load, + .release = smk_release_load, +}; + +/** + * smk_cipso_doi - initialize the CIPSO domain + */ +void smk_cipso_doi(void) +{ + int rc; + struct cipso_v4_doi *doip; + struct netlbl_audit audit_info; + + rc = netlbl_cfg_map_del(NULL, &audit_info); + if (rc != 0) + printk(KERN_WARNING "%s:%d remove rc = %d\n", + __func__, __LINE__, rc); + + doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL); + if (doip == NULL) + panic("smack: Failed to initialize cipso DOI.\n"); + doip->map.std = NULL; + doip->doi = smk_cipso_doi_value; + doip->type = CIPSO_V4_MAP_PASS; + doip->tags[0] = CIPSO_V4_TAG_RBITMAP; + for (rc = 1; rc < CIPSO_V4_TAG_MAXCNT; rc++) + doip->tags[rc] = CIPSO_V4_TAG_INVALID; + + rc = netlbl_cfg_cipsov4_add_map(doip, NULL, &audit_info); + if (rc != 0) + printk(KERN_WARNING "%s:%d add rc = %d\n", + __func__, __LINE__, rc); +} + +/* + * Seq_file read operations for /smack/cipso + */ + +static void *cipso_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos == SEQ_READ_FINISHED) + return NULL; + + return smack_known; +} + +static void *cipso_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct smack_known *skp = ((struct smack_known *) v)->smk_next; + + /* + * Omit labels with no associated cipso value + */ + while (skp != NULL && !skp->smk_cipso) + skp = skp->smk_next; + + if (skp == NULL) + *pos = SEQ_READ_FINISHED; + + return skp; +} + +/* + * Print cipso labels in format: + * label level[/cat[,cat]] + */ +static int cipso_seq_show(struct seq_file *s, void *v) +{ + struct smack_known *skp = (struct smack_known *) v; + struct smack_cipso *scp = skp->smk_cipso; + char *cbp; + char sep = '/'; + int cat = 1; + int i; + unsigned char m; + + if (scp == NULL) + return 0; + + seq_printf(s, "%s %3d", (char *)&skp->smk_known, scp->smk_level); + + cbp = scp->smk_catset; + for (i = 0; i < SMK_LABELLEN; i++) + for (m = 0x80; m != 0; m >>= 1) { + if (m & cbp[i]) { + seq_printf(s, "%c%d", sep, cat); + sep = ','; + } + cat++; + } + + seq_putc(s, '\n'); + + return 0; +} + +static void cipso_seq_stop(struct seq_file *s, void *v) +{ + /* No-op */ +} + +static struct seq_operations cipso_seq_ops = { + .start = cipso_seq_start, + .stop = cipso_seq_stop, + .next = cipso_seq_next, + .show = cipso_seq_show, +}; + +/** + * smk_open_cipso - open() for /smack/cipso + * @inode: inode structure representing file + * @file: "cipso" file pointer + * + * Connect our cipso_seq_* operations with /smack/cipso + * file_operations + */ +static int smk_open_cipso(struct inode *inode, struct file *file) +{ + return seq_open(file, &cipso_seq_ops); +} + +/** + * smk_write_cipso - write() for /smack/cipso + * @filp: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start + * + * Accepts only one cipso rule per write call. + * Returns number of bytes written or error code, as appropriate + */ +static ssize_t smk_write_cipso(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct smack_known *skp; + struct smack_cipso *scp = NULL; + char mapcatset[SMK_LABELLEN]; + int maplevel; + int cat; + int catlen; + ssize_t rc = -EINVAL; + char *data = NULL; + char *rule; + int ret; + int i; + + /* + * Must have privilege. + * No partial writes. + * Enough data must be present. + */ + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + if (*ppos != 0) + return -EINVAL; + if (count <= SMK_CIPSOMIN) + return -EINVAL; + + data = kzalloc(count + 1, GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + if (copy_from_user(data, buf, count) != 0) { + rc = -EFAULT; + goto unlockedout; + } + + data[count] = '\0'; + rule = data; + /* + * Only allow one writer at a time. Writes should be + * quite rare and small in any case. + */ + mutex_lock(&smack_cipso_lock); + + skp = smk_import_entry(rule, 0); + if (skp == NULL) + goto out; + + rule += SMK_LABELLEN;; + ret = sscanf(rule, "%d", &maplevel); + if (ret != 1 || maplevel > SMACK_CIPSO_MAXLEVEL) + goto out; + + rule += SMK_DIGITLEN; + ret = sscanf(rule, "%d", &catlen); + if (ret != 1 || catlen > SMACK_CIPSO_MAXCATNUM) + goto out; + + if (count <= (SMK_CIPSOMIN + catlen * SMK_DIGITLEN)) + goto out; + + memset(mapcatset, 0, sizeof(mapcatset)); + + for (i = 0; i < catlen; i++) { + rule += SMK_DIGITLEN; + ret = sscanf(rule, "%d", &cat); + if (ret != 1 || cat > SMACK_CIPSO_MAXCATVAL) + goto out; + + smack_catset_bit(cat, mapcatset); + } + + if (skp->smk_cipso == NULL) { + scp = kzalloc(sizeof(struct smack_cipso), GFP_KERNEL); + if (scp == NULL) { + rc = -ENOMEM; + goto out; + } + } + + spin_lock_bh(&skp->smk_cipsolock); + + if (scp == NULL) + scp = skp->smk_cipso; + else + skp->smk_cipso = scp; + + scp->smk_level = maplevel; + memcpy(scp->smk_catset, mapcatset, sizeof(mapcatset)); + + spin_unlock_bh(&skp->smk_cipsolock); + + rc = count; +out: + mutex_unlock(&smack_cipso_lock); +unlockedout: + kfree(data); + return rc; +} + +static const struct file_operations smk_cipso_ops = { + .open = smk_open_cipso, + .read = seq_read, + .llseek = seq_lseek, + .write = smk_write_cipso, + .release = seq_release, +}; + +/** + * smk_read_doi - read() for /smack/doi + * @filp: file pointer, not actually used + * @buf: where to put the result + * @count: maximum to send along + * @ppos: where to start + * + * Returns number of bytes read or error code, as appropriate + */ +static ssize_t smk_read_doi(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + char temp[80]; + ssize_t rc; + + if (*ppos != 0) + return 0; + + sprintf(temp, "%d", smk_cipso_doi_value); + rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp)); + + return rc; +} + +/** + * smk_write_doi - write() for /smack/doi + * @filp: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start + * + * Returns number of bytes written or error code, as appropriate + */ +static ssize_t smk_write_doi(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char temp[80]; + int i; + + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + + if (count >= sizeof(temp) || count == 0) + return -EINVAL; + + if (copy_from_user(temp, buf, count) != 0) + return -EFAULT; + + temp[count] = '\0'; + + if (sscanf(temp, "%d", &i) != 1) + return -EINVAL; + + smk_cipso_doi_value = i; + + smk_cipso_doi(); + + return count; +} + +static const struct file_operations smk_doi_ops = { + .read = smk_read_doi, + .write = smk_write_doi, +}; + +/** + * smk_read_direct - read() for /smack/direct + * @filp: file pointer, not actually used + * @buf: where to put the result + * @count: maximum to send along + * @ppos: where to start + * + * Returns number of bytes read or error code, as appropriate + */ +static ssize_t smk_read_direct(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + char temp[80]; + ssize_t rc; + + if (*ppos != 0) + return 0; + + sprintf(temp, "%d", smack_cipso_direct); + rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp)); + + return rc; +} + +/** + * smk_write_direct - write() for /smack/direct + * @filp: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start + * + * Returns number of bytes written or error code, as appropriate + */ +static ssize_t smk_write_direct(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char temp[80]; + int i; + + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + + if (count >= sizeof(temp) || count == 0) + return -EINVAL; + + if (copy_from_user(temp, buf, count) != 0) + return -EFAULT; + + temp[count] = '\0'; + + if (sscanf(temp, "%d", &i) != 1) + return -EINVAL; + + smack_cipso_direct = i; + + return count; +} + +static const struct file_operations smk_direct_ops = { + .read = smk_read_direct, + .write = smk_write_direct, +}; + +/** + * smk_read_ambient - read() for /smack/ambient + * @filp: file pointer, not actually used + * @buf: where to put the result + * @cn: maximum to send along + * @ppos: where to start + * + * Returns number of bytes read or error code, as appropriate + */ +static ssize_t smk_read_ambient(struct file *filp, char __user *buf, + size_t cn, loff_t *ppos) +{ + ssize_t rc; + char out[SMK_LABELLEN]; + int asize; + + if (*ppos != 0) + return 0; + /* + * Being careful to avoid a problem in the case where + * smack_net_ambient gets changed in midstream. + * Since smack_net_ambient is always set with a value + * from the label list, including initially, and those + * never get freed, the worst case is that the pointer + * gets changed just after this strncpy, in which case + * the value passed up is incorrect. Locking around + * smack_net_ambient wouldn't be any better than this + * copy scheme as by the time the caller got to look + * at the ambient value it would have cleared the lock + * and been changed. + */ + strncpy(out, smack_net_ambient, SMK_LABELLEN); + asize = strlen(out) + 1; + + if (cn < asize) + return -EINVAL; + + rc = simple_read_from_buffer(buf, cn, ppos, out, asize); + + return rc; +} + +/** + * smk_write_ambient - write() for /smack/ambient + * @filp: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start + * + * Returns number of bytes written or error code, as appropriate + */ +static ssize_t smk_write_ambient(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char in[SMK_LABELLEN]; + char *smack; + + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + + if (count >= SMK_LABELLEN) + return -EINVAL; + + if (copy_from_user(in, buf, count) != 0) + return -EFAULT; + + smack = smk_import(in, count); + if (smack == NULL) + return -EINVAL; + + smack_net_ambient = smack; + + return count; +} + +static const struct file_operations smk_ambient_ops = { + .read = smk_read_ambient, + .write = smk_write_ambient, +}; + +struct option_names { + int o_number; + char *o_name; + char *o_alias; +}; + +static struct option_names netlbl_choices[] = { + { NETLBL_NLTYPE_RIPSO, + NETLBL_NLTYPE_RIPSO_NAME, "ripso" }, + { NETLBL_NLTYPE_CIPSOV4, + NETLBL_NLTYPE_CIPSOV4_NAME, "cipsov4" }, + { NETLBL_NLTYPE_CIPSOV4, + NETLBL_NLTYPE_CIPSOV4_NAME, "cipso" }, + { NETLBL_NLTYPE_CIPSOV6, + NETLBL_NLTYPE_CIPSOV6_NAME, "cipsov6" }, + { NETLBL_NLTYPE_UNLABELED, + NETLBL_NLTYPE_UNLABELED_NAME, "unlabeled" }, +}; + +/** + * smk_read_nltype - read() for /smack/nltype + * @filp: file pointer, not actually used + * @buf: where to put the result + * @count: maximum to send along + * @ppos: where to start + * + * Returns number of bytes read or error code, as appropriate + */ +static ssize_t smk_read_nltype(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + char bound[40]; + ssize_t rc; + int i; + + if (count < SMK_LABELLEN) + return -EINVAL; + + if (*ppos != 0) + return 0; + + sprintf(bound, "unknown"); + + for (i = 0; i < ARRAY_SIZE(netlbl_choices); i++) + if (smack_net_nltype == netlbl_choices[i].o_number) { + sprintf(bound, "%s", netlbl_choices[i].o_name); + break; + } + + rc = simple_read_from_buffer(buf, count, ppos, bound, strlen(bound)); + + return rc; +} + +/** + * smk_write_nltype - write() for /smack/nltype + * @filp: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start + * + * Returns number of bytes written or error code, as appropriate + */ +static ssize_t smk_write_nltype(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char bound[40]; + char *cp; + int i; + + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + + if (count >= 40) + return -EINVAL; + + if (copy_from_user(bound, buf, count) != 0) + return -EFAULT; + + bound[count] = '\0'; + cp = strchr(bound, ' '); + if (cp != NULL) + *cp = '\0'; + cp = strchr(bound, '\n'); + if (cp != NULL) + *cp = '\0'; + + for (i = 0; i < ARRAY_SIZE(netlbl_choices); i++) + if (strcmp(bound, netlbl_choices[i].o_name) == 0 || + strcmp(bound, netlbl_choices[i].o_alias) == 0) { + smack_net_nltype = netlbl_choices[i].o_number; + return count; + } + /* + * Not a valid choice. + */ + return -EINVAL; +} + +static const struct file_operations smk_nltype_ops = { + .read = smk_read_nltype, + .write = smk_write_nltype, +}; + +/** + * smk_fill_super - fill the /smackfs superblock + * @sb: the empty superblock + * @data: unused + * @silent: unused + * + * Fill in the well known entries for /smack + * + * Returns 0 on success, an error code on failure + */ +static int smk_fill_super(struct super_block *sb, void *data, int silent) +{ + int rc; + struct inode *root_inode; + + static struct tree_descr smack_files[] = { + [SMK_LOAD] = + {"load", &smk_load_ops, S_IRUGO|S_IWUSR}, + [SMK_CIPSO] = + {"cipso", &smk_cipso_ops, S_IRUGO|S_IWUSR}, + [SMK_DOI] = + {"doi", &smk_doi_ops, S_IRUGO|S_IWUSR}, + [SMK_DIRECT] = + {"direct", &smk_direct_ops, S_IRUGO|S_IWUSR}, + [SMK_AMBIENT] = + {"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR}, + [SMK_NLTYPE] = + {"nltype", &smk_nltype_ops, S_IRUGO|S_IWUSR}, + /* last one */ {""} + }; + + rc = simple_fill_super(sb, SMACK_MAGIC, smack_files); + if (rc != 0) { + printk(KERN_ERR "%s failed %d while creating inodes\n", + __func__, rc); + return rc; + } + + root_inode = sb->s_root->d_inode; + root_inode->i_security = new_inode_smack(smack_known_floor.smk_known); + + return 0; +} + +/** + * smk_get_sb - get the smackfs superblock + * @fs_type: passed along without comment + * @flags: passed along without comment + * @dev_name: passed along without comment + * @data: passed along without comment + * @mnt: passed along without comment + * + * Just passes everything along. + * + * Returns what the lower level code does. + */ +static int smk_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_single(fs_type, flags, data, smk_fill_super, mnt); +} + +static struct file_system_type smk_fs_type = { + .name = "smackfs", + .get_sb = smk_get_sb, + .kill_sb = kill_litter_super, +}; + +static struct vfsmount *smackfs_mount; + +/** + * init_smk_fs - get the smackfs superblock + * + * register the smackfs + * + * Returns 0 unless the registration fails. + */ +static int __init init_smk_fs(void) +{ + int err; + + err = register_filesystem(&smk_fs_type); + if (!err) { + smackfs_mount = kern_mount(&smk_fs_type); + if (IS_ERR(smackfs_mount)) { + printk(KERN_ERR "smackfs: could not mount!\n"); + err = PTR_ERR(smackfs_mount); + smackfs_mount = NULL; + } + } + + sema_init(&smack_write_sem, 1); + smk_cipso_doi(); + + return err; +} + +__initcall(init_smk_fs); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 62449117ee14..61f5d425b630 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -23,7 +23,7 @@ #include <linux/file.h> #include <linux/slab.h> #include <linux/time.h> -#include <linux/latency.h> +#include <linux/pm_qos_params.h> #include <linux/uio.h> #include <sound/core.h> #include <sound/control.h> @@ -443,9 +443,11 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, snd_pcm_timer_resolution_change(substream); runtime->status->state = SNDRV_PCM_STATE_SETUP; - remove_acceptable_latency(substream->latency_id); + pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, + substream->latency_id); if ((usecs = period_to_usecs(runtime)) >= 0) - set_acceptable_latency(substream->latency_id, usecs); + pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, + substream->latency_id, usecs); return 0; _error: /* hardware might be unuseable from this time, @@ -505,7 +507,8 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream) if (substream->ops->hw_free) result = substream->ops->hw_free(substream); runtime->status->state = SNDRV_PCM_STATE_OPEN; - remove_acceptable_latency(substream->latency_id); + pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, + substream->latency_id); return result; } |