From 0172d9e322035bf7bb66a7dfdd795c38d71dbba9 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 15 Nov 2018 11:19:44 -0500 Subject: tools/memory-model: Rename some RCU relations In preparation for adding support for SRCU, rename "crit" to "rcu-rscs", rename "rscs" to "rcu-rscsi", and remove the restriction to only the outermost level of nesting. The name change is needed for disambiguating RCU read-side critical sections from SRCU read-side critical sections. Adding the "i" at the end of "rcu-rscsi" emphasizes that the relation is inverted; it links rcu_read_unlock() events to their corresponding preceding rcu_read_lock() events. The restriction to outermost nesting levels was never essential; it was included mostly to show that it could be done. Rather than add equivalent unnecessary code for SRCU lock nesting, it seemed better to remove the existing code. Signed-off-by: Alan Stern Signed-off-by: Paul E. McKenney Tested-by: Andrea Parri --- tools/memory-model/linux-kernel.bell | 9 +++------ tools/memory-model/linux-kernel.cat | 10 +++++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 796513362c05..353c8d68e030 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -34,7 +34,7 @@ enum Barriers = 'wmb (*smp_wmb*) || instructions F[Barriers] (* Compute matching pairs of nested Rcu-lock and Rcu-unlock *) -let matched = let rec +let rcu-rscs = let rec unmatched-locks = Rcu-lock \ domain(matched) and unmatched-unlocks = Rcu-unlock \ range(matched) and unmatched = unmatched-locks | unmatched-unlocks @@ -46,8 +46,5 @@ let matched = let rec in matched (* Validate nesting *) -flag ~empty Rcu-lock \ domain(matched) as unbalanced-rcu-locking -flag ~empty Rcu-unlock \ range(matched) as unbalanced-rcu-locking - -(* Outermost level of nesting only *) -let crit = matched \ (po^-1 ; matched ; po^-1) +flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking +flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 8f23c74a96fd..ab9de9c1234b 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -95,7 +95,7 @@ acyclic pb as propagation * onward on the one hand and from the rcu_read_unlock() backwards on the * other hand. *) -let rscs = po ; crit^-1 ; po? +let rcu-rscsi = po ; rcu-rscs^-1 ; po? (* * The synchronize_rcu() strong fence is special in that it can order not @@ -109,10 +109,10 @@ let rcu-link = hb* ; pb* ; prop * critical sections (joined by rcu-link) acts as a generalized strong fence. *) let rec rcu-fence = gp | - (gp ; rcu-link ; rscs) | - (rscs ; rcu-link ; gp) | - (gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) | - (rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) | + (gp ; rcu-link ; rcu-rscsi) | + (rcu-rscsi ; rcu-link ; gp) | + (gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | + (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; gp) | (rcu-fence ; rcu-link ; rcu-fence) (* rb orders instructions just as pb does *) -- cgit v1.2.3 From 284749b0aebbf3ab26ff92198545aea36165f6bf Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 15 Nov 2018 11:19:58 -0500 Subject: tools/memory-model: Refactor some RCU relations In preparation for adding support for SRCU, refactor the definitions of rcu-fence, rcu-rscsi, rcu-link, and rb by moving the po and po? terms from the first two to the second two. An rcu-gp relation is added; it is equivalent to gp with the po and po? terms removed. This is necessary because for SRCU, we will have to use the loc relation to check that the terms at the start and end of each disjunct in the definition of rcu-fence refer to the same srcu_struct location. If these terms are hidden behind po and po?, there's no way to carry out this check. Signed-off-by: Alan Stern Signed-off-by: Paul E. McKenney Tested-by: Andrea Parri --- tools/memory-model/linux-kernel.cat | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index ab9de9c1234b..b8e6197f05af 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -91,32 +91,37 @@ acyclic pb as propagation (*******) (* - * Effect of read-side critical section proceeds from the rcu_read_lock() - * onward on the one hand and from the rcu_read_unlock() backwards on the + * Effects of read-side critical sections proceed from the rcu_read_unlock() + * backwards on the one hand, and from the rcu_read_lock() forwards on the * other hand. + * + * In the definition of rcu-fence below, the po term at the left-hand side + * of each disjunct and the po? term at the right-hand end have been factored + * out. They have been moved into the definitions of rcu-link and rb. *) -let rcu-rscsi = po ; rcu-rscs^-1 ; po? +let rcu-gp = [Sync-rcu] (* Compare with gp *) +let rcu-rscsi = rcu-rscs^-1 (* * The synchronize_rcu() strong fence is special in that it can order not * one but two non-rf relations, but only in conjunction with an RCU * read-side critical section. *) -let rcu-link = hb* ; pb* ; prop +let rcu-link = po? ; hb* ; pb* ; prop ; po (* * Any sequence containing at least as many grace periods as RCU read-side * critical sections (joined by rcu-link) acts as a generalized strong fence. *) -let rec rcu-fence = gp | - (gp ; rcu-link ; rcu-rscsi) | - (rcu-rscsi ; rcu-link ; gp) | - (gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | - (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; gp) | +let rec rcu-fence = rcu-gp | + (rcu-gp ; rcu-link ; rcu-rscsi) | + (rcu-rscsi ; rcu-link ; rcu-gp) | + (rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | + (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) | (rcu-fence ; rcu-link ; rcu-fence) (* rb orders instructions just as pb does *) -let rb = prop ; rcu-fence ; hb* ; pb* +let rb = prop ; po ; rcu-fence ; po? ; hb* ; pb* irreflexive rb as rcu -- cgit v1.2.3 From a3f600d92da564ad35f237c8aeab268ca49377cc Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 15 Nov 2018 11:20:37 -0500 Subject: tools/memory-model: Add SRCU support Add support for SRCU. Herd creates srcu events and linux-kernel.def associates them with three possible annotations (srcu-lock, srcu-unlock, and sync-srcu) corresponding to the API routines srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu(). The linux-kernel.bell file now declares the annotations and determines matching lock/unlock pairs delimiting SRCU read-side critical sections, and it also checks for synchronize_srcu() calls inside an RCU critical section (which would generate a "sleeping in atomic context" error in real kernel code). The linux-kernel.cat file now adds SRCU-induced ordering, analogous to the existing RCU-induced ordering, to the gp and rcu-fence relations. Curiously enough, these small changes to the model's .cat code are all that is needed to describe SRCU. Portions of this patch (linux-kernel.def and the first hunk in linux-kernel.bell) were written by Luc Maranget. Signed-off-by: Alan Stern CC: Luc Maranget Signed-off-by: Paul E. McKenney Tested-by: Andrea Parri --- tools/memory-model/linux-kernel.bell | 25 +++++++++++++++++++++++++ tools/memory-model/linux-kernel.cat | 18 ++++++++++++++---- tools/memory-model/linux-kernel.def | 5 +++++ 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 353c8d68e030..9c42cd9ddcb4 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -33,6 +33,12 @@ enum Barriers = 'wmb (*smp_wmb*) || 'after-unlock-lock (*smp_mb__after_unlock_lock*) instructions F[Barriers] +(* SRCU *) +enum SRCU = 'srcu-lock || 'srcu-unlock || 'sync-srcu +instructions SRCU[SRCU] +(* All srcu events *) +let Srcu = Srcu-lock | Srcu-unlock | Sync-srcu + (* Compute matching pairs of nested Rcu-lock and Rcu-unlock *) let rcu-rscs = let rec unmatched-locks = Rcu-lock \ domain(matched) @@ -48,3 +54,22 @@ let rcu-rscs = let rec (* Validate nesting *) flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking + +(* Compute matching pairs of nested Srcu-lock and Srcu-unlock *) +let srcu-rscs = let rec + unmatched-locks = Srcu-lock \ domain(matched) + and unmatched-unlocks = Srcu-unlock \ range(matched) + and unmatched = unmatched-locks | unmatched-unlocks + and unmatched-po = ([unmatched] ; po ; [unmatched]) & loc + and unmatched-locks-to-unlocks = + ([unmatched-locks] ; po ; [unmatched-unlocks]) & loc + and matched = matched | (unmatched-locks-to-unlocks \ + (unmatched-po ; unmatched-po)) + in matched + +(* Validate nesting *) +flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-locking +flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking + +(* Check for use of synchronize_srcu() inside an RCU critical section *) +flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index b8e6197f05af..8dcb37835b61 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -33,7 +33,7 @@ let mb = ([M] ; fencerel(Mb) ; [M]) | ([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) | ([M] ; po ; [UL] ; (co | po) ; [LKW] ; fencerel(After-unlock-lock) ; [M]) -let gp = po ; [Sync-rcu] ; po? +let gp = po ; [Sync-rcu | Sync-srcu] ; po? let strong-fence = mb | gp @@ -92,15 +92,18 @@ acyclic pb as propagation (* * Effects of read-side critical sections proceed from the rcu_read_unlock() - * backwards on the one hand, and from the rcu_read_lock() forwards on the - * other hand. + * or srcu_read_unlock() backwards on the one hand, and from the + * rcu_read_lock() or srcu_read_lock() forwards on the other hand. * * In the definition of rcu-fence below, the po term at the left-hand side * of each disjunct and the po? term at the right-hand end have been factored * out. They have been moved into the definitions of rcu-link and rb. + * This was necessary in order to apply the "& loc" tests correctly. *) let rcu-gp = [Sync-rcu] (* Compare with gp *) +let srcu-gp = [Sync-srcu] let rcu-rscsi = rcu-rscs^-1 +let srcu-rscsi = srcu-rscs^-1 (* * The synchronize_rcu() strong fence is special in that it can order not @@ -112,12 +115,19 @@ let rcu-link = po? ; hb* ; pb* ; prop ; po (* * Any sequence containing at least as many grace periods as RCU read-side * critical sections (joined by rcu-link) acts as a generalized strong fence. + * Likewise for SRCU grace periods and read-side critical sections, provided + * the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same + * struct srcu_struct location. *) -let rec rcu-fence = rcu-gp | +let rec rcu-fence = rcu-gp | srcu-gp | (rcu-gp ; rcu-link ; rcu-rscsi) | + ((srcu-gp ; rcu-link ; srcu-rscsi) & loc) | (rcu-rscsi ; rcu-link ; rcu-gp) | + ((srcu-rscsi ; rcu-link ; srcu-gp) & loc) | (rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | + ((srcu-gp ; rcu-link ; rcu-fence ; rcu-link ; srcu-rscsi) & loc) | (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) | + ((srcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; srcu-gp) & loc) | (rcu-fence ; rcu-link ; rcu-fence) (* rb orders instructions just as pb does *) diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def index b27911cc087d..1d6a120cde14 100644 --- a/tools/memory-model/linux-kernel.def +++ b/tools/memory-model/linux-kernel.def @@ -47,6 +47,11 @@ rcu_read_unlock() { __fence{rcu-unlock}; } synchronize_rcu() { __fence{sync-rcu}; } synchronize_rcu_expedited() { __fence{sync-rcu}; } +// SRCU +srcu_read_lock(X) __srcu{srcu-lock}(X) +srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X); } +synchronize_srcu(X) { __srcu{sync-srcu}(X); } + // Atomic atomic_read(X) READ_ONCE(*X) atomic_set(X,V) { WRITE_ONCE(*X,V); } -- cgit v1.2.3 From ad9fd20b6dadb0cb14551477fcebe0fdf2e697dd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 26 Nov 2018 14:26:43 -0800 Subject: tools/memory-model: Update README for addition of SRCU This commit updates the section on LKMM limitations to no longer say that SRCU is not modeled, but instead describe how LKMM's modeling of SRCU departs from the Linux-kernel implementation. TL;DR: There is no known valid use case that cares about the Linux kernel's ability to have partially overlapping SRCU read-side critical sections. Signed-off-by: Paul E. McKenney Acked-by: Andrea Parri --- tools/memory-model/README | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tools/memory-model/README b/tools/memory-model/README index 0f2c366518c6..9d7d4f23503f 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -221,8 +221,29 @@ The Linux-kernel memory model has the following limitations: additional call_rcu() process to the site of the emulated rcu-barrier(). - e. Sleepable RCU (SRCU) is not modeled. It can be - emulated, but perhaps not simply. + e. Although sleepable RCU (SRCU) is now modeled, there + are some subtle differences between its semantics and + those in the Linux kernel. For example, the kernel + might interpret the following sequence as two partially + overlapping SRCU read-side critical sections: + + 1 r1 = srcu_read_lock(&my_srcu); + 2 do_something_1(); + 3 r2 = srcu_read_lock(&my_srcu); + 4 do_something_2(); + 5 srcu_read_unlock(&my_srcu, r1); + 6 do_something_3(); + 7 srcu_read_unlock(&my_srcu, r2); + + In contrast, LKMM will interpret this as a nested pair of + SRCU read-side critical sections, with the outer critical + section spanning lines 1-7 and the inner critical section + spanning lines 3-5. + + This difference would be more of a concern had anyone + identified a reasonable use case for partially overlapping + SRCU read-side critical sections. For more information, + please see: https://paulmck.livejournal.com/40593.html f. Reader-writer locking is not modeled. It can be emulated in litmus tests using atomic read-modify-write -- cgit v1.2.3 From 648e717586f2a832687fe44e2e0afb7a6fdea232 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 11 Dec 2018 11:38:53 -0500 Subject: tools/memory-model: Update Documentation/explanation.txt to include SRCU support The recent commit adding support for SRCU to the Linux Kernel Memory Model ended up changing the names and meanings of several relations. This patch updates the explanation.txt documentation file to reflect those changes. It also revises the statement of the RCU Guarantee to a more accurate form, and it adds a short paragraph mentioning the new support for SRCU. Signed-off-by: Alan Stern Cc: Akira Yokosawa Cc: Andrea Parri Cc: Boqun Feng Cc: Daniel Lustig Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: Nicholas Piggin Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Will Deacon Signed-off-by: Paul E. McKenney Acked-by: Andrea Parri --- tools/memory-model/Documentation/explanation.txt | 289 ++++++++++++----------- 1 file changed, 152 insertions(+), 137 deletions(-) diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 35bff92cc773..68caa9a976d0 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model 19. AND THEN THERE WAS ALPHA 20. THE HAPPENS-BEFORE RELATION: hb 21. THE PROPAGATES-BEFORE RELATION: pb - 22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb + 22. RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb 23. LOCKING 24. ODDS AND ENDS @@ -1430,8 +1430,8 @@ they execute means that it cannot have cycles. This requirement is the content of the LKMM's "propagation" axiom. -RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb ----------------------------------------------------- +RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb +------------------------------------------------------------- RCU (Read-Copy-Update) is a powerful synchronization mechanism. It rests on two concepts: grace periods and read-side critical sections. @@ -1446,17 +1446,19 @@ As far as memory models are concerned, RCU's main feature is its Grace-Period Guarantee, which states that a critical section can never span a full grace period. In more detail, the Guarantee says: - If a critical section starts before a grace period then it - must end before the grace period does. In addition, every - store that propagates to the critical section's CPU before the - end of the critical section must propagate to every CPU before - the end of the grace period. + For any critical section C and any grace period G, at least + one of the following statements must hold: - If a critical section ends after a grace period ends then it - must start after the grace period does. In addition, every - store that propagates to the grace period's CPU before the - start of the grace period must propagate to every CPU before - the start of the critical section. +(1) C ends before G does, and in addition, every store that + propagates to C's CPU before the end of C must propagate to + every CPU before G ends. + +(2) G starts before C does, and in addition, every store that + propagates to G's CPU before the start of G must propagate + to every CPU before C starts. + +In particular, it is not possible for a critical section to both start +before and end after a grace period. Here is a simple example of RCU in action: @@ -1483,10 +1485,11 @@ The Grace Period Guarantee tells us that when this code runs, it will never end with r1 = 1 and r2 = 0. The reasoning is as follows. r1 = 1 means that P0's store to x propagated to P1 before P1 called synchronize_rcu(), so P0's critical section must have started before -P1's grace period. On the other hand, r2 = 0 means that P0's store to -y, which occurs before the end of the critical section, did not -propagate to P1 before the end of the grace period, violating the -Guarantee. +P1's grace period, contrary to part (2) of the Guarantee. On the +other hand, r2 = 0 means that P0's store to y, which occurs before the +end of the critical section, did not propagate to P1 before the end of +the grace period, contrary to part (1). Together the results violate +the Guarantee. In the kernel's implementations of RCU, the requirements for stores to propagate to every CPU are fulfilled by placing strong fences at @@ -1504,11 +1507,11 @@ before" or "ends after" a grace period? Some aspects of the meaning are pretty obvious, as in the example above, but the details aren't entirely clear. The LKMM formalizes this notion by means of the rcu-link relation. rcu-link encompasses a very general notion of -"before": Among other things, X ->rcu-link Z includes cases where X -happens-before or is equal to some event Y which is equal to or comes -before Z in the coherence order. When Y = Z this says that X ->rfe Z -implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z -and X ->co Z each imply X ->rcu-link Z. +"before": If E and F are RCU fence events (i.e., rcu_read_lock(), +rcu_read_unlock(), or synchronize_rcu()) then among other things, +E ->rcu-link F includes cases where E is po-before some memory-access +event X, F is po-after some memory-access event Y, and we have any of +X ->rfe Y, X ->co Y, or X ->fr Y. The formal definition of the rcu-link relation is more than a little obscure, and we won't give it here. It is closely related to the pb @@ -1516,171 +1519,173 @@ relation, and the details don't matter unless you want to comb through a somewhat lengthy formal proof. Pretty much all you need to know about rcu-link is the information in the preceding paragraph. -The LKMM also defines the gp and rscs relations. They bring grace -periods and read-side critical sections into the picture, in the +The LKMM also defines the rcu-gp and rcu-rscsi relations. They bring +grace periods and read-side critical sections into the picture, in the following way: - E ->gp F means there is a synchronize_rcu() fence event S such - that E ->po S and either S ->po F or S = F. In simple terms, - there is a grace period po-between E and F. + E ->rcu-gp F means that E and F are in fact the same event, + and that event is a synchronize_rcu() fence (i.e., a grace + period). - E ->rscs F means there is a critical section delimited by an - rcu_read_lock() fence L and an rcu_read_unlock() fence U, such - that E ->po U and either L ->po F or L = F. You can think of - this as saying that E and F are in the same critical section - (in fact, it also allows E to be po-before the start of the - critical section and F to be po-after the end). + E ->rcu-rscsi F means that E and F are the rcu_read_unlock() + and rcu_read_lock() fence events delimiting some read-side + critical section. (The 'i' at the end of the name emphasizes + that this relation is "inverted": It links the end of the + critical section to the start.) If we think of the rcu-link relation as standing for an extended -"before", then X ->gp Y ->rcu-link Z says that X executes before a -grace period which ends before Z executes. (In fact it covers more -than this, because it also includes cases where X executes before a -grace period and some store propagates to Z's CPU before Z executes -but doesn't propagate to some other CPU until after the grace period -ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or -before the start of) a critical section which starts before Z -executes. - -The LKMM goes on to define the rcu-fence relation as a sequence of gp -and rscs links separated by rcu-link links, in which the number of gp -links is >= the number of rscs links. For example: +"before", then X ->rcu-gp Y ->rcu-link Z roughly says that X is a +grace period which ends before Z begins. (In fact it covers more than +this, because it also includes cases where some store propagates to +Z's CPU before Z begins but doesn't propagate to some other CPU until +after X ends.) Similarly, X ->rcu-rscsi Y ->rcu-link Z says that X is +the end of a critical section which starts before Z begins. + +The LKMM goes on to define the rcu-fence relation as a sequence of +rcu-gp and rcu-rscsi links separated by rcu-link links, in which the +number of rcu-gp links is >= the number of rcu-rscsi links. For +example: - X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V + X ->rcu-gp Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V would imply that X ->rcu-fence V, because this sequence contains two -gp links and only one rscs link. (It also implies that X ->rcu-fence T -and Z ->rcu-fence V.) On the other hand: +rcu-gp links and one rcu-rscsi link. (It also implies that +X ->rcu-fence T and Z ->rcu-fence V.) On the other hand: - X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V + X ->rcu-rscsi Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V does not imply X ->rcu-fence V, because the sequence contains only -one gp link but two rscs links. +one rcu-gp link but two rcu-rscsi links. The rcu-fence relation is important because the Grace Period Guarantee means that rcu-fence acts kind of like a strong fence. In particular, -if W is a write and we have W ->rcu-fence Z, the Guarantee says that W -will propagate to every CPU before Z executes. +E ->rcu-fence F implies not only that E begins before F ends, but also +that any write po-before E will propagate to every CPU before any +instruction po-after F can execute. (However, it does not imply that +E must execute before F; in fact, each synchronize_rcu() fence event +is linked to itself by rcu-fence as a degenerate case.) To prove this in full generality requires some intellectual effort. We'll consider just a very simple case: - W ->gp X ->rcu-link Y ->rscs Z. + G ->rcu-gp W ->rcu-link Z ->rcu-rscsi F. -This formula means that there is a grace period G and a critical -section C such that: +This formula means that G and W are the same event (a grace period), +and there are events X, Y and a read-side critical section C such that: - 1. W is po-before G; + 1. G = W is po-before or equal to X; - 2. X is equal to or po-after G; + 2. X comes "before" Y in some sense (including rfe, co and fr); - 3. X comes "before" Y in some sense; + 2. Y is po-before Z; - 4. Y is po-before the end of C; + 4. Z is the rcu_read_unlock() event marking the end of C; - 5. Z is equal to or po-after the start of C. + 5. F is the rcu_read_lock() event marking the start of C. -From 2 - 4 we deduce that the grace period G ends before the critical -section C. Then the second part of the Grace Period Guarantee says -not only that G starts before C does, but also that W (which executes -on G's CPU before G starts) must propagate to every CPU before C -starts. In particular, W propagates to every CPU before Z executes -(or finishes executing, in the case where Z is equal to the -rcu_read_lock() fence event which starts C.) This sort of reasoning -can be expanded to handle all the situations covered by rcu-fence. +From 1 - 4 we deduce that the grace period G ends before the critical +section C. Then part (2) of the Grace Period Guarantee says not only +that G starts before C does, but also that any write which executes on +G's CPU before G starts must propagate to every CPU before C starts. +In particular, the write propagates to every CPU before F finishes +executing and hence before any instruction po-after F can execute. +This sort of reasoning can be extended to handle all the situations +covered by rcu-fence. Finally, the LKMM defines the RCU-before (rb) relation in terms of rcu-fence. This is done in essentially the same way as the pb relation was defined in terms of strong-fence. We will omit the -details; the end result is that E ->rb F implies E must execute before -F, just as E ->pb F does (and for much the same reasons). +details; the end result is that E ->rb F implies E must execute +before F, just as E ->pb F does (and for much the same reasons). Putting this all together, the LKMM expresses the Grace Period Guarantee by requiring that the rb relation does not contain a cycle. -Equivalently, this "rcu" axiom requires that there are no events E and -F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the -axiom requires that there are no cycles consisting of gp and rscs -alternating with rcu-link, where the number of gp links is >= the -number of rscs links. +Equivalently, this "rcu" axiom requires that there are no events E +and F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, +the axiom requires that there are no cycles consisting of rcu-gp and +rcu-rscsi alternating with rcu-link, where the number of rcu-gp links +is >= the number of rcu-rscsi links. Justifying the axiom isn't easy, but it is in fact a valid formalization of the Grace Period Guarantee. We won't attempt to go through the detailed argument, but the following analysis gives a -taste of what is involved. Suppose we have a violation of the first -part of the Guarantee: A critical section starts before a grace -period, and some store propagates to the critical section's CPU before -the end of the critical section but doesn't propagate to some other -CPU until after the end of the grace period. +taste of what is involved. Suppose both parts of the Guarantee are +violated: A critical section starts before a grace period, and some +store propagates to the critical section's CPU before the end of the +critical section but doesn't propagate to some other CPU until after +the end of the grace period. Putting symbols to these ideas, let L and U be the rcu_read_lock() and rcu_read_unlock() fence events delimiting the critical section in question, and let S be the synchronize_rcu() fence event for the grace period. Saying that the critical section starts before S means there -are events E and F where E is po-after L (which marks the start of the -critical section), E is "before" F in the sense of the rcu-link -relation, and F is po-before the grace period S: +are events Q and R where Q is po-after L (which marks the start of the +critical section), Q is "before" R in the sense used by the rcu-link +relation, and R is po-before the grace period S. Thus we have: - L ->po E ->rcu-link F ->po S. + L ->rcu-link S. -Let W be the store mentioned above, let Z come before the end of the +Let W be the store mentioned above, let Y come before the end of the critical section and witness that W propagates to the critical -section's CPU by reading from W, and let Y on some arbitrary CPU be a -witness that W has not propagated to that CPU, where Y happens after +section's CPU by reading from W, and let Z on some arbitrary CPU be a +witness that W has not propagated to that CPU, where Z happens after some event X which is po-after S. Symbolically, this amounts to: - S ->po X ->hb* Y ->fr W ->rf Z ->po U. + S ->po X ->hb* Z ->fr W ->rf Y ->po U. -The fr link from Y to W indicates that W has not propagated to Y's CPU -at the time that Y executes. From this, it can be shown (see the -discussion of the rcu-link relation earlier) that X and Z are related -by rcu-link, yielding: +The fr link from Z to W indicates that W has not propagated to Z's CPU +at the time that Z executes. From this, it can be shown (see the +discussion of the rcu-link relation earlier) that S and U are related +by rcu-link: - S ->po X ->rcu-link Z ->po U. + S ->rcu-link U. -The formulas say that S is po-between F and X, hence F ->gp X. They -also say that Z comes before the end of the critical section and E -comes after its start, hence Z ->rscs E. From all this we obtain: +Since S is a grace period we have S ->rcu-gp S, and since L and U are +the start and end of the critical section C we have U ->rcu-rscsi L. +From this we obtain: - F ->gp X ->rcu-link Z ->rscs E ->rcu-link F, + S ->rcu-gp S ->rcu-link U ->rcu-rscsi L ->rcu-link S, a forbidden cycle. Thus the "rcu" axiom rules out this violation of the Grace Period Guarantee. For something a little more down-to-earth, let's see how the axiom works out in practice. Consider the RCU code example from above, this -time with statement labels added to the memory access instructions: +time with statement labels added: int x, y; P0() { - rcu_read_lock(); - W: WRITE_ONCE(x, 1); - X: WRITE_ONCE(y, 1); - rcu_read_unlock(); + L: rcu_read_lock(); + X: WRITE_ONCE(x, 1); + Y: WRITE_ONCE(y, 1); + U: rcu_read_unlock(); } P1() { int r1, r2; - Y: r1 = READ_ONCE(x); - synchronize_rcu(); - Z: r2 = READ_ONCE(y); + Z: r1 = READ_ONCE(x); + S: synchronize_rcu(); + W: r2 = READ_ONCE(y); } -If r2 = 0 at the end then P0's store at X overwrites the value that -P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X. -In addition, there is a synchronize_rcu() between Y and Z, so therefore -we have Y ->gp Z. +If r2 = 0 at the end then P0's store at Y overwrites the value that +P1's load at W reads from, so we have W ->fre Y. Since S ->po W and +also Y ->po U, we get S ->rcu-link U. In addition, S ->rcu-gp S +because S is a grace period. -If r1 = 1 at the end then P1's load at Y reads from P0's store at W, -so we have W ->rcu-link Y. In addition, W and X are in the same critical -section, so therefore we have X ->rscs W. +If r1 = 1 at the end then P1's load at Z reads from P0's store at X, +so we have X ->rfe Z. Together with L ->po X and Z ->po S, this +yields L ->rcu-link S. And since L and U are the start and end of a +critical section, we have U ->rcu-rscsi L. -Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle, -violating the "rcu" axiom. Hence the outcome is not allowed by the -LKMM, as we would expect. +Then U ->rcu-rscsi L ->rcu-link S ->rcu-gp S ->rcu-link U is a +forbidden cycle, violating the "rcu" axiom. Hence the outcome is not +allowed by the LKMM, as we would expect. For contrast, let's see what can happen in a more complicated example: @@ -1690,51 +1695,52 @@ For contrast, let's see what can happen in a more complicated example: { int r0; - rcu_read_lock(); - W: r0 = READ_ONCE(x); - X: WRITE_ONCE(y, 1); - rcu_read_unlock(); + L0: rcu_read_lock(); + r0 = READ_ONCE(x); + WRITE_ONCE(y, 1); + U0: rcu_read_unlock(); } P1() { int r1; - Y: r1 = READ_ONCE(y); - synchronize_rcu(); - Z: WRITE_ONCE(z, 1); + r1 = READ_ONCE(y); + S1: synchronize_rcu(); + WRITE_ONCE(z, 1); } P2() { int r2; - rcu_read_lock(); - U: r2 = READ_ONCE(z); - V: WRITE_ONCE(x, 1); - rcu_read_unlock(); + L2: rcu_read_lock(); + r2 = READ_ONCE(z); + WRITE_ONCE(x, 1); + U2: rcu_read_unlock(); } If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows -that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W. -However this cycle is not forbidden, because the sequence of relations -contains fewer instances of gp (one) than of rscs (two). Consequently -the outcome is allowed by the LKMM. The following instruction timing -diagram shows how it might actually occur: +that U0 ->rcu-rscsi L0 ->rcu-link S1 ->rcu-gp S1 ->rcu-link U2 ->rcu-rscsi +L2 ->rcu-link U0. However this cycle is not forbidden, because the +sequence of relations contains fewer instances of rcu-gp (one) than of +rcu-rscsi (two). Consequently the outcome is allowed by the LKMM. +The following instruction timing diagram shows how it might actually +occur: P0 P1 P2 -------------------- -------------------- -------------------- rcu_read_lock() -X: WRITE_ONCE(y, 1) - Y: r1 = READ_ONCE(y) +WRITE_ONCE(y, 1) + r1 = READ_ONCE(y) synchronize_rcu() starts . rcu_read_lock() - . V: WRITE_ONCE(x, 1) -W: r0 = READ_ONCE(x) . + . WRITE_ONCE(x, 1) +r0 = READ_ONCE(x) . rcu_read_unlock() . synchronize_rcu() ends - Z: WRITE_ONCE(z, 1) - U: r2 = READ_ONCE(z) + WRITE_ONCE(z, 1) + r2 = READ_ONCE(z) rcu_read_unlock() This requires P0 and P2 to execute their loads and stores out of @@ -1744,6 +1750,15 @@ section in P0 both starts before P1's grace period does and ends before it does, and the critical section in P2 both starts after P1's grace period does and ends after it does. +Addendum: The LKMM now supports SRCU (Sleepable Read-Copy-Update) in +addition to normal RCU. The ideas involved are much the same as +above, with new relations srcu-gp and srcu-rscsi added to represent +SRCU grace periods and read-side critical sections. There is a +restriction on the srcu-gp and srcu-rscsi links that can appear in an +rcu-fence sequence (the srcu-rscsi links must be paired with srcu-gp +links having the same SRCU domain with proper nesting); the details +are relatively unimportant. + LOCKING ------- -- cgit v1.2.3 From 9393998e9ee094f99d18783cc85c489e20f0e0e7 Mon Sep 17 00:00:00 2001 From: Luc Maranget Date: Thu, 27 Dec 2018 16:27:12 +0100 Subject: tools/memory-model: Dynamically check SRCU lock-to-unlock matching This commit checks that the return value of srcu_read_lock() is passed to the matching srcu_read_unlock(), where "matching" is determined by nesting. This check operates as follows: 1. srcu_read_lock() creates an integer token, which is stored into the generated events. 2. srcu_read_unlock() records its second (token) argument into the generated event. 3. A new herd primitive 'different-values' filters out pairs of events with identical values from the relation passed as its argument. 4. The bell file applies the above primitive to the (srcu) read-side-critical-section relation 'srcu-rscs' and flags non-empty results. BEWARE: Works only with herd version 7.51+6 and onwards. Signed-off-by: Luc Maranget Signed-off-by: Paul E. McKenney [ paulmck: Apply Andrea Parri's off-list feedback. ] Acked-by: Alan Stern --- tools/memory-model/linux-kernel.bell | 3 +++ tools/memory-model/linux-kernel.cat | 2 ++ tools/memory-model/linux-kernel.def | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 9c42cd9ddcb4..def9131d3d8e 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -73,3 +73,6 @@ flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking (* Check for use of synchronize_srcu() inside an RCU critical section *) flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep + +(* Validate SRCU dynamic match *) +flag ~empty different-values(srcu-rscs) as srcu-bad-nesting diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 8dcb37835b61..95bf45f1215f 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ (* + * Requires herd version 7.51+6 or higher. + * * Copyright (C) 2015 Jade Alglave , * Copyright (C) 2016 Luc Maranget for Inria * Copyright (C) 2017 Alan Stern , diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def index 1d6a120cde14..0c3f0ef486f4 100644 --- a/tools/memory-model/linux-kernel.def +++ b/tools/memory-model/linux-kernel.def @@ -49,7 +49,7 @@ synchronize_rcu_expedited() { __fence{sync-rcu}; } // SRCU srcu_read_lock(X) __srcu{srcu-lock}(X) -srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X); } +srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); } synchronize_srcu(X) { __srcu{sync-srcu}(X); } // Atomic -- cgit v1.2.3 From 034fb712a620c84efa78e2889845d5dea95f688f Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Thu, 31 Jan 2019 08:08:40 -0800 Subject: tools/memory-model: Avoid duplicating herdtools versions Currently, herdtools version information appears no fewer than three times in the LKMM source, which is difficult to maintain. This commit therefore places the required version in one place, namely the tools/memory-model/README file. Signed-off-by: Andrea Parri Signed-off-by: Paul E. McKenney Acked-by: Alan Stern --- tools/memory-model/README | 8 ++++++-- tools/memory-model/linux-kernel.cat | 2 -- tools/memory-model/lock.cat | 3 --- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/memory-model/README b/tools/memory-model/README index 9d7d4f23503f..2b87f3971548 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -20,13 +20,17 @@ that litmus test to be exercised within the Linux kernel. REQUIREMENTS ============ -Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded -separately: +Version 7.52 or higher of the "herd7" and "klitmus7" tools must be +downloaded separately: https://github.com/herd/herdtools7 See "herdtools7/INSTALL.md" for installation instructions. +Note that although these tools usually provide backwards compatibility, +this is not absolutely guaranteed. Therefore, if a later version does +not work, please try using the exact version called out above. + ================== BASIC USAGE: HERD7 diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 95bf45f1215f..8dcb37835b61 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ (* - * Requires herd version 7.51+6 or higher. - * * Copyright (C) 2015 Jade Alglave , * Copyright (C) 2016 Luc Maranget for Inria * Copyright (C) 2017 Alan Stern , diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat index 305ded17e741..a059d1a6d8a2 100644 --- a/tools/memory-model/lock.cat +++ b/tools/memory-model/lock.cat @@ -6,9 +6,6 @@ (* * Generate coherence orders and handle lock operations - * - * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48. - * spin_is_locked() is functional from herd7 version 7.49. *) include "cross.cat" -- cgit v1.2.3 From 487ecc460732cab178897e81a7f5ef6183eff143 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 25 Jan 2019 06:55:47 +0900 Subject: sched/Documentation/kokr: Update Korean translation to update wake_up() & co. memory-barrier guarantees Translate this commit to Korean: 7696f9910a9a ("sched/Documentation: Update wake_up() & co. memory-barrier guarantees") Signed-off-by: SeongJae Park Reviewed-by: Yunjae Lee Signed-off-by: Paul E. McKenney --- .../translations/ko_KR/memory-barriers.txt | 43 +++++++++++++--------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 7f01fb1c1084..4a6cf4d58cbf 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -2146,33 +2146,40 @@ set_current_state() 는 다음의 것들로 감싸질 수도 있습니다: event_indicated = 1; wake_up_process(event_daemon); -wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약 그것들이 뭔가를 -깨운다면요. 이 배리어는 태스크 상태가 지워지기 전에 수행되므로, 이벤트를 -알리기 위한 STORE 와 태스크 상태를 TASK_RUNNING 으로 설정하는 STORE 사이에 -위치하게 됩니다. +wake_up() 이 무언가를 깨우게 되면, 이 함수는 범용 메모리 배리어를 수행합니다. +이 함수가 아무것도 깨우지 않는다면 메모리 배리어는 수행될 수도, 수행되지 않을 +수도 있습니다; 이 경우에 메모리 배리어를 수행할 거라 오해해선 안됩니다. 이 +배리어는 태스크 상태가 접근되기 전에 수행되는데, 자세히 말하면 이 이벤트를 +알리기 위한 STORE 와 TASK_RUNNING 으로 상태를 쓰는 STORE 사이에 수행됩니다: - CPU 1 CPU 2 + CPU 1 (Sleeper) CPU 2 (Waker) =============================== =============================== set_current_state(); STORE event_indicated smp_store_mb(); wake_up(); - STORE current->state <쓰기 배리어> - <범용 배리어> STORE current->state - LOAD event_indicated + STORE current->state ... + <범용 배리어> <범용 배리어> + LOAD event_indicated if ((LOAD task->state) & TASK_NORMAL) + STORE task->state -한번더 말합니다만, 이 쓰기 메모리 배리어는 이 코드가 정말로 뭔가를 깨울 때에만 -실행됩니다. 이걸 설명하기 위해, X 와 Y 는 모두 0 으로 초기화 되어 있다는 가정 -하에 아래의 이벤트 시퀀스를 생각해 봅시다: +여기서 "task" 는 깨어나지는 쓰레드이고 CPU 1 의 "current" 와 같습니다. + +반복하지만, wake_up() 이 무언가를 정말 깨운다면 범용 메모리 배리어가 수행될 +것이 보장되지만, 그렇지 않다면 그런 보장이 없습니다. 이걸 이해하기 위해, X 와 +Y 는 모두 0 으로 초기화 되어 있다는 가정 하에 아래의 이벤트 시퀀스를 생각해 +봅시다: CPU 1 CPU 2 =============================== =============================== - X = 1; STORE event_indicated + X = 1; Y = 1; smp_mb(); wake_up(); - Y = 1; wait_event(wq, Y == 1); - wake_up(); load from Y sees 1, no memory barrier - load from X might see 0 + LOAD Y LOAD X + +정말로 깨우기가 행해졌다면, 두 로드 중 (최소한) 하나는 1 을 보게 됩니다. +반면에, 실제 깨우기가 행해지지 않았다면, 두 로드 모두 0을 볼 수도 있습니다. -위 예제에서의 경우와 달리 깨우기가 정말로 행해졌다면, CPU 2 의 X 로드는 1 을 -본다고 보장될 수 있을 겁니다. +wake_up_process() 는 항상 범용 메모리 배리어를 수행합니다. 이 배리어 역시 +태스크 상태가 접근되기 전에 수행됩니다. 특히, 앞의 예제 코드에서 wake_up() 이 +wake_up_process() 로 대체된다면 두 로드 중 하나는 1을 볼 것이 보장됩니다. 사용 가능한 깨우기류 함수들로 다음과 같은 것들이 있습니다: @@ -2192,6 +2199,8 @@ wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약 wake_up_poll(); wake_up_process(); +메모리 순서규칙 관점에서, 이 함수들은 모두 wake_up() 과 같거나 보다 강한 순서 +보장을 제공합니다. [!] 잠재우는 코드와 깨우는 코드에 내포되는 메모리 배리어들은 깨우기 전에 이루어진 스토어를 잠재우는 코드가 set_current_state() 를 호출한 후에 행하는 -- cgit v1.2.3 From db467147f131b8ab799066b9779addb8603d3baf Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 25 Jan 2019 06:55:48 +0900 Subject: locking/memory-barriers/kokr: Update Korean translation to replace smp_cond_acquire() with smp_cond_load_acquire() Transalte this commit to Korean: 2f359c7ea554 ("locking/memory-barriers: Replace smp_cond_acquire() with smp_cond_load_acquire()") Signed-off-by: SeongJae Park Reviewed-by: Yunjae Lee Signed-off-by: Paul E. McKenney --- Documentation/translations/ko_KR/memory-barriers.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 4a6cf4d58cbf..db0b9d8619f1 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -493,10 +493,8 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니 이 타입의 오퍼레이션은 단방향의 투과성 배리어처럼 동작합니다. ACQUIRE 오퍼레이션 뒤의 모든 메모리 오퍼레이션들이 ACQUIRE 오퍼레이션 후에 일어난 것으로 시스템의 나머지 컴포넌트들에 보이게 될 것이 보장됩니다. - LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_acquire() 오퍼레이션도 - ACQUIRE 오퍼레이션에 포함됩니다. smp_cond_acquire() 오퍼레이션은 컨트롤 - 의존성과 smp_rmb() 를 사용해서 ACQUIRE 의 의미적 요구사항(semantic)을 - 충족시킵니다. + LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_load_acquire() 오퍼레이션도 + ACQUIRE 오퍼레이션에 포함됩니다. ACQUIRE 오퍼레이션 앞의 메모리 오퍼레이션들은 ACQUIRE 오퍼레이션 완료 후에 수행된 것처럼 보일 수 있습니다. -- cgit v1.2.3 From f1887143f5984f23d2360f2efed6ef481bb41117 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 11 Feb 2019 18:09:43 +0100 Subject: Documentation/atomic_t: Clarify signed vs unsigned Clarify the whole signed vs unsigned issue for atomic_t. There has been enough confusion on this topic to warrant a few explicit words I feel. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Acked-by: Boqun Feng Signed-off-by: Paul E. McKenney --- Documentation/atomic_t.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt index 913396ac5824..dca3fb0554db 100644 --- a/Documentation/atomic_t.txt +++ b/Documentation/atomic_t.txt @@ -56,6 +56,23 @@ Barriers: smp_mb__{before,after}_atomic() +TYPES (signed vs unsigned) +----- + +While atomic_t, atomic_long_t and atomic64_t use int, long and s64 +respectively (for hysterical raisins), the kernel uses -fno-strict-overflow +(which implies -fwrapv) and defines signed overflow to behave like +2s-complement. + +Therefore, an explicitly unsigned variant of the atomic ops is strictly +unnecessary and we can simply cast, there is no UB. + +There was a bug in UBSAN prior to GCC-8 that would generate UB warnings for +signed types. + +With this we also conform to the C/C++ _Atomic behaviour and things like +P1236R1. + SEMANTICS --------- -- cgit v1.2.3