summaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorRaghavendra K T <raghavendra.kt@amd.com>2023-03-01 17:49:01 +0530
committerAndrew Morton <akpm@linux-foundation.org>2023-04-05 20:03:03 -0700
commitfc137c0ddab29b591db6a091dc6d7ce20ccb73f2 (patch)
tree582d8a0e080aaf6b31d3cec59f65c88d8cb74e85 /kernel/sched
parentef6a22b70f6d90449a5c797b8968a682824e2011 (diff)
downloadlinux-fc137c0ddab29b591db6a091dc6d7ce20ccb73f2.tar.gz
linux-fc137c0ddab29b591db6a091dc6d7ce20ccb73f2.tar.bz2
linux-fc137c0ddab29b591db6a091dc6d7ce20ccb73f2.zip
sched/numa: enhance vma scanning logic
During Numa scanning make sure only relevant vmas of the tasks are scanned. Before: All the tasks of a process participate in scanning the vma even if they do not access vma in it's lifespan. Now: Except cases of first few unconditional scans, if a process do not touch vma (exluding false positive cases of PID collisions) tasks no longer scan all vma Logic used: 1) 6 bits of PID used to mark active bit in vma numab status during fault to remember PIDs accessing vma. (Thanks Mel) 2) Subsequently in scan path, vma scanning is skipped if current PID had not accessed vma. 3) First two times we do allow unconditional scan to preserve earlier behaviour of scanning. Acknowledgement to Bharata B Rao <bharata@amd.com> for initial patch to store pid information and Peter Zijlstra <peterz@infradead.org> (Usage of test and set bit) Link: https://lkml.kernel.org/r/092f03105c7c1d3450f4636b1ea350407f07640e.1677672277.git.raghavendra.kt@amd.com Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com> Suggested-by: Mel Gorman <mgorman@techsingularity.net> Cc: David Hildenbrand <david@redhat.com> Cc: Disha Talreja <dishaa.talreja@amd.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Mike Rapoport <rppt@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c19
1 files changed, 19 insertions, 0 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7072de1686d5..ef27b5931480 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2928,6 +2928,21 @@ static void reset_ptenuma_scan(struct task_struct *p)
p->mm->numa_scan_offset = 0;
}
+static bool vma_is_accessed(struct vm_area_struct *vma)
+{
+ /*
+ * Allow unconditional access first two times, so that all the (pages)
+ * of VMAs get prot_none fault introduced irrespective of accesses.
+ * This is also done to avoid any side effect of task scanning
+ * amplifying the unfairness of disjoint set of VMAs' access.
+ */
+ if (READ_ONCE(current->mm->numa_scan_seq) < 2)
+ return true;
+
+ return test_bit(current->pid % BITS_PER_LONG,
+ &vma->numab_state->access_pids);
+}
+
/*
* The expensive part of numa migration is done from task_work context.
* Triggered from task_tick_numa().
@@ -3046,6 +3061,10 @@ static void task_numa_work(struct callback_head *work)
vma->numab_state->next_scan))
continue;
+ /* Do not scan the VMA if task has not accessed */
+ if (!vma_is_accessed(vma))
+ continue;
+
do {
start = max(start, vma->vm_start);
end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);