capabilities: introduce per-process capability bounding set

The capability bounding set is a set beyond which capabilities cannot grow. Currently cap_bset is per-system. It can be manipulated through sysctl, but only init can add capabilities. Root can remove capabilities. By default it includes all caps except CAP_SETPCAP. This patch makes the bounding set per-process when file capabilities are enabled. It is inherited at fork from parent. Noone can add elements, CAP_SETPCAP is required to remove them. One example use of this is to start a safer container. For instance, until device namespaces or per-container device whitelists are introduced, it is best to take CAP_MKNOD away from a container. The bounding set will not affect pP and pE immediately. It will only affect pP' and pE' after subsequent exec()s. It also does not affect pI, and exec() does not constrain pI'. So to really start a shell with no way of regain CAP_MKNOD, you would do prctl(PR_CAPBSET_DROP, CAP_MKNOD); cap_t cap = cap_get_proc(); cap_value_t caparray[1]; caparray[0] = CAP_MKNOD; cap_set_flag(cap, CAP_INHERITABLE, 1, caparray, CAP_DROP); cap_set_proc(cap); cap_free(cap); The following test program will get and set the bounding set (but not pI). For instance ./bset get (lists capabilities in bset) ./bset drop cap_net_raw (starts shell with new bset) (use capset, setuid binary, or binary with file capabilities to try to increase caps) ************************************************************ cap_bound.c ************************************************************ #include <sys/prctl.h> #include <linux/capability.h> #include <sys/types.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #ifndef PR_CAPBSET_READ #define PR_CAPBSET_READ 23 #endif #ifndef PR_CAPBSET_DROP #define PR_CAPBSET_DROP 24 #endif int usage(char *me) { printf("Usage: %s get\n", me); printf(" %s drop <capability>\n", me); return 1; } #define numcaps 32 char *captable[numcaps] = { "cap_chown", "cap_dac_override", "cap_dac_read_search", "cap_fowner", "cap_fsetid", "cap_kill", "cap_setgid", "cap_setuid", "cap_setpcap", "cap_linux_immutable", "cap_net_bind_service", "cap_net_broadcast", "cap_net_admin", "cap_net_raw", "cap_ipc_lock", "cap_ipc_owner", "cap_sys_module", "cap_sys_rawio", "cap_sys_chroot", "cap_sys_ptrace", "cap_sys_pacct", "cap_sys_admin", "cap_sys_boot", "cap_sys_nice", "cap_sys_resource", "cap_sys_time", "cap_sys_tty_config", "cap_mknod", "cap_lease", "cap_audit_write", "cap_audit_control", "cap_setfcap" }; int getbcap(void) { int comma=0; unsigned long i; int ret; printf("i know of %d capabilities\n", numcaps); printf("capability bounding set:"); for (i=0; i<numcaps; i++) { ret = prctl(PR_CAPBSET_READ, i); if (ret < 0) perror("prctl"); else if (ret==1) printf("%s%s", (comma++) ? ", " : " ", captable[i]); } printf("\n"); return 0; } int capdrop(char *str) { unsigned long i; int found=0; for (i=0; i<numcaps; i++) { if (strcmp(captable[i], str) == 0) { found=1; break; } } if (!found) return 1; if (prctl(PR_CAPBSET_DROP, i)) { perror("prctl"); return 1; } return 0; } int main(int argc, char *argv[]) { if (argc<2) return usage(argv[0]); if (strcmp(argv[1], "get")==0) return getbcap(); if (strcmp(argv[1], "drop")!=0 || argc<3) return usage(argv[0]); if (capdrop(argv[2])) { printf("unknown capability\n"); return 1; } return execl("/bin/bash", "/bin/bash", NULL); } ************************************************************ [serue@us.ibm.com: fix typo] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Signed-off-by: Andrew G. Morgan <morgan@kernel.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Casey Schaufler <casey@schaufler-ca.com>a Signed-off-by: "Serge E. Hallyn" <serue@us.ibm.com> Tested-by: Jiri Slaby <jirislaby@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Serge E. Hallyn <serue@us.ibm.com> 2008-02-04 22:29:45 -0800
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-02-05 09:44:20 -0800
commit: 3b7391de67da515c91f48aa371de77cb6cc5c07e (patch)
tree: 22b9f5d9d1c36b374eb5765219aca3c7e1f23486 /security
parent: 46c383cc4530ccc438cb325e92e11eb21dd3d4fc (diff)
download: linux-3b7391de67da515c91f48aa371de77cb6cc5c07e.tar.gz
linux-3b7391de67da515c91f48aa371de77cb6cc5c07e.tar.bz2
linux-3b7391de67da515c91f48aa371de77cb6cc5c07e.zip
1 files changed, 27 insertions, 17 deletions
diff --git a/security/commoncap.c b/security/commoncap.c
index 01ab47845dcf..5aba82679a0b 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -25,20 +25,6 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
-/*
- * Because of the reduced scope of CAP_SETPCAP when filesystem
- * capabilities are in effect, it is safe to allow this capability to
- * be available in the default configuration.
- */
-# define CAP_INIT_BSET  CAP_FULL_SET
-#else /* ie. ndef CONFIG_SECURITY_FILE_CAPABILITIES */
-# define CAP_INIT_BSET  CAP_INIT_EFF_SET
-#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
-
-kernel_cap_t cap_bset = CAP_INIT_BSET;    /* systemwide capability bound */
-EXPORT_SYMBOL(cap_bset);
-
 /* Global security state */
 
 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
@@ -140,6 +126,12 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective,
 		/* incapable of using this inheritable set */
 		return -EPERM;
 	}
+	if (!cap_issubset(*inheritable,
+			   cap_combine(target->cap_inheritable,
+				       current->cap_bset))) {
+		/* no new pI capabilities outside bounding set */
+		return -EPERM;
+	}
 
 	/* verify restrictions on target's new Permitted set */
 	if (!cap_issubset (*permitted,
@@ -337,10 +329,11 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	/* Derived from fs/exec.c:compute_creds. */
 	kernel_cap_t new_permitted, working;
 
-	new_permitted = cap_intersect (bprm->cap_permitted, cap_bset);
-	working = cap_intersect (bprm->cap_inheritable,
+	new_permitted = cap_intersect(bprm->cap_permitted,
+				 current->cap_bset);
+	working = cap_intersect(bprm->cap_inheritable,
 				 current->cap_inheritable);
-	new_permitted = cap_combine (new_permitted, working);
+	new_permitted = cap_combine(new_permitted, working);
 
 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
 	    !cap_issubset (new_permitted, current->cap_permitted)) {
@@ -581,6 +574,23 @@ int cap_task_kill(struct task_struct *p, struct siginfo *info,
 
 	return -EPERM;
 }
+
+/*
+ * called from kernel/sys.c for prctl(PR_CABSET_DROP)
+ * done without task_capability_lock() because it introduces
+ * no new races - i.e. only another task doing capget() on
+ * this task could get inconsistent info.  There can be no
+ * racing writer bc a task can only change its own caps.
+ */
+long cap_prctl_drop(unsigned long cap)
+{
+	if (!capable(CAP_SETPCAP))
+		return -EPERM;
+	if (!cap_valid(cap))
+		return -EINVAL;
+	cap_lower(current->cap_bset, cap);
+	return 0;
+}
 #else
 int cap_task_setscheduler (struct task_struct *p, int policy,
 			   struct sched_param *lp)
author	Serge E. Hallyn <serue@us.ibm.com>	2008-02-04 22:29:45 -0800
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-02-05 09:44:20 -0800
commit	3b7391de67da515c91f48aa371de77cb6cc5c07e (patch)
tree	22b9f5d9d1c36b374eb5765219aca3c7e1f23486 /security
parent	46c383cc4530ccc438cb325e92e11eb21dd3d4fc (diff)
download	linux-3b7391de67da515c91f48aa371de77cb6cc5c07e.tar.gz linux-3b7391de67da515c91f48aa371de77cb6cc5c07e.tar.bz2 linux-3b7391de67da515c91f48aa371de77cb6cc5c07e.zip