diff options
author | Serge Hallyn <serge.hallyn@canonical.com> | 2012-01-23 12:07:44 -0600 |
---|---|---|
committer | Daniel Lezcano <daniel.lezcano@free.fr> | 2012-02-26 10:44:40 +0100 |
commit | d08ba6ec05510e95eca791b19731f4a241d7f675 (patch) | |
tree | 2be5dddc4f699f617f954d0e3c2ac9f4eae69e98 | |
parent | lxc-ubuntu: use release-updates and release-security (diff) | |
download | lxc-d08ba6ec05510e95eca791b19731f4a241d7f675.tar.gz lxc-d08ba6ec05510e95eca791b19731f4a241d7f675.tar.bz2 lxc-d08ba6ec05510e95eca791b19731f4a241d7f675.zip |
Support nested cgroups
With this patch, I can start a container 'o1' inside another container 'o1'.
(Of course, the containers must be on a different subnet)
Detail:
1. Create cgroups for containers under /lxc.
2. Support nested lxc: respect init's cgroup:
Create cgroups under init's cgroup. So if we start a container c2
inside a container 'c1', we'll use /sys/fs/cgroup/freezer/lxc/c1/lxc/c2
instead of /sys/fs/cgroup/freezer/c2. This allows a container c1
to be created inside container c1 It also allow a container's limits
to be enforced on all a container's children (which a MAC policy could
already enforce, in which case current lxc code would be unable to nest
altogether).
3. Finally, if a container's cgroup already exists, rename it rather than
failing to start the container. Try to WARN the user so they might go
clean the old cgroup up.
Whereas without this patch, container o1's cgroup would be
/sys/fs/cgroup/<subsys>/o1,
it now becomes
/sys/fs/cgroup/<subsys>/<initcgroup>/lxc/o1
so if init is in cgroup '/' then o1's freezer cgroup would be:
/sys/fs/cgroup/freezer/lxc/o1
Changelog:
. make lxc-ps work with separate mtab. If cgroups were mounted with -n,
and mtab is not linked to /proc/self/mounts, then 'mount -t cgroup' won't
show these mounts. So make lxc-ps not use it, but rather use
/proc/self/mounts directly.
. lxc-ls in the past assumed that a container's cgroup was just '/<name>'.
Now it is '/<host-init-cgroup>/lxc/<name>'. Handle that.
. first version of this patch was setting clone_children on
<path-to-cpusets-cgroup>/<init-cgroup>/lxc, not the parent of that dir.
That failed to initialize that cgroup, so tasks could not enter it.
Signed-off-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
-rw-r--r-- | src/lxc/cgroup.c | 171 | ||||
-rw-r--r-- | src/lxc/lxc-ls.in | 5 | ||||
-rwxr-xr-x | src/lxc/lxc-ps.in | 43 |
3 files changed, 184 insertions, 35 deletions
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c index a8e6c27..8077a8d 100644 --- a/src/lxc/cgroup.c +++ b/src/lxc/cgroup.c @@ -81,9 +81,65 @@ static char *hasmntopt_multiple(struct mntent *mntent, const char *options) return hasmntopt(mntent, ptr); } +/* + * get_init_cgroup: get the cgroup init is in. + * dsg: preallocated buffer to put the output in + * subsystem: the exact cgroup subsystem to look up + * mntent: a mntent (from getmntent) whose mntopts contains the + * subsystem to look up. + * + * subsystem and mntent can both be NULL, in which case we return + * the first entry in /proc/1/cgroup. + * + * Returns a pointer to the answer, which may be "". + */ +static char *get_init_cgroup(const char *subsystem, struct mntent *mntent, + char *dsg) +{ + FILE *f; + char *c, *c2; + char line[MAXPATHLEN]; + + *dsg = '\0'; + f = fopen("/proc/1/cgroup", "r"); + if (!f) + return dsg; + + while (fgets(line, MAXPATHLEN, f)) { + c = index(line, ':'); + if (!c) + continue; + c++; + c2 = index(c, ':'); + if (!c2) + continue; + *c2 = '\0'; + c2++; + if (!subsystem && !mntent) + goto good; + if (subsystem && strcmp(c, subsystem) != 0) + continue; + if (mntent && !hasmntopt(mntent, c)) + continue; +good: + DEBUG("get_init_cgroup: found init cgroup for subsys %s at %s\n", + subsystem, c2); + strncpy(dsg, c2, MAXPATHLEN); + c = &dsg[strlen(dsg)-1]; + if (*c == '\n') + *c = '\0'; + goto found; + } + +found: + fclose(f); + return dsg; +} + static int get_cgroup_mount(const char *subsystem, char *mnt) { struct mntent *mntent; + char initcgroup[MAXPATHLEN]; FILE *file = NULL; file = setmntent(MTAB, "r"); @@ -97,14 +153,22 @@ static int get_cgroup_mount(const char *subsystem, char *mnt) if (strcmp(mntent->mnt_type, "cgroup")) continue; if (!subsystem || hasmntopt_multiple(mntent, subsystem)) { - strcpy(mnt, mntent->mnt_dir); + int ret; + ret = snprintf(mnt, MAXPATHLEN, "%s%s/lxc", + mntent->mnt_dir, + get_init_cgroup(subsystem, NULL, + initcgroup)); + if (ret < 0 || ret >= MAXPATHLEN) + goto fail; fclose(file); DEBUG("using cgroup mounted at '%s'", mnt); return 0; } }; - DEBUG("Failed to find cgroup for %s\n", subsystem ? subsystem : "(NULL)"); +fail: + DEBUG("Failed to find cgroup for %s\n", + subsystem ? subsystem : "(NULL)"); fclose(file); @@ -195,38 +259,76 @@ int lxc_cgroup_attach(const char *path, pid_t pid) } /* + * rename cgname, which is under cgparent, to a new name starting + * with 'cgparent/dead'. That way cgname can be reused. Return + * 0 on success, -1 on failure. + */ +int try_to_move_cgname(char *cgparent, char *cgname) +{ + char *newdir; + + /* tempnam problems don't matter here - cgroupfs will prevent + * duplicates if we race, and we'll just fail at that (unlikely) + * point + */ + + newdir = tempnam(cgparent, "dead"); + if (!newdir) + return -1; + if (rename(cgname, newdir)) + return -1; + WARN("non-empty cgroup %s renamed to %s, please manually inspect it\n", + cgname, newdir); + + return 0; +} + +/* * create a cgroup for the container in a particular subsystem. - * XXX TODO we will of course want to use cgroup_path{subsystem}/lxc/name, - * not just cgroup_path{subsystem}/name. */ static int lxc_one_cgroup_create(const char *name, struct mntent *mntent, pid_t pid) { - char cgname[MAXPATHLEN]; + char cginit[MAXPATHLEN], cgname[MAXPATHLEN], cgparent[MAXPATHLEN]; char clonechild[MAXPATHLEN]; - int flags; - - snprintf(cgname, MAXPATHLEN, "%s/%s", mntent->mnt_dir, name); + char initcgroup[MAXPATHLEN]; + int flags, ret; + + /* cgparent is the parent dir, /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc */ + /* (remember get_init_cgroup() returns a path starting with '/') */ + /* cgname is the full name, /sys/fs/cgroup/</cgroup>/<init-cgroup>/lxc/name */ + ret = snprintf(cginit, MAXPATHLEN, "%s%s", mntent->mnt_dir, + get_init_cgroup(NULL, mntent, initcgroup)); + if (ret < 0 || ret >= MAXPATHLEN) { + SYSERROR("Failed creating pathname for init's cgroup (%d)\n", ret); + return -1; + } - /* - * There is a previous cgroup, assume it is empty, - * otherwise that fails - */ - if (!access(cgname, F_OK) && rmdir(cgname)) { - SYSERROR("failed to remove previous cgroup '%s'", cgname); + ret = snprintf(cgparent, MAXPATHLEN, "%s/lxc", cginit); + if (ret < 0 || ret >= MAXPATHLEN) { + SYSERROR("Failed creating pathname for cgroup parent (%d)\n", ret); + return -1; + } + ret = snprintf(cgname, MAXPATHLEN, "%s/%s", cgparent, name); + if (ret < 0 || ret >= MAXPATHLEN) { + SYSERROR("Failed creating pathname for cgroup (%d)\n", ret); return -1; } flags = get_cgroup_flags(mntent); - /* We have the deprecated ns_cgroup subsystem */ + /* Do we have the deprecated ns_cgroup subsystem? */ if (flags & CGROUP_NS_CGROUP) { WARN("using deprecated ns_cgroup"); - return cgroup_rename_nsgroup(mntent->mnt_dir, cgname, pid); + return cgroup_rename_nsgroup(cgparent, cgname, pid); } - snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children", - mntent->mnt_dir); + ret = snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children", + cginit); + if (ret < 0 || ret >= MAXPATHLEN) { + SYSERROR("Failed creating pathname for clone_children (%d)\n", ret); + return -1; + } /* we check if the kernel has clone_children, at this point if there * no clone_children neither ns_cgroup, that means the cgroup is mounted @@ -237,14 +339,31 @@ static int lxc_one_cgroup_create(const char *name, return -1; } - /* we enable the clone_children flag of the cgroup */ + /* enable the clone_children flag of the cgroup */ if (cgroup_enable_clone_children(clonechild)) { SYSERROR("failed to enable 'clone_children flag"); return -1; } + /* if /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc does not exist, create it */ + if (access(cgparent, F_OK) && mkdir(cgparent, 0755)) { + SYSERROR("failed to create '%s' directory", cgparent); + return -1; + } + + /* + * There is a previous cgroup. Try to delete it. If that fails + * (i.e. it is not empty) try to move it out of the way. + */ + if (!access(cgname, F_OK) && rmdir(cgname)) { + if (try_to_move_cgname(cgparent, cgname)) { + SYSERROR("failed to remove previous cgroup '%s'", cgname); + return -1; + } + } + /* Let's create the cgroup */ - if (mkdir(cgname, 0700)) { + if (mkdir(cgname, 0755)) { SYSERROR("failed to create '%s' directory", cgname); return -1; } @@ -301,11 +420,14 @@ out: } -int lxc_one_cgroup_destroy(const char *cgmnt, const char *name) +int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name) { - char cgname[MAXPATHLEN]; + char cgname[MAXPATHLEN], initcgroup[MAXPATHLEN]; + char *cgmnt = mntent->mnt_dir; - snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, name); + snprintf(cgname, MAXPATHLEN, "%s%s/lxc/%s", cgmnt, + get_init_cgroup(NULL, mntent, initcgroup), name); + DEBUG("destroying %s\n", cgname); if (rmdir(cgname)) { SYSERROR("failed to remove cgroup '%s'", cgname); return -1; @@ -333,8 +455,7 @@ int lxc_cgroup_destroy(const char *name) while ((mntent = getmntent(file))) { if (!strcmp(mntent->mnt_type, "cgroup")) { - DEBUG("destroying %s %s\n", mntent->mnt_dir, name); - ret = lxc_one_cgroup_destroy(mntent->mnt_dir, name); + ret = lxc_one_cgroup_destroy(mntent, name); if (ret) { fclose(file); return ret; diff --git a/src/lxc/lxc-ls.in b/src/lxc/lxc-ls.in index 2f9075c..792ea0e 100644 --- a/src/lxc/lxc-ls.in +++ b/src/lxc/lxc-ls.in @@ -31,6 +31,9 @@ active=$(netstat -xl | grep $lxcpath | \ if test -n "$active"; then get_cgroup if test -n "$mount_point"; then - cd $mount_point; ls "$@" -d $active + # get cgroup for init + init_cgroup=`cat /proc/1/cgroup | awk -F: '{ print $3 }' | head -1` + cd $mount_point/$init_cgroup/lxc + ls "$@" -d $active fi fi diff --git a/src/lxc/lxc-ps.in b/src/lxc/lxc-ps.in index 2f1d537..2fa7b8b 100755 --- a/src/lxc/lxc-ps.in +++ b/src/lxc/lxc-ps.in @@ -53,25 +53,32 @@ sub get_cgroup { my $mount_string; $mount_string=`mount -t cgroup |grep -E -e '^lxc '`; - unless ($mount_string) { - $mount_string=`mount |grep -m1 'type cgroup'`; - } - chomp($mount_string); if ($mount_string) { + # use the one 'lxc' cgroup mount if it exists + chomp($mount_string); $$ref_cgroup=`echo "$mount_string" |cut -d' ' -f3`; chomp($$ref_cgroup); } - die "unable to find mounted cgroup" unless $$ref_cgroup; + # Otherwise (i.e. cgroup-bin) use the first cgroup mount + $mount_string=`grep -m1 -E '^[^ \t]+[ \t]+[^ \t]+[ \t]+cgroup' /proc/self/mounts`; + unless ($mount_string) { + die "unable to find mounted cgroup" unless $$ref_cgroup; + } + chomp($mount_string); + $$ref_cgroup=`echo "$mount_string" |cut -d' ' -f2`; + chomp($$ref_cgroup); + return; } sub get_pids_in_containers { my $ref_names = shift; my $ref_cgroup = shift; my $ref_pids = shift; + my $init_cgroup = shift; my @pidlist; for (@{$ref_names}) { - my $task_file = "$$ref_cgroup/$_/tasks"; + my $task_file = "$$ref_cgroup/$init_cgroup/lxc/$_/tasks"; $LXC_NAMES{$_} = 1; open(tasks, "cat $task_file 2>/dev/null |") or next; @@ -108,6 +115,20 @@ sub execute_ps { close ps; } +sub get_init_cgroup { + my $filename = "/proc/1/cgroup"; + open(LXC, "$filename"); + my @cgroup = <LXC>; + close LXC; + my $container = ''; + foreach ( @cgroup ) { + chomp; + # find the container name after :/ + s/.*:\///o; + } + return $container; +} + sub get_container { my $pid = shift; my $filename = "/proc/$pid/cgroup"; @@ -119,8 +140,10 @@ sub get_container { foreach ( @cgroup ) { chomp; # find the container name after :/ - s/.*:\///o; - $container = $_; + s/.*:\///o; + # chop off everything up to 'lxc/' + s/lxc\///o; + $container = $_; } return $container; } @@ -160,6 +183,7 @@ my $arg_help = ''; my $arg_usage = ''; my $arg_lxc = ''; my @arg_name; +my $init_cgroup = '/'; GetOptions('help' => \$arg_help, 'usage' => \$arg_usage, @@ -186,8 +210,9 @@ if (@arg_name > 0) { my $pid_list; $LXC_DISPLAY = 2; + $init_cgroup = get_init_cgroup(); get_cgroup \$cgroup; - get_pids_in_containers(\@arg_name, \$cgroup, \$pid_list); + get_pids_in_containers(\@arg_name, \$cgroup, \$pid_list, $init_cgroup); if ($pid_list) { @ARGV = ("-p $pid_list",@ARGV); } |