Commit 434bc3a5 authored by Martin Peres's avatar Martin Peres
Browse files

Initial commit

parents
out/
*.kate-swp
FROM alpine
RUN set -ex \
&& apk add --no-cache bash go git upx make linux-headers crun busybox ca-certificates e2fsprogs parted gpgme gpgme-dev util-linux pigz \
&& apk -X http://dl-cdn.alpinelinux.org/alpine/edge/testing --no-cache add libseccomp-dev conmon \
&& git clone https://github.com/containers/podman.git /src/podman/ \
&& cd /src/podman \
&& make EXTRA_LDFLAGS="-w -s" BUILD_ARG="exclude_graphdriver_btrfs btrfs_noversion exclude_graphdriver_devicemapper seccomp" podman \
&& upx --best bin/podman \
&& cp bin/podman /bin/podman \
&& rm -rf /src/podman /root/.cache \
&& apk del upx make linux-headers gpgme-dev bash libseccomp-dev
# Don't ask me why it cannot be folded on the layer above!
RUN set -ex \
&& go get github.com/u-root/u-root
COPY config/containers uhdcp-default.sh /etc/
COPY crun-no-pivot initscript.sh /bin/
# TODO: By calling the initscript directly, we could save quite a bit of
# size, but we first need to study what the init program does for us :)
#-initcmd="/bin/initscript.sh" -nocmd
ENTRYPOINT /root/go/bin/u-root -files /bin/sh -defaultsh="" \
-files /bin/crun-no-pivot -files /usr/bin/conmon \
-files /usr/lib/libgpgme.so.11 \
-files /bin/podman \
-files /etc/containers \
-files /usr/bin/unpigz -files /usr/bin/crun:bin/crun \
-files /sbin/mkfs.ext4 -files /usr/sbin/parted:sbin/parted -files /bin/lsblk \
-files /etc/ssl/certs/ca-certificates.crt \
-files /usr_mods -files /etc/uhdcp-default.sh \
-files /bin/initscript.sh \
-uinitcmd="/bin/initscript.sh" github.com/u-root/u-root/cmds/core/init
SHELL := /bin/bash
DOCKER ?= docker
# TODO: Replace with a public registry address
IMAGE_LABEL ?= boot2container
CONTAINER_LABEL ?= boot2container
# TODO: Collect all wanted modules and load them
out/initramfs.linux_amd64.cpio:
@mkdir out 2> /dev/null || /bin/true
@rm out/initramfs.linux_amd64.cpio 2> /dev/null || /bin/true
@-$(DOCKER) rm $(CONTAINER_LABEL) > /dev/null 2> /dev/null || /bin/true
$(DOCKER) run --privileged -v $(PWD)/usr_mods/:/usr_mods -v $(PWD)/config/containers/:/etc/containers/ --name $(CONTAINER_LABEL) $(IMAGE_LABEL)
@$(DOCKER) cp $(CONTAINER_LABEL):/tmp/initramfs.linux_amd64.cpio out/ > /dev/null
@$(DOCKER) rm $(CONTAINER_LABEL) > /dev/null
out/initramfs.linux_amd64.cpio.xz: out/initramfs.linux_amd64.cpio
xz --check=crc32 -9 --lzma2=dict=1MiB --stdout out/initramfs.linux_amd64.cpio | dd conv=sync bs=512 of=out/initramfs.linux_amd64.cpio.xz
rebuild_container:
$(DOCKER) build -t $(IMAGE_LABEL) .
test: out/initramfs.linux_amd64.cpio
[ -f "$(KERNEL)" ] || (echo "ERROR: Set the KERNEL parameter, pointing to linux kernel with modules compiled in"; exit 1)
[ -f out/disk.img ] || fallocate -l 128M out/disk.img
qemu-system-x86_64 -drive file=out/disk.img,format=raw,if=virtio -nic user,model=virtio-net-pci -kernel $(KERNEL) -initrd out/initramfs.linux_amd64.cpio -nographic -m 256M -enable-kvm -append "console=ttyS0 b2c.container=docker://registry.hub.docker.com/library/hello-world"
clean:
-rm out/initramfs.linux_amd64.cpio
-$(DOCKER) rm $(CONTAINER_LABEL) > /dev/null
# Boot2Container
**__WARNING__**: using this initramfs will wipe your drive, on top of not being
production-ready just yet. Use with caution!
Shipping containers have revolutionized the goods industry by standardizing the
way goods are packaged down to the their physical dimensions, load capacity, ...
which made it easy to stack them up on big container ships then offload them to
trains and trucks, and so long and so forth.
Just like physical containers, IT containers have brough this level of
standardization to allow reproducing a work/testing environment anywhere, without
affecting the running host. This explains why containers are now so ubiquitous,
and why they are the basis of the vast majority of automated test systems found
on Github, Gitlab, and other forges!
This projects aims to turn create a generic initramfs that initializes the HW
just enough to download and run containers (docker, OCI). This enables running
to run on your HW the same containers you may already run in your HW-independent
cycles, thus simplifying HW testing!
## Features
* Small size: Under 20MB, with a goal of achieving <10MB
* Fast boot: Under 10s cold boot to docker's hello-world, 5s on later boots
* Simple: no daemons, under 1kLOC of code, easy to generate
* Maintainable: All the heavy lifting done by Red Hat's [podman](https://podman.io/)
## Options
The initramfs reads its parameters from the kernel command line. The only
required argument there is `b2c.container` which is the address to the container
that needs to be executed. In `grub.cfg`, this could look like this:
menuentry 'Boot 2 container' --class arch --class gnu-linux --class gnu --class os $menuentry_id_option {
load_video
set gfxpayload=keep
insmod gzio
insmod part_gpt
insmod ext2
search --set=root --file /vmlinuz
echo 'Loading Linux ...'
linux /vmlinuz b2c.container=docker://hello-world
echo 'Loading ramdisk ...'
initrd /initramfs.linux_amd64.cpio.xz
Here is a list of options:
* b2c.container: Name of the container that should be executed at boot. Format: See [podman's image format](https://docs.podman.io/en/latest/markdown/podman-run.1.html#image)
## How to?
Generating an initramfs is pretty simple, provided you have already setup
docker/podman. Just run the following command to generate the initramfs (
located at `out/initramfs.linux_amd64.cpio`):
$ make out/initramfs.linux_amd64.cpio
docker run --privileged -v /home/.../boot2container/usr_mods/:/usr_mods --name boot2container boot2container
2021/01/27 08:28:40 Disabling CGO for u-root...
2021/01/27 08:28:40 Build environment: GOARCH=amd64 GOOS=linux GOROOT=/usr/lib/go GOPATH=/root/go CGO_ENABLED=0 GO111MODULE=off
2021/01/27 08:28:45 Successfully built "/tmp/initramfs.linux_amd64.cpio" (size 24570272).
If you want to test your initramfs, you may test it using QEMU, provided you
have compiled a kernel according to the [Linux Configuration](#linux-configuration) section):
$ make test KERNEL=/path/to/your/bzImage
2021/01/27 11:07:51 Welcome to u-root!
_
_ _ _ __ ___ ___ | |_
| | | |____| '__/ _ \ / _ \| __|
| |_| |____| | | (_) | (_) | |_
\__,_| |_| \___/ \___/ \__|
init: 2021/01/27 11:07:51 no modules found matching '/lib/modules/*.ko'
ln: /bin/sh: File exists
[0.97]: Busybox setup: DONE
[1.05]: Mounting the partition /dev/vda1 to /container: DONE
[1.05]: Container runtime setup: DONE
[1.05]: Loading requested modules: DONE
udhcpc: started, v1.31.1
udhcpc: sending discover
udhcpc: sending select for 10.0.2.15
udhcpc: lease of 10.0.2.15 obtained, lease time 86400
route: ioctl 0x890c failed: No such process
[1.07]: Getting IP: DONE
[1.07]: podman run --privileged --network=host --runtime /bin/crun-no-pivot docker://hello-world
[ 3.185548] cgroup: podman (423) created nested cgroup for controller "memory" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.
[ 3.188235] cgroup: "memory" requires setting use_hierarchy to 1 on the root
[ 3.196972] kmem.limit_in_bytes is deprecated and will be removed. Please report your usecase to linux-mm@kvack.org if you depend on this functionality.
Hello from Docker!
This message shows that your installation appears to be working correctly.
To generate this message, Docker took the following steps:
1. The Docker client contacted the Docker daemon.
2. The Docker daemon pulled the "hello-world" image from the Docker Hub.
(amd64)
3. The Docker daemon created a new container from that image which runs the
executable that produces the output you are currently reading.
4. The Docker daemon streamed that output to the Docker client, which sent it
to your terminal.
To try something more ambitious, you can run an Ubuntu container with:
$ docker run -it ubuntu bash
Share images, automate workflows, and more with a free Docker ID:
https://hub.docker.com/
For more examples and ideas, visit:
https://docs.docker.com/get-started/
/ #
If all goes well, you should now have a shell for you to do whatever you want.
## Linux configuration
This initramfs does not contain any module, or firmwares by default. It is thus
important to compile a kernel with everything needed built-in. Luckily, Linux
is pretty good at that!
Here are the following options you may want to set, on top of `x86_64_defconfig`,
in your `.config`:
# For QEMU machines
CONFIG_VIRTIO=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_NET=y
CONFIG_VIRTIO_BLK=y
# Enable CGROUPS, for podman
CONFIG_CGROUPS=y
CONFIG_BLK_CGROUP=y
CONFIG_CGROUP_WRITEBACK=y
CONFIG_CGROUP_SCHED=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_HUGETLB_PAGE=y
CONFIG_CGROUP_HUGETLB=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_CGROUP_DEBUG=y
CONFIG_SOCK_CGROUP_DATA=y
CONFIG_MEMCG=y
CONFIG_NET=y
CONFIG_NET_SCHED=y
CONFIG_NET_CLS_CGROUP=y
CONFIG_CGROUP_NET_CLASSID=y
CONFIG_CGROUP_NET_PRIO=y
# Enable user namespace, for podman
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
# Enable OVERLAYFS, for podman
CONFIG_OVERLAY_FS=y
# To embed the necessary firmwares in the kernel
CONFIG_FW_LOADER=y
CONFIG_EXTRA_FIRMWARE_DIR=/lib/firmware"
CONFIG_EXTRA_FIRMWARE="rtl_nic/rtl8125b-2.fw e100/d102e_ucode.bin ..."
# Now, add and remove whatever you need!
$ make oldconfig
## TODO
* Initramfs
* Allow downloading and mounting a tarball containing modules
* Allow downloading/mounting a tarball containing firmwares
* Remove the dependency in bbin (will save a couple of MB)
* Store the container that generates the initramfs in the project's registry
* Generate a generic initramfs that would be downloadable directly
* Add a swap file
# The containers configuration file specifies all of the available configuration
# command-line options/flags for container engine tools like Podman & Buildah,
# but in a TOML format that can be easily modified and versioned.
# Please refer to containers.conf(5) for details of all configuration options.
# Not all container engines implement all of the options.
# All of the options have hard coded defaults and these options will override
# the built in defaults. Users can then override these options via the command
# line. Container engines will read containers.conf files in up to three
# locations in the following order:
# 1. /usr/share/containers/containers.conf
# 2. /etc/containers/containers.conf
# 3. $HOME/.config/containers/containers.conf (Rootless containers ONLY)
# Items specified in the latter containers.conf, if they exist, override the
# previous containers.conf settings, or the default settings.
[containers]
# List of devices. Specified as
# "<device-on-host>:<device-on-container>:<permissions>", for example:
# "/dev/sdc:/dev/xvdc:rwm".
# If it is empty or commented out, only the default devices will be used
#
# devices = []
# List of volumes. Specified as
# "<directory-on-host>:<directory-in-container>:<options>", for example:
# "/db:/var/lib/db:ro".
# If it is empty or commented out, no volumes will be added
#
# volumes = []
# Used to change the name of the default AppArmor profile of container engine.
#
# apparmor_profile = "container-default"
# List of annotation. Specified as
# "key=value"
# If it is empty or commented out, no annotations will be added
#
# annotations = []
# Default way to to create a cgroup namespace for the container
# Options are:
# `private` Create private Cgroup Namespace for the container.
# `host` Share host Cgroup Namespace with the container.
#
# cgroupns = "private"
# Control container cgroup configuration
# Determines whether the container will create CGroups.
# Options are:
# `enabled` Enable cgroup support within container
# `disabled` Disable cgroup support, will inherit cgroups from parent
# `no-conmon` Do not create a cgroup dedicated to conmon.
#
# cgroups = "enabled"
# List of default capabilities for containers. If it is empty or commented out,
# the default capabilities defined in the container engine will be added.
#
default_capabilities = [
"CHOWN",
"DAC_OVERRIDE",
"FOWNER",
"FSETID",
"KILL",
"NET_BIND_SERVICE",
"SETFCAP",
"SETGID",
"SETPCAP",
"SETUID",
"SYS_CHROOT"
]
# A list of sysctls to be set in containers by default,
# specified as "name=value",
# for example:"net.ipv4.ping_group_range = 0 0".
#
default_sysctls = [
"net.ipv4.ping_group_range=0 0",
]
# A list of ulimits to be set in containers by default, specified as
# "<ulimit name>=<soft limit>:<hard limit>", for example:
# "nofile=1024:2048"
# See setrlimit(2) for a list of resource names.
# Any limit not specified here will be inherited from the process launching the
# container engine.
# Ulimits has limits for non privileged container engines.
#
# default_ulimits = [
# "nofile=1280:2560",
# ]
# List of default DNS options to be added to /etc/resolv.conf inside of the container.
#
# dns_options = []
# List of default DNS search domains to be added to /etc/resolv.conf inside of the container.
#
# dns_searches = []
# Set default DNS servers.
# This option can be used to override the DNS configuration passed to the
# container. The special value "none" can be specified to disable creation of
# /etc/resolv.conf in the container.
# The /etc/resolv.conf file in the image will be used without changes.
#
# dns_servers = []
# Environment variable list for the conmon process; used for passing necessary
# environment variables to conmon or the runtime.
#
# env = [
# "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
# "TERM=xterm",
# ]
# Pass all host environment variables into the container.
#
# env_host = false
# Default proxy environment variables passed into the container.
# The environment variables passed in include:
# http_proxy, https_proxy, ftp_proxy, no_proxy, and the upper case versions of
# these. This option is needed when host system uses a proxy but container
# should not use proxy. Proxy environment variables specified for the container
# in any other way will override the values passed from the host.
#
# http_proxy = true
# Run an init inside the container that forwards signals and reaps processes.
#
# init = false
# Container init binary, if init=true, this is the init binary to be used for containers.
#
# init_path = "/usr/libexec/podman/catatonit"
# Default way to to create an IPC namespace (POSIX SysV IPC) for the container
# Options are:
# `private` Create private IPC Namespace for the container.
# `host` Share host IPC Namespace with the container.
#
# ipcns = "private"
# keyring tells the container engine whether to create
# a kernel keyring for use within the container.
# keyring = true
# label tells the container engine whether to use container separation using
# MAC(SELinux) labeling or not.
# The label flag is ignored on label disabled systems.
#
# label = true
# Logging driver for the container. Available options: k8s-file and journald.
#
# log_driver = "k8s-file"
# Maximum size allowed for the container log file. Negative numbers indicate
# that no size limit is imposed. If positive, it must be >= 8192 to match or
# exceed conmon's read buffer. The file is truncated and re-opened so the
# limit is never exceeded.
#
# log_size_max = -1
# Default way to to create a Network namespace for the container
# Options are:
# `private` Create private Network Namespace for the container.
# `host` Share host Network Namespace with the container.
# `none` Containers do not use the network
#
# netns = "private"
# Create /etc/hosts for the container. By default, container engine manage
# /etc/hosts, automatically adding the container's own IP address.
#
# no_hosts = false
# Maximum number of processes allowed in a container.
#
# pids_limit = 2048
# Default way to to create a PID namespace for the container
# Options are:
# `private` Create private PID Namespace for the container.
# `host` Share host PID Namespace with the container.
#
# pidns = "private"
# Path to the seccomp.json profile which is used as the default seccomp profile
# for the runtime.
#
# seccomp_profile = "/usr/share/containers/seccomp.json"
# Size of /dev/shm. Specified as <number><unit>.
# Unit is optional, values:
# b (bytes), k (kilobytes), m (megabytes), or g (gigabytes).
# If the unit is omitted, the system uses bytes.
#
# shm_size = "65536k"
# Set timezone in container. Takes IANA timezones as well as "local",
# which sets the timezone in the container to match the host machine.
#
# tz = ""
# Set umask inside the container
#
# umask="0022"
# Default way to to create a UTS namespace for the container
# Options are:
# `private` Create private UTS Namespace for the container.
# `host` Share host UTS Namespace with the container.
#
# utsns = "private"
# Default way to to create a User namespace for the container
# Options are:
# `auto` Create unique User Namespace for the container.
# `host` Share host User Namespace with the container.
#
# userns = "host"
# Number of UIDs to allocate for the automatic container creation.
# UIDs are allocated from the "container" UIDs listed in
# /etc/subuid & /etc/subgid
#
# userns_size=65536
# The network table contains settings pertaining to the management of
# CNI plugins.
[network]
# Path to directory where CNI plugin binaries are located.
#
# cni_plugin_dirs = ["/usr/libexec/cni"]
# Path to the directory where CNI configuration files are located.
#
# network_config_dir = "/etc/cni/net.d/"
[engine]
# ImageBuildFormat indicates the default image format to building
# container images. Valid values are "oci" (default) or "docker".
# image_build_format = "oci"
# Cgroup management implementation used for the runtime.
# Valid options "systemd" or "cgroupfs"
#
# cgroup_manager = "systemd"
# Environment variables to pass into conmon
#
# conmon_env_vars = [
# "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
# ]
# Paths to look for the conmon container manager binary
#
# conmon_path = [
# "/usr/libexec/podman/conmon",
# "/usr/local/libexec/podman/conmon",
# "/usr/local/lib/podman/conmon",
# "/usr/bin/conmon",
# "/usr/sbin/conmon",
# "/usr/local/bin/conmon",
# "/usr/local/sbin/conmon"
# ]
# Specify the keys sequence used to detach a container.
# Format is a single character [a-Z] or a comma separated sequence of
# `ctrl-<value>`, where `<value>` is one of:
# `a-z`, `@`, `^`, `[`, `\`, `]`, `^` or `_`
#
# detach_keys = "ctrl-p,ctrl-q"
# Determines whether engine will reserve ports on the host when they are
# forwarded to containers. When enabled, when ports are forwarded to containers,
# ports are held open by as long as the container is running, ensuring that
# they cannot be reused by other programs on the host. However, this can cause
# significant memory usage if a container has many ports forwarded to it.
# Disabling this can save memory.
#
# enable_port_reservation = true
# Environment variables to be used when running the container engine (e.g., Podman, Buildah).
# For example "http_proxy=internal.proxy.company.com".
# Note these environment variables will not be used within the container.
# Set the env section under [containers] table, if you want to set environment variables for the container.
# env = []
# Selects which logging mechanism to use for container engine events.
# Valid values are `journald`, `file` and `none`.
#
# events_logger = "journald"
# Path to OCI hooks directories for automatically executed hooks.
#
# hooks_dir = [
# "/usr/share/containers/oci/hooks.d",
# ]
# Default transport method for pulling and pushing for images
#
# image_default_transport = "docker://"
# Default command to run the infra container
#
# infra_command = "/pause"
# Infra (pause) container image name for pod infra containers. When running a
# pod, we start a `pause` process in a container to hold open the namespaces
# associated with the pod. This container does nothing other then sleep,
# reserving the pods resources for the lifetime of the pod.
#
# infra_image = "k8s.gcr.io/pause:3.2"
# Specify the locking mechanism to use; valid values are "shm" and "file".
# Change the default only if you are sure of what you are doing, in general
# "file" is useful only on platforms where cgo is not available for using the
# faster "shm" lock type. You may need to run "podman system renumber" after
# you change the lock type.
#
# lock_type** = "shm"
# MultiImageArchive - if true, the container engine allows for storing archives
# (e.g., of the docker-archive transport) with multiple images. By default,
# Podman creates single-image archives.
#
# multi_image_archive = "false"
# Default engine namespace
# If engine is joined to a namespace, it will see only containers and pods
# that were created in the same namespace, and will create new containers and
# pods in that namespace.
# The default namespace is "", which corresponds to no namespace. When no
# namespace is set, all containers and pods are visible.
#
# namespace = ""
# Path to the slirp4netns binary
#
# network_cmd_path=""
# Default options to pass to the slirp4netns binary.
# For example "allow_host_loopback=true"
#
# network_cmd_options=[]
# Whether to use chroot instead of pivot_root in the runtime
#
# no_pivot_root = false
# Number of locks available for containers and pods.
# If this is changed, a lock renumber must be performed (e.g. with the
# 'podman system renumber' command).
#
# num_locks = 2048
# Whether to pull new image before running a container
# pull_policy = "missing"
# Indicates whether the application should be running in remote mode. This flag modifies the
# --remote option on container engines. Setting the flag to true will default
# `podman --remote=true` for access to the remote Podman service.
# remote = false
# Directory for persistent engine files (database, etc)
# By default, this will be configured relative to where the containers/storage
# stores containers
# Uncomment to change location from this default
#
# static_dir = "/var/lib/containers/storage/libpod"
# Directory for temporary files. Must be tmpfs (wiped after reboot)
#
tmp_dir = "/tmp/libpod"