Commit cb1dda1e authored by Benjamin Tissoires's avatar Benjamin Tissoires
Browse files

ci-fairy: add MinIO mc like functions



`mc` is lacking the STS token support the server can serve.

Implement a simple `mc` like command to be able to transfer files
with those STS.
Signed-off-by: Benjamin Tissoires's avatarBenjamin Tissoires <benjamin.tissoires@gmail.com>
parent 9a98f164
...@@ -27,6 +27,7 @@ setup(name='ci-fairy', ...@@ -27,6 +27,7 @@ setup(name='ci-fairy',
include_package_data=True, include_package_data=True,
install_requires=[ install_requires=[
'python-gitlab', 'python-gitlab',
'boto3',
'click', 'click',
'colored', 'colored',
'GitPython', 'GitPython',
......
#!/usr/bin/env python3 #!/usr/bin/env python3
import boto3
import click import click
import colored import colored
import functools
import git import git
import gitlab import gitlab
from gitlab import Gitlab from gitlab import Gitlab
import jinja2 import jinja2
import json
import fnmatch import fnmatch
import logging import logging
import os import os
import shutil
import sys import sys
import urllib.parse import urllib.parse
import yaml import yaml
from botocore.client import Config
from pathlib import Path from pathlib import Path
...@@ -211,6 +216,302 @@ class GitCommitValidator(object): ...@@ -211,6 +216,302 @@ class GitCommitValidator(object):
msg) msg)
class S3(object):
'''
An abstract object providing a common API
for AWS S3 server (remotes) or local file system
'''
def __init__(self):
pass
@classmethod
def s3(cls, full_path, credentials=None):
'''
Factory method to get an S3 object based on
its path.
:param full_path: the full path of the object (local posix path or "minio://bucket/key")
:type full_path: str
:param credentials: a file path with the appropriate credentials (access key, secret key and session token)
:type credentials: str
:returns: An S3Object
'''
prefix = 'minio://'
if not full_path.startswith(prefix):
return S3Object(full_path)
# full_path should now be in the form `minio://bucket/key`
path_str = full_path[len(prefix):]
s3_remote = S3Remote(credentials)
return s3_remote.get(path_str)
class S3Object(object):
'''
Wrapper around the S3 API for local or remote objects.
A plain S3Object wraps a local file system.
Subclasses wrap the remote API.
'''
def __init__(self, key):
self.key = key
self.path = Path(key)
@property
def exists(self):
return self.path.exists()
@property
def is_dir(self):
if not self.exists:
return self.key.endswith('/')
return self.path.is_dir()
@property
def is_local(self):
return True
@property
def name(self):
return self.path.name
def copy_from(self, other):
if other.is_local:
try:
shutil.copy(other.key, self.path)
except IsADirectoryError:
raise IsADirectoryError(f"Error: cannot do recursive cp of directory '{other.key}'")
else:
other.download_file(self)
@property
def children(self):
return [S3Object(p) for p in self.path.glob('*')]
class S3Remote(S3Object):
'''
A remote S3 server.
This contains the list of buckets.
'''
def __init__(self, credentials):
super().__init__('/')
with open(credentials) as credfile:
creds = json.load(credfile)
s3 = boto3.resource('s3',
endpoint_url=creds['endpoint_url'],
aws_access_key_id=creds['AccessKeyId'],
aws_secret_access_key=creds['SecretAccessKey'],
aws_session_token=creds['SessionToken'],
config=Config(signature_version='s3v4'),
region_name='us-east-1')
self._name = creds['endpoint_url']
self.bucket_names = [b.name for b in s3.buckets.all()]
self.buckets = {b: s3.Bucket(b) for b in self.bucket_names}
@property
def name(self):
return self._name
@property
def is_local(self):
return False
@property
def exists(self):
return True
@property
def is_dir(self):
return True
def copy_from(self, other):
raise ValueError('No destination bucket provided')
@property
def children(self):
return [S3Bucket(b) for b in self.buckets.values()]
def get(self, path):
if not path:
# minio://
return self
bucket_name = path
key = ''
if '/' in path:
bucket_name, key = path.split('/', 1)
try:
bucket = self.buckets[bucket_name]
except KeyError:
# minio://bucket_that_doesn_t_exist
raise FileNotFoundError(f"bucket '{bucket_name}' doesn't exist on {self.name}")
return S3Bucket(bucket).get(key)
class S3Bucket(S3Object):
'''
A remote S3 bucket
'''
def __init__(self, bucket):
super().__init__('/')
self._bucket = bucket
@property
def exists(self):
return True
@property
def is_dir(self):
return True
@property
def name(self):
return self._bucket.name
@property
def children(self):
objs = [o.key for o in self._bucket.objects.all()]
children = []
for o in objs:
if '/' in o:
# minio://bucket/some/path/some/file
# o is now: some/path/some/file
root, _ = o.split('/', 1)
if root not in children:
# root is "some" and is not in the children list
children.append(root)
else:
# minio://bucket/some_file
children.append(o)
return [S3RemoteObject(self, c) for c in children]
def copy_from(self, other):
dst = other.name
self.upload_file(other, dst)
def get(self, key):
if not key:
# - minio://bucket
# - minio://bucket/
return self
return S3RemoteObject(self, key)
@property
def objects(self):
return self._bucket.objects.all()
def upload_file(self, local_obj, remote_obj):
if not local_obj.is_local:
raise ValueError('at least one argument must be a local path')
return self._bucket.upload_file(str(local_obj.path), str(remote_obj))
def _download_file(self, remote_obj, local_obj):
if remote_obj.is_dir:
raise IsADirectoryError('cannot do recursive cp')
local_path = local_obj.path
if local_obj.is_dir:
if not local_obj.exists:
raise IsADirectoryError(f"directory '{local_path}' does not exist")
local_path = local_path / remote_obj.name
return self._bucket.download_file(str(remote_obj.key), str(local_path))
def download_file(self, local_dest):
return self._download_file(self, local_dest)
class S3RemoteObject(S3Object):
'''
A remote S3 object.
The key is the full path of the file or directory within
its bucket.
'''
def __init__(self, bucket, key):
super().__init__(key)
self._bucket = bucket
self._is_dir = False
self._children = []
objs = [o.key for o in bucket.objects if o.key.startswith(key)]
# find if the object exists already (full key matches)
self._exists = key in objs
# special case for directories
if not objs:
# nothing in the remote matches, check if we
# have a terminating '/'
self._is_dir = key.endswith('/')
elif not self._exists:
# at least one remote object starts with our key,
# check if we have a parent of a remote object, or
# just if the path and name starts with the same key
# build the list of all parents of all objects
parents = [p for o in objs for p in Path(o).parents]
self._is_dir = self._exists = self.path in parents
# compute the list of files or dir immediately below the
# current dir
if self._is_dir:
for o in objs:
path = Path(o)
for parent in path.parents:
if parent == self.path:
self._children.append(path)
break
path = parent
@property
def is_dir(self):
return self._is_dir
@property
def is_local(self):
return False
@property
def exists(self):
return self._exists
@property
def children(self):
return self._children
def copy_from(self, other):
dst = self.path
if self.is_dir:
dst = self.path / other.name
self._bucket.upload_file(other, dst)
def download_file(self, local_dest):
return self._bucket._download_file(self, local_dest)
@click.group() @click.group()
@click.option('-v', '--verbose', count=True, help='increase verbosity') @click.option('-v', '--verbose', count=True, help='increase verbosity')
@click.option('--gitlab-url', help='GitLab URL with transport protocol, e.g. http://gitlab.freedesktop.org') @click.option('--gitlab-url', help='GitLab URL with transport protocol, e.g. http://gitlab.freedesktop.org')
...@@ -261,6 +562,104 @@ def ci_fairy(ctx, verbose, gitlab_url, authfile): ...@@ -261,6 +562,104 @@ def ci_fairy(ctx, verbose, gitlab_url, authfile):
ctx.obj.job_token = token ctx.obj.job_token = token
def credentials_option(required=True):
def inner_function(func):
@click.option('--credentials',
default='.minio_credentials',
help='the file to store the credentials (default to $PWD/.minio_credentials)',
type=click.Path(exists=required,
file_okay=True,
dir_okay=False,
writable=not required, # we write the file if required is false (login case)
readable=True,
allow_dash=False))
@functools.wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
return inner_function
@ci_fairy.group()
def minio():
pass
@minio.command()
@credentials_option(required=False)
@click.option('--endpoint-url',
default='https://minio-packet.freedesktop.org',
help='The minio instance to contact')
@click.argument('token')
def login(credentials, endpoint_url, token):
'''Login to the minio server'''
session = boto3.Session()
sts = session.client('sts',
endpoint_url=endpoint_url,
config=Config(signature_version='s3v4'),
region_name='us-east-1')
roleArn = 'arn:aws:iam::123456789012:role/FederatedWebIdentityRole'
ret = sts.assume_role_with_web_identity(DurationSeconds=900,
WebIdentityToken=token,
RoleArn=roleArn,
RoleSessionName='session_name')
creds = ret['Credentials']
creds['endpoint_url'] = endpoint_url
creds['Expiration'] = creds['Expiration'].isoformat()
with open(credentials, 'w') as outfile:
json.dump(creds, outfile)
@minio.command()
@credentials_option()
@click.argument('path', default='.')
@click.pass_context
def ls(ctx, credentials, path):
try:
s3_obj = S3.s3(path, credentials)
except FileNotFoundError as e:
ctx.fail(e)
if not s3_obj.exists:
ctx.fail(f"file '{path}' does not exist")
if not s3_obj.is_dir:
print(s3_obj.name)
else:
for o in s3_obj.children:
print(o.name)
@minio.command()
@credentials_option()
@click.argument('src')
@click.argument('dst')
@click.pass_context
def cp(ctx, credentials, src, dst):
try:
src = S3.s3(src, credentials)
except FileNotFoundError as e:
ctx.fail(e)
# src doesn't exist
if not src.exists:
ctx.fail(f"source file '{src.path}' does not exist")
try:
dst = S3.s3(dst, credentials)
except FileNotFoundError as e:
ctx.fail(e)
try:
dst.copy_from(src)
except ValueError as e:
ctx.fail(e)
except IsADirectoryError as e:
ctx.fail(e)
@ci_fairy.command() @ci_fairy.command()
@click.option('--repository', help='The registry repository to work on, e.g. fedora/latest') @click.option('--repository', help='The registry repository to work on, e.g. fedora/latest')
@click.option('--project', help='Project name, e.g. freedesktop/ci-templates') @click.option('--project', help='Project name, e.g. freedesktop/ci-templates')
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
from click.testing import CliRunner from click.testing import CliRunner
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
import git import git
import json
import pytest import pytest
from pathlib import Path from pathlib import Path
...@@ -12,6 +13,8 @@ GITLAB_TEST_URL = 'https://test.gitlab.url' ...@@ -12,6 +13,8 @@ GITLAB_TEST_URL = 'https://test.gitlab.url'
GITLAB_TEST_PROJECT_ID = '11' GITLAB_TEST_PROJECT_ID = '11'
GITLAB_TEST_PROJECT_PATH = 'project12/path34' GITLAB_TEST_PROJECT_PATH = 'project12/path34'
MINIO_TEST_URL = 'http://min.io.url:9000'
# A note on @patch('ci_fairy.Gitlab') # A note on @patch('ci_fairy.Gitlab')
# because we use from gitlab import Gitlab, the actual instance sits in # because we use from gitlab import Gitlab, the actual instance sits in
# ci_fairy.Gitlab and we need to patch that instance. # ci_fairy.Gitlab and we need to patch that instance.
...@@ -775,3 +778,316 @@ def test_merge_request_already_merged(gitlab, caplog, gitlab_default_env): ...@@ -775,3 +778,316 @@ def test_merge_request_already_merged(gitlab, caplog, gitlab_default_env):
result = runner.invoke(ci_fairy.ci_fairy, args) result = runner.invoke(ci_fairy.ci_fairy, args)
assert result.exit_code == 0 assert result.exit_code == 0
assert 'Merge request !3 is already merged, skipping checks' in caplog.text assert 'Merge request !3 is already merged, skipping checks' in caplog.text
def mock_s3_session(session):
client = MagicMock(name='client')
ctx = session()
ctx.client = client
return ctx
@patch('json.dump')
@patch('ci_fairy.boto3.Session')
def test_minio_login(session, json_dump, caplog):
args = ['-vv', 'minio', 'login', '--endpoint-url', MINIO_TEST_URL, '1234']
# check that the endpoint we provide is actually used by the botocore client
ctx = mock_s3_session(session)
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(ci_fairy.ci_fairy, args, catch_exceptions=False)
assert result.exit_code == 0
# python 3.8 allows to have `mock.call_args.kwargs`, but python 3.7
# doesn't. So use the 3.7 version (tuple access) until 3.8 gets
# more widespread.
assert ctx.client.call_args[1]['endpoint_url'] == MINIO_TEST_URL
# ensure that if the caller forgets about the token ('1234' above), we fail
args = ['-vv', 'minio', 'login', '--endpoint-url', MINIO_TEST_URL]
with runner.isolated_filesystem():
result = runner.invoke(ci_fairy.ci_fairy, args)
assert result.exit_code == 2
def mock_minio(minio):
buckets = []
for i in range(3):
bucket = MagicMock()
bucket.name = 'bucket{}'.format(i)
buckets.append(bucket)
files = []
f = MagicMock()
f.key = 'root_file.txt'
files.append(f)
for _dir in range(5):
for _f in range(4):
f = MagicMock()
f.key = 'dir-{}/file-{}'.format(_dir, _f)
files.append(f)
def upload_file(src, dst):
f = MagicMock()
f.key = dst
files.append(f)
def download_file(src, dst):
with open(dst, 'w') as f:
f.write('Hello World!')
def list_objects():
return files
bucket.objects.all.side_effect = list_objects
bucket.upload_file.side_effect = upload_file
bucket.download_file.side_effect = download_file
ctx = minio(endpoint_url=MINIO_TEST_URL)
ctx.buckets.all = MagicMock(return_value=buckets)
def bucket_side_effect(arg):
for b in buckets:
if b.name == arg:
return b
bucket = MagicMock()
bucket.name = arg
bucket.objects.all = MagicMock(return_value=[])
return bucket
ctx.Bucket.side_effect = bucket_side_effect
return ctx, buckets
def write_minio_credentials():
with open('.minio_credentials', 'w') as f:
json.dump({
'endpoint_url': MINIO_TEST_URL,
'AccessKeyId': '1234',
'SecretAccessKey': '5678',
'SessionToken': '9101112'
}, f)
@patch('ci_fairy.boto3.resource')
@pytest.mark.parametrize("input_path,result_files", [
(None, ['hello.txt', '.minio_credentials']),
('.', ['hello.txt', '.minio_credentials']),
('minio:', None),
('minio:/', None),
('minio://', ['bucket{}'.format(i) for i in range(3)]),
('minio://bucket1', ['root_file.txt'] + ['dir-{}'.format(i) for i in range(5)]),
('minio://bucket0/', ['root_file.txt'] + ['dir-{}'.format(i) for i in range(5)]),
('minio://non_existant_bucket', None),
('minio://bucket0/non_existent_dir_or_file', None),
('minio://bucket2/dir-2', ['file-{}'.format(i) for i in range(4)]),
('minio://bucket0/dir-1/', ['file-{}'.format(i) for i in range(4)]),
('minio://bucket2/dir-0/file-3', ['file-3']),
])
def test_minio_ls(minio, input_path, result_files, caplog):
runner = CliRunner()
args = ['minio', 'ls']
if input_path is not None:
args.append(input_path)
ctx, buckets = mock_minio(minio)
with runner.isolated_filesystem():
write_minio_credentials()
with open('hello.txt', 'w') as f:
f.write('Hello World!')
result = runner.invoke(ci_fairy.ci_fairy, args, catch_exceptions=False)