From b6414c3ef49b58b63f402d6608d496722cec2b1d Mon Sep 17 00:00:00 2001 From: Mukundan Sundararajan Date: Fri, 22 Dec 2017 16:17:32 -0800 Subject: [PATCH] Added a regex based patter definition for Image name --- repo2docker/app.py | 4 +- repo2docker/utils.py | 164 ++++++++++++++++++++++++++++++++++++ tests/argumentvalidation.py | 55 +++++++++++- 3 files changed, 220 insertions(+), 3 deletions(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index 16b62969..ee01dc00 100644 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -29,7 +29,7 @@ from .buildpacks import ( PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack, CondaBuildPack, JuliaBuildPack, Python2BuildPack, BaseImage ) -from .utils import execute_cmd, ByteSpecification, maybe_cleanup +from .utils import execute_cmd, ByteSpecification, maybe_cleanup, ImageNameValidator from . import __version__ @@ -136,7 +136,7 @@ class Repo2Docker(Application): ArgumentTypeError: if image_name contains characters that are not lowercase """ - if not image_name.islower(): + if not ImageNameValidator.is_valid_image_name(image_name): msg = "%r is not a valid docker image name. Image name can contain only lowercase characters." % image_name raise argparse.ArgumentTypeError(msg) return image_name diff --git a/repo2docker/utils.py b/repo2docker/utils.py index 334b2399..8b680c15 100644 --- a/repo2docker/utils.py +++ b/repo2docker/utils.py @@ -2,6 +2,7 @@ from contextlib import contextmanager from functools import partial import shutil import subprocess +import re from traitlets import Integer @@ -99,3 +100,166 @@ class ByteSpecification(Integer): raise TraitError('{val} is not a valid memory specification. Must be an int or a string with suffix K, M, G, T'.format(val=value)) else: return int(float(num) * self.UNIT_SUFFIXES[suffix]) + + +class ImageNameValidator: + """ + Given a docker image_name, check if the image_name conforms to the restrictions placed by docker. + + Class defines the regex patterns based off of the definitions in + https://github.com/docker/distribution/blob/master/reference/regexp.go. There are some modifications as noted below. + """ + + def __init__(self): + alpha_numeric_regex = r'[a-z0-9]+' + """str: raw pattern denoting only lowercase character and numbers part of name""" + + separator_regex = r'(?:[\._]|__|[-]*)' + """str: raw pattern denoting separators allowed to be embedded in component names""" + + domain_component_regex_lowercase = r'(?:[a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])' + """str: raw pattern restricts the domain component of the tag to have at least 3 lowercase alphabets or numbers + Different from the https://github.com/docker/distribution/blob/master/reference/regexp.go in the sense only allow + lowercase characters + """ + + domain_component_regex = r'(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])' + """str: raw pattern restricts the domain component of the tag to have at least 3 alphabets or numbers""" + + numbers = r'[0-9]+' + """str: raw pattern restricts to only one or more numbers""" + + tag_regex = r'[\w][\w.-]{0,127}' + """str: raw pattern matching valid tag names that can at most contain 128 characters""" + + digest_regex = r'[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}' + """str: raw patten representing an image digest""" + + name_component_regex = self.expression(alpha_numeric_regex, + self.optional(self.repeated(separator_regex, + alpha_numeric_regex) + ) + ) + """str: restricts registry path component to start with alpha_numeric_regex followed by optional parts that can + have separators""" + + domain_regex = self.expression(domain_component_regex_lowercase, + self.optional(self.repeated(r'\.', domain_component_regex)), + self.optional(r':', numbers)) + """str: representing a registry domain starting with domain_component_regex followed by option period separated + domain_component_regex followed by optional : separated port + + Example: + + 'test.Com/name:latest' is still a valid tag + but + 'Test/name:latest' is not a valid tag + + Note: + + This give a stricter pattern as in the first part in a '.' separated registry domain must always be lowercase + + This pattern will not allow cases like + 'TEST.com/name:latest' though docker considers it a valid tag + """ + + name_regex = self.expression(self.optional(domain_regex, r'/'), + name_component_regex, + self.optional(self.repeated(r'/', name_component_regex))) + """str: defines a pattern representing an optional registry domain followed by one or more component names + separated by /""" + + self.reference_regex = self.anchored(self.capture(name_regex), + self.optional(r':', self.capture(tag_regex)), + self.optional(r'@', digest_regex)) + """str: defines a pattern representing a reference. The pattern is anchored and has capturing groups for + name, tag and digest""" + + @staticmethod + def is_valid_image_name(image_name): + """ + Static method that tests whether image_name conforms to a reference pattern + + Args: + image_name: string representing the image name + + Returns: + True if it a valid docker image name + """ + + validator = ImageNameValidator() + result = re.match(validator.reference_regex, image_name) + + return result is not None + + def expression(self, *args): + """ + Defines a full expression where each regex must follow the other + Args: + *args: Argument list representing regex + + Returns: + an expression which is a concatenation of the regexes in the *args + """ + s = r''.join(list(args)) + return s + + def optional(self, *args): + """ + Wraps the expression in a non-capturing group and makes it optional + + Args: + *args: Argument list representing regex + + Returns: + a string representing the regex wrapped in non-capturing group with optional production + """ + return self.group(self.expression(*args)) + r'?' + + def repeated(self, *args): + """ + Wraps the expression in a non-capturing group to get one or more matches + + Args: + *args: Argument list representing regex + + Returns: + a string representing the regex wrapped in non-capturing group with one or more matches + """ + return self.group(self.expression(*args)) + r'+' + + def group(self, *args): + """ + Wraps the expression in a non-capturing group + + Args: + *args: Argument list representing regex + + Returns: + wraps the expression represented by args in non-capturing group + """ + return r'(?:' + self.expression(*args) + r')' + + def capture(self, *args): + """ + Wraps the expression in a capturing group + + Args: + *args: Argument list representing regex + + Returns: + wraps the expression represented by args in capturing group + """ + return r'(' + self.expression(*args) + r')' + + def anchored(self, *args): + """ + Anchors the regular expression by adding start and end delimiters + + Args: + *args: Argument list representing regex + + Returns: + anchored regex + """ + return r'^' + self.expression(*args) + r'$' \ No newline at end of file diff --git a/tests/argumentvalidation.py b/tests/argumentvalidation.py index ea40c5c1..eb895081 100644 --- a/tests/argumentvalidation.py +++ b/tests/argumentvalidation.py @@ -11,6 +11,7 @@ def does_validate_image_name(builddir, image_name): [ 'repo2docker', '--no-run', + '--no-build', '--image-name', str(image_name), builddir @@ -26,12 +27,64 @@ def does_validate_image_name(builddir, image_name): else: raise + def test_image_name_fail(): """ - Test to check if repo2docker throws image_name validation error on --image-name argument containing uppercase characters. + Test to check if repo2docker throws image_name validation error on --image-name argument containing + uppercase characters and _ characters in incorrect positions. """ builddir = os.path.dirname(__file__) assert not does_validate_image_name(builddir, 'Test/Invalid_name:1.0.0') + +def test_image_name_underscore_fail(): + """ + Test to check if repo2docker throws image_name validation error on --image-name argument starts with _. + """ + + builddir = os.path.dirname(__file__) + + assert not does_validate_image_name(builddir, '_test/invalid_name:1.0.0') + + +def test_image_name_double_dot_fail(): + """ + Test to check if repo2docker throws image_name validation error on --image-name argument contains consecutive dots. + """ + + builddir = os.path.dirname(__file__) + + assert not does_validate_image_name(builddir, 'test..com/invalid_name:1.0.0') + + +def test_image_name_valid_restircted_registry_domain_name_fail(): + """ + Test to check if repo2docker throws image_name validation error on -image-name argument being invalid. Based on the + regex definitions first part of registry domain cannot contain uppercase characters + """ + + builddir = os.path.dirname(__file__) + + assert not does_validate_image_name(builddir, 'Test.com/valid_name:1.0.0') + + +def test_image_name_valid_registry_domain_name_success(): + """ + Test to check if repo2docker runs with a valid --image-name argument. + """ + + builddir = os.path.dirname(__file__) + '/dockerfile/simple/' + + assert does_validate_image_name(builddir, 'test.COM/valid_name:1.0.0') + + +def test_image_name_valid_name_success(): + """ + Test to check if repo2docker runs with a valid --image-name argument. + """ + + builddir = os.path.dirname(__file__) + '/dockerfile/simple/' + + assert does_validate_image_name(builddir, 'test.com/valid_name:1.0.0') \ No newline at end of file