diff --git a/.changelog/503eda9ec20e470cafb7904500dd22cb.md b/.changelog/503eda9ec20e470cafb7904500dd22cb.md new file mode 100644 index 0000000..6f7da1d --- /dev/null +++ b/.changelog/503eda9ec20e470cafb7904500dd22cb.md @@ -0,0 +1,4 @@ +--- +type: minor +--- +Add glob and regex support to dynamic zone config \ No newline at end of file diff --git a/docs/configuration.rst b/docs/configuration.rst index 08b3647..4a9a07b 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -13,10 +13,45 @@ YamlProvider :py:mod:`octodns.provider.yaml` lays out the options for configuring the most commonly used source of record data. +Dynamic Zone Config +------------------- + +In many cases octoDNS's dynamic zone configuration is the best option for +configuring octoDNS to manage your zones. In its simplest form that would look +something like:: + + --- + providers: + config: + class: octodns.provider.yaml.YamlProvider + directory: ./config + default_ttl: 3600 + enforce_order: True + ns1: + class: octodns_ns1.Ns1Provider + api_key: env/NS1_API_KEY + route53: + class: octodns_route53.Route53Provider + access_key_id: env/AWS_ACCESS_KEY_ID + secret_access_key: env/AWS_SECRET_ACCESS_KEY + + zones: + '*': + sources: + - config + targets: + - ns1 + - route53 + +This configuration will query both ns1 and route53 for the list of zones they +are managing and dynamically add them to the list being managed using the +sources and targets corresponding to the '*' section. See +:ref:`dynamic-zone-config` for details. + Static Zone Config ------------------ -In cases where finer grained control is desired and the configuration of +In cases where fine grained control is desired and the configuration of individual zones varies ``zones`` can be an explicit list with each configured zone listed along with its specific setup. As exemplified below ``alias`` zones can be useful when two zones are exact copies of each other, with the same diff --git a/docs/dynamic_zone_config.rst b/docs/dynamic_zone_config.rst new file mode 100644 index 0000000..c6cd36b --- /dev/null +++ b/docs/dynamic_zone_config.rst @@ -0,0 +1,181 @@ +.. _dynamic-zone-config: + +Dynamic Zone Config +=================== + +Dynamic zone configuration is a powerful tool for reducing the +configuration required to run octoDNS, specifically the *zones* section. Rather +than an exhaustive list of every zone and its corresponding sources and targets +it's possible to define the pattern once with a wildcard. + +This is most commonly done with a `YamlProvider`_ which will result in building +the list of zones managed at runtime from the yaml zone files in it's +directory, but any provider that supports the +:py:meth:`octodns.provider.yaml.YamlProvider.list_zones` method can be used. + +Any zone name configured in the *zones* section with a leading * is considered +dynamic and the information in this document applies. It is possible to include +multiple dynamic zone configurations in advanced setups utilizing +distinct sources and/or carefully crafted matching as described below. + +Matching +-------- + +There are three types of matching supported: legacy, file-glob, and regular +expression. This ultimately results in very flexible and powerful options, but +makes it pretty easy to build a foot-gun. The matching process has thorough +info and debug logging that can be enabled with **--debug** and should be the +first step in debugging a dynamic zone configuration. + +Legacy +...... + +This is the default mode and the only one supported in versions prior to +1.14.0. It is in effect a catch-all in that any zones returned by the sources' +:py:meth:`octodns.provider.yaml.YamlProvider.list_zones`. + +This generally means that it only makes sense to have multiple legacy matchers +when they have distinct sources, otherwise the first one configured will claim +all the zones leaving nothing available. + +.. _file-glob: + +File-glob +......... + +This mode uses Unix shell style matching using the `fnmatch`_ module and is +generally the place to start when trying to apply configs to zones in a single +source or set of sources as it's relatively easy to understand and predict the +behavior of it. + +A public and private setup where the public zones are also pushed internally is +a good starting example. If the following zone YAML files are in the *config* +provider's directory:: + + company.com. + foundation.org + internal.net. + jobs.company.com. + other.com + support.company.com. + us-east-1.internal.net. + us-west-2.internal.net. + +The following octoDNS configuration would match them as described in comments:: + + --- + ... + + zones: + + # the names here do not really matter beyond starting with a *, it is a + # reccomended best practice to match the glob, but not required. It will be + # used in logging to aid in debugging. + + # they are applied in the order defined and once claimed a zone is no + # longer available for matching + + # everytyhing is available for matching + '*internal.net': + # we only want the private zones here and they are all under + # internet.net. so this glob will claim them. + glob: '*internal.net.' + sources: + - config + targets: + # only push it to the private provider + - private + + # legacy style match everything that's left, all our various public zones + '*': + # legacy style match everything that's left, all our various public zones + sources: + - config + targets: + # push it to the public dns + - public + # and private + - private + +This does mean that things are public by default so care would need to be taken +if a new internal zone naming pattern is added. + +.. _fnmatch: https://docs.python.org/3/library/fnmatch.html + +.. versionadded:: 1.14.0 + File-glob matching support was added in 1.14.0 + +.. _regular-expression: + +Regular Expression +.................. + +Regular expression mode works similarly to :ref:`file-glob` with the matching +performed by the python regular expression engine `re`_. It enables much more +complex and powerful matching logic with the trade-off of having to work with +regular expressions. + +Continuing on with the public/private split, adding in the wrinkles of multiple +internal domain names and the desire to split the regions pushing only to the +co-located DNS servers. All of our internal zones end in .net., anything else +is public:: + + company.com. + foundation.org + jobs.company.com. + other.com + support.company.com. + us-east-1.hosts.net. + us-east-1.network.net. + us-east-1.services.net. + us-west-2.hosts.net. + us-west-2.network.net. + us-west-2.services.net. + +The following octoDNS configuration would match them as described in comments:: + + --- + ... + + zones: + + # regexes are too ugly to use as names, so these have useful info for + # logging/debugging + + # everytyhing is available for matching + '*us-east-1': + # we only want the private zones here and they are all under + # internet.net. So this regex will claim them, yes this could be done + # with a glob, but ... + regex: '^.*us-east-1.*.net.$' + sources: + - config + targets: + # only push it to the us-east-1 provider + - us-east-1 + + # everytyhing with the exception of the us-east-1 .net zones are available + '*us-west-2': + regex: '^.*us-west-2.*.net.$' + sources: + - config + targets: + # only push it to the us-east-1 provider + - us-west-2 + + # legacy style match everything that's left, all our various public zones + '*': + sources: + - config + targets: + # push it to the public dns + - public + # and private + - private + +.. _re: https://docs.python.org/3/library/re.html + +.. versionadded:: 1.14.0 + Regular expression matching support was added in 1.14.0 + +.. _YamlProvider: /octodns/provider/yaml.py diff --git a/docs/getting-started.rst b/docs/getting-started.rst index 681e413..6bffa79 100644 --- a/docs/getting-started.rst +++ b/docs/getting-started.rst @@ -41,9 +41,7 @@ separate accounts and each manage a distinct set of zones. A good example of this this might be ``./config/staging.yaml`` & ``./config/production.yaml``. We'll focus on a ``config/production.yaml``. -.. _dynamic-zone-config: - -Dynamic Zone Config +Zone Config ................... octoDNS supports dynamically building the list of zones it will work with when diff --git a/docs/index.rst b/docs/index.rst index 372d80d..983a5ca 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -28,6 +28,7 @@ Documentation getting-started.rst records.md configuration.rst + dynamic_zone_config.rst dynamic_records.rst auto_arpa.rst examples/README.rst diff --git a/octodns/manager.py b/octodns/manager.py index d710e91..bcdb849 100644 --- a/octodns/manager.py +++ b/octodns/manager.py @@ -4,12 +4,14 @@ from collections import deque from concurrent.futures import ThreadPoolExecutor +from fnmatch import filter as fnmatch_filter from hashlib import sha256 from importlib import import_module from importlib.metadata import PackageNotFoundError from importlib.metadata import version as module_version from json import dumps from logging import getLogger +from re import compile as re_compile from sys import stdout from . import __version__ @@ -591,30 +593,77 @@ class Manager(object): the call and the zones returned from this function should be used instead. ''' + + source_zones = {} + + # list since we'll be modifying zones in the loop for name, config in list(zones.items()): - if not name.startswith('*'): + if name[0] != '*': + # this isn't a dynamic zone config, move along continue - # we've found a dynamic config element - # find its sources + # it's dynamic, get a list of zone names from the configured sources found_sources = sources or self._get_sources( name, config, eligible_sources ) - self.log.info('sync: dynamic zone=%s, sources=%s', name, sources) + self.log.info( + '_preprocess_zones: dynamic zone=%s, sources=%s', + name, + (s.id for s in found_sources), + ) + candidates = set() for source in found_sources: - if not hasattr(source, 'list_zones'): - raise ManagerException( - f'dynamic zone={name} includes a source, {source.id}, that does not support `list_zones`' - ) - for zone_name in source.list_zones(): - if zone_name in zones: - self.log.info( - 'sync: zone=%s already in config, ignoring', - zone_name, + if source.id not in source_zones: + if not hasattr(source, 'list_zones'): + raise ManagerException( + f'dynamic zone={name} includes a source, {source.id}, that does not support `list_zones`' ) - continue - self.log.info('sync: adding dynamic zone=%s', zone_name) - zones[zone_name] = config + # get this source's zones + listed_zones = set(source.list_zones()) + # cache them + source_zones[source.id] = listed_zones + self.log.debug( + '_preprocess_zones: source=%s, list_zones=%s', + source.id, + listed_zones, + ) + # add this source's zones to the candidates + candidates |= source_zones[source.id] + + self.log.debug( + '_preprocess_zones: name=%s, candidates=%s', name, candidates + ) + + # remove any zones that are already configured, either explicitly or + # from a previous dyanmic config + candidates -= set(zones.keys()) + + if glob := config.pop('glob', None): + self.log.debug( + '_preprocess_zones: name=%s, glob=%s', name, glob + ) + candidates = set(fnmatch_filter(candidates, glob)) + elif regex := config.pop('regex', None): + self.log.debug( + '_preprocess_zones: name=%s, regex=%s', name, regex + ) + regex = re_compile(regex) + self.log.debug( + '_preprocess_zones: name=%s, compiled=%s', name, regex + ) + candidates = set(z for z in candidates if regex.search(z)) + else: + # old-style wildcard that uses everything + self.log.debug( + '_preprocess_zones: name=%s, old semantics, catch all', name + ) + + self.log.debug( + '_preprocess_zones: name=%s, matches=%s', name, candidates + ) + + for match in candidates: + zones[match] = config # remove the dynamic config element so we don't try and populate it del zones[name] diff --git a/tests/test_octodns_manager.py b/tests/test_octodns_manager.py index 76e5027..6ef1cf5 100644 --- a/tests/test_octodns_manager.py +++ b/tests/test_octodns_manager.py @@ -1341,6 +1341,307 @@ class TestManager(TestCase): self.assertIsNone(zone_with_defaults.update_pcent_threshold) self.assertIsNone(zone_with_defaults.delete_pcent_threshold) + def test_preprocess_zones_original(self): + # these will be unused + environ['YAML_TMP_DIR'] = '/tmp' + environ['YAML_TMP_DIR2'] = '/tmp' + manager = Manager(get_config_filename('simple.yaml')) + + # nothing returns nothing + mock_source = MagicMock() + got = manager._preprocess_zones({}, sources=[mock_source]) + self.assertEqual({}, got) + mock_source.list_zones.assert_not_called() + + # non-dynamic returns as-is, no calls to sources + mock_source.reset_mock() + zones = {'unit.tests.': {}} + got = manager._preprocess_zones(zones, sources=[mock_source]) + self.assertEqual(zones, got) + mock_source.list_zones.assert_not_called() + + # source that doesn't support list_zones + class SimpleSource: + id = 'simple-source' + + # dynamic with a source that doesn't support it + mock_source.reset_mock() + zones = {'*': {}} + with self.assertRaises(ManagerException) as ctx: + manager._preprocess_zones(zones, sources=[SimpleSource()]) + self.assertEqual( + 'dynamic zone=* includes a source, simple-source, that does not support `list_zones`', + str(ctx.exception), + ) + mock_source.list_zones.assert_not_called() + + # same, but w/a source supports it + mock_source.reset_mock() + config = {'foo': 42} + zones = {'*': config} + mock_source.list_zones.return_value = ['one', 'two', 'three'] + got = manager._preprocess_zones(zones, sources=[mock_source]) + self.assertEqual({'one': config, 'two': config, 'three': config}, got) + mock_source.list_zones.assert_called_once() + + # same, but one of the zones is expliticly configured, so left alone + mock_source.reset_mock() + config = {'foo': 42} + zones = {'*': config, 'two': {'bar': 43}} + mock_source.list_zones.return_value = ['one', 'two', 'three'] + got = manager._preprocess_zones(zones, sources=[mock_source]) + self.assertEqual( + {'one': config, 'two': {'bar': 43}, 'three': config}, got + ) + mock_source.list_zones.assert_called_once() + + # doesn't matter what the actual name is, just that it starts with a *, + mock_source.reset_mock() + config = {'foo': 42} + zones = {'*SDFLKJSDFL': config, 'two': {'bar': 43}} + mock_source.list_zones.return_value = ['one', 'two', 'three'] + got = manager._preprocess_zones(zones, sources=[mock_source]) + self.assertEqual( + {'one': config, 'two': {'bar': 43}, 'three': config}, got + ) + mock_source.list_zones.assert_called_once() + + def test_preprocess_zones_multiple_single_source(self): + # these will be unused + environ['YAML_TMP_DIR'] = '/tmp' + environ['YAML_TMP_DIR2'] = '/tmp' + manager = Manager(get_config_filename('simple.yaml')) + + manager._get_sources = MagicMock() + mock_source = MagicMock() + mock_source.id = 'mm' + manager._get_sources = MagicMock() + manager._get_sources.return_value = [mock_source] + + config_a = {'foo': 42} + config_b = {'bar': 43} + zones = {'*.a.com.': config_a, '*.b.com.': config_b} + mock_source.list_zones.side_effect = [ + ['one.a.com.', 'two.a.com.', 'one.b.com.', 'two.b.com.'] + ] + got = manager._preprocess_zones(zones, sources=[]) + # each zone will have it's sources looked up + self.assertEqual(2, manager._get_sources.call_count) + # but there's only one source so it's zones will be cached + self.assertEqual(1, mock_source.list_zones.call_count) + # everything will have been matched by the first old style wildcard and + # thus have its config, nothing will have b's + self.assertEqual( + { + 'one.a.com.': config_a, + 'two.a.com.': config_a, + 'one.b.com.': config_a, + 'two.b.com.': config_a, + }, + got, + ) + + def test_preprocess_zones_multiple_seperate_sources(self): + # these will be unused + environ['YAML_TMP_DIR'] = '/tmp' + environ['YAML_TMP_DIR2'] = '/tmp' + manager = Manager(get_config_filename('simple.yaml')) + + manager._get_sources = MagicMock() + mock_source_a = MagicMock() + mock_source_a.id = 'mm_a' + mock_source_b = MagicMock() + mock_source_b.id = 'mm_b' + manager._get_sources = MagicMock() + manager._get_sources.side_effect = [[mock_source_a], [mock_source_b]] + + config_a = {'foo': 42} + config_b = {'bar': 43} + zones = {'*.a.com.': config_a, '*.b.com.': config_b} + mock_source_a.list_zones.side_effect = [['one.a.com.', 'two.a.com.']] + mock_source_b.list_zones.side_effect = [['one.b.com.', 'two.b.com.']] + got = manager._preprocess_zones(zones, sources=[]) + # each zone will have it's sources looked up + self.assertEqual(2, manager._get_sources.call_count) + # so each mock will be called once + self.assertEqual(1, mock_source_a.list_zones.call_count) + self.assertEqual(1, mock_source_b.list_zones.call_count) + # the souces from each source will be matched with the coresponding config + self.assertEqual( + { + 'one.a.com.': config_a, + 'two.a.com.': config_a, + 'one.b.com.': config_b, + 'two.b.com.': config_b, + }, + got, + ) + + def test_preprocess_zones_glob(self): + # these will be unused + environ['YAML_TMP_DIR'] = '/tmp' + environ['YAML_TMP_DIR2'] = '/tmp' + manager = Manager(get_config_filename('simple.yaml')) + + manager._get_sources = MagicMock() + mock_source = MagicMock() + mock_source.id = 'mm' + manager._get_sources = MagicMock() + manager._get_sources.return_value = [mock_source] + + # won't match anything + config_n = {'foo': 42, 'glob': r'*.nope.com.'} + # match things with .a. + config_a = {'foo': 42, 'glob': r'*.a.com.'} + # match things with .b. + config_b = {'bar': 43, 'glob': r'*.b.com.'} + # will match anything + config_c = {'bar': 43, 'glob': r'*'} + zones = { + '*.nope.com.': config_n, + '*.a.com.': config_a, + '*.b.com.': config_b, + '*': config_c, + } + mock_source.list_zones.return_value = [ + # matched by a + 'one.a.com.', + # matched by a + 'two.a.com.', + # matched by b + 'one.b.com.', + # matched by b + 'two.b.com.', + # matched by c, catch all + 'ignored.com.', + ] + got = manager._preprocess_zones(zones, sources=[]) + # 4 configs + self.assertEqual(4, manager._get_sources.call_count) + # 1 shared source + self.assertEqual(1, mock_source.list_zones.call_count) + self.assertEqual( + { + 'one.a.com.': config_a, + 'two.a.com.': config_a, + 'one.b.com.': config_b, + 'two.b.com.': config_b, + 'ignored.com.': config_c, + }, + got, + ) + + # if we define the catch all first it'll take everything and leave + # nothing for the others + zones = { + '*': config_c, + '*.nope.com.': config_n, + '*.a.com.': config_a, + '*.b.com.': config_b, + } + got = manager._preprocess_zones(zones, sources=[]) + self.assertEqual( + { + 'one.a.com.': config_c, + 'two.a.com.': config_c, + 'one.b.com.': config_c, + 'two.b.com.': config_c, + 'ignored.com.': config_c, + }, + got, + ) + + def test_preprocess_zones_regex(self): + # these will be unused + environ['YAML_TMP_DIR'] = '/tmp' + environ['YAML_TMP_DIR2'] = '/tmp' + manager = Manager(get_config_filename('simple.yaml')) + + manager._get_sources = MagicMock() + mock_source = MagicMock() + mock_source.id = 'mm' + manager._get_sources = MagicMock() + manager._get_sources.return_value = [mock_source] + + # match things with .a. + config_a = {'foo': 42, 'regex': r'\.a\.'} + # match things with .b. + config_b = {'bar': 43, 'regex': r'\.b\.'} + zones = {'*.a.com.': config_a, '*.b.com.': config_b} + mock_source.list_zones.side_effect = [ + [ + 'one.a.com.', + 'two.a.com.', + 'one.b.com.', + 'two.b.com.', + 'ignored.com.', + ] + ] + got = manager._preprocess_zones(zones, sources=[]) + self.assertEqual(2, manager._get_sources.call_count) + self.assertEqual(1, mock_source.list_zones.call_count) + # a will regex match .a.com., b will .b.com., ignored.com. won't match + # anything + self.assertEqual( + { + 'one.a.com.': config_a, + 'two.a.com.': config_a, + 'one.b.com.': config_b, + 'two.b.com.': config_b, + }, + got, + ) + + def test_preprocess_zones_regex_claimed(self): + # these will be unused + environ['YAML_TMP_DIR'] = '/tmp' + environ['YAML_TMP_DIR2'] = '/tmp' + manager = Manager(get_config_filename('simple.yaml')) + + manager._get_sources = MagicMock() + mock_source = MagicMock() + mock_source.id = 'mm' + manager._get_sources = MagicMock() + manager._get_sources.return_value = [mock_source] + + # match things with .a. + config_a = {'foo': 42, 'regex': r'\.a\.'} + # match everything + config_b = {'bar': 43, 'regex': r'.*'} + zones = {'*.a.com.': config_a, '*.b.com.': config_b} + mock_source.list_zones.side_effect = [ + [ + # won't match a b/c no . before the a, will match b + 'a.com.', + # will match a, and be claimed + 'one.a.com.', + # will match a, and be claimed + 'two.a.com.', + # will match b + 'one.b.com.', + # will match b + 'two.b.com.', + # will match b + 'ignored.com.', + ] + ] + got = manager._preprocess_zones(zones, sources=[]) + self.assertEqual(2, manager._get_sources.call_count) + self.assertEqual(1, mock_source.list_zones.call_count) + # a will regex match .a.com., b will .b.com., ignored.com. won't match + # anything + self.assertEqual( + { + 'a.com.': config_b, + 'one.a.com.': config_a, + 'two.a.com.': config_a, + 'one.b.com.': config_b, + 'two.b.com.': config_b, + 'ignored.com.': config_b, + }, + got, + ) + class TestMainThreadExecutor(TestCase): def test_success(self):