Extending JSON Schema

In this example, we define a custom keyword – "enumRef" – that provides us with the capability to validate JSON string instances against enumerations (which may consist of many thousands of terms) that we have obtained and cached from remote terminology services.

First, we create a vocabulary that describes the syntax of our new keyword. This is a JSON meta-schema that we’ll save to data/enumRef-vocabulary.json:

{
    "title": "A meta-schema describing the syntax of a vocabulary that supports remote enumerations",
    "$id": "https://example.com/enumRef/enumRef-vocabulary",
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$vocabulary": {
        "https://example.com/enumRef": true
    },
    "$dynamicAnchor": "meta",
    "type": ["object", "boolean"],
    "properties": {
        "enumRef": {
            "type": "string",
            "format": "uri-reference"
        }
    }
}

Next, we create an extension to the JSON Schema 2020-12 meta-schema that includes our new vocabulary. We’ll save this to data/enumRef-metaschema.json:

{
    "title": "An extension to the JSON Schema 2020-12 meta-schema, incorporating a vocabulary that supports remote enumerations",
    "$id": "https://example.com/enumRef/enumRef-metaschema",
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$vocabulary": {
        "https://json-schema.org/draft/2020-12/vocab/core": true,
        "https://json-schema.org/draft/2020-12/vocab/applicator": true,
        "https://json-schema.org/draft/2020-12/vocab/unevaluated": true,
        "https://json-schema.org/draft/2020-12/vocab/validation": true,
        "https://json-schema.org/draft/2020-12/vocab/meta-data": true,
        "https://json-schema.org/draft/2020-12/vocab/format-annotation": true,
        "https://json-schema.org/draft/2020-12/vocab/content": true,
        "https://example.com/enumRef": true
    },
    "$dynamicAnchor": "meta",
    "type": ["object", "boolean"],
    "allOf": [
        {"$ref": "https://json-schema.org/draft/2020-12/meta/core"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/applicator"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/unevaluated"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/validation"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/meta-data"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/format-annotation"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/content"},
        {"$ref": "https://example.com/enumRef/enumRef-vocabulary"}
    ]
}

Finally, we implement the "enumRef" keyword by defining an EnumRefKeyword class. The following script includes an example implementation using a simple local cache, a few lines of boilerplate code to compile the meta-schema and vocabulary definition files, and an example schema that we use to evaluate both valid and invalid sample JSON instances:

import pathlib
import pprint

from jschon import create_catalog, URI, JSON, JSONSchema, JSONSchemaError, LocalSource
from jschon.jsonschema import Result
from jschon.vocabulary import Keyword

data_dir = pathlib.Path(__file__).parent / 'data'

# cache of enumeration values obtained from remote terminology services
remote_enum_cache = {
    "https://example.com/remote-enum-colours": [
        "red",
        "orange",
        "yellow",
        "green",
        "blue",
        "indigo",
        "violet",
    ]
}


# define a class that implements the "enumRef" keyword
class EnumRefKeyword(Keyword):
    key = "enumRef"

    # ignore non-string instances
    instance_types = "string",

    def evaluate(self, instance: JSON, result: Result) -> None:
        # get the keyword's value as it appears in the JSON schema
        enum_id = self.json.data
        try:
            # retrieve the enumeration from the remote enumeration cache
            enum = remote_enum_cache[enum_id]
        except KeyError:
            raise JSONSchemaError(f"Unknown remote enumeration {enum_id}")

        # test the value of the current JSON instance node against the enumeration
        if instance.data in enum:
            # (optionally) on success, annotate the result
            result.annotate(enum_id)
        else:
            # on failure, mark the result as failed, with an (optional) error message
            result.fail(f"The instance is not a member of the {enum_id} enumeration")


# initialize the catalog, with JSON Schema 2020-12 vocabulary support
catalog = create_catalog('2020-12')

# add a local source for loading the enumRef meta-schema and vocabulary
# definition files
catalog.add_uri_source(
    URI("https://example.com/enumRef/"),
    LocalSource(data_dir, suffix='.json'),
)

# implement the enumRef vocabulary using the EnumRefKeyword class
catalog.create_vocabulary(
    URI("https://example.com/enumRef"),
    EnumRefKeyword,
)

# compile the enumRef metaschema, which enables any referencing schema
# to use the keyword implementations provided by its vocabularies
catalog.create_metaschema(
    URI("https://example.com/enumRef/enumRef-metaschema"),
    URI("https://json-schema.org/draft/2020-12/vocab/core"),
    URI("https://json-schema.org/draft/2020-12/vocab/applicator"),
    URI("https://json-schema.org/draft/2020-12/vocab/unevaluated"),
    URI("https://json-schema.org/draft/2020-12/vocab/validation"),
    URI("https://json-schema.org/draft/2020-12/vocab/format-annotation"),
    URI("https://json-schema.org/draft/2020-12/vocab/meta-data"),
    URI("https://json-schema.org/draft/2020-12/vocab/content"),
    URI("https://example.com/enumRef"),
)

# create a schema for validating that a string is a member of a remote enumeration
schema = JSONSchema({
    "$schema": "https://example.com/enumRef/enumRef-metaschema",
    "$id": "https://example.com/remote-enum-test-schema",
    "type": "string",
    "enumRef": "https://example.com/remote-enum-colours",
})

# validate the schema against its metaschema
schema_validity = schema.validate()
print(f'Schema validity check: {schema_validity.valid}')

# declare a valid JSON instance
valid_json = JSON("green")

# declare an invalid JSON instance
invalid_json = JSON("purple")

# evaluate the valid instance
valid_result = schema.evaluate(valid_json)

# evaluate the invalid instance
invalid_result = schema.evaluate(invalid_json)

# print output for the valid case
print(f'Valid JSON result: {valid_result.valid}')
print('Valid JSON detailed output:')
pprint.pp(valid_result.output('detailed'))

# print output for the invalid case
print(f'Invalid JSON result: {invalid_result.valid}')
print('Invalid JSON detailed output:')
pprint.pp(invalid_result.output('detailed'))

The script produces the following output:

Schema validity check: True
Valid JSON result: True
Valid JSON detailed output:
{'valid': True,
 'instanceLocation': '',
 'keywordLocation': '',
 'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#',
 'annotations': [{'instanceLocation': '',
                  'keywordLocation': '/type',
                  'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/type'},
                 {'instanceLocation': '',
                  'keywordLocation': '/enumRef',
                  'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/enumRef',
                  'annotation': 'https://example.com/remote-enum-colours'}]}
Invalid JSON result: False
Invalid JSON detailed output:
{'valid': False,
 'instanceLocation': '',
 'keywordLocation': '',
 'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#',
 'errors': [{'instanceLocation': '',
             'keywordLocation': '/enumRef',
             'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/enumRef',
             'error': 'The instance is not a member of the '
                      'https://example.com/remote-enum-colours enumeration'}]}