Extending JSON Schema

In this example, we define a custom keyword – "enumRef" – that provides us with the capability to validate JSON string instances against enumerations (which may consist of many thousands of terms) that we have obtained and cached from remote terminology services.

First, we create a vocabulary that describes the syntax of our new keyword. This is a JSON meta-schema that we’ll save to data/enumRef-vocabulary.json:

{
    "title": "A meta-schema describing the syntax of a vocabulary that supports remote enumerations",
    "$id": "https://example.com/enumRef/enumRef-vocabulary",
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$vocabulary": {
        "https://example.com/enumRef": true
    },
    "$dynamicAnchor": "meta",
    "type": ["object", "boolean"],
    "properties": {
        "enumRef": {
            "type": "string",
            "format": "uri-reference"
        }
    }
}

Next, we create an extension to the JSON Schema 2020-12 meta-schema that includes our new vocabulary. We’ll save this to data/enumRef-metaschema.json:

{
    "title": "An extension to the JSON Schema 2020-12 meta-schema, incorporating a vocabulary that supports remote enumerations",
    "$id": "https://example.com/enumRef/enumRef-metaschema",
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$vocabulary": {
        "https://json-schema.org/draft/2020-12/vocab/core": true,
        "https://json-schema.org/draft/2020-12/vocab/applicator": true,
        "https://json-schema.org/draft/2020-12/vocab/unevaluated": true,
        "https://json-schema.org/draft/2020-12/vocab/validation": true,
        "https://json-schema.org/draft/2020-12/vocab/meta-data": true,
        "https://json-schema.org/draft/2020-12/vocab/format-annotation": true,
        "https://json-schema.org/draft/2020-12/vocab/content": true,
        "https://example.com/enumRef": true
    },
    "$dynamicAnchor": "meta",
    "type": ["object", "boolean"],
    "allOf": [
        {"$ref": "https://json-schema.org/draft/2020-12/meta/core"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/applicator"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/unevaluated"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/validation"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/meta-data"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/format-annotation"},
        {"$ref": "https://json-schema.org/draft/2020-12/meta/content"},
        {"$ref": "https://example.com/enumRef/enumRef-vocabulary"}
    ]
}

Finally, we implement the "enumRef" keyword by defining an EnumRefKeyword class. The following script includes an example implementation using a simple local cache, a few lines of boilerplate code to compile the meta-schema and vocabulary definition files, and an example schema that we use to evaluate both valid and invalid sample JSON instances:

import pathlib
import pprint

from jschon import create_catalog, URI, JSON, JSONSchema, JSONSchemaError, LocalSource
from jschon.jsonschema import Result
from jschon.vocabulary import Keyword

data_dir = pathlib.Path(__file__).parent / 'data'

# cache of enumeration values obtained from remote terminology services
remote_enum_cache = {
    "https://example.com/remote-enum-colours": [
        "red",
        "orange",
        "yellow",
        "green",
        "blue",
        "indigo",
        "violet",
    ]
}


# define a class that implements the "enumRef" keyword
class EnumRefKeyword(Keyword):
    key = "enumRef"

    # ignore non-string instances
    instance_types = "string",

    def __init__(self, parentschema: JSONSchema, value: str):
        super().__init__(parentschema, value)

        # raise an exception during schema construction if a reference is invalid
        if value not in remote_enum_cache:
            raise JSONSchemaError(f"Unknown remote enumeration {value}")

    def evaluate(self, instance: JSON, result: Result) -> None:
        # the keyword's value is a reference to a remote enumeration
        enum_ref = self.json.value

        # evaluate the current JSON instance node against the enumeration
        if instance.data in remote_enum_cache.get(enum_ref):
            # (optionally) on success, annotate the result
            result.annotate(enum_ref)
        else:
            # on failure, mark the result as failed, with an (optional) error message
            result.fail(f"The instance is not a member of the {enum_ref} enumeration")


# initialize the catalog, with JSON Schema 2020-12 vocabulary support
catalog = create_catalog('2020-12')

# add a local source for loading the enumRef meta-schema and vocabulary
# definition files
catalog.add_uri_source(
    URI("https://example.com/enumRef/"),
    LocalSource(data_dir, suffix='.json'),
)

# implement the enumRef vocabulary using the EnumRefKeyword class
catalog.create_vocabulary(
    URI("https://example.com/enumRef"),
    EnumRefKeyword,
)

# create a schema for validating that a string is a member of a remote enumeration
schema = JSONSchema({
    "$schema": "https://example.com/enumRef/enumRef-metaschema",
    "$id": "https://example.com/remote-enum-test-schema",
    "type": "string",
    "enumRef": "https://example.com/remote-enum-colours",
})

# validate the schema against its meta-schema
schema_validity = schema.validate()
print(f'Schema validity check: {schema_validity.valid}')

# declare a valid JSON instance
valid_json = JSON("green")

# declare an invalid JSON instance
invalid_json = JSON("purple")

# evaluate the valid instance
valid_result = schema.evaluate(valid_json)

# evaluate the invalid instance
invalid_result = schema.evaluate(invalid_json)

# print output for the valid case
print(f'Valid JSON result: {valid_result.valid}')
print('Valid JSON detailed output:')
pprint.pp(valid_result.output('detailed'))

# print output for the invalid case
print(f'Invalid JSON result: {invalid_result.valid}')
print('Invalid JSON detailed output:')
pprint.pp(invalid_result.output('detailed'))

The script produces the following output:

Schema validity check: True
Valid JSON result: True
Valid JSON detailed output:
{'valid': True,
 'instanceLocation': '',
 'keywordLocation': '',
 'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#',
 'annotations': [{'instanceLocation': '',
                  'keywordLocation': '/type',
                  'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/type'},
                 {'instanceLocation': '',
                  'keywordLocation': '/enumRef',
                  'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/enumRef',
                  'annotation': 'https://example.com/remote-enum-colours'}]}
Invalid JSON result: False
Invalid JSON detailed output:
{'valid': False,
 'instanceLocation': '',
 'keywordLocation': '',
 'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#',
 'errors': [{'instanceLocation': '',
             'keywordLocation': '/enumRef',
             'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/enumRef',
             'error': 'The instance is not a member of the '
                      'https://example.com/remote-enum-colours enumeration'}]}