Extending JSON Schema
In this example, we define a custom keyword – "enumRef"
– that provides
us with the capability to validate JSON string instances against enumerations
(which may consist of many thousands of terms) that we have obtained and cached
from remote terminology services.
First, we create a vocabulary that describes the syntax of our new keyword.
This is a JSON meta-schema that we’ll save to data/enumRef-vocabulary.json
:
{
"title": "A meta-schema describing the syntax of a vocabulary that supports remote enumerations",
"$id": "https://example.com/enumRef/enumRef-vocabulary",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$vocabulary": {
"https://example.com/enumRef": true
},
"$dynamicAnchor": "meta",
"type": ["object", "boolean"],
"properties": {
"enumRef": {
"type": "string",
"format": "uri-reference"
}
}
}
Next, we create an extension to the JSON Schema 2020-12 meta-schema that
includes our new vocabulary. We’ll save this to data/enumRef-metaschema.json
:
{
"title": "An extension to the JSON Schema 2020-12 meta-schema, incorporating a vocabulary that supports remote enumerations",
"$id": "https://example.com/enumRef/enumRef-metaschema",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true,
"https://json-schema.org/draft/2020-12/vocab/applicator": true,
"https://json-schema.org/draft/2020-12/vocab/unevaluated": true,
"https://json-schema.org/draft/2020-12/vocab/validation": true,
"https://json-schema.org/draft/2020-12/vocab/meta-data": true,
"https://json-schema.org/draft/2020-12/vocab/format-annotation": true,
"https://json-schema.org/draft/2020-12/vocab/content": true,
"https://example.com/enumRef": true
},
"$dynamicAnchor": "meta",
"type": ["object", "boolean"],
"allOf": [
{"$ref": "https://json-schema.org/draft/2020-12/meta/core"},
{"$ref": "https://json-schema.org/draft/2020-12/meta/applicator"},
{"$ref": "https://json-schema.org/draft/2020-12/meta/unevaluated"},
{"$ref": "https://json-schema.org/draft/2020-12/meta/validation"},
{"$ref": "https://json-schema.org/draft/2020-12/meta/meta-data"},
{"$ref": "https://json-schema.org/draft/2020-12/meta/format-annotation"},
{"$ref": "https://json-schema.org/draft/2020-12/meta/content"},
{"$ref": "https://example.com/enumRef/enumRef-vocabulary"}
]
}
Finally, we implement the "enumRef"
keyword by defining an
EnumRefKeyword
class. The following script includes an example
implementation using a simple local cache, a few lines of boilerplate code
to compile the meta-schema and vocabulary definition files, and an example
schema that we use to evaluate both valid and invalid sample JSON instances:
import pathlib
import pprint
from jschon import create_catalog, URI, JSON, JSONSchema, JSONSchemaError, LocalSource
from jschon.jsonschema import Result
from jschon.vocabulary import Keyword
data_dir = pathlib.Path(__file__).parent / 'data'
# cache of enumeration values obtained from remote terminology services
remote_enum_cache = {
"https://example.com/remote-enum-colours": [
"red",
"orange",
"yellow",
"green",
"blue",
"indigo",
"violet",
]
}
# define a class that implements the "enumRef" keyword
class EnumRefKeyword(Keyword):
key = "enumRef"
# ignore non-string instances
instance_types = "string",
def evaluate(self, instance: JSON, result: Result) -> None:
# get the keyword's value as it appears in the JSON schema
enum_id = self.json.data
try:
# retrieve the enumeration from the remote enumeration cache
enum = remote_enum_cache[enum_id]
except KeyError:
raise JSONSchemaError(f"Unknown remote enumeration {enum_id}")
# test the value of the current JSON instance node against the enumeration
if instance.data in enum:
# (optionally) on success, annotate the result
result.annotate(enum_id)
else:
# on failure, mark the result as failed, with an (optional) error message
result.fail(f"The instance is not a member of the {enum_id} enumeration")
# initialize the catalog, with JSON Schema 2020-12 vocabulary support
catalog = create_catalog('2020-12')
# add a local source for loading the enumRef meta-schema and vocabulary
# definition files
catalog.add_uri_source(
URI("https://example.com/enumRef/"),
LocalSource(data_dir, suffix='.json'),
)
# implement the enumRef vocabulary using the EnumRefKeyword class
catalog.create_vocabulary(
URI("https://example.com/enumRef"),
EnumRefKeyword,
)
# compile the enumRef metaschema, which enables any referencing schema
# to use the keyword implementations provided by its vocabularies
catalog.create_metaschema(
URI("https://example.com/enumRef/enumRef-metaschema"),
URI("https://json-schema.org/draft/2020-12/vocab/core"),
URI("https://json-schema.org/draft/2020-12/vocab/applicator"),
URI("https://json-schema.org/draft/2020-12/vocab/unevaluated"),
URI("https://json-schema.org/draft/2020-12/vocab/validation"),
URI("https://json-schema.org/draft/2020-12/vocab/format-annotation"),
URI("https://json-schema.org/draft/2020-12/vocab/meta-data"),
URI("https://json-schema.org/draft/2020-12/vocab/content"),
URI("https://example.com/enumRef"),
)
# create a schema for validating that a string is a member of a remote enumeration
schema = JSONSchema({
"$schema": "https://example.com/enumRef/enumRef-metaschema",
"$id": "https://example.com/remote-enum-test-schema",
"type": "string",
"enumRef": "https://example.com/remote-enum-colours",
})
# validate the schema against its metaschema
schema_validity = schema.validate()
print(f'Schema validity check: {schema_validity.valid}')
# declare a valid JSON instance
valid_json = JSON("green")
# declare an invalid JSON instance
invalid_json = JSON("purple")
# evaluate the valid instance
valid_result = schema.evaluate(valid_json)
# evaluate the invalid instance
invalid_result = schema.evaluate(invalid_json)
# print output for the valid case
print(f'Valid JSON result: {valid_result.valid}')
print('Valid JSON detailed output:')
pprint.pp(valid_result.output('detailed'))
# print output for the invalid case
print(f'Invalid JSON result: {invalid_result.valid}')
print('Invalid JSON detailed output:')
pprint.pp(invalid_result.output('detailed'))
The script produces the following output:
Schema validity check: True
Valid JSON result: True
Valid JSON detailed output:
{'valid': True,
'instanceLocation': '',
'keywordLocation': '',
'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#',
'annotations': [{'instanceLocation': '',
'keywordLocation': '/type',
'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/type'},
{'instanceLocation': '',
'keywordLocation': '/enumRef',
'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/enumRef',
'annotation': 'https://example.com/remote-enum-colours'}]}
Invalid JSON result: False
Invalid JSON detailed output:
{'valid': False,
'instanceLocation': '',
'keywordLocation': '',
'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#',
'errors': [{'instanceLocation': '',
'keywordLocation': '/enumRef',
'absoluteKeywordLocation': 'https://example.com/remote-enum-test-schema#/enumRef',
'error': 'The instance is not a member of the '
'https://example.com/remote-enum-colours enumeration'}]}