From 1c931e5cf47b9c24f7cbe9d184f38d12753eacf5 Mon Sep 17 00:00:00 2001 From: pradhyum6144 Date: Sat, 28 Feb 2026 16:18:57 +0530 Subject: [PATCH] feat: add Python SDK for ModelPack spec types and validation Signed-off-by: pradhyum6144 --- .gitignore | 7 + specs-python/modelpack/__init__.py | 0 specs-python/modelpack/v1/__init__.py | 91 +++ specs-python/modelpack/v1/annotations.py | 77 ++ specs-python/modelpack/v1/config-schema.json | 168 +++++ specs-python/modelpack/v1/config.py | 285 ++++++++ specs-python/modelpack/v1/mediatype.py | 55 ++ specs-python/modelpack/v1/validator.py | 54 ++ specs-python/setup.py | 33 + specs-python/tests/__init__.py | 0 specs-python/tests/test_annotations.py | 72 ++ specs-python/tests/test_config.py | 268 +++++++ specs-python/tests/test_mediatype.py | 119 ++++ specs-python/tests/test_validator.py | 705 +++++++++++++++++++ 14 files changed, 1934 insertions(+) create mode 100644 specs-python/modelpack/__init__.py create mode 100644 specs-python/modelpack/v1/__init__.py create mode 100644 specs-python/modelpack/v1/annotations.py create mode 100644 specs-python/modelpack/v1/config-schema.json create mode 100644 specs-python/modelpack/v1/config.py create mode 100644 specs-python/modelpack/v1/mediatype.py create mode 100644 specs-python/modelpack/v1/validator.py create mode 100644 specs-python/setup.py create mode 100644 specs-python/tests/__init__.py create mode 100644 specs-python/tests/test_annotations.py create mode 100644 specs-python/tests/test_config.py create mode 100644 specs-python/tests/test_mediatype.py create mode 100644 specs-python/tests/test_validator.py diff --git a/.gitignore b/.gitignore index 71cce7a..5bb9fb7 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,13 @@ # Dependency directories (remove the comment below to include it) vendor/ + +# Python +__pycache__/ +*.pyc +*.egg-info/ +.venv/ +.pytest_cache/ .idea .vscode .cache diff --git a/specs-python/modelpack/__init__.py b/specs-python/modelpack/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/specs-python/modelpack/v1/__init__.py b/specs-python/modelpack/v1/__init__.py new file mode 100644 index 0000000..6a5c8cf --- /dev/null +++ b/specs-python/modelpack/v1/__init__.py @@ -0,0 +1,91 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ModelPack Python SDK - CNCF standard for packaging and distributing AI models.""" + +from modelpack.v1.config import ( + Model, + ModelCapabilities, + ModelConfig, + ModelDescriptor, + ModelFS, + Modality, +) +from modelpack.v1.annotations import ( + ANNOTATION_FILEPATH, + ANNOTATION_FILE_METADATA, + ANNOTATION_MEDIA_TYPE_UNTESTED, + FileMetadata, +) +from modelpack.v1.mediatype import ( + ARTIFACT_TYPE_MODEL_MANIFEST, + MEDIA_TYPE_MODEL_CONFIG, + MEDIA_TYPE_MODEL_WEIGHT_RAW, + MEDIA_TYPE_MODEL_WEIGHT, + MEDIA_TYPE_MODEL_WEIGHT_GZIP, + MEDIA_TYPE_MODEL_WEIGHT_ZSTD, + MEDIA_TYPE_MODEL_WEIGHT_CONFIG_RAW, + MEDIA_TYPE_MODEL_WEIGHT_CONFIG, + MEDIA_TYPE_MODEL_WEIGHT_CONFIG_GZIP, + MEDIA_TYPE_MODEL_WEIGHT_CONFIG_ZSTD, + MEDIA_TYPE_MODEL_DOC_RAW, + MEDIA_TYPE_MODEL_DOC, + MEDIA_TYPE_MODEL_DOC_GZIP, + MEDIA_TYPE_MODEL_DOC_ZSTD, + MEDIA_TYPE_MODEL_CODE_RAW, + MEDIA_TYPE_MODEL_CODE, + MEDIA_TYPE_MODEL_CODE_GZIP, + MEDIA_TYPE_MODEL_CODE_ZSTD, + MEDIA_TYPE_MODEL_DATASET_RAW, + MEDIA_TYPE_MODEL_DATASET, + MEDIA_TYPE_MODEL_DATASET_GZIP, + MEDIA_TYPE_MODEL_DATASET_ZSTD, +) +from modelpack.v1.validator import validate_config + +__all__ = [ + "Model", + "ModelCapabilities", + "ModelConfig", + "ModelDescriptor", + "ModelFS", + "Modality", + "FileMetadata", + "ANNOTATION_FILEPATH", + "ANNOTATION_FILE_METADATA", + "ANNOTATION_MEDIA_TYPE_UNTESTED", + "ARTIFACT_TYPE_MODEL_MANIFEST", + "MEDIA_TYPE_MODEL_CONFIG", + "MEDIA_TYPE_MODEL_WEIGHT_RAW", + "MEDIA_TYPE_MODEL_WEIGHT", + "MEDIA_TYPE_MODEL_WEIGHT_GZIP", + "MEDIA_TYPE_MODEL_WEIGHT_ZSTD", + "MEDIA_TYPE_MODEL_WEIGHT_CONFIG_RAW", + "MEDIA_TYPE_MODEL_WEIGHT_CONFIG", + "MEDIA_TYPE_MODEL_WEIGHT_CONFIG_GZIP", + "MEDIA_TYPE_MODEL_WEIGHT_CONFIG_ZSTD", + "MEDIA_TYPE_MODEL_DOC_RAW", + "MEDIA_TYPE_MODEL_DOC", + "MEDIA_TYPE_MODEL_DOC_GZIP", + "MEDIA_TYPE_MODEL_DOC_ZSTD", + "MEDIA_TYPE_MODEL_CODE_RAW", + "MEDIA_TYPE_MODEL_CODE", + "MEDIA_TYPE_MODEL_CODE_GZIP", + "MEDIA_TYPE_MODEL_CODE_ZSTD", + "MEDIA_TYPE_MODEL_DATASET_RAW", + "MEDIA_TYPE_MODEL_DATASET", + "MEDIA_TYPE_MODEL_DATASET_GZIP", + "MEDIA_TYPE_MODEL_DATASET_ZSTD", + "validate_config", +] diff --git a/specs-python/modelpack/v1/annotations.py b/specs-python/modelpack/v1/annotations.py new file mode 100644 index 0000000..e958f7a --- /dev/null +++ b/specs-python/modelpack/v1/annotations.py @@ -0,0 +1,77 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Annotation constants and types matching specs-go/v1/annotations.go.""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime + +# Annotation key for the file path of the layer. +ANNOTATION_FILEPATH = "org.cncf.model.filepath" + +# Annotation key for the file metadata of the layer. +ANNOTATION_FILE_METADATA = "org.cncf.model.file.metadata+json" + +# Annotation key for file media type untested flag of the layer. +ANNOTATION_MEDIA_TYPE_UNTESTED = "org.cncf.model.file.mediatype.untested" + + +@dataclass +class FileMetadata: + """Represents the metadata of a file. + + Mirrors the Go FileMetadata struct in specs-go/v1/annotations.go. + """ + + name: str = "" + mode: int = 0 + uid: int = 0 + gid: int = 0 + size: int = 0 + mod_time: datetime | None = None + typeflag: int = 0 + + def to_dict(self) -> dict: + """Serialize to a dict matching the JSON field names.""" + d: dict = { + "name": self.name, + "mode": self.mode, + "uid": self.uid, + "gid": self.gid, + "size": self.size, + "typeflag": self.typeflag, + } + if self.mod_time is not None: + d["mtime"] = self.mod_time.isoformat() + return d + + @classmethod + def from_dict(cls, data: dict) -> FileMetadata: + """Deserialize from a dict with JSON field names.""" + mod_time = None + if "mtime" in data: + mod_time = datetime.fromisoformat( + data["mtime"].replace("Z", "+00:00") + ) + return cls( + name=data.get("name", ""), + mode=data.get("mode", 0), + uid=data.get("uid", 0), + gid=data.get("gid", 0), + size=data.get("size", 0), + mod_time=mod_time, + typeflag=data.get("typeflag", 0), + ) diff --git a/specs-python/modelpack/v1/config-schema.json b/specs-python/modelpack/v1/config-schema.json new file mode 100644 index 0000000..ce13fcc --- /dev/null +++ b/specs-python/modelpack/v1/config-schema.json @@ -0,0 +1,168 @@ +{ + "description": "Model Artifact Configuration Schema", + "$schema": "http://json-schema.org/draft-04/schema#", + "$id": "https://github.com/modelpack/model-spec/config", + "type": "object", + "properties": { + "descriptor": { + "$ref": "#/$defs/ModelDescriptor" + }, + "modelfs": { + "$ref": "#/$defs/ModelFS" + }, + "config": { + "$ref": "#/$defs/ModelConfig" + } + }, + "additionalProperties": false, + "required": [ + "descriptor", + "config", + "modelfs" + ], + "$defs": { + "ModelConfig": { + "type": "object", + "properties": { + "architecture": { + "type": "string" + }, + "format": { + "type": "string" + }, + "paramSize": { + "type": "string" + }, + "precision": { + "type": "string" + }, + "quantization": { + "type": "string" + }, + "capabilities": { + "$ref": "#/$defs/ModelCapabilities" + } + }, + "additionalProperties": false + }, + "ModelDescriptor": { + "type": "object", + "properties": { + "createdAt": { + "type": "string", + "format": "date-time" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + }, + "family": { + "type": "string" + }, + "name": { + "type": "string", + "minLength": 1 + }, + "docURL": { + "type": "string" + }, + "sourceURL": { + "type": "string" + }, + "datasetsURL": { + "type": "array", + "items": { + "type": "string" + } + }, + "version": { + "type": "string" + }, + "revision": { + "type": "string" + }, + "vendor": { + "type": "string" + }, + "licenses": { + "type": "array", + "items": { + "type": "string" + } + }, + "title": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "additionalProperties": false + }, + "ModelFS": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["layers"] + }, + "diffIds": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + } + }, + "additionalProperties": false, + "required": [ + "type", + "diffIds" + ] + }, + "ModelCapabilities": { + "type": "object", + "properties": { + "inputTypes": { + "type": "array", + "items": { + "$ref": "#/$defs/Modality" + } + }, + "outputTypes": { + "type": "array", + "items": { + "$ref": "#/$defs/Modality" + } + }, + "knowledgeCutoff": { + "type": "string", + "format": "date-time" + }, + "reasoning": { + "type": "boolean" + }, + "toolUsage": { + "type": "boolean" + }, + "reward": { + "type": "boolean" + }, + "languages": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[a-z]{2}$" + } + } + }, + "additionalProperties": false + }, + "Modality": { + "type": "string", + "enum": ["text", "image", "audio", "video", "embedding", "other"] + } + } +} diff --git a/specs-python/modelpack/v1/config.py b/specs-python/modelpack/v1/config.py new file mode 100644 index 0000000..cdeaf77 --- /dev/null +++ b/specs-python/modelpack/v1/config.py @@ -0,0 +1,285 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Model configuration types matching the Go structs in specs-go/v1/config.go.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import Optional + + +class Modality(str, Enum): + """Defines the input and output types of the model. + + Mirrors the Go Modality type in specs-go/v1/config.go. + """ + + TEXT = "text" + IMAGE = "image" + AUDIO = "audio" + VIDEO = "video" + EMBEDDING = "embedding" + OTHER = "other" + + +@dataclass +class ModelCapabilities: + """Defines the special capabilities that the model supports. + + Mirrors the Go ModelCapabilities struct in specs-go/v1/config.go. + """ + + input_types: Optional[list[Modality]] = None + output_types: Optional[list[Modality]] = None + knowledge_cutoff: Optional[datetime] = None + reasoning: Optional[bool] = None + tool_usage: Optional[bool] = None + reward: Optional[bool] = None + languages: Optional[list[str]] = None + + def to_dict(self) -> dict: + """Serialize to a dict matching the JSON schema field names.""" + d: dict = {} + if self.input_types is not None: + d["inputTypes"] = [m.value for m in self.input_types] + if self.output_types is not None: + d["outputTypes"] = [m.value for m in self.output_types] + if self.knowledge_cutoff is not None: + d["knowledgeCutoff"] = self.knowledge_cutoff.isoformat() + if self.reasoning is not None: + d["reasoning"] = self.reasoning + if self.tool_usage is not None: + d["toolUsage"] = self.tool_usage + if self.reward is not None: + d["reward"] = self.reward + if self.languages is not None: + d["languages"] = self.languages + return d + + @classmethod + def from_dict(cls, data: dict) -> ModelCapabilities: + """Deserialize from a dict with JSON schema field names.""" + kwargs: dict = {} + if "inputTypes" in data: + kwargs["input_types"] = [Modality(v) for v in data["inputTypes"]] + if "outputTypes" in data: + kwargs["output_types"] = [Modality(v) for v in data["outputTypes"]] + if "knowledgeCutoff" in data and data["knowledgeCutoff"]: + kwargs["knowledge_cutoff"] = datetime.fromisoformat( + data["knowledgeCutoff"].replace("Z", "+00:00") + ) + if "reasoning" in data: + kwargs["reasoning"] = data["reasoning"] + if "toolUsage" in data: + kwargs["tool_usage"] = data["toolUsage"] + if "reward" in data: + kwargs["reward"] = data["reward"] + if "languages" in data: + kwargs["languages"] = data["languages"] + return cls(**kwargs) + + +@dataclass +class ModelConfig: + """Defines the execution parameters for running a model. + + Mirrors the Go ModelConfig struct in specs-go/v1/config.go. + """ + + architecture: str = "" + format: str = "" + param_size: str = "" + precision: str = "" + quantization: str = "" + capabilities: Optional[ModelCapabilities] = None + + def to_dict(self) -> dict: + """Serialize to a dict matching the JSON schema field names.""" + d: dict = {} + if self.architecture: + d["architecture"] = self.architecture + if self.format: + d["format"] = self.format + if self.param_size: + d["paramSize"] = self.param_size + if self.precision: + d["precision"] = self.precision + if self.quantization: + d["quantization"] = self.quantization + if self.capabilities is not None: + d["capabilities"] = self.capabilities.to_dict() + return d + + @classmethod + def from_dict(cls, data: dict) -> ModelConfig: + """Deserialize from a dict with JSON schema field names.""" + caps = None + if "capabilities" in data: + caps = ModelCapabilities.from_dict(data["capabilities"]) + return cls( + architecture=data.get("architecture", ""), + format=data.get("format", ""), + param_size=data.get("paramSize", ""), + precision=data.get("precision", ""), + quantization=data.get("quantization", ""), + capabilities=caps, + ) + + +@dataclass +class ModelFS: + """Describes layer content addresses. + + Mirrors the Go ModelFS struct in specs-go/v1/config.go. + """ + + type: str = "" + diff_ids: list[str] = field(default_factory=list) + + def to_dict(self) -> dict: + """Serialize to a dict matching the JSON schema field names.""" + return { + "type": self.type, + "diffIds": self.diff_ids, + } + + @classmethod + def from_dict(cls, data: dict) -> ModelFS: + """Deserialize from a dict with JSON schema field names.""" + return cls( + type=data.get("type", ""), + diff_ids=data.get("diffIds", []), + ) + + +@dataclass +class ModelDescriptor: + """Defines the general information of a model. + + Mirrors the Go ModelDescriptor struct in specs-go/v1/config.go. + """ + + created_at: Optional[datetime] = None + authors: Optional[list[str]] = None + family: str = "" + name: str = "" + doc_url: str = "" + source_url: str = "" + datasets_url: Optional[list[str]] = None + version: str = "" + revision: str = "" + vendor: str = "" + licenses: Optional[list[str]] = None + title: str = "" + description: str = "" + + def to_dict(self) -> dict: + """Serialize to a dict matching the JSON schema field names.""" + d: dict = {} + if self.created_at is not None: + d["createdAt"] = self.created_at.isoformat() + if self.authors is not None: + d["authors"] = self.authors + if self.family: + d["family"] = self.family + if self.name: + d["name"] = self.name + if self.doc_url: + d["docURL"] = self.doc_url + if self.source_url: + d["sourceURL"] = self.source_url + if self.datasets_url is not None: + d["datasetsURL"] = self.datasets_url + if self.version: + d["version"] = self.version + if self.revision: + d["revision"] = self.revision + if self.vendor: + d["vendor"] = self.vendor + if self.licenses is not None: + d["licenses"] = self.licenses + if self.title: + d["title"] = self.title + if self.description: + d["description"] = self.description + return d + + @classmethod + def from_dict(cls, data: dict) -> ModelDescriptor: + """Deserialize from a dict with JSON schema field names.""" + created_at = None + if "createdAt" in data: + created_at = datetime.fromisoformat( + data["createdAt"].replace("Z", "+00:00") + ) + return cls( + created_at=created_at, + authors=data.get("authors"), + family=data.get("family", ""), + name=data.get("name", ""), + doc_url=data.get("docURL", ""), + source_url=data.get("sourceURL", ""), + datasets_url=data.get("datasetsURL"), + version=data.get("version", ""), + revision=data.get("revision", ""), + vendor=data.get("vendor", ""), + licenses=data.get("licenses"), + title=data.get("title", ""), + description=data.get("description", ""), + ) + + +@dataclass +class Model: + """Defines the basic information of a model. + + Provides the application/vnd.cncf.model.config.v1+json mediatype + when marshalled to JSON. + + Mirrors the Go Model struct in specs-go/v1/config.go. + """ + + descriptor: ModelDescriptor = field(default_factory=ModelDescriptor) + modelfs: ModelFS = field(default_factory=ModelFS) + config: ModelConfig = field(default_factory=ModelConfig) + + def to_dict(self) -> dict: + """Serialize to a dict matching the JSON schema field names.""" + return { + "descriptor": self.descriptor.to_dict(), + "modelfs": self.modelfs.to_dict(), + "config": self.config.to_dict(), + } + + def to_json(self, indent: Optional[int] = 2) -> str: + """Serialize to a JSON string.""" + return json.dumps(self.to_dict(), indent=indent) + + @classmethod + def from_dict(cls, data: dict) -> Model: + """Deserialize from a dict with JSON schema field names.""" + return cls( + descriptor=ModelDescriptor.from_dict(data.get("descriptor", {})), + modelfs=ModelFS.from_dict(data.get("modelfs", {})), + config=ModelConfig.from_dict(data.get("config", {})), + ) + + @classmethod + def from_json(cls, json_str: str) -> Model: + """Deserialize from a JSON string.""" + return cls.from_dict(json.loads(json_str)) diff --git a/specs-python/modelpack/v1/mediatype.py b/specs-python/modelpack/v1/mediatype.py new file mode 100644 index 0000000..1b1bfd6 --- /dev/null +++ b/specs-python/modelpack/v1/mediatype.py @@ -0,0 +1,55 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Media type constants matching specs-go/v1/mediatype.go.""" + +# Artifact type for a model manifest. +ARTIFACT_TYPE_MODEL_MANIFEST = "application/vnd.cncf.model.manifest.v1+json" + +# Media type for a model configuration. +MEDIA_TYPE_MODEL_CONFIG = "application/vnd.cncf.model.config.v1+json" + +# Model weight media types. +MEDIA_TYPE_MODEL_WEIGHT_RAW = "application/vnd.cncf.model.weight.v1.raw" +MEDIA_TYPE_MODEL_WEIGHT = "application/vnd.cncf.model.weight.v1.tar" +MEDIA_TYPE_MODEL_WEIGHT_GZIP = "application/vnd.cncf.model.weight.v1.tar+gzip" +MEDIA_TYPE_MODEL_WEIGHT_ZSTD = "application/vnd.cncf.model.weight.v1.tar+zstd" + +# Model weight config media types. +MEDIA_TYPE_MODEL_WEIGHT_CONFIG_RAW = "application/vnd.cncf.model.weight.config.v1.raw" +MEDIA_TYPE_MODEL_WEIGHT_CONFIG = "application/vnd.cncf.model.weight.config.v1.tar" +MEDIA_TYPE_MODEL_WEIGHT_CONFIG_GZIP = ( + "application/vnd.cncf.model.weight.config.v1.tar+gzip" +) +MEDIA_TYPE_MODEL_WEIGHT_CONFIG_ZSTD = ( + "application/vnd.cncf.model.weight.config.v1.tar+zstd" +) + +# Model documentation media types. +MEDIA_TYPE_MODEL_DOC_RAW = "application/vnd.cncf.model.doc.v1.raw" +MEDIA_TYPE_MODEL_DOC = "application/vnd.cncf.model.doc.v1.tar" +MEDIA_TYPE_MODEL_DOC_GZIP = "application/vnd.cncf.model.doc.v1.tar+gzip" +MEDIA_TYPE_MODEL_DOC_ZSTD = "application/vnd.cncf.model.doc.v1.tar+zstd" + +# Model code media types. +MEDIA_TYPE_MODEL_CODE_RAW = "application/vnd.cncf.model.code.v1.raw" +MEDIA_TYPE_MODEL_CODE = "application/vnd.cncf.model.code.v1.tar" +MEDIA_TYPE_MODEL_CODE_GZIP = "application/vnd.cncf.model.code.v1.tar+gzip" +MEDIA_TYPE_MODEL_CODE_ZSTD = "application/vnd.cncf.model.code.v1.tar+zstd" + +# Model dataset media types. +MEDIA_TYPE_MODEL_DATASET_RAW = "application/vnd.cncf.model.dataset.v1.raw" +MEDIA_TYPE_MODEL_DATASET = "application/vnd.cncf.model.dataset.v1.tar" +MEDIA_TYPE_MODEL_DATASET_GZIP = "application/vnd.cncf.model.dataset.v1.tar+gzip" +MEDIA_TYPE_MODEL_DATASET_ZSTD = "application/vnd.cncf.model.dataset.v1.tar+zstd" diff --git a/specs-python/modelpack/v1/validator.py b/specs-python/modelpack/v1/validator.py new file mode 100644 index 0000000..83df26c --- /dev/null +++ b/specs-python/modelpack/v1/validator.py @@ -0,0 +1,54 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""JSON schema validation for ModelPack configs. + +Uses the same config-schema.json as the Go validator to ensure +consistent validation behavior across languages. +""" + +from __future__ import annotations + +import importlib.resources +import json + +from jsonschema import Draft202012Validator, FormatChecker + + +def _load_schema() -> dict: + """Load and return the config JSON schema.""" + schema_file = importlib.resources.files("modelpack.v1").joinpath( + "config-schema.json" + ) + with schema_file.open(encoding="utf-8") as f: + return json.load(f) + + +def validate_config(data: dict | str) -> None: + """Validate a model config against the JSON schema. + + Args: + data: Either a dict or a JSON string representing the model config. + + Raises: + jsonschema.ValidationError: If the config is invalid. + jsonschema.SchemaError: If the schema itself is invalid. + json.JSONDecodeError: If data is a string that is not valid JSON. + """ + if isinstance(data, str): + data = json.loads(data) + + schema = _load_schema() + format_checker = FormatChecker() + Draft202012Validator(schema, format_checker=format_checker).validate(data) diff --git a/specs-python/setup.py b/specs-python/setup.py new file mode 100644 index 0000000..0634240 --- /dev/null +++ b/specs-python/setup.py @@ -0,0 +1,33 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup, find_packages + +setup( + name="modelpack", + version="0.1.0", + description="Python SDK for the CNCF ModelPack specification", + packages=find_packages(), + package_data={"modelpack.v1": ["config-schema.json"]}, + python_requires=">=3.10", + install_requires=[ + "jsonschema[format]>=4.20.0", + ], + extras_require={ + "dev": [ + "pytest>=7.0", + "ruff>=0.4.0", + ], + }, +) diff --git a/specs-python/tests/__init__.py b/specs-python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/specs-python/tests/test_annotations.py b/specs-python/tests/test_annotations.py new file mode 100644 index 0000000..4681cfe --- /dev/null +++ b/specs-python/tests/test_annotations.py @@ -0,0 +1,72 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for annotation constants and FileMetadata type.""" + +from datetime import datetime, timezone + +from modelpack.v1.annotations import ( + ANNOTATION_FILEPATH, + ANNOTATION_FILE_METADATA, + ANNOTATION_MEDIA_TYPE_UNTESTED, + FileMetadata, +) + + +class TestAnnotationConstants: + """Verify annotation constants match Go definitions exactly.""" + + def test_filepath(self): + assert ANNOTATION_FILEPATH == "org.cncf.model.filepath" + + def test_file_metadata(self): + assert ANNOTATION_FILE_METADATA == "org.cncf.model.file.metadata+json" + + def test_media_type_untested(self): + assert ( + ANNOTATION_MEDIA_TYPE_UNTESTED == "org.cncf.model.file.mediatype.untested" + ) + + +class TestFileMetadata: + """Tests for FileMetadata serialization.""" + + def test_round_trip(self): + dt = datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + meta = FileMetadata( + name="model.bin", + mode=0o644, + uid=1000, + gid=1000, + size=1024, + mod_time=dt, + typeflag=0, + ) + d = meta.to_dict() + assert d["name"] == "model.bin" + assert d["mode"] == 0o644 + assert d["size"] == 1024 + assert "mtime" in d + + restored = FileMetadata.from_dict(d) + assert restored.name == "model.bin" + assert restored.mode == 0o644 + assert restored.size == 1024 + + def test_empty(self): + meta = FileMetadata() + d = meta.to_dict() + assert d["name"] == "" + assert d["size"] == 0 + assert "mtime" not in d diff --git a/specs-python/tests/test_config.py b/specs-python/tests/test_config.py new file mode 100644 index 0000000..2619e18 --- /dev/null +++ b/specs-python/tests/test_config.py @@ -0,0 +1,268 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for model config types - serialization and deserialization.""" + +import json +from datetime import datetime, timezone + +import pytest + +from modelpack.v1.config import ( + Model, + ModelCapabilities, + ModelConfig, + ModelDescriptor, + ModelFS, + Modality, +) + + +class TestModality: + """Tests for the Modality enum.""" + + def test_all_values(self): + assert Modality.TEXT.value == "text" + assert Modality.IMAGE.value == "image" + assert Modality.AUDIO.value == "audio" + assert Modality.VIDEO.value == "video" + assert Modality.EMBEDDING.value == "embedding" + assert Modality.OTHER.value == "other" + + def test_from_string(self): + assert Modality("text") == Modality.TEXT + assert Modality("image") == Modality.IMAGE + + def test_invalid_value(self): + with pytest.raises(ValueError): + Modality("invalid") + + +class TestModelCapabilities: + """Tests for ModelCapabilities serialization.""" + + def test_empty(self): + caps = ModelCapabilities() + assert caps.to_dict() == {} + + def test_round_trip(self): + caps = ModelCapabilities( + input_types=[Modality.TEXT, Modality.IMAGE], + output_types=[Modality.TEXT], + reasoning=True, + tool_usage=False, + reward=True, + languages=["en", "fr"], + ) + d = caps.to_dict() + assert d["inputTypes"] == ["text", "image"] + assert d["outputTypes"] == ["text"] + assert d["reasoning"] is True + assert d["toolUsage"] is False + assert d["reward"] is True + assert d["languages"] == ["en", "fr"] + + restored = ModelCapabilities.from_dict(d) + assert restored.input_types == [Modality.TEXT, Modality.IMAGE] + assert restored.reasoning is True + assert restored.tool_usage is False + + def test_knowledge_cutoff(self): + dt = datetime(2025, 1, 1, tzinfo=timezone.utc) + caps = ModelCapabilities(knowledge_cutoff=dt) + d = caps.to_dict() + assert "knowledgeCutoff" in d + + restored = ModelCapabilities.from_dict(d) + assert restored.knowledge_cutoff is not None + + +class TestModelConfig: + """Tests for ModelConfig serialization.""" + + def test_empty(self): + cfg = ModelConfig() + assert cfg.to_dict() == {} + + def test_round_trip(self): + cfg = ModelConfig( + architecture="transformer", + format="safetensors", + param_size="8b", + precision="float16", + quantization="awq", + ) + d = cfg.to_dict() + assert d["architecture"] == "transformer" + assert d["paramSize"] == "8b" + + restored = ModelConfig.from_dict(d) + assert restored.architecture == "transformer" + assert restored.param_size == "8b" + + def test_with_capabilities(self): + cfg = ModelConfig( + param_size="8b", + capabilities=ModelCapabilities( + input_types=[Modality.TEXT], + output_types=[Modality.TEXT], + ), + ) + d = cfg.to_dict() + assert "capabilities" in d + assert d["capabilities"]["inputTypes"] == ["text"] + + +class TestModelFS: + """Tests for ModelFS serialization.""" + + def test_round_trip(self): + fs = ModelFS( + type="layers", + diff_ids=["sha256:abc123"], + ) + d = fs.to_dict() + assert d["type"] == "layers" + assert d["diffIds"] == ["sha256:abc123"] + + restored = ModelFS.from_dict(d) + assert restored.type == "layers" + assert restored.diff_ids == ["sha256:abc123"] + + +class TestModelDescriptor: + """Tests for ModelDescriptor serialization.""" + + def test_empty(self): + desc = ModelDescriptor() + assert desc.to_dict() == {} + + def test_round_trip(self): + desc = ModelDescriptor( + name="llama3-8b-instruct", + version="3.1", + family="llama3", + authors=["Meta"], + licenses=["Apache-2.0"], + ) + d = desc.to_dict() + assert d["name"] == "llama3-8b-instruct" + assert d["version"] == "3.1" + + restored = ModelDescriptor.from_dict(d) + assert restored.name == "llama3-8b-instruct" + assert restored.authors == ["Meta"] + + def test_created_at(self): + dt = datetime(2025, 6, 15, 10, 30, 0, tzinfo=timezone.utc) + desc = ModelDescriptor(name="test", created_at=dt) + d = desc.to_dict() + assert "createdAt" in d + + restored = ModelDescriptor.from_dict(d) + assert restored.created_at is not None + + +class TestModel: + """Tests for Model serialization.""" + + def test_minimal(self): + model = Model( + descriptor=ModelDescriptor(name="test-model"), + modelfs=ModelFS(type="layers", diff_ids=["sha256:abc"]), + config=ModelConfig(param_size="8b"), + ) + d = model.to_dict() + assert d["descriptor"]["name"] == "test-model" + assert d["modelfs"]["type"] == "layers" + assert d["config"]["paramSize"] == "8b" + + def test_json_round_trip(self): + model = Model( + descriptor=ModelDescriptor( + name="llama3-8b-instruct", + version="3.1", + ), + modelfs=ModelFS( + type="layers", + diff_ids=[ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + ), + config=ModelConfig(param_size="8b"), + ) + json_str = model.to_json() + restored = Model.from_json(json_str) + assert restored.descriptor.name == "llama3-8b-instruct" + assert restored.modelfs.type == "layers" + assert restored.config.param_size == "8b" + + def test_from_json_string(self): + data = json.dumps( + { + "descriptor": {"name": "test"}, + "modelfs": {"type": "layers", "diffIds": ["sha256:abc"]}, + "config": {"paramSize": "1b"}, + } + ) + model = Model.from_json(data) + assert model.descriptor.name == "test" + assert model.config.param_size == "1b" + + def test_full_model(self): + model = Model( + descriptor=ModelDescriptor( + name="qwen2-vl-72b-instruct", + version="2.0", + family="qwen2", + vendor="Alibaba", + authors=["Qwen Team"], + licenses=["Apache-2.0"], + title="Qwen2 VL 72B Instruct", + description="A vision-language model", + doc_url="https://example.com/docs", + source_url="https://github.com/example/qwen2", + datasets_url=["https://example.com/dataset"], + ), + modelfs=ModelFS( + type="layers", + diff_ids=["sha256:aabbcc", "sha256:ddeeff"], + ), + config=ModelConfig( + architecture="transformer", + format="safetensors", + param_size="72b", + precision="bfloat16", + capabilities=ModelCapabilities( + input_types=[Modality.TEXT, Modality.IMAGE], + output_types=[Modality.TEXT], + reasoning=True, + tool_usage=True, + languages=["en", "zh"], + ), + ), + ) + d = model.to_dict() + assert d["descriptor"]["vendor"] == "Alibaba" + assert d["config"]["capabilities"]["inputTypes"] == ["text", "image"] + assert d["config"]["capabilities"]["languages"] == ["en", "zh"] + + json_str = model.to_json() + restored = Model.from_json(json_str) + assert restored.config.capabilities.input_types == [ + Modality.TEXT, + Modality.IMAGE, + ] + assert restored.config.capabilities.languages == ["en", "zh"] diff --git a/specs-python/tests/test_mediatype.py b/specs-python/tests/test_mediatype.py new file mode 100644 index 0000000..fb47c08 --- /dev/null +++ b/specs-python/tests/test_mediatype.py @@ -0,0 +1,119 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for media type constants matching specs-go/v1/mediatype.go.""" + +from modelpack.v1 import mediatype + + +class TestMediaTypes: + """Verify media type constants match Go definitions exactly.""" + + def test_artifact_type(self): + assert ( + mediatype.ARTIFACT_TYPE_MODEL_MANIFEST + == "application/vnd.cncf.model.manifest.v1+json" + ) + + def test_config(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_CONFIG + == "application/vnd.cncf.model.config.v1+json" + ) + + def test_weight_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_RAW + == "application/vnd.cncf.model.weight.v1.raw" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT + == "application/vnd.cncf.model.weight.v1.tar" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_GZIP + == "application/vnd.cncf.model.weight.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_ZSTD + == "application/vnd.cncf.model.weight.v1.tar+zstd" + ) + + def test_weight_config_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_CONFIG_RAW + == "application/vnd.cncf.model.weight.config.v1.raw" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_CONFIG + == "application/vnd.cncf.model.weight.config.v1.tar" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_CONFIG_GZIP + == "application/vnd.cncf.model.weight.config.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_CONFIG_ZSTD + == "application/vnd.cncf.model.weight.config.v1.tar+zstd" + ) + + def test_doc_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_DOC_RAW + == "application/vnd.cncf.model.doc.v1.raw" + ) + assert mediatype.MEDIA_TYPE_MODEL_DOC == "application/vnd.cncf.model.doc.v1.tar" + assert ( + mediatype.MEDIA_TYPE_MODEL_DOC_GZIP + == "application/vnd.cncf.model.doc.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_DOC_ZSTD + == "application/vnd.cncf.model.doc.v1.tar+zstd" + ) + + def test_code_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_CODE_RAW + == "application/vnd.cncf.model.code.v1.raw" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_CODE == "application/vnd.cncf.model.code.v1.tar" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_CODE_GZIP + == "application/vnd.cncf.model.code.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_CODE_ZSTD + == "application/vnd.cncf.model.code.v1.tar+zstd" + ) + + def test_dataset_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_DATASET_RAW + == "application/vnd.cncf.model.dataset.v1.raw" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_DATASET + == "application/vnd.cncf.model.dataset.v1.tar" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_DATASET_GZIP + == "application/vnd.cncf.model.dataset.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_DATASET_ZSTD + == "application/vnd.cncf.model.dataset.v1.tar+zstd" + ) diff --git a/specs-python/tests/test_validator.py b/specs-python/tests/test_validator.py new file mode 100644 index 0000000..5060797 --- /dev/null +++ b/specs-python/tests/test_validator.py @@ -0,0 +1,705 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Validation tests mirroring the Go test cases in schema/config_test.go. + +Each test case matches the corresponding Go test to ensure +consistent validation behavior between the Go and Python SDKs. +""" + +import json + +import pytest +from jsonschema import ValidationError + +from modelpack.v1.validator import validate_config + +# A valid base config used across tests. +VALID_CONFIG = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" + ], + }, +} + + +class TestValidConfigCases: + """Tests that valid configs pass validation.""" + + def test_minimal_valid(self): + validate_config(VALID_CONFIG) + + def test_valid_as_json_string(self): + validate_config(json.dumps(VALID_CONFIG)) + + def test_valid_with_all_fields(self): + config = { + "descriptor": { + "name": "llama3-8b-instruct", + "version": "3.1", + "family": "llama3", + "authors": ["Meta"], + "vendor": "Meta", + "licenses": ["Apache-2.0"], + "title": "Llama 3 8B Instruct", + "description": "An instruction-tuned LLM", + "createdAt": "2025-01-01T00:00:00Z", + "docURL": "https://example.com/docs", + "sourceURL": "https://github.com/meta/llama3", + "datasetsURL": ["https://example.com/data"], + "revision": "abc123", + }, + "config": { + "architecture": "transformer", + "format": "safetensors", + "paramSize": "8b", + "precision": "float16", + "quantization": "awq", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "knowledgeCutoff": "2025-01-01T00:00:00Z", + "reasoning": True, + "toolUsage": True, + "reward": False, + "languages": ["en", "fr"], + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:abcdef1234567890abcdef1234567890" + "abcdef1234567890abcdef1234567890" + ], + }, + } + validate_config(config) + + +class TestFailureConfigCases: + """Tests mirroring Go config_test.go failure cases. + + Each test corresponds to a numbered test case in the Go file. + """ + + def test_config_missing(self): + """Go test 0: config is missing.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_version_is_number(self): + """Go test 1: version is a number.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": 3.1, + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_revision_is_number(self): + """Go test 2: revision is a number.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "revision": 1234567890, + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_created_at_not_rfc3339(self): + """Go test 3: createdAt is not RFC3339 format.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "createdAt": "2025/01/01T00:00:00Z", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_authors_not_array(self): + """Go test 4: authors is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "authors": "John Doe", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_licenses_not_array(self): + """Go test 5: licenses is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "licenses": "Apache-2.0", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_doc_url_is_array(self): + """Go test 6: docURL is an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "docURL": ["https://example.com/doc"], + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_source_url_is_array(self): + """Go test 7: sourceURL is an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "sourceURL": ["https://github.com/xyz/xyz3"], + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_datasets_url_not_array(self): + """Go test 8: datasetsURL is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "sourceURL": "https://github.com/xyz/xyz3", + "datasetsURL": "https://example.com/dataset", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_param_size_is_number(self): + """Go test 9: paramSize is a number.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"paramSize": 8000000}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_precision_is_number(self): + """Go test 10: precision is a number.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"precision": 16}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_type_not_layers(self): + """Go test 11: type is not 'layers'.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layer", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_diff_ids_not_array(self): + """Go test 12: diffIds is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ( + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ), + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_diff_ids_empty(self): + """Go test 13: diffIds is empty.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_input_types_not_array(self): + """Go test 14: inputTypes is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": {"inputTypes": "text"}, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_output_types_not_array(self): + """Go test 15: outputTypes is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": {"outputTypes": "text"}, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_invalid_modality(self): + """Go test 16: invalid modality value.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": {"inputTypes": ["img"]}, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_knowledge_cutoff_not_rfc3339(self): + """Go test 17: knowledgeCutoff is not RFC3339 format.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "knowledgeCutoff": "2025-01-01", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_reasoning_not_boolean(self): + """Go test 18: reasoning is not boolean.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "reasoning": "true", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_tool_usage_not_boolean(self): + """Go test 19: toolUsage is not boolean.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "toolUsage": "true", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_reward_not_boolean(self): + """Go test 20: reward is not boolean.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "reward": "true", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_languages_not_array(self): + """Go test 21: languages is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "languages": "en", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_language_code_not_iso639(self): + """Go test 22: language code is not a two-letter ISO 639 code.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "languages": ["fra"], + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_unknown_field_in_capabilities(self): + """Go test 23: unknown field in capabilities.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "unknownField": True, + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + +class TestEdgeCases: + """Additional edge case tests.""" + + def test_empty_dict(self): + with pytest.raises(ValidationError): + validate_config({}) + + def test_invalid_json_string(self): + with pytest.raises(Exception): + validate_config("{invalid json") + + def test_empty_name(self): + """Name with minLength: 1 should reject empty string.""" + config = { + "descriptor": {"name": "", "version": "1.0"}, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_unknown_field_at_root(self): + config = { + "descriptor": {"name": "test", "version": "1.0"}, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + "extraField": "should fail", + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_unknown_field_in_descriptor(self): + config = { + "descriptor": { + "name": "test", + "version": "1.0", + "unknownField": "value", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_unknown_field_in_config(self): + config = { + "descriptor": {"name": "test", "version": "1.0"}, + "config": {"paramSize": "8b", "unknownField": "value"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_modelfs_missing(self): + config = { + "descriptor": {"name": "test", "version": "1.0"}, + "config": {"paramSize": "8b"}, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_descriptor_missing(self): + config = { + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + } + with pytest.raises(ValidationError): + validate_config(config)