diff --git a/json/_types.ts b/json/_types.ts new file mode 100644 index 000000000000..a91dfc5d34c5 --- /dev/null +++ b/json/_types.ts @@ -0,0 +1,10 @@ +// Copyright 2018-2026 the Deno authors. MIT license. + +/** + * Proxy type of {@code Uint8Array} or {@code Uint8Array} in TypeScript 5.1 or below respectively. + * + * This type is internal utility type and should not be used directly. + * + * @internal @private + */ +export type Uint8Array_ = ReturnType; diff --git a/json/deno.json b/json/deno.json index ada5bf9450ac..163a9f783b09 100644 --- a/json/deno.json +++ b/json/deno.json @@ -6,6 +6,7 @@ "./types": "./types.ts", "./concatenated-json-parse-stream": "./concatenated_json_parse_stream.ts", "./parse-stream": "./parse_stream.ts", - "./stringify-stream": "./stringify_stream.ts" + "./stringify-stream": "./stringify_stream.ts", + "./unstable-canonicalize": "./unstable_canonicalize.ts" } } diff --git a/json/types.ts b/json/types.ts index 06022a31eb6e..4266cc52ea07 100644 --- a/json/types.ts +++ b/json/types.ts @@ -1,11 +1,11 @@ // Copyright 2018-2026 the Deno authors. MIT license. // This module is browser compatible. +/** A primitive JSON value. */ +export type JsonPrimitive = string | number | boolean | null; + /** The type of the result of parsing JSON. */ export type JsonValue = | { [key: string]: JsonValue | undefined } | JsonValue[] - | string - | number - | boolean - | null; + | JsonPrimitive; diff --git a/json/unstable_canonicalize.ts b/json/unstable_canonicalize.ts new file mode 100644 index 000000000000..8df5eb235ddf --- /dev/null +++ b/json/unstable_canonicalize.ts @@ -0,0 +1,194 @@ +// Copyright 2018-2026 the Deno authors. MIT license. +// This module is browser compatible. + +import type { JsonPrimitive, JsonValue } from "./types.ts"; +import type { Uint8Array_ } from "./_types.ts"; + +/** + * Serializes a primitive JSON value (null, boolean, number, string) to its + * canonical string representation per RFC 8785. + */ +function serializePrimitive(value: JsonPrimitive): string { + // JSON.stringify handles null, boolean, and string correctly per RFC 8785 + if (typeof value !== "number") return JSON.stringify(value); + + // RFC 8785 Section 3.2.2.3: Numbers must conform to I-JSON (RFC 7493) + if (!Number.isFinite(value)) { + throw new TypeError( + `Cannot canonicalize non-finite number: ${value} is not allowed in I-JSON`, + ); + } + // Handle -0 as "0" (RFC 8785 Section 3.2.2.3) + if (Object.is(value, -0)) return "0"; + // ECMAScript Number-to-String for all other numbers + return value.toString(); +} + +/** + * Serializes an array to its canonical string representation. + * Undefined elements become null (standard JSON behavior). + */ +function serializeArray(value: JsonValue[], ancestors: object[]): string { + if (value.length === 0) return "[]"; + + const parts: string[] = []; + for (const elem of value) { + parts.push(elem === undefined ? "null" : serializeValue(elem, ancestors)); + } + return "[" + parts.join(",") + "]"; +} + +/** + * Serializes an object to its canonical string representation. + * Keys are sorted by UTF-16 code units (RFC 8785 Section 3.2.3). + * Undefined values are skipped (standard JSON behavior, RFC 8785 Section 3.1). + */ +function serializeObject( + value: { [key: string]: JsonValue | undefined }, + ancestors: object[], +): string { + // Default sort uses UTF-16 code unit comparison per RFC 8785 + const keys = Object.keys(value).sort(); + + const parts: string[] = []; + for (const key of keys) { + const propValue = value[key]; + if (propValue === undefined) continue; + parts.push( + JSON.stringify(key) + ":" + serializeValue(propValue, ancestors), + ); + } + + return "{" + parts.join(",") + "}"; +} + +/** + * Recursively serializes a JSON value to its canonical string representation. + * + * @param value The JSON value to serialize + * @param ancestors Stack of ancestor objects for cycle detection + */ +function serializeValue(value: JsonValue, ancestors: object[] = []): string { + if (value === null) return "null"; + if (typeof value !== "object") return serializePrimitive(value); + + // Circular reference detection: check if this object is an ancestor + if (ancestors.includes(value)) { + throw new TypeError("Converting circular structure to JSON"); + } + ancestors.push(value); + + const result = Array.isArray(value) + ? serializeArray(value, ancestors) + : serializeObject(value, ancestors); + + ancestors.pop(); + return result; +} + +/** + * Serializes a JSON value to a canonical string per + * {@link https://www.rfc-editor.org/rfc/rfc8785 | RFC 8785} JSON + * Canonicalization Scheme (JCS). + * + * This produces a deterministic JSON string suitable for hashing or signing, + * with object keys sorted lexicographically by UTF-16 code units and no + * whitespace between tokens. + * + * Note: The input must be JSON-compatible data. Objects with `toJSON()` methods + * (like `Date`) should be converted to their JSON representation first. + * + * @experimental **UNSTABLE**: New API, yet to be vetted. + * + * @param value The JSON value to canonicalize. + * @returns The canonical JSON string. + * + * @example Basic usage (RFC 8785 Appendix E inspired) + * ```ts + * import { canonicalize } from "@std/json/unstable-canonicalize"; + * import { assertEquals } from "@std/assert"; + * + * // Keys are sorted lexicographically, no whitespace between tokens + * const data = { + * time: "2019-01-28T07:45:10Z", + * big: "055", + * val: 3.5, + * }; + * assertEquals(canonicalize(data), '{"big":"055","time":"2019-01-28T07:45:10Z","val":3.5}'); + * ``` + * + * @example Number serialization (RFC 8785 Section 3.2.2.3) + * ```ts + * import { canonicalize } from "@std/json/unstable-canonicalize"; + * import { assertEquals } from "@std/assert"; + * + * // Numbers follow ECMAScript serialization rules + * assertEquals(canonicalize(10.0), "10"); // No unnecessary decimals + * assertEquals(canonicalize(1e21), "1e+21"); // Scientific notation for large + * assertEquals(canonicalize(0.0000001), "1e-7"); // Scientific notation for small + * assertEquals(canonicalize(-0), "0"); // Negative zero becomes "0" + * ``` + * + * @example Key sorting by UTF-16 code units (RFC 8785 Section 3.2.3) + * ```ts + * import { canonicalize } from "@std/json/unstable-canonicalize"; + * import { assertEquals } from "@std/assert"; + * + * // Keys sorted by UTF-16 code units: digits < uppercase < lowercase + * const data = { a: 1, A: 2, "1": 3 }; + * assertEquals(canonicalize(data), '{"1":3,"A":2,"a":1}'); + * ``` + * + * @throws {TypeError} If the value contains non-finite numbers (Infinity, -Infinity, NaN). + * @throws {TypeError} If the value contains circular references. + * + * @see {@link https://www.rfc-editor.org/rfc/rfc8785 | RFC 8785} + */ +export function canonicalize(value: JsonValue): string { + return serializeValue(value); +} + +/** + * Serializes a JSON value to canonical UTF-8 bytes per + * {@link https://www.rfc-editor.org/rfc/rfc8785 | RFC 8785} JSON + * Canonicalization Scheme (JCS). + * + * This is equivalent to `new TextEncoder().encode(canonicalize(value))` and + * is provided as a convenience for cryptographic operations that require + * byte input. + * + * @experimental **UNSTABLE**: New API, yet to be vetted. + * + * @param value The JSON value to canonicalize. + * @returns The canonical JSON as UTF-8 bytes. + * + * @example Creating a verifiable hash + * ```ts + * import { canonicalizeToBytes } from "@std/json/unstable-canonicalize"; + * import { encodeHex } from "@std/encoding/hex"; + * import { assertEquals } from "@std/assert"; + * + * async function sha256Hex(data: Uint8Array): Promise { + * const hash = await crypto.subtle.digest("SHA-256", data.buffer as ArrayBuffer); + * return encodeHex(new Uint8Array(hash)); + * } + * + * // Create a deterministic hash of JSON data for verification + * const payload = { action: "transfer", amount: 100, to: "alice" }; + * const hash = await sha256Hex(canonicalizeToBytes(payload)); + * + * // Same hash regardless of original key order + * const reordered = { to: "alice", action: "transfer", amount: 100 }; + * const reorderedHash = await sha256Hex(canonicalizeToBytes(reordered)); + * + * assertEquals(hash, reorderedHash); + * ``` + * + * @throws {TypeError} If the value contains non-finite numbers (Infinity, -Infinity, NaN). + * @throws {TypeError} If the value contains circular references. + * + * @see {@link https://www.rfc-editor.org/rfc/rfc8785 | RFC 8785} + */ +export function canonicalizeToBytes(value: JsonValue): Uint8Array_ { + return new TextEncoder().encode(canonicalize(value)); +} diff --git a/json/unstable_canonicalize_test.ts b/json/unstable_canonicalize_test.ts new file mode 100644 index 000000000000..dcb1070736cd --- /dev/null +++ b/json/unstable_canonicalize_test.ts @@ -0,0 +1,375 @@ +// Copyright 2018-2026 the Deno authors. MIT license. + +import { assertEquals, assertThrows } from "@std/assert"; +import { canonicalize, canonicalizeToBytes } from "./unstable_canonicalize.ts"; + +// RFC 8785 §3.2.2.1: Literals + +Deno.test("canonicalize() serializes literals", () => { + assertEquals(canonicalize(null), "null"); + assertEquals(canonicalize(true), "true"); + assertEquals(canonicalize(false), "false"); +}); + +// RFC 8785 §3.2.2.2: Strings + +Deno.test("canonicalize() serializes strings", () => { + assertEquals(canonicalize(""), '""'); + assertEquals(canonicalize("hello"), '"hello"'); +}); + +Deno.test("canonicalize() escapes mandatory characters in strings", () => { + assertEquals(canonicalize('a"b'), '"a\\"b"'); + assertEquals(canonicalize("a\\b"), '"a\\\\b"'); +}); + +Deno.test("canonicalize() uses two-character escape sequences", () => { + assertEquals(canonicalize("a\bb"), '"a\\bb"'); + assertEquals(canonicalize("a\fb"), '"a\\fb"'); + assertEquals(canonicalize("a\nb"), '"a\\nb"'); + assertEquals(canonicalize("a\rb"), '"a\\rb"'); + assertEquals(canonicalize("a\tb"), '"a\\tb"'); +}); + +Deno.test("canonicalize() escapes other control characters as \\uXXXX", () => { + assertEquals(canonicalize("\x00"), '"\\u0000"'); + assertEquals(canonicalize("\x0f"), '"\\u000f"'); + assertEquals(canonicalize("\x1f"), '"\\u001f"'); +}); + +Deno.test("canonicalize() does not escape forward slash", () => { + assertEquals(canonicalize("a/b"), '"a/b"'); + assertEquals( + canonicalize("https://example.com/path"), + '"https://example.com/path"', + ); +}); + +Deno.test("canonicalize() preserves unicode characters without escaping", () => { + assertEquals(canonicalize("こんにちは"), '"こんにちは"'); + assertEquals(canonicalize("€"), '"€"'); + assertEquals(canonicalize("🦕"), '"🦕"'); + assertEquals(canonicalize("emoji: 👪"), '"emoji: 👪"'); +}); + +// RFC 8785 §3.2.2.3: Numbers + +Deno.test("canonicalize() serializes zero and negative zero", () => { + assertEquals(canonicalize(0), "0"); + assertEquals(canonicalize(-0), "0"); // RFC 8785: -0 → "0" +}); + +Deno.test("canonicalize() serializes integers", () => { + assertEquals(canonicalize(1), "1"); + assertEquals(canonicalize(-1), "-1"); + assertEquals(canonicalize(123), "123"); + assertEquals(canonicalize(-123), "-123"); + assertEquals(canonicalize(9007199254740991), "9007199254740991"); + assertEquals(canonicalize(-9007199254740991), "-9007199254740991"); +}); + +Deno.test("canonicalize() serializes decimals without trailing zeros", () => { + assertEquals(canonicalize(1.5), "1.5"); + assertEquals(canonicalize(-1.5), "-1.5"); + assertEquals(canonicalize(0.5), "0.5"); + assertEquals(canonicalize(4.50), "4.5"); + assertEquals(canonicalize(10.0), "10"); +}); + +Deno.test("canonicalize() uses ECMAScript number serialization (Appendix B)", () => { + // RFC 8785 Appendix B test vectors + assertEquals(canonicalize(333333333.33333329), "333333333.3333333"); + assertEquals(canonicalize(1E30), "1e+30"); + assertEquals(canonicalize(4.50), "4.5"); + assertEquals(canonicalize(2e-3), "0.002"); + assertEquals(canonicalize(0.000000000000000000000000001), "1e-27"); +}); + +Deno.test("canonicalize() uses exponential notation boundaries", () => { + assertEquals(canonicalize(1e20), "100000000000000000000"); + assertEquals(canonicalize(1e21), "1e+21"); + assertEquals(canonicalize(1e22), "1e+22"); + assertEquals(canonicalize(0.000001), "0.000001"); + assertEquals(canonicalize(0.0000001), "1e-7"); + assertEquals(canonicalize(1e-10), "1e-10"); +}); + +Deno.test("canonicalize() throws on non-finite numbers", () => { + assertThrows( + () => canonicalize(NaN), + TypeError, + "Cannot canonicalize non-finite number", + ); + assertThrows( + () => canonicalize(Infinity), + TypeError, + "Cannot canonicalize non-finite number", + ); + assertThrows( + () => canonicalize(-Infinity), + TypeError, + "Cannot canonicalize non-finite number", + ); +}); + +// RFC 8785 §3.2.3: Object Property Sorting + +Deno.test("canonicalize() handles empty object", () => { + assertEquals(canonicalize({}), "{}"); +}); + +Deno.test("canonicalize() sorts object keys by UTF-16 code units", () => { + assertEquals(canonicalize({ z: 1, a: 2 }), '{"a":2,"z":1}'); + assertEquals(canonicalize({ c: 3, b: 2, a: 1 }), '{"a":1,"b":2,"c":3}'); + assertEquals(canonicalize({ a: 1, A: 2, "1": 3 }), '{"1":3,"A":2,"a":1}'); +}); + +Deno.test("canonicalize() sorts unicode keys by UTF-16 code units", () => { + assertEquals(canonicalize({ "€": 1, "$": 2 }), '{"$":2,"€":1}'); + assertEquals(canonicalize({ "ö": 1, "o": 2 }), '{"o":2,"ö":1}'); +}); + +Deno.test("canonicalize() handles keys requiring escaping", () => { + assertEquals( + canonicalize({ 'key"with"quotes': 1 }), + '{"key\\"with\\"quotes":1}', + ); + assertEquals( + canonicalize({ "key\nwith\nnewlines": 1 }), + '{"key\\nwith\\nnewlines":1}', + ); + assertEquals(canonicalize({ "": 1 }), '{"":1}'); +}); + +Deno.test("canonicalize() omits undefined properties", () => { + assertEquals(canonicalize({ a: 1, b: undefined, c: 3 }), '{"a":1,"c":3}'); +}); + +Deno.test("canonicalize() handles object with all undefined properties", () => { + assertEquals( + canonicalize({ a: undefined, b: undefined, c: undefined }), + "{}", + ); +}); + +Deno.test("canonicalize() recursively sorts nested objects", () => { + const data = { b: { d: 4, c: 3 }, a: 1 }; + assertEquals(canonicalize(data), '{"a":1,"b":{"c":3,"d":4}}'); +}); + +// Arrays + +Deno.test("canonicalize() handles empty array", () => { + assertEquals(canonicalize([]), "[]"); +}); + +Deno.test("canonicalize() preserves array element order", () => { + assertEquals(canonicalize([1, 2, 3]), "[1,2,3]"); + assertEquals(canonicalize([3, 1, 2]), "[3,1,2]"); + assertEquals(canonicalize(["a", "b", "c"]), '["a","b","c"]'); +}); + +Deno.test("canonicalize() handles mixed type arrays", () => { + assertEquals(canonicalize([1, "two", true, null]), '[1,"two",true,null]'); +}); + +Deno.test("canonicalize() handles nested arrays", () => { + assertEquals(canonicalize([[1, 2], [3, 4]]), "[[1,2],[3,4]]"); +}); + +Deno.test("canonicalize() sorts objects within arrays", () => { + assertEquals( + canonicalize([{ z: 1, a: 2 }, { y: 3, b: 4 }]), + '[{"a":2,"z":1},{"b":4,"y":3}]', + ); +}); + +Deno.test("canonicalize() converts undefined array elements to null", () => { + // deno-lint-ignore no-explicit-any + assertEquals(canonicalize([1, undefined, 3] as any), "[1,null,3]"); + // deno-lint-ignore no-explicit-any + assertEquals(canonicalize([undefined] as any), "[null]"); + // deno-lint-ignore no-explicit-any + assertEquals(canonicalize([undefined, undefined] as any), "[null,null]"); +}); + +Deno.test("canonicalize() handles sparse arrays", () => { + const sparse = new Array(3); + sparse[0] = 1; + sparse[2] = 3; + // deno-lint-ignore no-explicit-any + assertEquals(canonicalize(sparse as any), "[1,null,3]"); +}); + +Deno.test("canonicalize() handles nested undefined in arrays", () => { + // deno-lint-ignore no-explicit-any + const nested = [{ a: 1 }, undefined, [undefined, 2]] as any; + assertEquals(canonicalize(nested), '[{"a":1},null,[null,2]]'); +}); + +// RFC 8785 §3.2.1: No Whitespace + +Deno.test("canonicalize() produces output with no whitespace", () => { + const data = { array: [1, 2, 3], nested: { a: 1, b: 2 } }; + const result = canonicalize(data); + assertEquals(result.includes(" "), false); + assertEquals(result.includes("\n"), false); + assertEquals(result.includes("\t"), false); + assertEquals(result.includes("\r"), false); +}); + +// Circular Reference Detection + +Deno.test("canonicalize() throws on direct circular reference", () => { + // deno-lint-ignore no-explicit-any + const obj: any = { a: 1 }; + obj.self = obj; + assertThrows( + () => canonicalize(obj), + TypeError, + "Converting circular structure to JSON", + ); +}); + +Deno.test("canonicalize() throws on indirect circular reference", () => { + // deno-lint-ignore no-explicit-any + const a: any = { name: "a" }; + // deno-lint-ignore no-explicit-any + const b: any = { name: "b" }; + a.ref = b; + b.ref = a; + assertThrows( + () => canonicalize(a), + TypeError, + "Converting circular structure to JSON", + ); +}); + +Deno.test("canonicalize() throws on deeply nested circular reference", () => { + // deno-lint-ignore no-explicit-any + const root: any = { + level1: { + level2: { + level3: {}, + }, + }, + }; + root.level1.level2.level3.backToRoot = root; + assertThrows( + () => canonicalize(root), + TypeError, + "Converting circular structure to JSON", + ); +}); + +Deno.test("canonicalize() throws on circular reference in array", () => { + // deno-lint-ignore no-explicit-any + const arr: any = [1, 2]; + arr.push(arr); + assertThrows( + () => canonicalize(arr), + TypeError, + "Converting circular structure to JSON", + ); +}); + +Deno.test("canonicalize() allows sibling references (non-circular)", () => { + const shared = { x: 1 }; + const result = canonicalize({ a: shared, b: shared }); + assertEquals(result, '{"a":{"x":1},"b":{"x":1}}'); +}); + +Deno.test("canonicalize() allows same object in array siblings", () => { + const shared = { value: 42 }; + const result = canonicalize([shared, shared, shared]); + assertEquals(result, '[{"value":42},{"value":42},{"value":42}]'); +}); + +// Complex Nested Structures + +Deno.test("canonicalize() handles deeply nested structures", () => { + const data = { + z: { + y: { + x: [1, { w: 2, v: 3 }], + }, + }, + a: "first", + }; + assertEquals( + canonicalize(data), + '{"a":"first","z":{"y":{"x":[1,{"v":3,"w":2}]}}}', + ); +}); + +Deno.test("canonicalize() handles very deep nesting", () => { + type DeepObj = { value?: string; nested?: DeepObj }; + let deep: DeepObj = { value: "leaf" }; + for (let i = 0; i < 100; i++) { + deep = { nested: deep }; + } + const result = canonicalize(deep); + assertEquals(result.startsWith('{"nested":'), true); + assertEquals(result.endsWith('{"value":"leaf"}' + "}".repeat(100)), true); +}); + +// RFC 8785 Appendix E: Examples + +Deno.test("canonicalize() produces RFC 8785 Appendix E example output", () => { + const input = { + time: "2019-01-28T07:45:10Z", + big: "055", + val: 3.5, + }; + assertEquals( + canonicalize(input), + '{"big":"055","time":"2019-01-28T07:45:10Z","val":3.5}', + ); +}); + +Deno.test("canonicalize() handles complex string with escaping", () => { + const input = "\u20ac$\u000F\u000aA''\u0042\"\\/"; + const result = canonicalize(input); + assertEquals(result, '"€$\\u000f\\nA\'\'B\\"\\\\/"'); +}); + +// RFC 8785 §3.2.4: UTF-8 Generation + +Deno.test("canonicalizeToBytes() returns UTF-8 encoded bytes", () => { + const result = canonicalizeToBytes({ a: 1 }); + assertEquals(result, new TextEncoder().encode('{"a":1}')); +}); + +Deno.test("canonicalizeToBytes() handles unicode correctly", () => { + const result = canonicalizeToBytes({ emoji: "🦕" }); + assertEquals(result, new TextEncoder().encode('{"emoji":"🦕"}')); +}); + +Deno.test("canonicalizeToBytes() handles multi-byte characters", () => { + const result = canonicalizeToBytes({ euro: "€", cjk: "日本語" }); + assertEquals(result, new TextEncoder().encode('{"cjk":"日本語","euro":"€"}')); +}); + +Deno.test("canonicalizeToBytes() throws on invalid input", () => { + assertThrows( + () => canonicalizeToBytes(NaN), + TypeError, + "Cannot canonicalize non-finite number", + ); +}); + +// Determinism + +Deno.test("canonicalize() produces identical output regardless of key order", () => { + const obj1 = { action: "transfer", amount: 100, to: "alice" }; + const obj2 = { to: "alice", action: "transfer", amount: 100 }; + const obj3 = { amount: 100, to: "alice", action: "transfer" }; + + const result1 = canonicalize(obj1); + const result2 = canonicalize(obj2); + const result3 = canonicalize(obj3); + + assertEquals(result1, result2); + assertEquals(result2, result3); + assertEquals(result1, '{"action":"transfer","amount":100,"to":"alice"}'); +});