diff --git a/.gitignore b/.gitignore index e004ae30..721680c2 100644 --- a/.gitignore +++ b/.gitignore @@ -141,4 +141,5 @@ compile_commands.json # quaddtype /quaddtype/subprojects/qblas/ /quaddtype/subprojects/sleef/ +/quaddtype/subprojects/pythoncapi-compat/ .wraplock diff --git a/quaddtype/meson.build b/quaddtype/meson.build index 6ad8841a..40783cf3 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -79,6 +79,10 @@ incdir_numpy = run_command(py, check : true ).stdout().strip() +# pythoncapi-compat for portable C API usage across Python versions +pythoncapi_compat_subproj = subproject('pythoncapi-compat') +pythoncapi_compat_inc = pythoncapi_compat_subproj.get_variable('incdir') + # print numpy version used numpy_version = run_command(py, ['-c', 'import numpy; print(numpy.__version__)'], @@ -154,6 +158,7 @@ includes = include_directories( 'numpy_quaddtype/src', ] ) +pythoncapi_includes = pythoncapi_compat_inc srcs = [ 'numpy_quaddtype/src/quad_common.h', @@ -208,5 +213,5 @@ py.extension_module('_quaddtype_main', dependencies: dependencies, install: true, subdir: 'numpy_quaddtype', - include_directories: [includes, build_includes], + include_directories: [includes, build_includes, pythoncapi_includes], ) diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 8c257dbc..8a63fd00 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -17,6 +17,8 @@ #include "dtype.h" #include "lock.h" #include "utilities.h" +#include "constants.hpp" +#include "pythoncapi_compat.h" QuadPrecisionObject * @@ -624,6 +626,112 @@ static PyGetSetDef QuadPrecision_getset[] = { {NULL} /* Sentinel */ }; +/* + * Hash function for QuadPrecision scalars. + * + * This implements the same algorithm as CPython's _Py_HashDouble, adapted for + * 128-bit floating point. The algorithm computes a hash based + * on the reduction of the value modulo the prime P = 2**PYHASH_BITS - 1. + * https://github.com/python/cpython/blob/20b69aac0d19a5e5358362410d9710887762f0e7/Python/pyhash.c#L87 + * + * Key invariant: hash(x) == hash(y) whenever x and y are numerically equal, + * even if x and y have different types. This ensures that: + * hash(QuadPrecision(1.0)) == hash(1.0) == hash(1) + * + * The algorithm: + * 1. Handle special cases: inf returns PyHASH_INF, nan uses pointer hash + * 2. Extract mantissa m in [0.5, 1.0) and exponent e via frexp(v) = m * 2^e + * 3. Process mantissa 28 bits at a time, accumulating into hash value x + * 4. Adjust for exponent using bit rotation (since 2^PyHASH_BITS ≡ 1 mod P) + * 5. Apply sign and handle the special case of -1 -> -2 + */ + +static Py_hash_t +QuadPrecision_hash(QuadPrecisionObject *self) +{ + Sleef_quad value; + int sign = 1; + + if (self->backend == BACKEND_SLEEF) { + value = self->value.sleef_value; + } + else { + value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value); + } + + // Check for NaN - use pointer hash (each NaN instance gets unique hash) + // This prevents hash table catastrophic pileups from NaN instances + if (Sleef_iunordq1(value, value)) { + return Py_HashPointer((void *)self); + } + + if (Sleef_icmpeqq1(value, QUAD_PRECISION_INF)) { + return PyHASH_INF; + } + if (Sleef_icmpeqq1(value, QUAD_PRECISION_NINF)) { + return -PyHASH_INF; + } + + // Handle sign + Sleef_quad zero = Sleef_cast_from_int64q1(0); + if (Sleef_icmpltq1(value, zero)) { + sign = -1; + value = Sleef_negq1(value); + } + + // Get mantissa and exponent: value = m * 2^e, where 0.5 <= m < 1.0 + int exponent; + Sleef_quad mantissa = Sleef_frexpq1(value, &exponent); + + // Process 28 bits at a time (same as CPython's _Py_HashDouble) + // This works well for both binary and hexadecimal floating point + Py_uhash_t x = 0; + // 2^28 = 268435456 - exactly representable in double, so cast is safe + Sleef_quad multiplier = Sleef_cast_from_int64q1(1LL << 28); + + // Continue until mantissa becomes zero (all bits processed) + while (Sleef_icmpneq1(mantissa, zero)) { + // Rotate x left by 28 bits within PyHASH_MODULUS + x = ((x << 28) & PyHASH_MODULUS) | (x >> (PyHASH_BITS - 28)); + + // Scale mantissa by 2^28 + mantissa = Sleef_mulq1_u05(mantissa, multiplier); + exponent -= 28; + + // Extract integer part + Sleef_quad int_part = Sleef_truncq1(mantissa); + Py_uhash_t y = (Py_uhash_t)Sleef_cast_to_int64q1(int_part); + + // Remove integer part from mantissa (keep fractional part) + mantissa = Sleef_subq1_u05(mantissa, int_part); + + // Accumulate + x += y; + if (x >= PyHASH_MODULUS) { + x -= PyHASH_MODULUS; + } + } + + // Adjust for exponent: reduce e modulo PyHASH_BITS + // For negative exponents: PyHASH_BITS - 1 - ((-1 - e) % PyHASH_BITS) + int e = exponent >= 0 + ? exponent % PyHASH_BITS + : PyHASH_BITS - 1 - ((-1 - exponent) % PyHASH_BITS); + + // Rotate x left by e bits + x = ((x << e) & PyHASH_MODULUS) | (x >> (PyHASH_BITS - e)); + + // Apply sign + x = x * sign; + + // -1 is reserved for errors, so use -2 instead + if (x == (Py_uhash_t)-1) { + x = (Py_uhash_t)-2; + } + + return (Py_hash_t)x; +} + PyTypeObject QuadPrecision_Type = { PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision", .tp_basicsize = sizeof(QuadPrecisionObject), @@ -632,6 +740,7 @@ PyTypeObject QuadPrecision_Type = { .tp_dealloc = (destructor)QuadPrecision_dealloc, .tp_repr = (reprfunc)QuadPrecision_repr_dragon4, .tp_str = (reprfunc)QuadPrecision_str_dragon4, + .tp_hash = (hashfunc)QuadPrecision_hash, .tp_as_number = &quad_as_scalar, .tp_as_buffer = &QuadPrecision_as_buffer, .tp_richcompare = (richcmpfunc)quad_richcompare, diff --git a/quaddtype/pyproject.toml b/quaddtype/pyproject.toml index e532bf54..c72b0c62 100644 --- a/quaddtype/pyproject.toml +++ b/quaddtype/pyproject.toml @@ -55,3 +55,7 @@ strict_equality_for_none = true exclude = ["build", "numpy_quaddtype/src", "subprojects", "tests"] enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] warn_unreachable = false + +[tool.pytest.ini_options] +testpaths = ["tests"] +norecursedirs = ["subprojects", "build", ".mesonpy*"] diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh index b3d3b922..4274b6e9 100755 --- a/quaddtype/reinstall.sh +++ b/quaddtype/reinstall.sh @@ -5,6 +5,7 @@ rm -rf build/ rm -rf dist/ rm -rf subprojects/qblas rm -rf subprojects/sleef +rm -rf subprojects/pythoncapi-compat rm -rf .mesonpy-* python -m pip uninstall -y numpy_quaddtype diff --git a/quaddtype/subprojects/pythoncapi-compat.wrap b/quaddtype/subprojects/pythoncapi-compat.wrap new file mode 100644 index 00000000..23036868 --- /dev/null +++ b/quaddtype/subprojects/pythoncapi-compat.wrap @@ -0,0 +1,6 @@ +[wrap-git] +directory=pythoncapi-compat +url=https://github.com/python/pythoncapi-compat.git +revision=main +[provide] +pythoncapi_compat = pythoncapi_compat_dep diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 0ef1fd39..dabe9744 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -5229,3 +5229,142 @@ def test_add_regression_zero_plus_small(self): assert result_yx == result_xy, f"0 + x = {result_yx}, but x + 0 = {result_xy}" assert result_yx == x, f"0 + x = {result_yx}, expected {x}" + + +class TestQuadPrecisionHash: + """Test suite for QuadPrecision hash function. + + The hash implementation follows CPython's _Py_HashDouble algorithm to ensure + the invariant: hash(x) == hash(y) when x and y are numerically equal, + even across different types. + """ + + @pytest.mark.parametrize("value", [ + # Values that are exactly representable in binary floating point + "0.0", "1.0", "-1.0", "2.0", "-2.0", + "0.5", "0.25", "1.5", "-0.5", + "100.0", "-100.0", + # Powers of 2 are exactly representable + "0.125", "0.0625", "4.0", "8.0", + ]) + def test_hash_matches_float(self, value): + """Test that hash(QuadPrecision) == hash(float) for exactly representable values. + + Note: Only values that are exactly representable in both float64 and float128 + should match. Values like 0.1, 0.3 will have different hashes because they + have different binary representations at different precisions. + """ + quad_val = QuadPrecision(value) + float_val = float(value) + assert hash(quad_val) == hash(float_val) + + @pytest.mark.parametrize("value", [0.1, 0.3, 0.7, 1.1, 2.3, 1e300, 1e-300]) + def test_hash_matches_float_from_float(self, value): + """Test that QuadPrecision created from float has same hash as that float. + + When creating QuadPrecision from a Python float, the value is converted + from the float's double precision representation, so they should be + numerically equal and have the same hash. + """ + quad_val = QuadPrecision(value) # Created from float, not string + assert hash(quad_val) == hash(value) + + @pytest.mark.parametrize("value", [0, 1, -1, 2, -2, 100, -100, 1000, -1000]) + def test_hash_matches_int(self, value): + """Test that hash(QuadPrecision) == hash(int) for integer values.""" + quad_val = QuadPrecision(value) + assert hash(quad_val) == hash(value) + + def test_hash_matches_large_int(self): + """Test that hash(QuadPrecision) == hash(int) for large integers.""" + big_int = 10**20 + quad_val = QuadPrecision(str(big_int)) + assert hash(quad_val) == hash(big_int) + + def test_hash_infinity(self): + """Test that infinity hash matches Python's float infinity hash.""" + assert hash(QuadPrecision("inf")) == hash(float("inf")) + assert hash(QuadPrecision("-inf")) == hash(float("-inf")) + # Standard PyHASH_INF values + assert hash(QuadPrecision("inf")) == 314159 + assert hash(QuadPrecision("-inf")) == -314159 + + def test_hash_nan_unique(self): + """Test that each NaN instance gets a unique hash (pointer-based).""" + nan1 = QuadPrecision("nan") + nan2 = QuadPrecision("nan") + # NaN instances should have different hashes (based on object identity) + assert hash(nan1) != hash(nan2) + + def test_hash_nan_same_instance(self): + """Test that the same NaN instance has consistent hash.""" + nan = QuadPrecision("nan") + assert hash(nan) == hash(nan) + + def test_hash_negative_one(self): + """Test that hash(-1) returns -2 (Python's hash convention).""" + # In Python, hash(-1) returns -2 because -1 is reserved for errors + assert hash(QuadPrecision(-1.0)) == -2 + assert hash(QuadPrecision("-1.0")) == -2 + + def test_hash_set_membership(self): + """Test that QuadPrecision values work correctly in sets.""" + vals = [QuadPrecision(1.0), QuadPrecision(2.0), QuadPrecision(1.0)] + unique_set = set(vals) + assert len(unique_set) == 2 + + def test_hash_set_cross_type(self): + """Test that QuadPrecision and float with same value are in same set bucket.""" + s = {QuadPrecision(1.0)} + s.add(1.0) + assert len(s) == 1 + + def test_hash_dict_key(self): + """Test that QuadPrecision values work as dict keys.""" + d = {QuadPrecision(1.0): "one", QuadPrecision(2.0): "two"} + assert d[QuadPrecision(1.0)] == "one" + assert d[QuadPrecision(2.0)] == "two" + + def test_hash_dict_cross_type_lookup(self): + """Test that dict lookup works with float keys when hash matches.""" + d = {QuadPrecision(1.0): "one"} + # Float lookup should work if hash and eq both work + assert d.get(1.0) == "one" + + @pytest.mark.parametrize("value", [ + # Powers of 2 outside double range but within quad range + # Double max exponent is ~1024, quad max is ~16384 + 2**1100, 2**2000, 2**5000, 2**10000, + -(2**1100), -(2**2000), + # Small powers of 2 (subnormal in double, normal in quad) + 2**(-1100), 2**(-2000), + ]) + def test_hash_extreme_integers_outside_double_range(self, value): + """Test hash matches Python int for values outside double range. + + We use powers of 2 which are exactly representable in quad precision. + Since these integers are exact, hash(QuadPrecision(x)) must equal hash(x). + """ + quad_val = QuadPrecision(value) + assert hash(quad_val) == hash(value) + + @pytest.mark.parametrize("value", [ + "1e500", "-1e500", "1e1000", "-1e1000", "1e-500", "-1e-500", + "1.23456789e500", "-9.87654321e-600", + ]) + def test_hash_matches_mpmath(self, value): + """Test hash matches mpmath at quad precision (113 bits). + + mpmath with 113-bit precision represents the same value as QuadPrecision, + so their hashes must match. + """ + mp.prec = 113 + quad_val = QuadPrecision(value) + mpf_val = mp.mpf(value) + assert hash(quad_val) == hash(mpf_val) + + @pytest.mark.parametrize("backend", ["sleef", "longdouble"]) + def test_hash_backends(self, backend): + """Test hash works for both backends.""" + quad_val = QuadPrecision(1.5, backend=backend) + assert hash(quad_val) == hash(1.5)