Skip to content

Commit 78a070f

Browse files
kshyattlkdvosKatharine Hyatt
authored
Start on CUDA extension (#325)
* Start on CUDA extension * More small fixes * move collect in * More test via CPU * Update example * Comments * Fixup diagonal * More comments * Update test/cuda/tensors.jl Co-authored-by: Lukas Devos <[email protected]> * Update test/cuda/tensors.jl Co-authored-by: Lukas Devos <[email protected]> * Update test/cuda/tensors.jl Co-authored-by: Lukas Devos <[email protected]> * Update test/cuda/tensors.jl Co-authored-by: Lukas Devos <[email protected]> * Fix bad end * Make format happy * Small updates * Update Project.toml * Try to fix spacing * one more format attempt * two more format attempt --------- Co-authored-by: Lukas Devos <[email protected]> Co-authored-by: Katharine Hyatt <[email protected]>
1 parent 59c5d97 commit 78a070f

File tree

8 files changed

+873
-61
lines changed

8 files changed

+873
-61
lines changed

.buildkite/pipeline.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ steps:
1515
queue: "juliagpu"
1616
cuda: "*"
1717
if: build.message !~ /\[skip tests\]/
18-
timeout_in_minutes: 30
18+
timeout_in_minutes: 60
1919
matrix:
2020
setup:
2121
julia:
2222
- "1.10"
23-
- "1.11"
23+
- "1.12"
2424

2525
- label: "Julia {{matrix.julia}} -- AMDGPU"
2626
plugins:
@@ -36,9 +36,9 @@ steps:
3636
rocm: "*"
3737
rocmgpu: "*"
3838
if: build.message !~ /\[skip tests\]/
39-
timeout_in_minutes: 30
39+
timeout_in_minutes: 60
4040
matrix:
4141
setup:
4242
julia:
4343
- "1.10"
44-
- "1.11"
44+
- "1.12"

Project.toml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,29 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
1818
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
1919

2020
[weakdeps]
21+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
2122
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
2223
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
24+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
2325

2426
[extensions]
27+
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
2528
TensorKitChainRulesCoreExt = "ChainRulesCore"
2629
TensorKitFiniteDifferencesExt = "FiniteDifferences"
2730

2831
[compat]
32+
Adapt = "4"
2933
Aqua = "0.6, 0.7, 0.8"
3034
ArgParse = "1.2.0"
35+
CUDA = "5.9"
3136
ChainRulesCore = "1"
3237
ChainRulesTestUtils = "1"
3338
Combinatorics = "1"
3439
FiniteDifferences = "0.12"
40+
GPUArrays = "11.3.1"
3541
LRUCache = "1.0.2"
3642
LinearAlgebra = "1"
37-
MatrixAlgebraKit = "0.6.0"
43+
MatrixAlgebraKit = "0.6.1"
3844
OhMyThreads = "0.8.0"
3945
Printf = "1"
4046
Random = "1"
@@ -48,21 +54,26 @@ TestExtras = "0.2,0.3"
4854
TupleTools = "1.1"
4955
VectorInterface = "0.4.8, 0.5"
5056
Zygote = "0.7"
57+
cuTENSOR = "2"
5158
julia = "1.10"
5259

5360
[extras]
54-
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
61+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
5562
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
63+
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
64+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
5665
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
5766
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
5867
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
5968
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
69+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
6070
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
6171
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
6272
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
6373
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
6474
TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
6575
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
76+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
6677

6778
[targets]
68-
test = ["ArgParse", "Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "SafeTestsets", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
79+
test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module TensorKitCUDAExt
2+
3+
using CUDA, CUDA.CUBLAS, CUDA.CUSOLVER, LinearAlgebra
4+
using CUDA: @allowscalar
5+
using cuTENSOR: cuTENSOR
6+
import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn!
7+
8+
using TensorKit
9+
using TensorKit.Factorizations
10+
using TensorKit.Strided
11+
using TensorKit.Factorizations: AbstractAlgorithm
12+
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check
13+
import TensorKit: randisometry, rand, randn
14+
15+
using TensorKit: MatrixAlgebraKit
16+
17+
using Random
18+
19+
include("cutensormap.jl")
20+
21+
end
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
const CuTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, CuVector{T, CUDA.DeviceMemory}}
2+
const CuTensor{T, S, N} = CuTensorMap{T, S, N, 0}
3+
4+
const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, CuTensorMap{T, S, N₁, N₂}}
5+
6+
function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A}
7+
return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t))
8+
end
9+
10+
# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
11+
function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}}
12+
h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V)
13+
h_t = TensorKit.project_symmetric!(h_t, Array(data))
14+
# verify result
15+
isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) ||
16+
throw(ArgumentError("Data has non-zero elements at incompatible positions"))
17+
return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
18+
end
19+
20+
for (fname, felt) in ((:zeros, :zero), (:ones, :one))
21+
@eval begin
22+
function CUDA.$fname(
23+
codomain::TensorSpace{S},
24+
domain::TensorSpace{S} = one(codomain)
25+
) where {S <: IndexSpace}
26+
return CUDA.$fname(codomain domain)
27+
end
28+
function CUDA.$fname(
29+
::Type{T}, codomain::TensorSpace{S},
30+
domain::TensorSpace{S} = one(codomain)
31+
) where {T, S <: IndexSpace}
32+
return CUDA.$fname(T, codomain domain)
33+
end
34+
CUDA.$fname(V::TensorMapSpace) = CUDA.$fname(Float64, V)
35+
function CUDA.$fname(::Type{T}, V::TensorMapSpace) where {T}
36+
t = CuTensorMap{T}(undef, V)
37+
fill!(t, $felt(T))
38+
return t
39+
end
40+
end
41+
end
42+
43+
for randfun in (:curand, :curandn)
44+
randfun! = Symbol(randfun, :!)
45+
@eval begin
46+
# converting `codomain` and `domain` into `HomSpace`
47+
function $randfun(
48+
codomain::TensorSpace{S},
49+
domain::TensorSpace{S} = one(codomain),
50+
) where {S <: IndexSpace}
51+
return $randfun(codomain domain)
52+
end
53+
function $randfun(
54+
::Type{T}, codomain::TensorSpace{S},
55+
domain::TensorSpace{S} = one(codomain),
56+
) where {T, S <: IndexSpace}
57+
return $randfun(T, codomain domain)
58+
end
59+
function $randfun(
60+
rng::Random.AbstractRNG, ::Type{T},
61+
codomain::TensorSpace{S},
62+
domain::TensorSpace{S} = one(codomain),
63+
) where {T, S <: IndexSpace}
64+
return $randfun(rng, T, codomain domain)
65+
end
66+
67+
# filling in default eltype
68+
$randfun(V::TensorMapSpace) = $randfun(Float64, V)
69+
function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
70+
return $randfun(rng, Float64, V)
71+
end
72+
73+
# filling in default rng
74+
function $randfun(::Type{T}, V::TensorMapSpace) where {T}
75+
return $randfun(Random.default_rng(), T, V)
76+
end
77+
78+
# implementation
79+
function $randfun(
80+
rng::Random.AbstractRNG, ::Type{T},
81+
V::TensorMapSpace
82+
) where {T}
83+
t = CuTensorMap{T}(undef, V)
84+
$randfun!(rng, t)
85+
return t
86+
end
87+
88+
function $randfun!(rng::Random.AbstractRNG, t::CuTensorMap)
89+
for (_, b) in blocks(t)
90+
$randfun!(rng, b)
91+
end
92+
return t
93+
end
94+
end
95+
end
96+
97+
# Scalar implementation
98+
#-----------------------
99+
function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S}
100+
inds = findall(!iszero, t.data)
101+
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
102+
end
103+
104+
function Base.convert(
105+
TT::Type{CuTensorMap{T, S, N₁, N₂}},
106+
t::AbstractTensorMap{<:Any, S, N₁, N₂}
107+
) where {T, S, N₁, N₂}
108+
if typeof(t) === TT
109+
return t
110+
else
111+
tnew = TT(undef, space(t))
112+
return copy!(tnew, t)
113+
end
114+
end
115+
116+
function LinearAlgebra.isposdef(t::CuTensorMap)
117+
domain(t) == codomain(t) ||
118+
throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
119+
InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false
120+
for (c, b) in blocks(t)
121+
# do our own hermitian check
122+
isherm = MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b))))
123+
isherm || return false
124+
isposdef(Hermitian(b)) || return false
125+
end
126+
return true
127+
end
128+
129+
function Base.promote_rule(
130+
::Type{<:TT₁},
131+
::Type{<:TT₂}
132+
) where {
133+
S, N₁, N₂, TTT₁, TTT₂,
134+
TT₁ <: CuTensorMap{TTT₁, S, N₁, N₂},
135+
TT₂ <: CuTensorMap{TTT₂, S, N₁, N₂},
136+
}
137+
T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂)
138+
return CuTensorMap{T, S, N₁, N₂}
139+
end
140+
141+
# CuTensorMap exponentation:
142+
function TensorKit.exp!(t::CuTensorMap)
143+
domain(t) == codomain(t) ||
144+
error("Exponential of a tensor only exist when domain == codomain.")
145+
!MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is currently only supported on hermitian CUDA tensors"))
146+
for (c, b) in blocks(t)
147+
copy!(b, parent(Base.exp(Hermitian(b))))
148+
end
149+
return t
150+
end
151+
152+
# functions that don't map ℝ to (a subset of) ℝ
153+
for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth)
154+
sf = string(f)
155+
@eval function Base.$f(t::CuTensorMap)
156+
domain(t) == codomain(t) ||
157+
throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain"))
158+
!MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is currently only supported on hermitian CUDA tensors"))
159+
T = complex(float(scalartype(t)))
160+
tf = similar(t, T)
161+
for (c, b) in blocks(t)
162+
copy!(block(tf, c), parent($f(Hermitian(b))))
163+
end
164+
return tf
165+
end
166+
end

src/tensors/linalg.jl

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -270,20 +270,11 @@ function _norm(blockiter, p::Real, init::Real)
270270
return mapreduce(max, blockiter; init = init) do (c, b)
271271
return isempty(b) ? init : oftype(init, LinearAlgebra.normInf(b))
272272
end
273-
elseif p == 2
274-
= mapreduce(+, blockiter; init = init) do (c, b)
275-
return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm2(b)^2)
273+
elseif p > 0 # finite positive p
274+
np = sum(blockiter; init) do (c, b)
275+
return oftype(init, dim(c) * norm(b, p)^p)
276276
end
277-
return sqrt(n²)
278-
elseif p == 1
279-
return mapreduce(+, blockiter; init = init) do (c, b)
280-
return isempty(b) ? init : oftype(init, dim(c) * sum(abs, b))
281-
end
282-
elseif p > 0
283-
nᵖ = mapreduce(+, blockiter; init = init) do (c, b)
284-
return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.normp(b, p)^p)
285-
end
286-
return (nᵖ)^inv(oftype(nᵖ, p))
277+
return np^(inv(oftype(np, p)))
287278
else
288279
msg = "Norm with non-positive p is not defined for `AbstractTensorMap`"
289280
throw(ArgumentError(msg))
@@ -298,8 +289,8 @@ function LinearAlgebra.rank(
298289
)
299290
r = 0 * dim(first(allunits(sectortype(t))))
300291
dim(t) == 0 && return r
301-
S = LinearAlgebra.svdvals(t)
302-
tol = max(atol, rtol * maximum(first, values(S)))
292+
S = MatrixAlgebraKit.svd_vals(t)
293+
tol = max(atol, rtol * maximum(parent(S)))
303294
for (c, b) in pairs(S)
304295
if !isempty(b)
305296
r += dim(c) * count(>(tol), b)
@@ -316,9 +307,9 @@ function LinearAlgebra.cond(t::AbstractTensorMap, p::Real = 2)
316307
throw(SpaceMismatch("`cond` requires domain and codomain to be the same"))
317308
return zero(real(float(scalartype(t))))
318309
end
319-
S = LinearAlgebra.svdvals(t)
320-
maxS = maximum(first, values(S))
321-
minS = minimum(last, values(S))
310+
S = MatrixAlgebraKit.svd_vals(t)
311+
maxS = maximum(parent(S))
312+
minS = minimum(parent(S))
322313
return iszero(maxS) ? oftype(maxS, Inf) : (maxS / minS)
323314
else
324315
throw(ArgumentError("cond currently only defined for p=2"))

0 commit comments

Comments
 (0)