Skip to content

Commit 090c173

Browse files
committed
Set max tile-unroll to 4 as a heuristic.
1 parent fb7bf0e commit 090c173

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

src/determinestrategy.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,8 @@ function solve_tilesize(
242242
cost_vec::AbstractVector{Float64} = @view(ls.cost_vec[:,1]),
243243
reg_pressure::AbstractVector{Int} = @view(ls.reg_pres[:,1])
244244
)
245-
maxT = isstaticloop(ls, tiled) ? looprangehint(ls, tiled) : 8#REGISTER_COUNT
246-
maxU = isstaticloop(ls, unrolled) ? looprangehint(ls, unrolled) : 4#REGISTER_COUNT
245+
maxT = isstaticloop(ls, tiled) ? looprangehint(ls, tiled) : 4#REGISTER_COUNT
246+
maxU = isstaticloop(ls, unrolled) ? looprangehint(ls, unrolled) : 8#REGISTER_COUNT
247247
solve_tilesize(cost_vec, reg_pressure, maxU, maxT)
248248
end
249249

test/runtests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ gemmq = :(for i ∈ 1:size(A,1), j ∈ 1:size(B,2)
4343
end)
4444

4545
lsgemm = LoopVectorization.LoopSet(gemmq);
46-
U, T = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3,4) : (4, 6)
46+
U, T = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3,4) : (6, 4)
4747
@test LoopVectorization.choose_order(lsgemm) == (Symbol[:j,:i,:k], U, T)
4848

4949
function mygemm!(C, A, B)

0 commit comments

Comments
 (0)