@@ -728,43 +728,46 @@ static const std::string gpu_pipeline =
728728 " linalg-fuse-elementwise-ops,"
729729 " arith-expand,"
730730 " memref-expand,"
731- " arith-bufferize,"
732- " func-bufferize,"
733731 " func.func(empty-tensor-to-alloc-tensor),"
734- " func.func(scf-bufferize),"
735- " func.func(tensor-bufferize),"
736- " func.func(bufferization-bufferize),"
737- " func.func(linalg-bufferize),"
738- " func.func(linalg-detensorize),"
739- " func.func(tensor-bufferize),"
732+ " func.func(tile-for-gpu{tile-sizes=32 in-regions}),"
733+ " func.func(tile-for-gpu{tile-sizes=1 in-regions}),"
740734 " region-bufferize,"
741735 " canonicalize,"
742- " func.func(finalizing-bufferize),"
736+ " one-shot-bufferize,"
737+ " cse,"
738+ " canonicalize,"
739+ " scf-forall-to-parallel,"
740+ " cse,"
741+ " canonicalize,"
743742 " imex-remove-temporaries,"
744- " func.func(convert-linalg-to-parallel-loops),"
745- " func.func(scf-parallel-loop-fusion),"
746- // is add-outer-parallel-loop needed?
747- " func.func(imex-add-outer-parallel-loop),"
743+ " buffer-deallocation-pipeline,"
744+ " func.func(convert-linalg-to-loops),"
748745 " func.func(gpu-map-parallel-loops),"
749- " func.func(convert-parallel-loops-to-gpu),"
746+ " convert-parallel-loops-to-gpu,"
747+ " canonicalize,"
748+ " cse,"
750749 " func.func(insert-gpu-allocs{in-regions=1}),"
751750 " func.func(insert-gpu-copy),"
752751 " drop-regions,"
753752 " canonicalize,"
754- " func.func(lower-affine),"
755753 " gpu-kernel-outlining,"
754+ " convert-scf-to-cf,"
755+ " convert-cf-to-llvm,"
756756 " canonicalize,"
757757 " cse,"
758- " gpu.module(strip-debuginfo,convert-gpu-to-nvvm),"
759- " nvvm-attach-target,"
758+ " gpu.module(strip-debuginfo,"
759+ " convert-gpu-to-nvvm),"
760+ " nvvm-attach-target{chip=sm_80 O=3},"
760761 " func.func(gpu-async-region),"
761762 " expand-strided-metadata,"
762763 " lower-affine,"
763764 " gpu-to-llvm,"
764- " gpu-module-to-binary{format=fatbin},"
765765 " convert-func-to-llvm,"
766766 " convert-math-to-llvm,"
767767 " finalize-memref-to-llvm,"
768+ " canonicalize,"
769+ " cse,"
770+ " gpu-module-to-binary{format=fatbin},"
768771 " reconcile-unrealized-casts" ;
769772
770773const std::string _passes (get_text_env (" SHARPY_PASSES" ));
0 commit comments