From 9bfd83322771c464fcd69c77d904c823913577c6 Mon Sep 17 00:00:00 2001 From: Mark Hammons Date: Sun, 28 May 2023 14:41:18 +0200 Subject: [PATCH 1/7] chore: update benchmarks --- .github/workflows/benchmarks.yml | 4 +- .../slinc/TransferBenchmarkShape.scala | 51 +++++++++++++++---- ...NoJIT.scala => BindingsBenchmarks17.scala} | 2 +- .../slinc/BindingsBenchmarksStandard.scala | 16 ------ ...NoJIT.scala => TransferBenchmarks17.scala} | 2 +- .../slinc/TransferBenchmarksStandard.scala | 16 ------ j17/src/fr/hammons/slinc/Slinc17.scala | 2 - ...NoJIT.scala => BindingsBenchmarks19.scala} | 2 +- .../slinc/BindingsBenchmarksStandard.scala | 16 ------ ...NoJIT.scala => TransferBenchmarks19.scala} | 2 +- .../slinc/TransferBenchmarksStandard.scala | 16 ------ 11 files changed, 47 insertions(+), 82 deletions(-) rename j17/benchmarks/test/src/fr/hammons/slinc/{BindingsBenchmarksNoJIT.scala => BindingsBenchmarks17.scala} (84%) delete mode 100644 j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksStandard.scala rename j17/benchmarks/test/src/fr/hammons/slinc/{TransferBenchmarksNoJIT.scala => TransferBenchmarks17.scala} (84%) delete mode 100644 j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksStandard.scala rename j19/benchmarks/test/src/fr/hammons/slinc/{TransferBenchmarksNoJIT.scala => BindingsBenchmarks19.scala} (83%) delete mode 100644 j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksStandard.scala rename j19/benchmarks/test/src/fr/hammons/slinc/{BindingsBenchmarksNoJIT.scala => TransferBenchmarks19.scala} (83%) delete mode 100644 j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksStandard.scala diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index dd2d2911..e20ec605 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -12,7 +12,7 @@ jobs: os: [ubuntu-latest] jvm: [17,19] benchmark: [BindingsBenchmarks, TransferBenchmarks] - jit: [NoJIT, Standard] + jit: [disabled, standard] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 @@ -21,7 +21,7 @@ jobs: with: jvm: temurin:1.${{ matrix.jvm }} apps: mill - - run: mill j${{ matrix.jvm }}.benchmarks.test -f1 -wi 2 -i 2 -o j${{ matrix.jvm }}-${{ matrix.os }}.bench -rff j${{ matrix.jvm }}-${{ matrix.os }}.json -rf json .*${{ matrix.benchmark }}${{ matrix.jit }}.* + - run: mill j${{ matrix.jvm }}.benchmarks.test -jvmArgsAppend "-Dslinc.jitc.mode=${{ matrix.jit }}" -f1 -wi 2 -i 2 -o j${{ matrix.jvm }}-${{ matrix.os }}.bench -rff j${{ matrix.jvm }}-${{ matrix.os }}.json -rf json .*${{ matrix.benchmark }}${{ matrix.jvm }}.* - run: scala-cli run scripts/PublishBenchmarkReport.sc -- "Java ${{ matrix.jvm}}" ${{ matrix.os }} out/j${{ matrix.jvm }}/benchmarks/test/jmhRun.dest/j${{ matrix.jvm }}-${{ matrix.os }}.json ${{ matrix.benchmark }} ${{ matrix.jit }} >> $GITHUB_STEP_SUMMARY - uses: actions/upload-artifact@v3 with: diff --git a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala index ec431f4b..67f4f0a6 100644 --- a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala +++ b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala @@ -1,19 +1,20 @@ package fr.hammons.slinc +import fr.hammons.slinc.types.CLong import org.openjdk.jmh.annotations.{Scope as _, *} -case class A(a: Int, b: B, c: Int) -case class B(a: Int, b: Int) +case class A(a: Int, b: B, c: Int) derives Struct +case class B(a: Int, b: Int) derives Struct +@Warmup(iterations = 5) +@Measurement(iterations = 5) trait TransferBenchmarkShape(val s: Slinc): import s.{given, *} - case class C(a: Int, b: D, c: Int) - case class D(a: Int, b: Int) - given Struct[A] = Struct.derived - given Struct[B] = Struct.derived - given Struct[C] = Struct.derived - given Struct[D] = Struct.derived + case class C(a: Int, b: D, c: Int) derives Struct + case class D(a: CLong, b: Int) derives Struct + case class E(a: Int, b: Int) derives Struct + case class F(a: Int, e: E, c: Int) derives Struct val aPtr = Scope.global { Ptr.blank[A] @@ -25,7 +26,7 @@ trait TransferBenchmarkShape(val s: Slinc): Ptr.blank[C] } - val c = C(1, D(2, 3), 4) + val c = C(1, D(CLong(2), 3), 4) @Benchmark def topLevelRead = @@ -50,7 +51,37 @@ trait TransferBenchmarkShape(val s: Slinc): ) @Benchmark - def allocateIntPointer = + def allocatePrimitivePointer = Scope.confined( Ptr.copy(3) ) + + @Benchmark + def allocateAliasPointer = + Scope.confined( + Ptr.copy(CLong(3)) + ) + + @Benchmark + def allocateComplexWAliasInnerStructPointer = + Scope.confined( + Ptr.copy(C(1, D(CLong(2), 3), 4)) + ) + + @Benchmark + def allocateSimpleWAliasInnerStructPointer = + Scope.confined( + Ptr.copy(D(CLong(2), 3)) + ) + + @Benchmark + def allocatePtrFromArray = + Scope.confined( + Ptr.copy(Array(1, 2, 3)) + ) + + @Benchmark + def allocatePtrFromCLongArray = + Scope.confined( + Ptr.copy(Array(CLong(1), CLong(2), CLong(3))) + ) diff --git a/j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksNoJIT.scala b/j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarks17.scala similarity index 84% rename from j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksNoJIT.scala rename to j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarks17.scala index 483e3a55..dc113fd0 100644 --- a/j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksNoJIT.scala +++ b/j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarks17.scala @@ -13,4 +13,4 @@ import java.util.concurrent.TimeUnit ) ) @OutputTimeUnit(TimeUnit.MICROSECONDS) -class BindingsBenchmarksNoJIT extends BindingsBenchmarkShape(Slinc17.noJit) +class BindingsBenchmarks17 extends BindingsBenchmarkShape(Slinc17.noJit) diff --git a/j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksStandard.scala b/j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksStandard.scala deleted file mode 100644 index 9ca1c1c9..00000000 --- a/j17/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksStandard.scala +++ /dev/null @@ -1,16 +0,0 @@ -package fr.hammons.slinc - -import org.openjdk.jmh.annotations.*, Mode.{SingleShotTime, Throughput} -import java.util.concurrent.TimeUnit - -@State(Scope.Thread) -@BenchmarkMode(Array(Throughput, SingleShotTime)) -@Fork( - jvmArgsAppend = Array( - "--add-modules=jdk.incubator.foreign", - "--enable-native-access=ALL-UNNAMED" - // "-XX:ActiveProcessorCount=1", - ) -) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -class BindingsBenchmarksStandard extends BindingsBenchmarkShape(Slinc17.default) diff --git a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksNoJIT.scala b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala similarity index 84% rename from j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksNoJIT.scala rename to j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala index 8f0e0102..e09200fc 100644 --- a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksNoJIT.scala +++ b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala @@ -13,4 +13,4 @@ import java.util.concurrent.TimeUnit ) ) @OutputTimeUnit(TimeUnit.MICROSECONDS) -class TransferBenchmarksNoJIT extends TransferBenchmarkShape(Slinc17.noJit) +class TransferBenchmarks17 extends TransferBenchmarkShape(Slinc17.noJit) diff --git a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksStandard.scala b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksStandard.scala deleted file mode 100644 index deecb9f1..00000000 --- a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksStandard.scala +++ /dev/null @@ -1,16 +0,0 @@ -package fr.hammons.slinc - -import org.openjdk.jmh.annotations.*, Mode.{SingleShotTime, Throughput} -import java.util.concurrent.TimeUnit - -@State(Scope.Thread) -@BenchmarkMode(Array(Throughput, SingleShotTime)) -@Fork( - jvmArgsAppend = Array( - "--add-modules=jdk.incubator.foreign", - "--enable-native-access=ALL-UNNAMED" - // "-XX:ActiveProcessorCount=1", - ) -) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -class TransferBenchmarksStandard extends TransferBenchmarkShape(Slinc17.default) diff --git a/j17/src/fr/hammons/slinc/Slinc17.scala b/j17/src/fr/hammons/slinc/Slinc17.scala index 7b27b641..995f40d6 100644 --- a/j17/src/fr/hammons/slinc/Slinc17.scala +++ b/j17/src/fr/hammons/slinc/Slinc17.scala @@ -20,8 +20,6 @@ class Slinc17(using @SlincImpl(17) object Slinc17: - private lazy val compiler = - scala.quoted.staging.Compiler.make(getClass().getClassLoader().nn) private[slinc] lazy val linker = CLinker.getInstance().nn val default = Slinc17() val noJit = Slinc17() diff --git a/j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksNoJIT.scala b/j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarks19.scala similarity index 83% rename from j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksNoJIT.scala rename to j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarks19.scala index 9c6f83b5..cb2b43c9 100644 --- a/j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksNoJIT.scala +++ b/j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarks19.scala @@ -13,4 +13,4 @@ import java.util.concurrent.TimeUnit ) ) @OutputTimeUnit(TimeUnit.MICROSECONDS) -class TransferBenchmarksNoJIT extends TransferBenchmarkShape(Slinc19.noJit) +class BindingsBenchmarks19 extends BindingsBenchmarkShape(Slinc19.noJit) diff --git a/j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksStandard.scala b/j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksStandard.scala deleted file mode 100644 index 1772a014..00000000 --- a/j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksStandard.scala +++ /dev/null @@ -1,16 +0,0 @@ -package fr.hammons.slinc - -import org.openjdk.jmh.annotations.*, Mode.{SingleShotTime, Throughput} -import java.util.concurrent.TimeUnit - -@State(Scope.Thread) -@BenchmarkMode(Array(Throughput, SingleShotTime)) -@Fork( - jvmArgsAppend = Array( - "--enable-preview", - "--enable-native-access=ALL-UNNAMED" - // "-XX:ActiveProcessorCount=1", - ) -) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -class BindingsBenchmarksStandard extends BindingsBenchmarkShape(Slinc19.default) diff --git a/j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksNoJIT.scala b/j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks19.scala similarity index 83% rename from j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksNoJIT.scala rename to j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks19.scala index 2eb6beac..37ca86ad 100644 --- a/j19/benchmarks/test/src/fr/hammons/slinc/BindingsBenchmarksNoJIT.scala +++ b/j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks19.scala @@ -13,4 +13,4 @@ import java.util.concurrent.TimeUnit ) ) @OutputTimeUnit(TimeUnit.MICROSECONDS) -class BindingsBenchmarksNoJIT extends BindingsBenchmarkShape(Slinc19.noJit) +class TransferBenchmarks19 extends TransferBenchmarkShape(Slinc19.noJit) diff --git a/j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksStandard.scala b/j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksStandard.scala deleted file mode 100644 index 7c5ce7b9..00000000 --- a/j19/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarksStandard.scala +++ /dev/null @@ -1,16 +0,0 @@ -package fr.hammons.slinc - -import org.openjdk.jmh.annotations.*, Mode.{SingleShotTime, Throughput} -import java.util.concurrent.TimeUnit - -@State(Scope.Thread) -@BenchmarkMode(Array(Throughput, SingleShotTime)) -@Fork( - jvmArgsAppend = Array( - "--enable-preview", - "--enable-native-access=ALL-UNNAMED" - // "-XX:ActiveProcessorCount=1", - ) -) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -class TransferBenchmarksStandard extends TransferBenchmarkShape(Slinc19.default) From e1d20adbf3ef6038913f1755058e4fc654239c86 Mon Sep 17 00:00:00 2001 From: Mark Hammons Date: Mon, 29 May 2023 14:17:21 +0200 Subject: [PATCH 2/7] chore: laying the groundwork for JITC writers --- core/src/fr/hammons/slinc/Bytes.scala | 6 + core/src/fr/hammons/slinc/Ptr.scala | 21 ++- core/src/fr/hammons/slinc/Struct.scala | 4 +- .../src/fr/hammons/slinc/TypeDescriptor.scala | 17 ++- .../fr/hammons/slinc/descriptors/Writer.scala | 8 + .../slinc/descriptors/WriterContext.scala | 6 + .../hammons/slinc/jitc/Intrumentation.scala | 8 +- .../fr/hammons/slinc/jitc/JitCService.scala | 5 + .../fr/hammons/slinc/jitc/OptimizableFn.scala | 49 +++--- .../slinc/modules/ReadWriteModule.scala | 27 ++-- .../hammons/slinc/jitc/JitSpecification.scala | 49 +++--- .../slinc/modules/ReadWriteModule17.scala | 142 ++++++++++++++++-- .../slinc/modules/ReadWriteModule19.scala | 15 +- 13 files changed, 273 insertions(+), 84 deletions(-) create mode 100644 core/src/fr/hammons/slinc/descriptors/Writer.scala create mode 100644 core/src/fr/hammons/slinc/descriptors/WriterContext.scala diff --git a/core/src/fr/hammons/slinc/Bytes.scala b/core/src/fr/hammons/slinc/Bytes.scala index 481420cc..78ec7bc8 100644 --- a/core/src/fr/hammons/slinc/Bytes.scala +++ b/core/src/fr/hammons/slinc/Bytes.scala @@ -2,6 +2,8 @@ package fr.hammons.slinc import scala.quoted.{ToExpr, Quotes} import fr.hammons.slinc.types.SizeT +import scala.quoted.FromExpr +import scala.quoted.Expr opaque type Bytes = Long @@ -29,3 +31,7 @@ object Bytes: given Numeric[Bytes] = Numeric.LongIsIntegral given ToExpr[Bytes] with def apply(t: Bytes)(using Quotes) = ToExpr.LongToExpr[Long].apply(t) + + given FromExpr[Bytes] with + def unapply(x: Expr[Bytes])(using Quotes) = + FromExpr.LongFromExpr[Long].unapply(x) diff --git a/core/src/fr/hammons/slinc/Ptr.scala b/core/src/fr/hammons/slinc/Ptr.scala index aab0bb26..cf25c88e 100644 --- a/core/src/fr/hammons/slinc/Ptr.scala +++ b/core/src/fr/hammons/slinc/Ptr.scala @@ -4,6 +4,7 @@ import scala.reflect.ClassTag import fr.hammons.slinc.modules.DescriptorModule import fr.hammons.slinc.modules.ReadWriteModule import fr.hammons.slinc.fnutils.{Fn, toNativeCompatible} +import fr.hammons.slinc.descriptors.WriterContext class Ptr[A](private[slinc] val mem: Mem, private[slinc] val offset: Bytes): inline def `unary_!`(using rwm: ReadWriteModule): A = @@ -29,7 +30,11 @@ class Ptr[A](private[slinc] val mem: Mem, private[slinc] val offset: Bytes): r: ReadWriteModule )(using ClassTag[A]): IArray[A] = IArray.unsafeFromArray( - r.readArray(mem.resize(DescriptorOf[A].size * size), offset, size) + r.readArray[A]( + mem.resize(DescriptorOf[A].size * size), + offset, + size + ) ) def `unary_!_=`(value: A)(using rwM: ReadWriteModule, desc: DescriptorOf[A]) = @@ -63,7 +68,12 @@ object Ptr: def copy[A]( a: Array[A] - )(using alloc: Allocator, descriptor: DescriptorOf[A], rwm: ReadWriteModule) = + )(using + alloc: Allocator, + descriptor: DescriptorOf[A], + rwm: ReadWriteModule, + dm: DescriptorModule + ) = val mem = alloc.allocate(DescriptorOf[A], a.size) rwm.writeArray(mem, Bytes(0), a) Ptr[A](mem, Bytes(0)) @@ -82,7 +92,12 @@ object Ptr: def copy( string: String - )(using Allocator, DescriptorOf[Byte], ReadWriteModule): Ptr[Byte] = copy( + )(using + Allocator, + DescriptorOf[Byte], + ReadWriteModule, + DescriptorModule + ): Ptr[Byte] = copy( string.getBytes("ASCII").nn :+ 0.toByte ) diff --git a/core/src/fr/hammons/slinc/Struct.scala b/core/src/fr/hammons/slinc/Struct.scala index 89c32a88..b2d47d4a 100644 --- a/core/src/fr/hammons/slinc/Struct.scala +++ b/core/src/fr/hammons/slinc/Struct.scala @@ -7,7 +7,7 @@ import modules.DescriptorModule import fr.hammons.slinc.modules.TransitionModule import fr.hammons.slinc.modules.ReadWriteModule import fr.hammons.slinc.modules.Reader -import fr.hammons.slinc.modules.Writer +import fr.hammons.slinc.modules.MemWriter trait Struct[A <: Product] extends DescriptorOf[A] @@ -35,7 +35,7 @@ object Struct: m: Mirror.ProductOf[A], rwm: ReadWriteModule, dm: DescriptorModule - ): Writer[A] = + ): MemWriter[A] = val offsets = dm.memberOffsets(memberDescriptors[A]) (mem, offset, value) => writeGenHelper( diff --git a/core/src/fr/hammons/slinc/TypeDescriptor.scala b/core/src/fr/hammons/slinc/TypeDescriptor.scala index 38ab3e13..c6326e6e 100644 --- a/core/src/fr/hammons/slinc/TypeDescriptor.scala +++ b/core/src/fr/hammons/slinc/TypeDescriptor.scala @@ -4,7 +4,6 @@ import modules.DescriptorModule import fr.hammons.slinc.modules.{ ReadWriteModule, Reader, - Writer, ArrayReader, readWriteModule } @@ -14,6 +13,7 @@ import fr.hammons.slinc.modules.TransitionModule import fr.hammons.slinc.modules.{ArgumentTransition, ReturnTransition} import scala.NonEmptyTuple import scala.language.implicitConversions +import fr.hammons.slinc.modules.MemWriter /** Describes types used by C interop */ @@ -28,7 +28,7 @@ sealed trait TypeDescriptor: dm.toCarrierType(this) val reader: (ReadWriteModule, DescriptorModule) ?=> Reader[Inner] - val writer: (ReadWriteModule, DescriptorModule) ?=> Writer[Inner] + val writer: (ReadWriteModule, DescriptorModule) ?=> MemWriter[Inner] val argumentTransition: ( TransitionModule, ReadWriteModule, @@ -55,7 +55,7 @@ sealed trait TypeDescriptor: } val arrayWriter - : (ReadWriteModule, DescriptorModule) ?=> Writer[Array[Inner]] = + : (ReadWriteModule, DescriptorModule) ?=> MemWriter[Array[Inner]] = val writer = this.writer val size = this.size (mem, offset, a) => @@ -167,7 +167,7 @@ case class AliasDescriptor[A](val real: TypeDescriptor) extends TypeDescriptor: val reader: (ReadWriteModule, DescriptorModule) ?=> Reader[Inner] = (rwm, _) ?=> (mem, bytes) => rwm.read(mem, bytes, real) - val writer: (ReadWriteModule, DescriptorModule) ?=> Writer[Inner] = + val writer: (ReadWriteModule, DescriptorModule) ?=> MemWriter[Inner] = (rwm, _) ?=> (mem, bytes, a) => rwm.write(mem, bytes, real, a) override val argumentTransition = @@ -191,7 +191,8 @@ case object VaListDescriptor extends TypeDescriptor: Inner ] = _.mem.asAddress - override val writer: (ReadWriteModule, DescriptorModule) ?=> Writer[Inner] = + override val writer + : (ReadWriteModule, DescriptorModule) ?=> MemWriter[Inner] = (mem, offset, value) => summon[ReadWriteModule].memWriter(mem, offset, value.mem) @@ -216,7 +217,8 @@ case class CUnionDescriptor(possibleTypes: Set[TypeDescriptor]) Inner ] = (i: Inner) => i.mem.asBase - override val writer: (ReadWriteModule, DescriptorModule) ?=> Writer[Inner] = + override val writer + : (ReadWriteModule, DescriptorModule) ?=> MemWriter[Inner] = summon[ReadWriteModule].unionWriter(this) case class SetSizeArrayDescriptor( @@ -231,7 +233,8 @@ case class SetSizeArrayDescriptor( summon[ReadWriteModule].readArray[contained.Inner](mem, offset, number) ) - override val writer: (ReadWriteModule, DescriptorModule) ?=> Writer[Inner] = + override val writer + : (ReadWriteModule, DescriptorModule) ?=> MemWriter[Inner] = (mem, offset, value) => summon[ReadWriteModule] .writeArray[contained.Inner](mem, offset, value.toArray) diff --git a/core/src/fr/hammons/slinc/descriptors/Writer.scala b/core/src/fr/hammons/slinc/descriptors/Writer.scala new file mode 100644 index 00000000..a459ab9b --- /dev/null +++ b/core/src/fr/hammons/slinc/descriptors/Writer.scala @@ -0,0 +1,8 @@ +package fr.hammons.slinc.descriptors + +import fr.hammons.slinc.jitc.OptimizableFn +import fr.hammons.slinc.Mem +import fr.hammons.slinc.Bytes +import fr.hammons.slinc.modules.MemWriter + +type Writer[A] = OptimizableFn[MemWriter[A], WriterContext] diff --git a/core/src/fr/hammons/slinc/descriptors/WriterContext.scala b/core/src/fr/hammons/slinc/descriptors/WriterContext.scala new file mode 100644 index 00000000..a9a983a9 --- /dev/null +++ b/core/src/fr/hammons/slinc/descriptors/WriterContext.scala @@ -0,0 +1,6 @@ +package fr.hammons.slinc.descriptors + +import fr.hammons.slinc.modules.DescriptorModule +import fr.hammons.slinc.modules.ReadWriteModule + +final case class WriterContext(dm: DescriptorModule, rwm: ReadWriteModule) diff --git a/core/src/fr/hammons/slinc/jitc/Intrumentation.scala b/core/src/fr/hammons/slinc/jitc/Intrumentation.scala index 7f5be5ab..01f84c29 100644 --- a/core/src/fr/hammons/slinc/jitc/Intrumentation.scala +++ b/core/src/fr/hammons/slinc/jitc/Intrumentation.scala @@ -2,6 +2,7 @@ package fr.hammons.slinc.jitc import java.util.concurrent.atomic.AtomicInteger import fr.hammons.slinc.fnutils.Fn +import scala.annotation.implicitNotFound trait Instrumentation: def getCount(): Int @@ -12,9 +13,10 @@ trait Instrumentation: def instrument[A](a: A): Instrumented[A] def apply[A, B <: Tuple, C, D, E](fn: A)(using - Fn[A, B, C], - C =:= Instrumented[D], - Fn[E, B, D] + @implicitNotFound( + "Could not find Fn[${A}, ${B}, Instrumented[${C}]" + ) ev1: Fn[A, B, Instrumented[C]], + ev2: Fn[E, B, C] ): InstrumentedFn[E] = fn.asInstanceOf[E] diff --git a/core/src/fr/hammons/slinc/jitc/JitCService.scala b/core/src/fr/hammons/slinc/jitc/JitCService.scala index 1f37f3b6..34606941 100644 --- a/core/src/fr/hammons/slinc/jitc/JitCService.scala +++ b/core/src/fr/hammons/slinc/jitc/JitCService.scala @@ -17,6 +17,7 @@ type JitCompiler = [A] => ( trait JitCService: def jitC(tag: UUID, c: JitCompiler => Unit): Unit def processedRecently(tag: UUID): Boolean + def async: Boolean object JitCService: lazy val standard = new JitCService: @@ -90,6 +91,8 @@ object JitCService: override def processedRecently(tag: ju.UUID): Boolean = workDone.getOpaque().nn.contains(tag) + override def async: Boolean = true + lazy val synchronous = new JitCService: private val wdoneCache = 32 given compiler: scala.quoted.staging.Compiler = @@ -111,3 +114,5 @@ object JitCService: override def processedRecently(tag: ju.UUID): Boolean = workDone.getOpaque().nn.contains(tag) + + override def async: Boolean = false diff --git a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala index f40fdaeb..66616afc 100644 --- a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala +++ b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala @@ -3,28 +3,26 @@ package fr.hammons.slinc.jitc import java.util.concurrent.atomic.AtomicReference import java.util.UUID -class OptimizableFn[F]( +class OptimizableFn[F, G]( optimizer: JitCService, inst: Instrumentation = new CountbasedInstrumentation )( - f: (i: Instrumentation) => i.InstrumentedFn[F], + f: G ?=> (i: Instrumentation) => i.InstrumentedFn[F], limit: Int -)(optimized: JitCompiler => F): - private val _fn: AtomicReference[F] = AtomicReference(f(inst)) +)(optimized: G ?=> JitCompiler => F): + private val _fn: AtomicReference[F] = AtomicReference() val uuid = UUID.randomUUID().nn - private val _optFn: AtomicReference[F] = AtomicReference( - if inst.getCount() >= limit then - var opt: F | Null = null - optimizer.jitC(uuid, jitCompiler => opt = optimized(jitCompiler)) - opt - else null - ) + private val _optFn: AtomicReference[F] = AtomicReference() - def get: F = + def get(using G): F = val optFn = _optFn.getOpaque() + var fn = _fn.getOpaque() + if fn == null then + fn = f(inst) + _fn.set(fn) + if optFn != null then optFn - else - if inst.getCount() >= limit then + else if inst.getCount() >= limit then optimizer.jitC( uuid, jitCompiler => @@ -33,13 +31,17 @@ class OptimizableFn[F]( opt ) ) - _fn.getOpaque().nn - + if optimizer.async then fn.nn + else + while _optFn.getOpaque() == null do {} + _optFn.getOpaque().nn + else fn.nn + object OptimizableFn: val modeSetting = "slinc.jitc.mode" val limitSetting = "slinc.jitc.jit-limit" - def apply[F](optimized: JitCompiler => F)( - unoptimizedFn: (i: Instrumentation) => i.InstrumentedFn[F] + def apply[F, G](optimized: G ?=> JitCompiler => F)( + unoptimizedFn: G ?=> (i: Instrumentation) => i.InstrumentedFn[F] ) = val mode = sys.props.getOrElseUpdate("slinc.jitc.mode", "standard") mode match @@ -49,24 +51,19 @@ object OptimizableFn: limit match case None => throw Error("slinc.jitc.jit-limit should be an integer") case Some(value) => - new OptimizableFn[F]( + new OptimizableFn[F, G]( JitCService.standard, CountbasedInstrumentation() )(unoptimizedFn, value)(optimized) case "never" | "disabled" => - new OptimizableFn[F](JitCService.synchronous, IgnoreInstrumentation)( + new OptimizableFn[F, G](JitCService.synchronous, IgnoreInstrumentation)( unoptimizedFn, 1 )(optimized) case "immediate" => - new OptimizableFn[F](JitCService.synchronous, IgnoreInstrumentation)( + new OptimizableFn[F, G](JitCService.synchronous, IgnoreInstrumentation)( unoptimizedFn, 0 )(optimized) - - def standard[F]( - optimized: JitCompiler => F - )(unoptimizedFn: (i: Instrumentation) => i.InstrumentedFn[F], limit: Int) = - new OptimizableFn[F](JitCService.standard)(unoptimizedFn, limit)(optimized) diff --git a/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala b/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala index 2da55e90..a84ec9ae 100644 --- a/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala +++ b/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala @@ -5,32 +5,36 @@ import java.lang.invoke.MethodHandle import scala.reflect.ClassTag import scala.NonEmptyTuple import fr.hammons.slinc.fnutils.Fn +import fr.hammons.slinc.jitc.OptimizableFn +import scala.quoted.Expr +import scala.quoted.Quotes type Reader[A] = (Mem, Bytes) => A -type Writer[A] = (Mem, Bytes, A) => Unit +type MemWriter[A] = (Mem, Bytes, A) => Unit type ArrayReader[A] = (Mem, Bytes, Int) => Array[A] val readWriteModule = (rwm: ReadWriteModule) ?=> rwm trait ReadWriteModule: val byteReader: Reader[Byte] - val byteWriter: Writer[Byte] + val byteWriter: MemWriter[Byte] val shortReader: Reader[Short] - val shortWriter: Writer[Short] + val shortWriter: MemWriter[Short] val intReader: Reader[Int] - val intWriter: Writer[Int] + val intWriter: MemWriter[Int] + val intWritingExpr: Quotes ?=> Expr[MemWriter[Int]] val longReader: Reader[Long] - val longWriter: Writer[Long] + val longWriter: MemWriter[Long] val floatReader: Reader[Float] - val floatWriter: Writer[Float] + val floatWriter: MemWriter[Float] val doubleReader: Reader[Double] - val doubleWriter: Writer[Double] + val doubleWriter: MemWriter[Double] val memReader: Reader[Mem] - val memWriter: Writer[Mem] + val memWriter: MemWriter[Mem] def unionReader(td: TypeDescriptor): Reader[CUnion[? <: NonEmptyTuple]] - def unionWriter(td: TypeDescriptor): Writer[CUnion[? <: NonEmptyTuple]] + def unionWriter(td: TypeDescriptor): MemWriter[CUnion[? <: NonEmptyTuple]] def write( memory: Mem, @@ -56,3 +60,8 @@ trait ReadWriteModule: descriptor: CFunctionDescriptor, fn: => MethodHandle => Mem => A )(using Fn[A, ?, ?]): A + + def writeExpr(td: TypeDescriptor)(using Quotes): Expr[MemWriter[Any]] + def writeArrayExpr(td: TypeDescriptor)(using + Quotes + ): Expr[MemWriter[Array[Any]]] diff --git a/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala b/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala index 48c03e65..88d30620 100644 --- a/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala +++ b/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala @@ -7,15 +7,16 @@ import scala.compiletime.codeOf class JitSpecification extends munit.FunSuite: test("jit-compilation works"): var optimized = false - var fn = OptimizableFn.standard(jitCompiler => - jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => - optimizedFn(true) - i - })(optimized = _) - )( - i => i((a: Int) => i.instrument(a)), - 10 - ) + var fn = + new OptimizableFn[Int => Int, DummyImplicit](JitCService.standard)( + i => i((a: Int) => i.instrument(a)), + 10 + )(jitCompiler => + jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => + optimizedFn(true) + i + })(optimized = _) + ) for _ <- 0 to 10 yield fn.get(3) @@ -26,17 +27,18 @@ class JitSpecification extends munit.FunSuite: test("jit-compilation in multithreaded env works"): var optimized = false - val fn = new OptimizableFn(JitCService.standard)( - i => i((a: Int) => i.instrument(a)), - 10 - )(jitCompiler => - jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => - optimizedFn(true) - i - })( - optimized = _ + val fn = + new OptimizableFn[Int => Int, DummyImplicit](JitCService.standard)( + i => i((a: Int) => i.instrument(a)), + 10 + )(jitCompiler => + jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => + optimizedFn(true) + i + })( + optimized = _ + ) ) - ) for _ <- 0 to 5 yield Future { for _ <- 0 until 2 @@ -53,7 +55,10 @@ class JitSpecification extends munit.FunSuite: test("instant compilation works"): var optimized = false val fn = - new OptimizableFn(JitCService.synchronous, IgnoreInstrumentation)( + new OptimizableFn[Int => Int, DummyImplicit]( + JitCService.synchronous, + IgnoreInstrumentation + )( i => i((a: Int) => i.instrument(a)), 0 )(jitCompiler => @@ -69,7 +74,7 @@ class JitSpecification extends munit.FunSuite: test("instant compilation from properties"): System.setProperty(OptimizableFn.modeSetting, "immediate") var optimized = false - val fn = OptimizableFn(jitCompiler => + val fn = OptimizableFn[Int => Int, DummyImplicit](jitCompiler => jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => optimizedFn(true) i @@ -82,7 +87,7 @@ class JitSpecification extends munit.FunSuite: test("jitc disable from properties"): System.setProperty(OptimizableFn.modeSetting, "disabled") var optimized = false - val fn = OptimizableFn(jitCompiler => + val fn = OptimizableFn[Int => Int, DummyImplicit](jitCompiler => jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => optimizedFn(true) i diff --git a/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala b/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala index 6b9c81f2..80ac31c3 100644 --- a/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala +++ b/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala @@ -5,9 +5,18 @@ import scala.collection.concurrent.TrieMap import java.lang.invoke.MethodHandle import scala.reflect.ClassTag import fr.hammons.slinc.fnutils.Fn +import scala.quoted.* +import java.lang.reflect.Modifier given readWriteModule17: ReadWriteModule with // todo: eliminate this + + def writeFn( + typeDescriptor: TypeDescriptor + ): MemWriter[typeDescriptor.Inner] = ??? + + override val intWritingExpr: (Quotes) ?=> Expr[MemWriter[Int]] = ??? + val fnCache: TrieMap[CFunctionDescriptor, Mem => ?] = TrieMap.empty @@ -15,9 +24,9 @@ given readWriteModule17: ReadWriteModule with val arrayReaderCache = DependentTrieMap[ArrayReader] - val writerCache = DependentTrieMap[Writer] + val writerCache = DependentTrieMap[MemWriter] - val arrayWriterCache = DependentTrieMap[[I] =>> Writer[Array[I]]] + val arrayWriterCache = DependentTrieMap[[I] =>> MemWriter[Array[I]]] val byteWriter = (mem, offset, value) => mem.writeByte(value, offset) val shortWriter = (mem, offset, value) => mem.writeShort(value, offset) @@ -54,7 +63,7 @@ given readWriteModule17: ReadWriteModule with new CUnion(newMem) ) - def unionWriter(td: TypeDescriptor): Writer[CUnion[? <: NonEmptyTuple]] = + def unionWriter(td: TypeDescriptor): MemWriter[CUnion[? <: NonEmptyTuple]] = val size = descriptorModule17.sizeOf(td) (mem, offset, value) => mem.offset(offset).resize(size).copyFrom(value.mem) @@ -106,12 +115,127 @@ given readWriteModule17: ReadWriteModule with offset: Bytes, typeDescriptor: TypeDescriptor, value: typeDescriptor.Inner - ): Unit = writerCache.getOrElseUpdate( - typeDescriptor, - typeDescriptor.writer - )(memory, offset, value) - - override def writeArray[A](memory: Mem, offset: Bytes, value: Array[A])(using + ): Unit = ??? + + def asExprOf[A](expr: Expr[Any])(using Quotes, Type[A]) = + if expr.isExprOf[A] then expr.asExprOf[A] + else '{ $expr.asInstanceOf[A] }.asExprOf[A] + + def canBeUsedDirectly(clazz: Class[?]): Boolean = + val enclosingClass = clazz.getEnclosingClass() + if clazz.getCanonicalName() == null then false + else if enclosingClass == null && clazz + .getEnclosingConstructor() == null && clazz.getEnclosingMethod() == null + then true + else if canBeUsedDirectly(enclosingClass.nn) && Modifier.isStatic( + clazz.getModifiers() + ) && Modifier.isPublic(clazz.getModifiers()) + then true + else false + + def writeExprHelper( + typeDescriptor: TypeDescriptor, + mem: Expr[Mem], + offset: Expr[Bytes], + value: Expr[Any] + )(using Quotes): Expr[Unit] = + import quotes.reflect.* + typeDescriptor match + case ByteDescriptor => ??? + case ShortDescriptor => ??? + case IntDescriptor => + '{ + $mem.writeInt(${ asExprOf[Int](value) }, $offset) + } + case LongDescriptor => ??? + case FloatDescriptor => ??? + case DoubleDescriptor => ??? + case PtrDescriptor => ??? + case sd: StructDescriptor if canBeUsedDirectly(sd.clazz) => + val fields = + Symbol.classSymbol(sd.clazz.getCanonicalName().nn).caseFields + + val offsets = + descriptorModule17.memberOffsets(sd.members.map(_.descriptor)) + + val fns = sd.members.zip(offsets).zipWithIndex.map { + case ( + (StructMemberDescriptor(childDescriptor, name), childOffset), + index + ) => + (nv: Expr[Product]) => + val childField = Select(nv.asTerm, fields(index)).asExpr + val totalOffset = offset.value + .map(_ + childOffset) + .map(Expr(_)) + .getOrElse('{ $offset + ${ Expr(childOffset) } }) + + writeExprHelper(childDescriptor, mem, totalOffset, childField) + } + case sd: StructDescriptor => + val offsets = + descriptorModule17.memberOffsets(sd.members.map(_.descriptor)) + val fns = sd.members + .zip(offsets) + .zipWithIndex + .map { + case ((StructMemberDescriptor(td, name), childOffset), index) => + (nv: Expr[Product]) => + val childField = '{ $nv.productElement(${ Expr(index) }) } + val totalOffset = offset.value + .map(_ + childOffset) + .map(Expr(_)) + .getOrElse('{ $offset + ${ Expr(childOffset) } }) + + writeExprHelper(td, mem, totalOffset, childField) + } + .toList + + '{ + val a: Product = ${ asExprOf[Product](value) } + + ${ + Expr.block(fns.map(_('a)), '{}) + } + } + + case AliasDescriptor(real) => ??? + case VaListDescriptor => ??? + case CUnionDescriptor(possibleTypes) => ??? + case SetSizeArrayDescriptor(td, x) => ??? + + ??? + + def writeExpr( + typeDescriptor: TypeDescriptor + )(using Quotes): Expr[MemWriter[Any]] = + '{ (mem: Mem, offset: Bytes, value: Any) => + ${ + writeExprHelper(typeDescriptor, 'mem, 'offset, 'value) + } + } + + def writeArrayExpr(typeDescriptor: TypeDescriptor)(using + Quotes + ): Expr[MemWriter[Array[Any]]] = + val elemLength = Expr(typeDescriptor.size) + '{ (mem: Mem, offset: Bytes, value: Array[Any]) => + var x = 0 + while x < value.length do + ${ + writeExprHelper( + typeDescriptor, + 'mem, + '{ + ($elemLength * x) + offset + }, + '{ value(x) } + ) + } + x += 1 + } + + def writeArray[A](memory: Mem, offset: Bytes, value: Array[A])(using DescriptorOf[A] ): Unit = val desc = DescriptorOf[A] diff --git a/j19/src/fr/hammons/slinc/modules/ReadWriteModule19.scala b/j19/src/fr/hammons/slinc/modules/ReadWriteModule19.scala index 916edb7e..bcc78ba9 100644 --- a/j19/src/fr/hammons/slinc/modules/ReadWriteModule19.scala +++ b/j19/src/fr/hammons/slinc/modules/ReadWriteModule19.scala @@ -7,10 +7,19 @@ import scala.reflect.ClassTag import fr.hammons.slinc.fnutils.Fn private[slinc] given readWriteModule19: ReadWriteModule with + val intWritingExpr: ( + quoted.Quotes + ) ?=> quoted.Expr[fr.hammons.slinc.modules.MemWriter[Int]] = ??? + def writeArrayExpr(td: fr.hammons.slinc.TypeDescriptor)(using + x$2: quoted.Quotes + ): quoted.Expr[fr.hammons.slinc.modules.MemWriter[Array[Any]]] = ??? + def writeExpr(td: fr.hammons.slinc.TypeDescriptor)(using + x$2: quoted.Quotes + ): quoted.Expr[fr.hammons.slinc.modules.MemWriter[Any]] = ??? override def unionWriter( td: TypeDescriptor - ): Writer[CUnion[? <: NonEmptyTuple]] = + ): MemWriter[CUnion[? <: NonEmptyTuple]] = val size = descriptorModule19.sizeOf(td) (mem, offset, value) => mem.offset(offset).resize(size).copyFrom(value.mem) @@ -25,9 +34,9 @@ private[slinc] given readWriteModule19: ReadWriteModule with new CUnion(newMem) ) - val writerCache = DependentTrieMap[Writer] + val writerCache = DependentTrieMap[MemWriter] - val arrayWriterCache = DependentTrieMap[[I] =>> Writer[Array[I]]] + val arrayWriterCache = DependentTrieMap[[I] =>> MemWriter[Array[I]]] val readerCache = DependentTrieMap[Reader] From bc5a22ca4835431621fc081f5032206fe307c3e0 Mon Sep 17 00:00:00 2001 From: Mark Hammons Date: Mon, 29 May 2023 17:50:31 +0200 Subject: [PATCH 3/7] feat: JITC works now on java 17 --- core/src/fr/hammons/slinc/Alias.scala | 22 ++-- core/src/fr/hammons/slinc/DescriptorOf.scala | 87 ++++++++----- core/src/fr/hammons/slinc/Ptr.scala | 9 +- core/src/fr/hammons/slinc/Struct.scala | 49 ++++---- .../src/fr/hammons/slinc/TypeDescriptor.scala | 7 +- .../fr/hammons/slinc/jitc/OptimizableFn.scala | 28 ++--- .../slinc/modules/ReadWriteModule.scala | 4 +- core/src/fr/hammons/slinc/types/CLong.scala | 7 +- core/src/fr/hammons/slinc/types/SizeT.scala | 10 +- core/src/fr/hammons/slinc/types/TimeT.scala | 10 +- .../src/fr/hammons/slinc/TransferSpec.scala | 1 + .../slinc/modules/ReadWriteModule17.scala | 117 +++++++++++++----- 12 files changed, 221 insertions(+), 130 deletions(-) diff --git a/core/src/fr/hammons/slinc/Alias.scala b/core/src/fr/hammons/slinc/Alias.scala index 50bfcae1..318915a9 100644 --- a/core/src/fr/hammons/slinc/Alias.scala +++ b/core/src/fr/hammons/slinc/Alias.scala @@ -1,17 +1,17 @@ package fr.hammons.slinc import types.{OS, Arch, os, arch} +import scala.reflect.ClassTag -trait Alias[T] extends DescriptorOf[T]: - lazy val name: String - lazy val aliases: PartialFunction[(OS, Arch), TypeDescriptor] - val descriptor: TypeDescriptor { type Inner >: T <: T } = - AliasDescriptor[T]( - aliases.applyOrElse( - os -> arch, - _ => - throw new Error( - s"Alias for $name is not defined on platform $os - $arch" - ) +abstract class Alias[T]( + val name: String, + val aliases: PartialFunction[(OS, Arch), TypeDescriptor] +)(using ClassTag[T]) + extends DescriptorOf[T]( + AliasDescriptor[T]( + aliases.applyOrElse( + os -> arch, + _ => throw new Error("") + ) ) ) diff --git a/core/src/fr/hammons/slinc/DescriptorOf.scala b/core/src/fr/hammons/slinc/DescriptorOf.scala index e94c7cec..054e5b3d 100644 --- a/core/src/fr/hammons/slinc/DescriptorOf.scala +++ b/core/src/fr/hammons/slinc/DescriptorOf.scala @@ -5,11 +5,50 @@ import scala.quoted.* import scala.compiletime.{summonInline, erasedValue, constValue} import scala.NonEmptyTuple import scala.reflect.ClassTag +import fr.hammons.slinc.descriptors.Writer +import fr.hammons.slinc.jitc.OptimizableFn +import fr.hammons.slinc.descriptors.WriterContext +import fr.hammons.slinc.modules.MemWriter /** Typeclass that summons TypeDescriptors */ -trait DescriptorOf[A] extends MethodCompatible[A]: - val descriptor: TypeDescriptor { type Inner = A } +trait DescriptorOf[A](val descriptor: TypeDescriptor { type Inner = A })(using + ClassTag[A] +) extends MethodCompatible[A]: + // val descriptor: TypeDescriptor { type Inner = A } + val writer: Writer[A] = OptimizableFn((writerContext: WriterContext) ?=> + _ { + val expr = writerContext.rwm.writeExpr(descriptor) + println(s"jitc: ${expr.show}") + expr + }.asInstanceOf[MemWriter[A]] + )(instrumentation => + instrumentation((mem: Mem, offset: Bytes, value: A) => + instrumentation.instrument( + descriptor.writer(using + summon[WriterContext].rwm, + summon[WriterContext].dm + )(mem, offset, value) + ) + ) + ) + + val arrayWriter: Writer[Array[A]] = OptimizableFn( + _( + summon[WriterContext].rwm.writeArrayExpr(descriptor) + ).asInstanceOf[MemWriter[Array[A]]] + ) { instrumentation => + instrumentation { + val size = descriptor.size(using summon[WriterContext].dm) + (mem: Mem, offset: Bytes, value: Array[A]) => + instrumentation.instrument { + var i = 0 + while i < value.length do + writer.get(mem, size * i + offset, value(i)) + i += 1 + } + } + } object DescriptorOf: /** Convenience method for summoning the TypeDescriptor attached to @@ -29,34 +68,24 @@ object DescriptorOf: c: ContextProof[DescriptorOf *::: End, A] ): DescriptorOf[A] = c.tup.head - given DescriptorOf[Byte] with - val descriptor: TypeDescriptor { type Inner = Byte } = ByteDescriptor - - given DescriptorOf[Short] with - val descriptor: TypeDescriptor { type Inner = Short } = ShortDescriptor + given DescriptorOf[Byte](ByteDescriptor) with {} + given DescriptorOf[Short](ShortDescriptor) with {} - given DescriptorOf[Int] with - val descriptor: TypeDescriptor { type Inner = Int } = IntDescriptor + given DescriptorOf[Int](IntDescriptor) with {} - given DescriptorOf[Long] with - val descriptor: TypeDescriptor { type Inner = Long } = LongDescriptor + given DescriptorOf[Long](LongDescriptor) with {} - given DescriptorOf[Float] with - val descriptor: TypeDescriptor { type Inner = Float } = FloatDescriptor + given DescriptorOf[Float](FloatDescriptor) with {} - given DescriptorOf[Double] with - val descriptor: TypeDescriptor { type Inner = Double } = - DoubleDescriptor + given DescriptorOf[Double](DoubleDescriptor) with {} // this is the general DescriptorOf for all [[Ptr[A]]] - private val ptrDescriptor = new DescriptorOf[Ptr[?]]: - val descriptor: TypeDescriptor { type Inner = Ptr[?] } = PtrDescriptor + private val ptrDescriptor = new DescriptorOf[Ptr[?]](PtrDescriptor) {} given [A]: DescriptorOf[Ptr[A]] = ptrDescriptor.asInstanceOf[DescriptorOf[Ptr[A]]] - given DescriptorOf[VarArgs] with - val descriptor: TypeDescriptor { type Inner = VarArgs } = VaListDescriptor + given DescriptorOf[VarArgs](VaListDescriptor) def getDescriptorFor[A](using Quotes, Type[A]) = import quotes.reflect.* @@ -74,15 +103,15 @@ object DescriptorOf: case _: EmptyTuple => Set.empty[TypeDescriptor] inline given [A <: NonEmptyTuple]: DescriptorOf[CUnion[A]] = - new DescriptorOf[CUnion[A]]: - val descriptor: CUnionDescriptor { type Inner = CUnion[A] } = - CUnionDescriptor(helper[A]) - .asInstanceOf[CUnionDescriptor { type Inner = CUnion[A] }] + new DescriptorOf[CUnion[A]]( + CUnionDescriptor(helper[A]) + .asInstanceOf[CUnionDescriptor { type Inner = CUnion[A] }] + ) {} inline given [A, B <: Int](using innerDesc: DescriptorOf[A])(using classTag: ClassTag[innerDesc.descriptor.Inner] - ): DescriptorOf[SetSizeArray[A, B]] = new DescriptorOf[SetSizeArray[A, B]]: - val descriptor: TypeDescriptor { type Inner = SetSizeArray[A, B] } = - SetSizeArrayDescriptor(innerDesc.descriptor, constValue[B]).asInstanceOf[ - SetSizeArrayDescriptor { type Inner = SetSizeArray[A, B] } - ] + ): DescriptorOf[SetSizeArray[A, B]] = new DescriptorOf[SetSizeArray[A, B]]( + SetSizeArrayDescriptor(innerDesc.descriptor, constValue[B]).asInstanceOf[ + SetSizeArrayDescriptor { type Inner = SetSizeArray[A, B] } + ] + ) {} diff --git a/core/src/fr/hammons/slinc/Ptr.scala b/core/src/fr/hammons/slinc/Ptr.scala index cf25c88e..856bd931 100644 --- a/core/src/fr/hammons/slinc/Ptr.scala +++ b/core/src/fr/hammons/slinc/Ptr.scala @@ -37,8 +37,10 @@ class Ptr[A](private[slinc] val mem: Mem, private[slinc] val offset: Bytes): ) ) - def `unary_!_=`(value: A)(using rwM: ReadWriteModule, desc: DescriptorOf[A]) = - rwM.write(mem, offset, desc.descriptor, value) + def `unary_!_=`( + value: A + )(using rwM: ReadWriteModule, desc: DescriptorOf[A], dm: DescriptorModule) = + desc.writer.get(using WriterContext(dm, rwM))(mem, offset, value) def apply(bytes: Bytes): Ptr[A] = Ptr[A](mem, offset + bytes) def apply(index: Int)(using DescriptorOf[A], DescriptorModule): Ptr[A] = Ptr[A](mem, offset + (DescriptorOf[A].size * index)) @@ -82,12 +84,13 @@ object Ptr: a: A )(using rwm: ReadWriteModule, + dm: DescriptorModule, descriptor: DescriptorOf[A] { val descriptor: TypeDescriptor { type Inner = A } } ) = val mem = alloc.allocate(DescriptorOf[A], 1) - rwm.write(mem, Bytes(0), descriptor.descriptor, a) + descriptor.writer.get(using WriterContext(dm, rwm))(mem, Bytes(0), a) Ptr[A](mem, Bytes(0)) def copy( diff --git a/core/src/fr/hammons/slinc/Struct.scala b/core/src/fr/hammons/slinc/Struct.scala index b2d47d4a..f7804c70 100644 --- a/core/src/fr/hammons/slinc/Struct.scala +++ b/core/src/fr/hammons/slinc/Struct.scala @@ -1,7 +1,7 @@ package fr.hammons.slinc import scala.deriving.Mirror -import scala.compiletime.{erasedValue, summonInline, constValueTuple} +import scala.compiletime.{erasedValue, summonInline, constValueTuple, codeOf} import scala.reflect.ClassTag import modules.DescriptorModule import fr.hammons.slinc.modules.TransitionModule @@ -9,7 +9,9 @@ import fr.hammons.slinc.modules.ReadWriteModule import fr.hammons.slinc.modules.Reader import fr.hammons.slinc.modules.MemWriter -trait Struct[A <: Product] extends DescriptorOf[A] +abstract class Struct[A <: Product](td: TypeDescriptor { type Inner = A })(using + ClassTag[A] +) extends DescriptorOf[A](td) object Struct: private inline def memberDescriptors[A](using @@ -94,26 +96,27 @@ object Struct: inline def derived[A <: Product](using m: Mirror.ProductOf[A], ct: ClassTag[A] - ) = new Struct[A]: - type Inner = A - val descriptor: StructDescriptor { type Inner = A } = - new StructDescriptor( - memberDescriptors[A].view - .zip(memberNames[A]) - .map(StructMemberDescriptor.apply) - .toList, - ct.runtimeClass, - m.fromProduct(_) - ): - type Inner = A - val reader = readGen[A] - val writer = writeGen[A] + ) = new Struct[A]( + new StructDescriptor( + memberDescriptors[A].view + .zip(memberNames[A]) + .map(StructMemberDescriptor.apply) + .toList, + ct.runtimeClass, + m.fromProduct(_) + ): + type Inner = A + val reader = readGen[A] + val writer = + // println(codeOf(writeGen[A])) + writeGen[A] - override val returnTransition = returnValue => - val mem = summon[TransitionModule].memReturn(returnValue) - summon[ReadWriteModule].read(mem, Bytes(0), this) + override val returnTransition = returnValue => + val mem = summon[TransitionModule].memReturn(returnValue) + summon[ReadWriteModule].read(mem, Bytes(0), this) - override val argumentTransition = argument => - val mem = summon[Allocator].allocate(this, 1) - summon[ReadWriteModule].write(mem, Bytes(0), this, argument) - summon[TransitionModule].methodArgument(mem).asInstanceOf[Object] + override val argumentTransition = argument => + val mem = summon[Allocator].allocate(this, 1) + summon[ReadWriteModule].write(mem, Bytes(0), this, argument) + summon[TransitionModule].methodArgument(mem).asInstanceOf[Object] + ) {} diff --git a/core/src/fr/hammons/slinc/TypeDescriptor.scala b/core/src/fr/hammons/slinc/TypeDescriptor.scala index c6326e6e..81a0dd16 100644 --- a/core/src/fr/hammons/slinc/TypeDescriptor.scala +++ b/core/src/fr/hammons/slinc/TypeDescriptor.scala @@ -14,15 +14,18 @@ import fr.hammons.slinc.modules.{ArgumentTransition, ReturnTransition} import scala.NonEmptyTuple import scala.language.implicitConversions import fr.hammons.slinc.modules.MemWriter +import fr.hammons.slinc.descriptors.WriterContext /** Describes types used by C interop */ sealed trait TypeDescriptor: self => type Inner - given DescriptorOf[Inner] with - val descriptor = self + given ct: ClassTag[Inner] = ??? + given DescriptorOf[Inner](self) with {} def size(using dm: DescriptorModule): Bytes = dm.sizeOf(this) + def writeArrayExpr(using wc: WriterContext)(using Quotes) = + wc.rwm.writeArrayExpr(this) def alignment(using dm: DescriptorModule): Bytes = dm.alignmentOf(this) def toCarrierType(using dm: DescriptorModule): Class[?] = dm.toCarrierType(this) diff --git a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala index 66616afc..cc604c4e 100644 --- a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala +++ b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala @@ -20,23 +20,23 @@ class OptimizableFn[F, G]( if fn == null then fn = f(inst) _fn.set(fn) - + if optFn != null then optFn else if inst.getCount() >= limit then - optimizer.jitC( - uuid, - jitCompiler => - val opt = optimized(jitCompiler) - _optFn.setOpaque( - opt - ) - ) - if optimizer.async then fn.nn - else - while _optFn.getOpaque() == null do {} - _optFn.getOpaque().nn + optimizer.jitC( + uuid, + jitCompiler => + val opt = optimized(jitCompiler) + _optFn.setOpaque( + opt + ) + ) + if optimizer.async then fn.nn + else + while _optFn.getOpaque() == null do {} + _optFn.getOpaque().nn else fn.nn - + object OptimizableFn: val modeSetting = "slinc.jitc.mode" val limitSetting = "slinc.jitc.jit-limit" diff --git a/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala b/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala index a84ec9ae..f4ed3862 100644 --- a/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala +++ b/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala @@ -61,7 +61,9 @@ trait ReadWriteModule: fn: => MethodHandle => Mem => A )(using Fn[A, ?, ?]): A - def writeExpr(td: TypeDescriptor)(using Quotes): Expr[MemWriter[Any]] + def writeExpr( + td: TypeDescriptor + )(using Quotes, ClassTag[td.Inner]): Expr[MemWriter[Any]] def writeArrayExpr(td: TypeDescriptor)(using Quotes ): Expr[MemWriter[Array[Any]]] diff --git a/core/src/fr/hammons/slinc/types/CLong.scala b/core/src/fr/hammons/slinc/types/CLong.scala index 99b618ec..d50e2b75 100644 --- a/core/src/fr/hammons/slinc/types/CLong.scala +++ b/core/src/fr/hammons/slinc/types/CLong.scala @@ -19,9 +19,10 @@ object CLong: then Some(IntegralAlias.transform[CLong](maybeFits)) else None - given Alias[CLong] with - lazy val name = "CLong" - lazy val aliases = { + given Alias[CLong]( + "CLong", + { case (OS.Linux | OS.Darwin, Arch.X64 | Arch.AArch64) => LongDescriptor case (OS.Windows, Arch.X64) => IntDescriptor } + ) with {} diff --git a/core/src/fr/hammons/slinc/types/SizeT.scala b/core/src/fr/hammons/slinc/types/SizeT.scala index 119aabd2..cf73d5d7 100644 --- a/core/src/fr/hammons/slinc/types/SizeT.scala +++ b/core/src/fr/hammons/slinc/types/SizeT.scala @@ -19,9 +19,9 @@ object SizeT: Some(IntegralAlias.transform[SizeT](value)) else None - given Alias[SizeT] with - lazy val name = "SizeT" - lazy val aliases = { - case (OS.Linux | OS.Darwin | OS.Windows, Arch.X64 | Arch.AArch64) => - LongDescriptor + given Alias[SizeT]( + "SizeT", + { case (OS.Linux | OS.Darwin | OS.Windows, Arch.X64 | Arch.AArch64) => + LongDescriptor } + ) with {} diff --git a/core/src/fr/hammons/slinc/types/TimeT.scala b/core/src/fr/hammons/slinc/types/TimeT.scala index 20f14d91..70c64018 100644 --- a/core/src/fr/hammons/slinc/types/TimeT.scala +++ b/core/src/fr/hammons/slinc/types/TimeT.scala @@ -17,9 +17,9 @@ object TimeT: Some(IntegralAlias.transform[TimeT](upcast)) else None - given Alias[TimeT] with - lazy val name: String = "TimeT" - lazy val aliases = { - case (OS.Windows | OS.Linux | OS.Darwin, Arch.X64 | Arch.AArch64) => - LongDescriptor + given Alias[TimeT]( + "TimeT", + { case (OS.Windows | OS.Linux | OS.Darwin, Arch.X64 | Arch.AArch64) => + LongDescriptor } + ) with {} diff --git a/core/test/src/fr/hammons/slinc/TransferSpec.scala b/core/test/src/fr/hammons/slinc/TransferSpec.scala index 1bf855a2..0f6a152e 100644 --- a/core/test/src/fr/hammons/slinc/TransferSpec.scala +++ b/core/test/src/fr/hammons/slinc/TransferSpec.scala @@ -16,6 +16,7 @@ trait TransferSpec[ThreadException <: Throwable](val slinc: Slinc)(using ClassTag[ThreadException] ) extends ScalaCheckSuite: import slinc.{*, given} + System.setProperty("slinc.jitc.mode", "disabled") val numVarArgs = if slinc.version < 19 then 7 else 200 diff --git a/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala b/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala index 80ac31c3..f448b995 100644 --- a/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala +++ b/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala @@ -115,7 +115,10 @@ given readWriteModule17: ReadWriteModule with offset: Bytes, typeDescriptor: TypeDescriptor, value: typeDescriptor.Inner - ): Unit = ??? + ): Unit = writerCache.getOrElseUpdate( + typeDescriptor, + typeDescriptor.writer + )(memory, offset, value) def asExprOf[A](expr: Expr[Any])(using Quotes, Type[A]) = if expr.isExprOf[A] then expr.asExprOf[A] @@ -133,10 +136,17 @@ given readWriteModule17: ReadWriteModule with then true else false + def foldOffsets(offsets: Seq[Expr[Bytes]])(using Quotes): Expr[Bytes] = + val (constants, references) = offsets.partition(_.value.isDefined) + val constantOffset = Expr(constants.map(_.valueOrAbort).sum) + val referenceOffset = references.reduceLeftOption((a, b) => '{ $a + $b }) + referenceOffset + .map(refOff => '{ $refOff + $constantOffset }) + .getOrElse(constantOffset) def writeExprHelper( typeDescriptor: TypeDescriptor, mem: Expr[Mem], - offset: Expr[Bytes], + offsetExprs: Seq[Expr[Bytes]], value: Expr[Any] )(using Quotes): Expr[Unit] = import quotes.reflect.* @@ -145,33 +155,61 @@ given readWriteModule17: ReadWriteModule with case ShortDescriptor => ??? case IntDescriptor => '{ - $mem.writeInt(${ asExprOf[Int](value) }, $offset) + $mem.writeInt( + ${ asExprOf[Int](value) }, + ${ foldOffsets(offsetExprs) } + ) + } + case LongDescriptor => + '{ + $mem.writeLong( + ${ asExprOf[Long](value) }, + ${ foldOffsets(offsetExprs) } + ) } - case LongDescriptor => ??? case FloatDescriptor => ??? case DoubleDescriptor => ??? case PtrDescriptor => ??? case sd: StructDescriptor if canBeUsedDirectly(sd.clazz) => + println(s"compiling $sd") val fields = Symbol.classSymbol(sd.clazz.getCanonicalName().nn).caseFields + println("calculated fields") val offsets = descriptorModule17.memberOffsets(sd.members.map(_.descriptor)) - val fns = sd.members.zip(offsets).zipWithIndex.map { - case ( - (StructMemberDescriptor(childDescriptor, name), childOffset), - index - ) => - (nv: Expr[Product]) => - val childField = Select(nv.asTerm, fields(index)).asExpr - val totalOffset = offset.value - .map(_ + childOffset) - .map(Expr(_)) - .getOrElse('{ $offset + ${ Expr(childOffset) } }) - - writeExprHelper(childDescriptor, mem, totalOffset, childField) - } + println("calculated offsets") + + val fns = sd.members + .zip(offsets) + .zipWithIndex + .map { + case ( + (StructMemberDescriptor(childDescriptor, name), childOffset), + index + ) => + (nv: Expr[Product]) => + val childField = Select(nv.asTerm, fields(index)).asExpr + val totalOffset = offsetExprs :+ Expr(childOffset) + + writeExprHelper(childDescriptor, mem, totalOffset, childField) + } + .toList + + println("fns complete") + + val code = TypeRepr.typeConstructorOf(sd.clazz).asType match + case '[a & Product] => + '{ + val a: a & Product = ${ asExprOf[a & Product](value) } + + ${ + Expr.block(fns.map(_('a)), '{}) + } + } + println(code.show) + code case sd: StructDescriptor => val offsets = descriptorModule17.memberOffsets(sd.members.map(_.descriptor)) @@ -182,10 +220,7 @@ given readWriteModule17: ReadWriteModule with case ((StructMemberDescriptor(td, name), childOffset), index) => (nv: Expr[Product]) => val childField = '{ $nv.productElement(${ Expr(index) }) } - val totalOffset = offset.value - .map(_ + childOffset) - .map(Expr(_)) - .getOrElse('{ $offset + ${ Expr(childOffset) } }) + val totalOffset = offsetExprs :+ Expr(childOffset) writeExprHelper(td, mem, totalOffset, childField) } @@ -199,21 +234,32 @@ given readWriteModule17: ReadWriteModule with } } - case AliasDescriptor(real) => ??? + case AliasDescriptor(real) => + writeExprHelper(real, mem, offsetExprs, value) case VaListDescriptor => ??? case CUnionDescriptor(possibleTypes) => ??? case SetSizeArrayDescriptor(td, x) => ??? - ??? - def writeExpr( typeDescriptor: TypeDescriptor - )(using Quotes): Expr[MemWriter[Any]] = - '{ (mem: Mem, offset: Bytes, value: Any) => - ${ - writeExprHelper(typeDescriptor, 'mem, 'offset, 'value) - } - } + )(using Quotes, ClassTag[typeDescriptor.Inner]): Expr[MemWriter[Any]] = + import quotes.reflect.* + val output = TypeRepr + .typeConstructorOf(summon[ClassTag[typeDescriptor.Inner]].runtimeClass) + .asType match + case '[a] => + '{ (mem: Mem, offset: Bytes, value: Any) => + ${ + writeExprHelper( + typeDescriptor, + 'mem, + Seq('offset), + '{ value }.asExprOf[Any] + ) + } + } + + output def writeArrayExpr(typeDescriptor: TypeDescriptor)(using Quotes @@ -226,9 +272,12 @@ given readWriteModule17: ReadWriteModule with writeExprHelper( typeDescriptor, 'mem, - '{ - ($elemLength * x) + offset - }, + Seq( + '{ + ($elemLength * x) + }, + '{ offset } + ), '{ value(x) } ) } From cb216df8a27a7c5c19070d79883c8584a65f8e30 Mon Sep 17 00:00:00 2001 From: Mark Hammons Date: Thu, 1 Jun 2023 12:34:46 +0200 Subject: [PATCH 4/7] opt: Various modifications to optimize JITC --- .../slinc/TransferBenchmarkShape.scala | 33 +++++++++- core/src/fr/hammons/slinc/DescriptorOf.scala | 2 +- core/src/fr/hammons/slinc/Ptr.scala | 4 +- core/src/fr/hammons/slinc/Struct.scala | 1 - .../slinc/descriptors/WriterContext.scala | 3 + .../fr/hammons/slinc/jitc/JitCService.scala | 7 +- .../fr/hammons/slinc/jitc/OptimizableFn.scala | 60 +++++++++++------ .../slinc/modules/ReadWriteModule.scala | 4 +- .../src/fr/hammons/slinc/TransferSpec.scala | 5 ++ .../hammons/slinc/TransferBenchmarks17.scala | 34 ++++++++-- .../slinc/modules/ReadWriteModule17.scala | 64 ++++++++++--------- 11 files changed, 155 insertions(+), 62 deletions(-) diff --git a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala index 67f4f0a6..e681a066 100644 --- a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala +++ b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala @@ -2,15 +2,21 @@ package fr.hammons.slinc import fr.hammons.slinc.types.CLong import org.openjdk.jmh.annotations.{Scope as _, *} +import org.openjdk.jmh.infra.Blackhole +import fr.hammons.slinc.descriptors.WriterContext case class A(a: Int, b: B, c: Int) derives Struct case class B(a: Int, b: Int) derives Struct +case class G(a: Int, b: Float, c: CLong) derives Struct +case class I(a: Int, b: Float, c: CLong) derives Struct -@Warmup(iterations = 5) -@Measurement(iterations = 5) +//@Warmup(iterations = 5) +//@Measurement(iterations = 5) trait TransferBenchmarkShape(val s: Slinc): import s.{given, *} + given WriterContext = WriterContext(dm, rwm) + case class C(a: Int, b: D, c: Int) derives Struct case class D(a: CLong, b: Int) derives Struct case class E(a: Int, b: Int) derives Struct @@ -28,6 +34,19 @@ trait TransferBenchmarkShape(val s: Slinc): val c = C(1, D(CLong(2), 3), 4) + val g = G(1, 2f, CLong(3)) + + val gPtr = Scope.global { + Ptr.blank[G] + } + + val i = I(1, 2f, CLong(3)) + val iPtr = Scope.global: + Ptr.blank[I] + + val optimizedIWriter = + summon[DescriptorOf[I]].writer.forceOptimize + @Benchmark def topLevelRead = !aPtr @@ -36,6 +55,16 @@ trait TransferBenchmarkShape(val s: Slinc): def topLevelWrite = !aPtr = a + + @Benchmark + def topLevelWriteG(blackhole: Blackhole) = blackhole.consume: + !gPtr = g + + @Benchmark + def topLevelWriteI(blackhole: Blackhole) = blackhole.consume: + optimizedIWriter(iPtr.mem, Bytes(0), i) + + @Benchmark def innerRead = !cPtr diff --git a/core/src/fr/hammons/slinc/DescriptorOf.scala b/core/src/fr/hammons/slinc/DescriptorOf.scala index 054e5b3d..f639760f 100644 --- a/core/src/fr/hammons/slinc/DescriptorOf.scala +++ b/core/src/fr/hammons/slinc/DescriptorOf.scala @@ -18,7 +18,7 @@ trait DescriptorOf[A](val descriptor: TypeDescriptor { type Inner = A })(using // val descriptor: TypeDescriptor { type Inner = A } val writer: Writer[A] = OptimizableFn((writerContext: WriterContext) ?=> _ { - val expr = writerContext.rwm.writeExpr(descriptor) + val expr = writerContext.rwm.writeExpr[A](descriptor) println(s"jitc: ${expr.show}") expr }.asInstanceOf[MemWriter[A]] diff --git a/core/src/fr/hammons/slinc/Ptr.scala b/core/src/fr/hammons/slinc/Ptr.scala index 856bd931..51fbc85d 100644 --- a/core/src/fr/hammons/slinc/Ptr.scala +++ b/core/src/fr/hammons/slinc/Ptr.scala @@ -39,8 +39,8 @@ class Ptr[A](private[slinc] val mem: Mem, private[slinc] val offset: Bytes): def `unary_!_=`( value: A - )(using rwM: ReadWriteModule, desc: DescriptorOf[A], dm: DescriptorModule) = - desc.writer.get(using WriterContext(dm, rwM))(mem, offset, value) + )(using wc: WriterContext, desc: DescriptorOf[A]) = + desc.writer.get(mem, offset, value) def apply(bytes: Bytes): Ptr[A] = Ptr[A](mem, offset + bytes) def apply(index: Int)(using DescriptorOf[A], DescriptorModule): Ptr[A] = Ptr[A](mem, offset + (DescriptorOf[A].size * index)) diff --git a/core/src/fr/hammons/slinc/Struct.scala b/core/src/fr/hammons/slinc/Struct.scala index f7804c70..1b53f5ff 100644 --- a/core/src/fr/hammons/slinc/Struct.scala +++ b/core/src/fr/hammons/slinc/Struct.scala @@ -108,7 +108,6 @@ object Struct: type Inner = A val reader = readGen[A] val writer = - // println(codeOf(writeGen[A])) writeGen[A] override val returnTransition = returnValue => diff --git a/core/src/fr/hammons/slinc/descriptors/WriterContext.scala b/core/src/fr/hammons/slinc/descriptors/WriterContext.scala index a9a983a9..fdf707a6 100644 --- a/core/src/fr/hammons/slinc/descriptors/WriterContext.scala +++ b/core/src/fr/hammons/slinc/descriptors/WriterContext.scala @@ -4,3 +4,6 @@ import fr.hammons.slinc.modules.DescriptorModule import fr.hammons.slinc.modules.ReadWriteModule final case class WriterContext(dm: DescriptorModule, rwm: ReadWriteModule) + +object WriterContext: + given (using dm: DescriptorModule, rwm: ReadWriteModule): WriterContext = WriterContext(dm, rwm) diff --git a/core/src/fr/hammons/slinc/jitc/JitCService.scala b/core/src/fr/hammons/slinc/jitc/JitCService.scala index 34606941..9777847a 100644 --- a/core/src/fr/hammons/slinc/jitc/JitCService.scala +++ b/core/src/fr/hammons/slinc/jitc/JitCService.scala @@ -10,6 +10,7 @@ import scala.concurrent.Future import scala.quoted.staging.run import java.util.UUID import java.{util as ju} +import scala.util.Try type JitCompiler = [A] => ( Quotes ?=> Expr[A] @@ -49,7 +50,11 @@ object JitCService: for (_, work) <- workToDo pfn: JitCompiler = [A] => (fn: Quotes ?=> Expr[A]) => run[A](fn) - do work(pfn) + do Try( + work(pfn) + ).recover{ + case t => t.printStackTrace() + } val done = workToDo.map(_._1) var succeeded = false diff --git a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala index cc604c4e..3ad52586 100644 --- a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala +++ b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala @@ -13,29 +13,51 @@ class OptimizableFn[F, G]( private val _fn: AtomicReference[F] = AtomicReference() val uuid = UUID.randomUUID().nn private val _optFn: AtomicReference[F] = AtomicReference() + private var _permOptFn: F | Null = null + + def forceOptimize(using G) = + optimizer.jitC( + uuid, + jitCompiler => + val opt = optimized(jitCompiler) + _optFn.setOpaque( + opt + ) + ) + while _optFn.getOpaque() == null do {} + _permOptFn = _optFn.getOpaque().nn + _permOptFn.nn + def get(using G): F = - val optFn = _optFn.getOpaque() - var fn = _fn.getOpaque() - if fn == null then - fn = f(inst) - _fn.set(fn) + if _permOptFn != null then + _permOptFn.nn + else + val optFn = _optFn.getOpaque() + + if optFn != null then + _permOptFn = optFn + optFn + else + var fn = _fn.getOpaque() + if fn == null then + fn = f(inst) + _fn.set(fn) - if optFn != null then optFn - else if inst.getCount() >= limit then - optimizer.jitC( - uuid, - jitCompiler => - val opt = optimized(jitCompiler) - _optFn.setOpaque( - opt + if inst.getCount() >= limit then + optimizer.jitC( + uuid, + jitCompiler => + val opt = optimized(jitCompiler) + _optFn.setOpaque( + opt + ) ) - ) - if optimizer.async then fn.nn - else - while _optFn.getOpaque() == null do {} - _optFn.getOpaque().nn - else fn.nn + if optimizer.async then fn.nn + else + while _optFn.getOpaque() == null do {} + _optFn.getOpaque().nn + else fn.nn object OptimizableFn: val modeSetting = "slinc.jitc.mode" diff --git a/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala b/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala index f4ed3862..0298b94b 100644 --- a/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala +++ b/core/src/fr/hammons/slinc/modules/ReadWriteModule.scala @@ -61,9 +61,9 @@ trait ReadWriteModule: fn: => MethodHandle => Mem => A )(using Fn[A, ?, ?]): A - def writeExpr( + def writeExpr[A]( td: TypeDescriptor - )(using Quotes, ClassTag[td.Inner]): Expr[MemWriter[Any]] + )(using Quotes, ClassTag[A], A =:= td.Inner): Expr[MemWriter[A]] def writeArrayExpr(td: TypeDescriptor)(using Quotes ): Expr[MemWriter[Array[Any]]] diff --git a/core/test/src/fr/hammons/slinc/TransferSpec.scala b/core/test/src/fr/hammons/slinc/TransferSpec.scala index 0f6a152e..d9851a50 100644 --- a/core/test/src/fr/hammons/slinc/TransferSpec.scala +++ b/core/test/src/fr/hammons/slinc/TransferSpec.scala @@ -32,6 +32,11 @@ trait TransferSpec[ThreadException <: Throwable](val slinc: Slinc)(using case class G(long: CLong, arr: SetSizeArray[CLong, 2]) derives Struct + case class H(a: Int, b: Float, c: CLong) derives Struct + + Scope.confined: + Ptr.copy(H(1,2,CLong(3))) + test("can read and write jvm ints") { Scope.global { val mem = Ptr.blank[Int] diff --git a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala index e09200fc..1350d99d 100644 --- a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala +++ b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala @@ -1,10 +1,14 @@ package fr.hammons.slinc -import org.openjdk.jmh.annotations.*, Mode.{SingleShotTime, Throughput} +import org.openjdk.jmh.annotations.*, Mode.{SingleShotTime, AverageTime} import java.util.concurrent.TimeUnit +import jdk.incubator.foreign.* +import jdk.incubator.foreign.CLinker.* +import org.openjdk.jmh.infra.Blackhole +import fr.hammons.slinc.types.* @State(Scope.Thread) -@BenchmarkMode(Array(Throughput, SingleShotTime)) +@BenchmarkMode(Array(AverageTime, SingleShotTime)) @Fork( jvmArgsAppend = Array( "--add-modules=jdk.incubator.foreign", @@ -12,5 +16,27 @@ import java.util.concurrent.TimeUnit // "-XX:ActiveProcessorCount=1", ) ) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -class TransferBenchmarks17 extends TransferBenchmarkShape(Slinc17.noJit) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +class TransferBenchmarks17 extends TransferBenchmarkShape(Slinc17.noJit) { + case class H(a: Int, b: Float, c: CLong) + + val rs = ResourceScope.globalScope().nn + val segAlloc = SegmentAllocator.arenaAllocator(rs).nn + val ml = MemoryLayout.structLayout(C_INT, C_FLOAT, C_LONG) + + val ms = segAlloc.allocate(ml) + val h = H(1,2f,CLong(3)) + + val writerFn = (ms: MemorySegment | Null, offset: Bytes, value: H) => + MemoryAccess.setIntAtOffset(ms, offset.toLong + 0, h.a) + MemoryAccess.setFloatAtOffset(ms, offset.toLong + 4, h.b) + MemoryAccess.setLongAtOffset(ms, offset.toLong + 8, a.c.asInstanceOf[Long]) + + + + @Benchmark + def writeManual(blackhole: Blackhole) = blackhole.consume( + writerFn(ms, Bytes(0), h) + ) + +} diff --git a/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala b/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala index f448b995..556ed3b3 100644 --- a/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala +++ b/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala @@ -7,6 +7,7 @@ import scala.reflect.ClassTag import fr.hammons.slinc.fnutils.Fn import scala.quoted.* import java.lang.reflect.Modifier +import jdk.incubator.foreign.* given readWriteModule17: ReadWriteModule with // todo: eliminate this @@ -120,7 +121,7 @@ given readWriteModule17: ReadWriteModule with typeDescriptor.writer )(memory, offset, value) - def asExprOf[A](expr: Expr[Any])(using Quotes, Type[A]) = + def asExprOf[A](expr: Expr[?])(using Quotes, Type[A]) = if expr.isExprOf[A] then expr.asExprOf[A] else '{ $expr.asInstanceOf[A] }.asExprOf[A] @@ -143,31 +144,36 @@ given readWriteModule17: ReadWriteModule with referenceOffset .map(refOff => '{ $refOff + $constantOffset }) .getOrElse(constantOffset) - def writeExprHelper( + + def writeExprHelper[A]( typeDescriptor: TypeDescriptor, - mem: Expr[Mem], + mem: Expr[MemorySegment], offsetExprs: Seq[Expr[Bytes]], - value: Expr[Any] - )(using Quotes): Expr[Unit] = + value: Expr[A] + )(using Quotes, Type[A]): Expr[Unit] = import quotes.reflect.* typeDescriptor match case ByteDescriptor => ??? case ShortDescriptor => ??? case IntDescriptor => '{ - $mem.writeInt( - ${ asExprOf[Int](value) }, - ${ foldOffsets(offsetExprs) } - ) + MemoryAccess.setIntAtOffset($mem, ${foldOffsets(offsetExprs)}.toLong, ${asExprOf[Int](value)}) } case LongDescriptor => '{ - $mem.writeLong( - ${ asExprOf[Long](value) }, - ${ foldOffsets(offsetExprs) } - ) + MemoryAccess.setLongAtOffset($mem, ${foldOffsets(offsetExprs)}.toLong, ${asExprOf[Long](value)}) + // $mem.writeLong( + // ${ asExprOf[Long](value) }, + // ${ foldOffsets(offsetExprs) } + // ) } - case FloatDescriptor => ??? + case FloatDescriptor => '{ + MemoryAccess.setFloatAtOffset($mem, ${foldOffsets(offsetExprs)}.toLong, ${asExprOf[Float](value)}) + // $mem.writeFloat( + // ${ asExprOf[Float](value) }, + // ${ foldOffsets(offsetExprs)} + // ) + } case DoubleDescriptor => ??? case PtrDescriptor => ??? case sd: StructDescriptor if canBeUsedDirectly(sd.clazz) => @@ -189,7 +195,7 @@ given readWriteModule17: ReadWriteModule with (StructMemberDescriptor(childDescriptor, name), childOffset), index ) => - (nv: Expr[Product]) => + (nv: Expr[A]) => val childField = Select(nv.asTerm, fields(index)).asExpr val totalOffset = offsetExprs :+ Expr(childOffset) @@ -201,13 +207,8 @@ given readWriteModule17: ReadWriteModule with val code = TypeRepr.typeConstructorOf(sd.clazz).asType match case '[a & Product] => - '{ - val a: a & Product = ${ asExprOf[a & Product](value) } - - ${ - Expr.block(fns.map(_('a)), '{}) - } - } + val writes = fns.map(_(value)) + Expr.block(writes.init, writes.last) println(code.show) code case sd: StructDescriptor => @@ -240,26 +241,28 @@ given readWriteModule17: ReadWriteModule with case CUnionDescriptor(possibleTypes) => ??? case SetSizeArrayDescriptor(td, x) => ??? - def writeExpr( + def writeExpr[A]( typeDescriptor: TypeDescriptor - )(using Quotes, ClassTag[typeDescriptor.Inner]): Expr[MemWriter[Any]] = + )(using Quotes, ClassTag[A], A =:= typeDescriptor.Inner): Expr[MemWriter[A]] = import quotes.reflect.* val output = TypeRepr - .typeConstructorOf(summon[ClassTag[typeDescriptor.Inner]].runtimeClass) + .typeConstructorOf(summon[ClassTag[A]].runtimeClass) .asType match case '[a] => - '{ (mem: Mem, offset: Bytes, value: Any) => + '{ (mem: Mem, offset: Bytes, value: a) => + val memsegment = mem.asBase.asInstanceOf[MemorySegment] ${ writeExprHelper( typeDescriptor, - 'mem, + 'memsegment, Seq('offset), - '{ value }.asExprOf[Any] + '{ value } ) } } - output + given Type[A] = TypeRepr.typeConstructorOf(summon[ClassTag[A]].runtimeClass).asType.asInstanceOf[Type[A]] + output.asExprOf[MemWriter[A]] def writeArrayExpr(typeDescriptor: TypeDescriptor)(using Quotes @@ -267,11 +270,12 @@ given readWriteModule17: ReadWriteModule with val elemLength = Expr(typeDescriptor.size) '{ (mem: Mem, offset: Bytes, value: Array[Any]) => var x = 0 + val ms = mem.asBase.asInstanceOf[MemorySegment] while x < value.length do ${ writeExprHelper( typeDescriptor, - 'mem, + 'ms, Seq( '{ ($elemLength * x) From a265eb347623fe28b8ee75909f6906b62c049510 Mon Sep 17 00:00:00 2001 From: Mark Hammons Date: Thu, 1 Jun 2023 14:15:09 +0200 Subject: [PATCH 5/7] chore: Increase number of benchmarks and strengthen unit tests --- .../slinc/TransferBenchmarkShape.scala | 35 ++++++++++++++++-- .../slinc/descriptors/WriterContext.scala | 3 +- .../fr/hammons/slinc/jitc/JitCService.scala | 11 +++--- .../fr/hammons/slinc/jitc/OptimizableFn.scala | 16 ++++----- .../src/fr/hammons/slinc/TransferSpec.scala | 2 +- .../hammons/slinc/jitc/JitSpecification.scala | 34 +++++++++--------- .../hammons/slinc/TransferBenchmarks17.scala | 20 +++++------ .../slinc/modules/ReadWriteModule17.scala | 36 +++++++++++++------ 8 files changed, 101 insertions(+), 56 deletions(-) diff --git a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala index e681a066..f00a30e2 100644 --- a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala +++ b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala @@ -4,6 +4,7 @@ import fr.hammons.slinc.types.CLong import org.openjdk.jmh.annotations.{Scope as _, *} import org.openjdk.jmh.infra.Blackhole import fr.hammons.slinc.descriptors.WriterContext +import scala.util.Random case class A(a: Int, b: B, c: Int) derives Struct case class B(a: Int, b: Int) derives Struct @@ -55,15 +56,43 @@ trait TransferBenchmarkShape(val s: Slinc): def topLevelWrite = !aPtr = a + @Benchmark + @Fork( + jvmArgsAppend = Array( + "-Dslinc.jitc.mode=standard" + ) + ) + def topLevelWriteGJitted(blackhole: Blackhole) = blackhole.consume: + !gPtr = g - @Benchmark - def topLevelWriteG(blackhole: Blackhole) = blackhole.consume: + @Benchmark + @Fork( + jvmArgsAppend = Array( + "-Dslinc.jitc.mode=disabled" + ) + ) + def topLevelWriteGNoJit(blackhole: Blackhole) = blackhole.consume: !gPtr = g @Benchmark - def topLevelWriteI(blackhole: Blackhole) = blackhole.consume: + @Fork( + jvmArgsAppend = Array( + "-Dslinc.jitc.mode=immediate" + ) + ) + def topLevelWriteGImmediateJIT(blackhole: Blackhole) = blackhole.consume: + !gPtr = g + + @Benchmark + def cachedWriteI(blackhole: Blackhole) = blackhole.consume: optimizedIWriter(iPtr.mem, Bytes(0), i) + var x = Random.nextInt() + var y = Random.nextInt() + + @Benchmark + def addValues(blackhole: Blackhole) = blackhole.consume: + x + y @Benchmark def innerRead = diff --git a/core/src/fr/hammons/slinc/descriptors/WriterContext.scala b/core/src/fr/hammons/slinc/descriptors/WriterContext.scala index fdf707a6..d807d02e 100644 --- a/core/src/fr/hammons/slinc/descriptors/WriterContext.scala +++ b/core/src/fr/hammons/slinc/descriptors/WriterContext.scala @@ -6,4 +6,5 @@ import fr.hammons.slinc.modules.ReadWriteModule final case class WriterContext(dm: DescriptorModule, rwm: ReadWriteModule) object WriterContext: - given (using dm: DescriptorModule, rwm: ReadWriteModule): WriterContext = WriterContext(dm, rwm) + given (using dm: DescriptorModule, rwm: ReadWriteModule): WriterContext = + WriterContext(dm, rwm) diff --git a/core/src/fr/hammons/slinc/jitc/JitCService.scala b/core/src/fr/hammons/slinc/jitc/JitCService.scala index 9777847a..db91067d 100644 --- a/core/src/fr/hammons/slinc/jitc/JitCService.scala +++ b/core/src/fr/hammons/slinc/jitc/JitCService.scala @@ -50,11 +50,12 @@ object JitCService: for (_, work) <- workToDo pfn: JitCompiler = [A] => (fn: Quotes ?=> Expr[A]) => run[A](fn) - do Try( - work(pfn) - ).recover{ - case t => t.printStackTrace() - } + do + Try( + work(pfn) + ).recover { case t => + t.printStackTrace() + } val done = workToDo.map(_._1) var succeeded = false diff --git a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala index 3ad52586..b0b6d966 100644 --- a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala +++ b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala @@ -15,10 +15,10 @@ class OptimizableFn[F, G]( private val _optFn: AtomicReference[F] = AtomicReference() private var _permOptFn: F | Null = null - def forceOptimize(using G) = + def forceOptimize(using G) = optimizer.jitC( - uuid, - jitCompiler => + uuid, + jitCompiler => val opt = optimized(jitCompiler) _optFn.setOpaque( opt @@ -28,17 +28,15 @@ class OptimizableFn[F, G]( _permOptFn = _optFn.getOpaque().nn _permOptFn.nn - def get(using G): F = - if _permOptFn != null then - _permOptFn.nn - else + if _permOptFn != null then _permOptFn.nn + else val optFn = _optFn.getOpaque() - if optFn != null then + if optFn != null then _permOptFn = optFn optFn - else + else var fn = _fn.getOpaque() if fn == null then fn = f(inst) diff --git a/core/test/src/fr/hammons/slinc/TransferSpec.scala b/core/test/src/fr/hammons/slinc/TransferSpec.scala index d9851a50..fd1f2213 100644 --- a/core/test/src/fr/hammons/slinc/TransferSpec.scala +++ b/core/test/src/fr/hammons/slinc/TransferSpec.scala @@ -35,7 +35,7 @@ trait TransferSpec[ThreadException <: Throwable](val slinc: Slinc)(using case class H(a: Int, b: Float, c: CLong) derives Struct Scope.confined: - Ptr.copy(H(1,2,CLong(3))) + Ptr.copy(H(1, 2, CLong(3))) test("can read and write jvm ints") { Scope.global { diff --git a/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala b/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala index 88d30620..958bfd7c 100644 --- a/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala +++ b/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala @@ -3,6 +3,8 @@ package fr.hammons.slinc.jitc import scala.concurrent.Future import scala.concurrent.ExecutionContext.Implicits.global import scala.compiletime.codeOf +import scala.concurrent.duration.Duration +import scala.concurrent.Await class JitSpecification extends munit.FunSuite: test("jit-compilation works"): @@ -26,31 +28,31 @@ class JitSpecification extends munit.FunSuite: assertEquals(optimized, true) test("jit-compilation in multithreaded env works"): - var optimized = false + val optimized = Array.fill(10)(false) val fn = new OptimizableFn[Int => Int, DummyImplicit](JitCService.standard)( i => i((a: Int) => i.instrument(a)), 10 )(jitCompiler => - jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => - optimizedFn(true) + jitCompiler('{ (optimizedFn: Int => Unit) => (i: Int) => + optimizedFn(i) i - })( - optimized = _ - ) + })(i => optimized(i) = true) ) - for _ <- 0 to 5 - yield Future { - for _ <- 0 until 2 - yield fn.get(3) - } + val futures = + for i <- 0 until 10 + yield Future { + for _ <- 0 until 100000 + yield fn.get(i) + while !JitCService.standard.processedRecently(fn.uuid) do + Thread.sleep(100) - while !JitCService.standard.processedRecently(fn.uuid) do - println("waiting") - Thread.sleep(100) + fn.get(i) + } - fn.get(6) - assertEquals(optimized, true) + futures.foreach(Await.result(_, Duration.Inf)) + + assertEquals(optimized.toSeq, Seq.fill(10)(true)) test("instant compilation works"): var optimized = false diff --git a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala index 1350d99d..357b9385 100644 --- a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala +++ b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala @@ -1,6 +1,6 @@ package fr.hammons.slinc -import org.openjdk.jmh.annotations.*, Mode.{SingleShotTime, AverageTime} +import org.openjdk.jmh.annotations.*, Mode.{SingleShotTime, Throughput} import java.util.concurrent.TimeUnit import jdk.incubator.foreign.* import jdk.incubator.foreign.CLinker.* @@ -8,7 +8,7 @@ import org.openjdk.jmh.infra.Blackhole import fr.hammons.slinc.types.* @State(Scope.Thread) -@BenchmarkMode(Array(AverageTime, SingleShotTime)) +@BenchmarkMode(Array(Throughput, SingleShotTime)) @Fork( jvmArgsAppend = Array( "--add-modules=jdk.incubator.foreign", @@ -16,23 +16,21 @@ import fr.hammons.slinc.types.* // "-XX:ActiveProcessorCount=1", ) ) -@OutputTimeUnit(TimeUnit.NANOSECONDS) +@OutputTimeUnit(TimeUnit.MICROSECONDS) class TransferBenchmarks17 extends TransferBenchmarkShape(Slinc17.noJit) { case class H(a: Int, b: Float, c: CLong) - val rs = ResourceScope.globalScope().nn + val rs = ResourceScope.globalScope().nn val segAlloc = SegmentAllocator.arenaAllocator(rs).nn val ml = MemoryLayout.structLayout(C_INT, C_FLOAT, C_LONG) val ms = segAlloc.allocate(ml) - val h = H(1,2f,CLong(3)) - - val writerFn = (ms: MemorySegment | Null, offset: Bytes, value: H) => - MemoryAccess.setIntAtOffset(ms, offset.toLong + 0, h.a) - MemoryAccess.setFloatAtOffset(ms, offset.toLong + 4, h.b) - MemoryAccess.setLongAtOffset(ms, offset.toLong + 8, a.c.asInstanceOf[Long]) - + val h = H(1, 2f, CLong(3)) + val writerFn = (ms: MemorySegment | Null, offset: Bytes, value: H) => + MemoryAccess.setIntAtOffset(ms, offset.toLong + 0, h.a) + MemoryAccess.setFloatAtOffset(ms, offset.toLong + 4, h.b) + MemoryAccess.setLongAtOffset(ms, offset.toLong + 8, a.c.asInstanceOf[Long]) @Benchmark def writeManual(blackhole: Blackhole) = blackhole.consume( diff --git a/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala b/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala index 556ed3b3..530fcd36 100644 --- a/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala +++ b/j17/src/fr/hammons/slinc/modules/ReadWriteModule17.scala @@ -157,23 +157,36 @@ given readWriteModule17: ReadWriteModule with case ShortDescriptor => ??? case IntDescriptor => '{ - MemoryAccess.setIntAtOffset($mem, ${foldOffsets(offsetExprs)}.toLong, ${asExprOf[Int](value)}) + MemoryAccess.setIntAtOffset( + $mem, + ${ foldOffsets(offsetExprs) }.toLong, + ${ asExprOf[Int](value) } + ) } case LongDescriptor => '{ - MemoryAccess.setLongAtOffset($mem, ${foldOffsets(offsetExprs)}.toLong, ${asExprOf[Long](value)}) + MemoryAccess.setLongAtOffset( + $mem, + ${ foldOffsets(offsetExprs) }.toLong, + ${ asExprOf[Long](value) } + ) // $mem.writeLong( // ${ asExprOf[Long](value) }, // ${ foldOffsets(offsetExprs) } // ) } - case FloatDescriptor => '{ - MemoryAccess.setFloatAtOffset($mem, ${foldOffsets(offsetExprs)}.toLong, ${asExprOf[Float](value)}) - // $mem.writeFloat( - // ${ asExprOf[Float](value) }, - // ${ foldOffsets(offsetExprs)} - // ) - } + case FloatDescriptor => + '{ + MemoryAccess.setFloatAtOffset( + $mem, + ${ foldOffsets(offsetExprs) }.toLong, + ${ asExprOf[Float](value) } + ) + // $mem.writeFloat( + // ${ asExprOf[Float](value) }, + // ${ foldOffsets(offsetExprs)} + // ) + } case DoubleDescriptor => ??? case PtrDescriptor => ??? case sd: StructDescriptor if canBeUsedDirectly(sd.clazz) => @@ -261,7 +274,10 @@ given readWriteModule17: ReadWriteModule with } } - given Type[A] = TypeRepr.typeConstructorOf(summon[ClassTag[A]].runtimeClass).asType.asInstanceOf[Type[A]] + given Type[A] = TypeRepr + .typeConstructorOf(summon[ClassTag[A]].runtimeClass) + .asType + .asInstanceOf[Type[A]] output.asExprOf[MemWriter[A]] def writeArrayExpr(typeDescriptor: TypeDescriptor)(using From bdc65395e678a2d0d51ef2874b43072346989333 Mon Sep 17 00:00:00 2001 From: Mark Hammons Date: Thu, 1 Jun 2023 23:36:49 +0200 Subject: [PATCH 6/7] perf: optimized optimizablefn some --- .../slinc/TransferBenchmarkShape.scala | 8 +- .../hammons/slinc/jitc/Intrumentation.scala | 10 +- .../fr/hammons/slinc/jitc/OptimizableFn.scala | 194 +++++++++++++----- .../hammons/slinc/jitc/JitSpecification.scala | 60 +++--- 4 files changed, 185 insertions(+), 87 deletions(-) diff --git a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala index f00a30e2..29402c40 100644 --- a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala +++ b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala @@ -62,7 +62,7 @@ trait TransferBenchmarkShape(val s: Slinc): "-Dslinc.jitc.mode=standard" ) ) - def topLevelWriteGJitted(blackhole: Blackhole) = blackhole.consume: + def jitted(blackhole: Blackhole) = blackhole.consume: !gPtr = g @Benchmark @@ -71,7 +71,7 @@ trait TransferBenchmarkShape(val s: Slinc): "-Dslinc.jitc.mode=disabled" ) ) - def topLevelWriteGNoJit(blackhole: Blackhole) = blackhole.consume: + def compiletime(blackhole: Blackhole) = blackhole.consume: !gPtr = g @Benchmark @@ -80,11 +80,11 @@ trait TransferBenchmarkShape(val s: Slinc): "-Dslinc.jitc.mode=immediate" ) ) - def topLevelWriteGImmediateJIT(blackhole: Blackhole) = blackhole.consume: + def immediatecompilation(blackhole: Blackhole) = blackhole.consume: !gPtr = g @Benchmark - def cachedWriteI(blackhole: Blackhole) = blackhole.consume: + def nakedfunction(blackhole: Blackhole) = blackhole.consume: optimizedIWriter(iPtr.mem, Bytes(0), i) var x = Random.nextInt() diff --git a/core/src/fr/hammons/slinc/jitc/Intrumentation.scala b/core/src/fr/hammons/slinc/jitc/Intrumentation.scala index 01f84c29..47834f4f 100644 --- a/core/src/fr/hammons/slinc/jitc/Intrumentation.scala +++ b/core/src/fr/hammons/slinc/jitc/Intrumentation.scala @@ -20,17 +20,19 @@ trait Instrumentation: ): InstrumentedFn[E] = fn.asInstanceOf[E] -class CountbasedInstrumentation extends Instrumentation: +class CountbasedInstrumentation(triggerFn: () => Unit, triggerLimit: Int) + extends Instrumentation: private val count = AtomicInteger(0) - def getCount() = count.get() - private def incrementCount(): Int = + final def getCount() = count.getAcquire() + private def incrementCount(): Unit = var succeeded = false var res = 0 while !succeeded do res = count.get() succeeded = count.compareAndSet(res, res + 1) - res + 1 + res += 1 + if res >= triggerLimit then triggerFn() def instrument[A](a: A): Instrumented[A] = incrementCount() diff --git a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala index b0b6d966..84254061 100644 --- a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala +++ b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala @@ -3,19 +3,30 @@ package fr.hammons.slinc.jitc import java.util.concurrent.atomic.AtomicReference import java.util.UUID -class OptimizableFn[F, G]( +sealed trait OptimizableFn[F, G]: + final val uuid = UUID.randomUUID().nn + + def forceOptimize(using G): F + def triggerOptimization(using G): Unit + def get(using G): F + +final class FnToJit[F, G]( optimizer: JitCService, - inst: Instrumentation = new CountbasedInstrumentation -)( - f: G ?=> (i: Instrumentation) => i.InstrumentedFn[F], - limit: Int -)(optimized: G ?=> JitCompiler => F): + inst: (() => Unit) => Instrumentation, + optimized: G ?=> JitCompiler => F, + f: G ?=> (i: Instrumentation) => i.InstrumentedFn[F] +) extends OptimizableFn[F, G]: private val _fn: AtomicReference[F] = AtomicReference() - val uuid = UUID.randomUUID().nn private val _optFn: AtomicReference[F] = AtomicReference() private var _permOptFn: F | Null = null - def forceOptimize(using G) = + final def forceOptimize(using G): F = + triggerOptimization + while _optFn.getOpaque() == null do {} + _permOptFn = _optFn.getOpaque().nn + _permOptFn.nn + + def triggerOptimization(using G): Unit = optimizer.jitC( uuid, jitCompiler => @@ -24,39 +35,128 @@ class OptimizableFn[F, G]( opt ) ) - while _optFn.getOpaque() == null do {} - _permOptFn = _optFn.getOpaque().nn - _permOptFn.nn - def get(using G): F = + private def tryGetOptFn(using G) = + val optFn = _optFn.getOpaque() + + if optFn != null then + _permOptFn = optFn + optFn + else getFn + + private def getFn(using G) = + val fn = _fn.getOpaque() + if fn != null then fn + else + val nFn = f(inst(() => triggerOptimization)) + _fn.set(nFn) + nFn + + final def get(using G): F = if _permOptFn != null then _permOptFn.nn + else tryGetOptFn + +final class NeverJitFn[F, G]( + f: G ?=> (i: Instrumentation) => i.InstrumentedFn[F] +) extends OptimizableFn[F, G]: + private var _f: F | Null = null + final def forceOptimize(using G): F = + _f = f(IgnoreInstrumentation) + _f.nn + + final def triggerOptimization(using G): Unit = () + + final def get(using G): F = forceOptimize + +final class InstantJitFn[F, G]( + optimizer: JitCService, + f: G ?=> JitCompiler => F +) extends OptimizableFn[F, G]: + private val _fn: AtomicReference[F] = AtomicReference() + private var _permFn: F | Null = null + + final def forceOptimize(using G): F = + val fn = _fn.getOpaque() + if fn == null then + triggerOptimization + while _fn.getOpaque() == null do {} + _fn.getOpaque().nn + else fn + + final def get(using G): F = + if _permFn != null then _permFn.nn else - val optFn = _optFn.getOpaque() - - if optFn != null then - _permOptFn = optFn - optFn - else - var fn = _fn.getOpaque() - if fn == null then - fn = f(inst) - _fn.set(fn) - - if inst.getCount() >= limit then - optimizer.jitC( - uuid, - jitCompiler => - val opt = optimized(jitCompiler) - _optFn.setOpaque( - opt - ) - ) - if optimizer.async then fn.nn - else - while _optFn.getOpaque() == null do {} - _optFn.getOpaque().nn - else fn.nn + _permFn = forceOptimize + _permFn.nn + + final def triggerOptimization(using G): Unit = + optimizer.jitC( + uuid, + jitCompiler => + val optimized = f(jitCompiler) + _fn.setOpaque(optimized) + ) +// final class OptimizableFn[F, G]( +// optimizer: JitCService, +// inst: Instrumentation = new CountbasedInstrumentation +// )( +// f: G ?=> (i: Instrumentation) => i.InstrumentedFn[F], +// limit: Int +// )(optimized: G ?=> JitCompiler => F): +// private val _fn: AtomicReference[F] = AtomicReference() +// final val uuid = UUID.randomUUID().nn +// private val _optFn: AtomicReference[F] = AtomicReference() +// private var _permOptFn: F | Null = null + +// final def forceOptimize(using G) = +// optimizer.jitC( +// uuid, +// jitCompiler => +// val opt = optimized(jitCompiler) +// _optFn.setOpaque( +// opt +// ) +// ) +// while _optFn.getOpaque() == null do {} +// _permOptFn = _optFn.getOpaque().nn +// _permOptFn.nn + +// private def optimize(using G): F = +// optimizer.jitC( +// uuid, +// jitCompiler => +// val opt = optimized(jitCompiler) +// _optFn.setOpaque(opt) +// ) +// if optimizer.async then getFn +// else +// while _optFn.getOpaque() == null do {} +// _optFn.getOpaque().nn + +// private def checkOptTrigger(using G) = +// if inst.getCount() >= limit then optimize +// else getFn + +// private def tryGetOptFn(using G) = +// val optFn = _optFn.getOpaque() + +// if optFn != null then +// _permOptFn = optFn +// optFn +// else checkOptTrigger + +// private def getFn(using G) = +// val fn = _fn.getOpaque() +// if fn != null then fn +// else +// val nFn = f(inst) +// _fn.set(nFn) +// nFn + +// final def get(using G): F = +// if _permOptFn != null then _permOptFn.nn +// else tryGetOptFn object OptimizableFn: val modeSetting = "slinc.jitc.mode" val limitSetting = "slinc.jitc.jit-limit" @@ -71,19 +171,17 @@ object OptimizableFn: limit match case None => throw Error("slinc.jitc.jit-limit should be an integer") case Some(value) => - new OptimizableFn[F, G]( + new FnToJit[F, G]( JitCService.standard, - CountbasedInstrumentation() - )(unoptimizedFn, value)(optimized) + updateFn => CountbasedInstrumentation(updateFn, value), + optimized, + unoptimizedFn + ) case "never" | "disabled" => - new OptimizableFn[F, G](JitCService.synchronous, IgnoreInstrumentation)( - unoptimizedFn, - 1 - )(optimized) + new NeverJitFn[F, G]( + unoptimizedFn + ) case "immediate" => - new OptimizableFn[F, G](JitCService.synchronous, IgnoreInstrumentation)( - unoptimizedFn, - 0 - )(optimized) + new InstantJitFn[F, G](JitCService.synchronous, optimized) diff --git a/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala b/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala index 958bfd7c..6ba3d6d4 100644 --- a/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala +++ b/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala @@ -9,15 +9,16 @@ import scala.concurrent.Await class JitSpecification extends munit.FunSuite: test("jit-compilation works"): var optimized = false - var fn = - new OptimizableFn[Int => Int, DummyImplicit](JitCService.standard)( - i => i((a: Int) => i.instrument(a)), - 10 - )(jitCompiler => - jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => - optimizedFn(true) - i - })(optimized = _) + var fn: OptimizableFn[Int => Int, DummyImplicit] = + new FnToJit( + JitCService.standard, + CountbasedInstrumentation(_, 10), + jitCompiler => + jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => + optimizedFn(true) + i + })(optimized = _), + i => i((a: Int) => i.instrument(a)) ) for _ <- 0 to 10 yield fn.get(3) @@ -29,15 +30,16 @@ class JitSpecification extends munit.FunSuite: test("jit-compilation in multithreaded env works"): val optimized = Array.fill(10)(false) - val fn = - new OptimizableFn[Int => Int, DummyImplicit](JitCService.standard)( - i => i((a: Int) => i.instrument(a)), - 10 - )(jitCompiler => - jitCompiler('{ (optimizedFn: Int => Unit) => (i: Int) => - optimizedFn(i) - i - })(i => optimized(i) = true) + val fn: OptimizableFn[Int => Int, DummyImplicit] = + new FnToJit( + JitCService.standard, + CountbasedInstrumentation(_, 10), + jitCompiler => + jitCompiler('{ (optimizedFn: Int => Unit) => (i: Int) => + optimizedFn(i) + i + })(i => optimized(i) = true), + i => i((a: Int) => i.instrument(a)) ) val futures = for i <- 0 until 10 @@ -56,18 +58,14 @@ class JitSpecification extends munit.FunSuite: test("instant compilation works"): var optimized = false - val fn = - new OptimizableFn[Int => Int, DummyImplicit]( - JitCService.synchronous, - IgnoreInstrumentation - )( - i => i((a: Int) => i.instrument(a)), - 0 - )(jitCompiler => - jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => - optimizedFn(true) - i - })(optimized = _) + val fn: OptimizableFn[Int => Int, DummyImplicit] = + new InstantJitFn[Int => Int, DummyImplicit]( + JitCService.standard, + jitCompiler => + jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => + optimizedFn(true) + i + })(optimized = _) ) fn.get(6) @@ -111,7 +109,7 @@ class JitSpecification extends munit.FunSuite: assertEquals(ignoreInstrumentation.getCount(), 0) test("Count instrumentation records invokations"): - val countInstrumentation = CountbasedInstrumentation() + val countInstrumentation = CountbasedInstrumentation(() => (), 100) assertEquals(countInstrumentation.getCount(), 0) countInstrumentation.instrument(5) From 6a341e1a8181a20daf209513cf2cd21950a7cf9f Mon Sep 17 00:00:00 2001 From: Mark Hammons Date: Sat, 10 Jun 2023 13:16:27 +0200 Subject: [PATCH 7/7] Add more benchmarks to JitCompiler, more work on jit harness. --- build.sc | 8 +- .../src/fr/hammons/slinc/JitBenchmark.scala | 63 +++++++++ .../hammons/slinc/JitCompilerBenchmark.scala | 53 +++++++ .../slinc/TransferBenchmarkShape.scala | 34 ++++- .../hammons/slinc/jitc/Intrumentation.scala | 38 ++--- .../fr/hammons/slinc/jitc/JitCService.scala | 80 ++++------- .../fr/hammons/slinc/jitc/OptimizableFn.scala | 133 ++++++------------ .../hammons/slinc/jitc/JitSpecification.scala | 32 +++-- .../hammons/slinc/TransferBenchmarks17.scala | 5 +- 9 files changed, 274 insertions(+), 172 deletions(-) create mode 100644 core/benchmarks/test/src/fr/hammons/slinc/JitBenchmark.scala create mode 100644 core/benchmarks/test/src/fr/hammons/slinc/JitCompilerBenchmark.scala diff --git a/build.sc b/build.sc index 38dba93e..558d85b2 100644 --- a/build.sc +++ b/build.sc @@ -30,7 +30,6 @@ trait BaseModule extends ScoverageModule with ScalafmtModule { "-unchecked", "-Xcheck-macros", "-Xprint-suspension", - "-Xsemanticdb", "-Yexplicit-nulls", "-Ysafe-init", "-source:future", @@ -142,12 +141,9 @@ object core override def scalaVersion = core.scalaVersion() override def scalacOptions = core.scalacOptions - object test extends BenchmarkSources { + object test extends Benchmarks { def jmhVersion = jmhV - def forkArgs = super.forkArgs() ++ Seq( - "--add-modules=jdk.incubator.foreign", - "--enable-native-access=ALL-UNNAMED" - ) + def forkArgs = super.forkArgs() } } diff --git a/core/benchmarks/test/src/fr/hammons/slinc/JitBenchmark.scala b/core/benchmarks/test/src/fr/hammons/slinc/JitBenchmark.scala new file mode 100644 index 00000000..2be2719f --- /dev/null +++ b/core/benchmarks/test/src/fr/hammons/slinc/JitBenchmark.scala @@ -0,0 +1,63 @@ +package fr.hammons.slinc + +import org.openjdk.jmh.annotations.* +import java.util.concurrent.TimeUnit +import org.openjdk.jmh.infra.Blackhole +import fr.hammons.slinc.jitc.OptimizableFn +import fr.hammons.slinc.jitc.InstantJitFn +import fr.hammons.slinc.jitc.JitCService +import fr.hammons.slinc.jitc.FnToJit +import fr.hammons.slinc.jitc.CountbasedInstrumentation +import fr.hammons.slinc.jitc.IgnoreInstrumentation +import fr.hammons.slinc.jitc.Instrumentation + +@State(Scope.Thread) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@BenchmarkMode(Array(Mode.SampleTime)) +class JitBenchmark: + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + def value: Int = 5 + + val baseFn = (i: Int) => i + 5 + val iiFn = + val ignoreInstrumentation: Instrumentation = IgnoreInstrumentation(false) + ignoreInstrumentation((i: Int) => ignoreInstrumentation.instrument(i + 5)) + val ciFn = + val countInstrumentation: Instrumentation = CountbasedInstrumentation(10000) + countInstrumentation((i: Int) => countInstrumentation.instrument(i + 5)) + + val instantFn: OptimizableFn[Int => Int, DummyImplicit] = + InstantJitFn(JitCService.standard, jitc => jitc('{ (i: Int) => i + 5 })) + + val bareInstantFn = instantFn.get + + val jittedFn: FnToJit[Int => Int, DummyImplicit] = FnToJit( + JitCService.standard, + CountbasedInstrumentation(10000), + jitc => jitc('{ (i: Int) => i + 5 }), + inst => inst((i: Int) => inst.instrument(i + 5)) + ) + + @Benchmark + def base(b: Blackhole) = + b.consume(baseFn(value)) + + @Benchmark + def ignoreInstrumented(b: Blackhole) = + b.consume(iiFn(value)) + + @Benchmark + def countInstrumented(b: Blackhole) = + b.consume(ciFn(value)) + + @Benchmark + def instant(b: Blackhole) = + b.consume(instantFn.get(value)) + + // @Benchmark + // def bareInstant(b: Blackhole) = + // b.consume(bareInstantFn(value)) + + @Benchmark + def jitted(b: Blackhole) = + b.consume(jittedFn.get(value)) diff --git a/core/benchmarks/test/src/fr/hammons/slinc/JitCompilerBenchmark.scala b/core/benchmarks/test/src/fr/hammons/slinc/JitCompilerBenchmark.scala new file mode 100644 index 00000000..dd3fd1a1 --- /dev/null +++ b/core/benchmarks/test/src/fr/hammons/slinc/JitCompilerBenchmark.scala @@ -0,0 +1,53 @@ +package fr.hammons.slinc + +import org.openjdk.jmh.annotations.* +import java.util.concurrent.atomic.AtomicReference +import fr.hammons.slinc.jitc.JitCService +import fr.hammons.slinc.jitc.JitCompiler +import org.openjdk.jmh.infra.Blackhole +import scala.compiletime.uninitialized +import java.util.UUID +import java.util.concurrent.TimeUnit + +@State(Scope.Thread) +class JitCompilerState: + val fnRef: AtomicReference[Int => Int] = AtomicReference() + + val jitcAsync = JitCService.standard + var uuid: UUID = uninitialized + + val methodToCompile: JitCompiler => Unit = jitc => + fnRef.setOpaque(jitc('{ (i: Int) => i })) + + @Setup(Level.Invocation) + def setup(): Unit = + uuid = UUID.randomUUID().nn + fnRef.set(null) + +@State(Scope.Thread) +class DummyState: + val dummyFnToCompile: JitCompiler => Unit = jitc => jitc('{ (i: Int) => i }) + + val dummyIds: Array[UUID] = Array.ofDim(100) + + @Setup(Level.Invocation) + def setup(): Unit = + for i <- 0 until dummyIds.size do dummyIds(i) = UUID.randomUUID().nn + +@BenchmarkMode(Array(Mode.SampleTime)) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +class JitCompilerBenchmark: + + @Benchmark + def compilationSpeed(b: Blackhole, jcs: JitCompilerState) = + b.consume: + jcs.jitcAsync.jitC(jcs.uuid, jcs.methodToCompile) + while jcs.fnRef.get() == null do {} + + @Benchmark + @OperationsPerInvocation(101) + def compileStress(b: Blackhole, jcs: JitCompilerState, ds: DummyState) = + b.consume: + for id <- ds.dummyIds do jcs.jitcAsync.jitC(id, ds.dummyFnToCompile) + jcs.jitcAsync.jitC(jcs.uuid, jcs.methodToCompile) + while jcs.fnRef.get() == null do {} diff --git a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala index 29402c40..7a5b9c1f 100644 --- a/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala +++ b/core/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarkShape.scala @@ -5,6 +5,9 @@ import org.openjdk.jmh.annotations.{Scope as _, *} import org.openjdk.jmh.infra.Blackhole import fr.hammons.slinc.descriptors.WriterContext import scala.util.Random +import fr.hammons.slinc.jitc.FnToJit +import fr.hammons.slinc.modules.MemWriter +import scala.compiletime.uninitialized case class A(a: Int, b: B, c: Int) derives Struct case class B(a: Int, b: Int) derives Struct @@ -35,19 +38,31 @@ trait TransferBenchmarkShape(val s: Slinc): val c = C(1, D(CLong(2), 3), 4) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + def offset = Bytes(0) + val g = G(1, 2f, CLong(3)) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + def getG = g + val gPtr = Scope.global { Ptr.blank[G] } val i = I(1, 2f, CLong(3)) + + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + def getI = i val iPtr = Scope.global: Ptr.blank[I] val optimizedIWriter = summon[DescriptorOf[I]].writer.forceOptimize + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + def getOptimizedIWriter = optimizedIWriter + @Benchmark def topLevelRead = !aPtr @@ -63,7 +78,7 @@ trait TransferBenchmarkShape(val s: Slinc): ) ) def jitted(blackhole: Blackhole) = blackhole.consume: - !gPtr = g + !gPtr = getG @Benchmark @Fork( @@ -72,7 +87,7 @@ trait TransferBenchmarkShape(val s: Slinc): ) ) def compiletime(blackhole: Blackhole) = blackhole.consume: - !gPtr = g + !gPtr = getG @Benchmark @Fork( @@ -81,15 +96,26 @@ trait TransferBenchmarkShape(val s: Slinc): ) ) def immediatecompilation(blackhole: Blackhole) = blackhole.consume: - !gPtr = g + !gPtr = getG @Benchmark def nakedfunction(blackhole: Blackhole) = blackhole.consume: - optimizedIWriter(iPtr.mem, Bytes(0), i) + getOptimizedIWriter(iPtr.mem, iPtr.offset, getI) + + import scala.language.unsafeNulls + val castGWriter: FnToJit[MemWriter[G], WriterContext] = + summon[DescriptorOf[G]].writer match + case a: FnToJit[MemWriter[G], WriterContext] => a + case _ => null var x = Random.nextInt() var y = Random.nextInt() + @Benchmark + def fntojit(blackhole: Blackhole) = blackhole.consume( + castGWriter.get(gPtr.mem, gPtr.offset, getG) + ) + @Benchmark def addValues(blackhole: Blackhole) = blackhole.consume: x + y diff --git a/core/src/fr/hammons/slinc/jitc/Intrumentation.scala b/core/src/fr/hammons/slinc/jitc/Intrumentation.scala index 47834f4f..7dc81b4f 100644 --- a/core/src/fr/hammons/slinc/jitc/Intrumentation.scala +++ b/core/src/fr/hammons/slinc/jitc/Intrumentation.scala @@ -3,14 +3,20 @@ package fr.hammons.slinc.jitc import java.util.concurrent.atomic.AtomicInteger import fr.hammons.slinc.fnutils.Fn import scala.annotation.implicitNotFound +import fr.hammons.slinc.jitc.OptimizableFn.limitSetting trait Instrumentation: def getCount(): Int - protected def toInstrumented[A](a: A): Instrumented[A] = a opaque type Instrumented[A] = A opaque type InstrumentedFn[A] <: A = A - def instrument[A](a: A): Instrumented[A] + protected def bootInstrumentation(): Unit + protected def finishInstrumentation(): Unit + inline def instrument[A](inline a: A): Instrumented[A] = + bootInstrumentation() + val ret = a + finishInstrumentation() + ret def apply[A, B <: Tuple, C, D, E](fn: A)(using @implicitNotFound( @@ -20,24 +26,22 @@ trait Instrumentation: ): InstrumentedFn[E] = fn.asInstanceOf[E] -class CountbasedInstrumentation(triggerFn: () => Unit, triggerLimit: Int) - extends Instrumentation: + def shouldOpt: Boolean + +class CountbasedInstrumentation(triggerLimit: Int) extends Instrumentation: private val count = AtomicInteger(0) - final def getCount() = count.getAcquire() + final def getCount() = count.get() private def incrementCount(): Unit = - var succeeded = false - var res = 0 - while !succeeded do - res = count.get() - succeeded = count.compareAndSet(res, res + 1) + count.incrementAndGet() - res += 1 - if res >= triggerLimit then triggerFn() + protected def bootInstrumentation(): Unit = incrementCount() + protected def finishInstrumentation(): Unit = () - def instrument[A](a: A): Instrumented[A] = - incrementCount() - toInstrumented(a) + def shouldOpt: Boolean = count.getOpaque() >= triggerLimit -object IgnoreInstrumentation extends Instrumentation: +case class IgnoreInstrumentation(optimize: Boolean) extends Instrumentation: def getCount() = 0 - def instrument[A](a: A): Instrumented[A] = toInstrumented(a) + protected def bootInstrumentation(): Unit = () + protected def finishInstrumentation(): Unit = () + + def shouldOpt: Boolean = optimize diff --git a/core/src/fr/hammons/slinc/jitc/JitCService.scala b/core/src/fr/hammons/slinc/jitc/JitCService.scala index db91067d..91b0bac3 100644 --- a/core/src/fr/hammons/slinc/jitc/JitCService.scala +++ b/core/src/fr/hammons/slinc/jitc/JitCService.scala @@ -17,7 +17,6 @@ type JitCompiler = [A] => ( ) => A trait JitCService: def jitC(tag: UUID, c: JitCompiler => Unit): Unit - def processedRecently(tag: UUID): Boolean def async: Boolean object JitCService: @@ -37,65 +36,51 @@ object JitCService: private val workQueue = AtomicReference( Vector.empty[(UUID, JitCompiler => Unit)] ) - private val workDone = AtomicReference( + private var workDone = Vector.empty[UUID] - ) + private val doneCache = 32 Future { while !shutdown.getOpaque() do - - val workToDo = workQueue.get().nn.distinctBy(_._1) - - for - (_, work) <- workToDo + val start = System.currentTimeMillis() + val workToDo = workQueue + .getAndSet(Vector.empty) + .nn + .distinctBy(_._1) + .filter((id, _) => !workDone.contains(id)) + + val done = for + (id, work) <- workToDo pfn: JitCompiler = [A] => (fn: Quotes ?=> Expr[A]) => run[A](fn) - do - Try( + workDone <- Try { work(pfn) - ).recover { case t => + Vector(id) + }.recover { case t => t.printStackTrace() - } - - val done = workToDo.map(_._1) - var succeeded = false - while !succeeded do - val wDone = workDone.get().nn - val toDrop = math.max((wDone.size + done.size) - doneCache, 0) - succeeded = - workDone.compareAndSet(wDone, done ++ wDone.dropRight(toDrop)) - - succeeded = false - val doneSet = done.toSet - while !succeeded do - val newWork = workQueue.getOpaque().nn - succeeded = workQueue.compareAndSet( - newWork, - newWork.filterNot((uuid, _) => doneSet.contains(uuid)) - ) - Thread.sleep(100) + Vector.empty[UUID] + }.getOrElse(Vector.empty) + yield workDone + + workDone = done.take(32) ++ workDone.dropRight( + math.max(done.take(32).size + workDone.size - doneCache, 0) + ) + + val end = System.currentTimeMillis() + val duration = end - start + println(s"sleeping ${Math.max(100 - duration, 0)} ms") + Thread.sleep(Math.max(100 - duration, 0)) } override def jitC(uuid: UUID, fn: JitCompiler => Unit) = import language.unsafeNulls var succeeded = false while !succeeded do - val wDone = workDone.get() - if wDone.contains(uuid) then - succeeded = workDone.compareAndSet( - wDone, - uuid +: wDone.filter(_ == uuid) - ) - else - while !succeeded do - val workToDo = workQueue.get() - succeeded = workQueue.compareAndSet( - workToDo, - (workToDo :+ (uuid, fn)) - ) - - override def processedRecently(tag: ju.UUID): Boolean = - workDone.getOpaque().nn.contains(tag) + val workToDo = workQueue.getOpaque() + succeeded = workQueue.compareAndSet( + workToDo, + (workToDo :+ (uuid, fn)) + ) override def async: Boolean = true @@ -118,7 +103,4 @@ object JitCService: succeeded = workDone.compareAndSet(wDone, tag +: wDone.dropRight(toDrop)) - override def processedRecently(tag: ju.UUID): Boolean = - workDone.getOpaque().nn.contains(tag) - override def async: Boolean = false diff --git a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala index 84254061..6df799f2 100644 --- a/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala +++ b/core/src/fr/hammons/slinc/jitc/OptimizableFn.scala @@ -2,31 +2,42 @@ package fr.hammons.slinc.jitc import java.util.concurrent.atomic.AtomicReference import java.util.UUID +import java.util.concurrent.atomic.AtomicStampedReference +import java.util.concurrent.atomic.AtomicMarkableReference +import java.util.concurrent.atomic.AtomicBoolean +import scala.annotation.switch +import scala.compiletime.uninitialized sealed trait OptimizableFn[F, G]: final val uuid = UUID.randomUUID().nn + def isOptimized: Boolean + def forceOptimize(using G): F def triggerOptimization(using G): Unit def get(using G): F final class FnToJit[F, G]( optimizer: JitCService, - inst: (() => Unit) => Instrumentation, + inst: Instrumentation, optimized: G ?=> JitCompiler => F, f: G ?=> (i: Instrumentation) => i.InstrumentedFn[F] ) extends OptimizableFn[F, G]: - private val _fn: AtomicReference[F] = AtomicReference() + import scala.language.unsafeNulls + private var state: Int = 0 private val _optFn: AtomicReference[F] = AtomicReference() - private var _permOptFn: F | Null = null + private var fn: F = uninitialized + private var fastFn: F = uninitialized + + def isOptimized: Boolean = state == 3 final def forceOptimize(using G): F = triggerOptimization - while _optFn.getOpaque() == null do {} - _permOptFn = _optFn.getOpaque().nn - _permOptFn.nn + while fastFn == null do fastFn = _optFn.getOpaque() + state = 3 + fastFn - def triggerOptimization(using G): Unit = + final def triggerOptimization(using G): Unit = optimizer.jitC( uuid, jitCompiler => @@ -36,32 +47,39 @@ final class FnToJit[F, G]( ) ) - private def tryGetOptFn(using G) = - val optFn = _optFn.getOpaque() - - if optFn != null then - _permOptFn = optFn - optFn - else getFn - - private def getFn(using G) = - val fn = _fn.getOpaque() - if fn != null then fn - else - val nFn = f(inst(() => triggerOptimization)) - _fn.set(nFn) - nFn - final def get(using G): F = - if _permOptFn != null then _permOptFn.nn - else tryGetOptFn + (state: @switch) match + case 0 => + fn = f(inst) + state = 1 + get + case 1 => + if inst.shouldOpt then + state = 2 + get + fn + case 2 => + if _optFn.getOpaque() == null then triggerOptimization + + state = 3 + get + + case 3 => + fastFn = _optFn.getOpaque() + if fastFn != null then + state = 4 + get + else fn + case 4 => fastFn + case _ => fn final class NeverJitFn[F, G]( f: G ?=> (i: Instrumentation) => i.InstrumentedFn[F] ) extends OptimizableFn[F, G]: private var _f: F | Null = null + def isOptimized: Boolean = false final def forceOptimize(using G): F = - _f = f(IgnoreInstrumentation) + _f = f(IgnoreInstrumentation(false)) _f.nn final def triggerOptimization(using G): Unit = () @@ -72,6 +90,7 @@ final class InstantJitFn[F, G]( optimizer: JitCService, f: G ?=> JitCompiler => F ) extends OptimizableFn[F, G]: + def isOptimized: Boolean = _permFn != null private val _fn: AtomicReference[F] = AtomicReference() private var _permFn: F | Null = null @@ -97,66 +116,6 @@ final class InstantJitFn[F, G]( _fn.setOpaque(optimized) ) -// final class OptimizableFn[F, G]( -// optimizer: JitCService, -// inst: Instrumentation = new CountbasedInstrumentation -// )( -// f: G ?=> (i: Instrumentation) => i.InstrumentedFn[F], -// limit: Int -// )(optimized: G ?=> JitCompiler => F): -// private val _fn: AtomicReference[F] = AtomicReference() -// final val uuid = UUID.randomUUID().nn -// private val _optFn: AtomicReference[F] = AtomicReference() -// private var _permOptFn: F | Null = null - -// final def forceOptimize(using G) = -// optimizer.jitC( -// uuid, -// jitCompiler => -// val opt = optimized(jitCompiler) -// _optFn.setOpaque( -// opt -// ) -// ) -// while _optFn.getOpaque() == null do {} -// _permOptFn = _optFn.getOpaque().nn -// _permOptFn.nn - -// private def optimize(using G): F = -// optimizer.jitC( -// uuid, -// jitCompiler => -// val opt = optimized(jitCompiler) -// _optFn.setOpaque(opt) -// ) -// if optimizer.async then getFn -// else -// while _optFn.getOpaque() == null do {} -// _optFn.getOpaque().nn - -// private def checkOptTrigger(using G) = -// if inst.getCount() >= limit then optimize -// else getFn - -// private def tryGetOptFn(using G) = -// val optFn = _optFn.getOpaque() - -// if optFn != null then -// _permOptFn = optFn -// optFn -// else checkOptTrigger - -// private def getFn(using G) = -// val fn = _fn.getOpaque() -// if fn != null then fn -// else -// val nFn = f(inst) -// _fn.set(nFn) -// nFn - -// final def get(using G): F = -// if _permOptFn != null then _permOptFn.nn -// else tryGetOptFn object OptimizableFn: val modeSetting = "slinc.jitc.mode" val limitSetting = "slinc.jitc.jit-limit" @@ -173,7 +132,7 @@ object OptimizableFn: case Some(value) => new FnToJit[F, G]( JitCService.standard, - updateFn => CountbasedInstrumentation(updateFn, value), + CountbasedInstrumentation(value), optimized, unoptimizedFn ) diff --git a/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala b/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala index 6ba3d6d4..e8582d2e 100644 --- a/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala +++ b/core/test/src/fr/hammons/slinc/jitc/JitSpecification.scala @@ -3,7 +3,7 @@ package fr.hammons.slinc.jitc import scala.concurrent.Future import scala.concurrent.ExecutionContext.Implicits.global import scala.compiletime.codeOf -import scala.concurrent.duration.Duration +import scala.concurrent.duration.* import scala.concurrent.Await class JitSpecification extends munit.FunSuite: @@ -12,7 +12,7 @@ class JitSpecification extends munit.FunSuite: var fn: OptimizableFn[Int => Int, DummyImplicit] = new FnToJit( JitCService.standard, - CountbasedInstrumentation(_, 10), + CountbasedInstrumentation(10), jitCompiler => jitCompiler('{ (optimizedFn: Boolean => Unit) => (i: Int) => optimizedFn(true) @@ -23,7 +23,9 @@ class JitSpecification extends munit.FunSuite: for _ <- 0 to 10 yield fn.get(3) - while !JitCService.standard.processedRecently(fn.uuid) do + while !fn.isOptimized do + println("wait") + fn.get Thread.sleep(100) fn.get(4) assertEquals(optimized, true) @@ -33,7 +35,7 @@ class JitSpecification extends munit.FunSuite: val fn: OptimizableFn[Int => Int, DummyImplicit] = new FnToJit( JitCService.standard, - CountbasedInstrumentation(_, 10), + CountbasedInstrumentation(10), jitCompiler => jitCompiler('{ (optimizedFn: Int => Unit) => (i: Int) => optimizedFn(i) @@ -44,9 +46,10 @@ class JitSpecification extends munit.FunSuite: val futures = for i <- 0 until 10 yield Future { - for _ <- 0 until 100000 + for _ <- 0 until 1 yield fn.get(i) - while !JitCService.standard.processedRecently(fn.uuid) do + while !fn.isOptimized do + fn.get Thread.sleep(100) fn.get(i) @@ -102,15 +105,28 @@ class JitSpecification extends munit.FunSuite: assertEquals(optimized, false) test("Ignore instrumentation records no info"): - val ignoreInstrumentation = IgnoreInstrumentation + val ignoreInstrumentation = IgnoreInstrumentation(false) assertEquals(ignoreInstrumentation.getCount(), 0) ignoreInstrumentation.instrument(5) assertEquals(ignoreInstrumentation.getCount(), 0) test("Count instrumentation records invokations"): - val countInstrumentation = CountbasedInstrumentation(() => (), 100) + val countInstrumentation = CountbasedInstrumentation(100) assertEquals(countInstrumentation.getCount(), 0) countInstrumentation.instrument(5) assertEquals(countInstrumentation.getCount(), 1) + + test("Count instrumentation is accurate in multithreaded contexts"): + val countInstrumentation = CountbasedInstrumentation(10) + + val results = + for _ <- 0 until 10 + yield Future { + countInstrumentation.instrument(5) + } + + results.foreach(Await.result(_, 5.seconds)) + + assert(countInstrumentation.shouldOpt) diff --git a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala index 357b9385..b08e92b7 100644 --- a/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala +++ b/j17/benchmarks/test/src/fr/hammons/slinc/TransferBenchmarks17.scala @@ -27,6 +27,9 @@ class TransferBenchmarks17 extends TransferBenchmarkShape(Slinc17.noJit) { val ms = segAlloc.allocate(ml) val h = H(1, 2f, CLong(3)) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + def getH = h + val writerFn = (ms: MemorySegment | Null, offset: Bytes, value: H) => MemoryAccess.setIntAtOffset(ms, offset.toLong + 0, h.a) MemoryAccess.setFloatAtOffset(ms, offset.toLong + 4, h.b) @@ -34,7 +37,7 @@ class TransferBenchmarks17 extends TransferBenchmarkShape(Slinc17.noJit) { @Benchmark def writeManual(blackhole: Blackhole) = blackhole.consume( - writerFn(ms, Bytes(0), h) + writerFn(ms, offset, getH) ) }