Refactor Weights class to improve tensor handling

mikepapadim · mikepapadim · commit 783883c11a97 · 2025-05-22T17:55:21.000+03:00
diff --git a/src/main/java/com/example/loader/weights/Weights.java b/src/main/java/com/example/loader/weights/Weights.java
@@ -1,14 +1,21 @@
 package com.example.loader.weights;
 
-import com.example.LlamaApp;
 import com.example.core.model.GGMLType;
 import com.example.core.model.tensor.FloatTensor;
+import com.example.core.model.tensor.GGMLTensorEntry;
+import com.example.core.types.Float16;
 import uk.ac.manchester.tornado.api.types.HalfFloat;
 import uk.ac.manchester.tornado.api.types.arrays.ByteArray;
 import uk.ac.manchester.tornado.api.types.arrays.FloatArray;
 import uk.ac.manchester.tornado.api.types.arrays.HalfFloatArray;
 
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteOrder;
 import java.nio.FloatBuffer;
+import java.util.function.IntFunction;
+
+import static com.example.core.model.tensor.FloatTensor.readByte;
+import static com.example.core.model.tensor.FloatTensor.readShort;
 
 public class Weights {
     // token embedding table
@@ -28,7 +35,7 @@ public class Weights {
     public final FloatTensor[] w3; // (layer, hidden_dim, dim)
     //
     public final FloatTensor wcls; // (vocab_size, dim)
-    public final ByteArray wclsByteArray;
+    public final HalfFloatArray wclsHalfFloat;
     // public final rmsnorm
     public final FloatBuffer rms_final_weight; // (dim,)
     // freq_cis for RoPE relatively positional embeddings
@@ -51,6 +58,7 @@ public class Weights {
     public FloatArray freq_cis_imagFlat; // (seq_len, head_size/2)
     // (optional) classifier weights for the logits, on the last layer
     public GGMLType weightType;
+
     /**
      * Constructor to initialize all weight tensors for the model. Automatically creates TornadoVM-compatible versions when needed.
      *
@@ -86,17 +94,19 @@ public class Weights {
     /**
      * Constructor for standard (non-TornadoVM) mode
      */
-    public Weights(FloatTensor token_embedding_table, FloatBuffer[] rms_att_weight,
-            FloatTensor[] wq, FloatTensor[] wk, FloatTensor[] wv, FloatTensor[] wo,
-            FloatBuffer[] rms_ffn_weight, FloatTensor[] w1, FloatTensor[] w2, FloatTensor[] w3,
-            FloatBuffer rms_final_weight, FloatBuffer freq_cis_real, FloatBuffer freq_cis_imag,
-            FloatTensor wcls, GGMLType weightType) {
+    public Weights(FloatTensor token_embedding_table, FloatBuffer[] rms_att_weight, FloatTensor[] wq, FloatTensor[] wk, FloatTensor[] wv, FloatTensor[] wo, FloatBuffer[] rms_ffn_weight,
+            FloatTensor[] w1, FloatTensor[] w2, FloatTensor[] w3, FloatBuffer rms_final_weight, FloatBuffer freq_cis_real, FloatBuffer freq_cis_imag, FloatTensor wcls, GGMLType weightType) {
         // Standard format
         this.token_embedding_table = token_embedding_table;
         this.rms_att_weight = rms_att_weight;
-        this.wq = wq; this.wk = wk; this.wv = wv; this.wo = wo;
+        this.wq = wq;
+        this.wk = wk;
+        this.wv = wv;
+        this.wo = wo;
         this.rms_ffn_weight = rms_ffn_weight;
-        this.w1 = w1; this.w2 = w2; this.w3 = w3;
+        this.w1 = w1;
+        this.w2 = w2;
+        this.w3 = w3;
         this.wcls = wcls;
         this.rms_final_weight = rms_final_weight;
         this.freq_cis_real = freq_cis_real;
@@ -106,110 +116,58 @@ public Weights(FloatTensor token_embedding_table, FloatBuffer[] rms_att_weight,
         // TornadoVM format (null when not using TornadoVM)
         this.tokenEmbeddingTable = null;
         this.rms_att_weightLayered = null;
-        this.wqLayered = null; this.wkLayered = null; this.wvLayered = null; this.woLayered = null;
+        this.wqLayered = null;
+        this.wkLayered = null;
+        this.wvLayered = null;
+        this.woLayered = null;
         this.rms_ffn_weightLayered = null;
-        this.w1Layered = null; this.w2Layered = null; this.w3Layered = null;
+        this.w1Layered = null;
+        this.w2Layered = null;
+        this.w3Layered = null;
         this.rms_final_weight_as_floatArray = null;
-        this.freq_cis_realFlat = null; this.freq_cis_imagFlat = null;
-        this.wclsByteArray = null;
+        this.freq_cis_realFlat = null;
+        this.freq_cis_imagFlat = null;
+        this.wclsHalfFloat = null;
     }
 
     /**
      * Constructor for TornadoVM mode
      */
-    public Weights(FloatArray tokenEmbeddingTable,
-            FloatArray[] rms_att_weightLayered,
-            HalfFloatArray[] wqLayered, HalfFloatArray[] wkLayered, HalfFloatArray[] wvLayered, HalfFloatArray[] woLayered,
-            FloatArray[] rms_ffn_weightLayered, HalfFloatArray[] w1Layered, HalfFloatArray[] w2Layered, HalfFloatArray[] w3Layered,
-            FloatArray rms_final_weight_as_floatArray, FloatArray freq_cis_realFlat, FloatArray freq_cis_imagFlat,
-            ByteArray wclsByteArray, GGMLType weightType) {
+    public Weights(FloatArray tokenEmbeddingTable, FloatArray[] rms_att_weightLayered, HalfFloatArray[] wqLayered, HalfFloatArray[] wkLayered, HalfFloatArray[] wvLayered, HalfFloatArray[] woLayered,
+            FloatArray[] rms_ffn_weightLayered, HalfFloatArray[] w1Layered, HalfFloatArray[] w2Layered, HalfFloatArray[] w3Layered, FloatArray rms_final_weight_as_floatArray,
+            FloatArray freq_cis_realFlat, FloatArray freq_cis_imagFlat, HalfFloatArray wclsByteArray, GGMLType weightType) {
         // Standard format (null when using TornadoVM)
         this.token_embedding_table = null;
         this.rms_att_weight = null;
-        this.wq = null; this.wk = null; this.wv = null; this.wo = null;
+        this.wq = null;
+        this.wk = null;
+        this.wv = null;
+        this.wo = null;
         this.rms_ffn_weight = null;
-        this.w1 = null; this.w2 = null; this.w3 = null;
+        this.w1 = null;
+        this.w2 = null;
+        this.w3 = null;
         this.wcls = null;
         this.rms_final_weight = null;
-        this.freq_cis_real = null; this.freq_cis_imag = null;
+        this.freq_cis_real = null;
+        this.freq_cis_imag = null;
 
         // TornadoVM format
         this.tokenEmbeddingTable = tokenEmbeddingTable;
         this.rms_att_weightLayered = rms_att_weightLayered;
-        this.wqLayered = wqLayered; this.wkLayered = wkLayered; this.wvLayered = wvLayered; this.woLayered = woLayered;
+        this.wqLayered = wqLayered;
+        this.wkLayered = wkLayered;
+        this.wvLayered = wvLayered;
+        this.woLayered = woLayered;
         this.rms_ffn_weightLayered = rms_ffn_weightLayered;
-        this.w1Layered = w1Layered; this.w2Layered = w2Layered; this.w3Layered = w3Layered;
+        this.w1Layered = w1Layered;
+        this.w2Layered = w2Layered;
+        this.w3Layered = w3Layered;
         this.rms_final_weight_as_floatArray = rms_final_weight_as_floatArray;
-        this.freq_cis_realFlat = freq_cis_realFlat; this.freq_cis_imagFlat = freq_cis_imagFlat;
-        this.wclsByteArray = wclsByteArray;
+        this.freq_cis_realFlat = freq_cis_realFlat;
+        this.freq_cis_imagFlat = freq_cis_imagFlat;
+        this.wclsHalfFloat = wclsByteArray;
         this.weightType = weightType;
     }
 
-    /**
-     * Converts an array of FloatBuffer objects to TornadoVM FloatArray format. Preserves the original buffer position after conversion.
-     *
-     * @param array
-     *         Array of FloatBuffers to convert
-     * @return Array of FloatArrays with the same data
-     */
-    private static FloatArray[] loadToFloatArray(FloatBuffer[] array) {
-        FloatArray[] result = new FloatArray[array.length];
-        for (int i = 0; i < array.length; i++) {
-            int size = array[i].remaining();
-            result[i] = new FloatArray(size);
-
-            // Save and restore buffer position to avoid side effects
-            int originalPosition = array[i].position();
-
-            for (int j = 0; j < size; j++) {
-                float value = array[i].get();
-                result[i].set(j, value);
-            }
-            // Reset buffer position
-            array[i].position(originalPosition);
-        }
-
-        return result;
-    }
-
-
-    /**
-     * Converts a single FloatBuffer to a TornadoVM FloatArray. Creates a duplicate buffer to avoid modifying the original.
-     *
-     * @param input
-     *         FloatBuffer to convert
-     * @return FloatArray with the same data
-     */
-    private static FloatArray loadToSingleFloatArray(FloatBuffer input) {
-        // Create a duplicate to prevent modifying the original buffer
-        FloatBuffer copy = input.duplicate();
-        int totalSize = copy.remaining();
-
-        FloatArray result = new FloatArray(totalSize);
-
-        int index = 0;
-        while (copy.hasRemaining()) {
-            result.set(index++, copy.get());
-        }
-
-        return result;
-    }
-
-    /**
-     * Converts a FloatTensor to a TornadoVM FloatArray.
-     *
-     * @param input
-     *         FloatTensor to convert
-     * @return FloatArray with the same data
-     */
-    public FloatArray loadToFloatArray(FloatTensor input) {
-        FloatArray floatArray = new FloatArray(input.size());
-
-        for (int i = 0; i < input.size(); i++) {
-            floatArray.set(i, input.getFloat(i));
-        }
-
-        return floatArray;
-    }
-
 }