@@ -339,6 +339,7 @@ struct hash_node {
339339};
340340
341341struct tensor_alloc {
342+ int buffer_id ;
342343 size_t offset ;
343344 size_t size_max ; // 0 = pre-allocated, unused, or view
344345};
@@ -349,7 +350,6 @@ struct leaf_alloc {
349350};
350351
351352struct node_alloc {
352- int buffer_id ;
353353 struct tensor_alloc dst ;
354354 struct tensor_alloc src [GGML_MAX_SRC ];
355355};
@@ -386,8 +386,19 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
386386 for (int i = 0 ; i < n_bufs ; i ++ ) {
387387 galloc -> bufts [i ] = bufts [i ];
388388 galloc -> buffers [i ] = NULL ;
389- size_t alignment = ggml_backend_buft_get_alignment (bufts [i ]);
390- galloc -> buf_tallocs [i ] = ggml_dyn_tallocr_new (alignment );
389+
390+ // check if the same buffer type is used multiple times and reuse the same allocator
391+ for (int j = 0 ; j < i ; j ++ ) {
392+ if (bufts [i ] == bufts [j ]) {
393+ galloc -> buf_tallocs [i ] = galloc -> buf_tallocs [j ];
394+ break ;
395+ }
396+ }
397+
398+ if (galloc -> buf_tallocs [i ] == NULL ) {
399+ size_t alignment = ggml_backend_buft_get_alignment (bufts [i ]);
400+ galloc -> buf_tallocs [i ] = ggml_dyn_tallocr_new (alignment );
401+ }
391402 }
392403 galloc -> n_buffers = n_bufs ;
393404
@@ -405,10 +416,30 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
405416
406417 for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
407418 if (galloc -> buffers != NULL ) {
408- ggml_backend_buffer_free (galloc -> buffers [i ]);
419+ // skip if already freed
420+ bool freed = false;
421+ for (int j = 0 ; j < i ; j ++ ) {
422+ if (galloc -> buffers [j ] == galloc -> buffers [i ]) {
423+ freed = true;
424+ break ;
425+ }
426+ }
427+ if (!freed ) {
428+ ggml_backend_buffer_free (galloc -> buffers [i ]);
429+ }
409430 }
410431 if (galloc -> buf_tallocs != NULL ) {
411- ggml_dyn_tallocr_free (galloc -> buf_tallocs [i ]);
432+ // skip if already freed
433+ bool freed = false;
434+ for (int j = 0 ; j < i ; j ++ ) {
435+ if (galloc -> buf_tallocs [j ] == galloc -> buf_tallocs [i ]) {
436+ freed = true;
437+ break ;
438+ }
439+ }
440+ if (!freed ) {
441+ ggml_dyn_tallocr_free (galloc -> buf_tallocs [i ]);
442+ }
412443 }
413444 }
414445
@@ -511,17 +542,18 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
511542 }
512543}
513544
514- static void ggml_gallocr_free_node (ggml_gallocr_t galloc , struct ggml_tensor * node , int buffer_id ) {
545+ static void ggml_gallocr_free_node (ggml_gallocr_t galloc , struct ggml_tensor * node ) {
515546 // graph outputs are never freed
516547 if (node -> flags & GGML_TENSOR_FLAG_OUTPUT ) {
517548 AT_PRINTF ("not freeing output %s\n" , node -> name );
518549 return ;
519550 }
520551
521- struct ggml_dyn_tallocr * alloc = galloc -> buf_tallocs [buffer_id ];
522- ggml_backend_buffer_type_t buft = galloc -> bufts [buffer_id ];
523552 struct hash_node * hn = ggml_gallocr_hash_get (galloc , node );
524553 size_t offset = hn -> offset ;
554+ int buffer_id = hn -> buffer_id ;
555+ struct ggml_dyn_tallocr * alloc = galloc -> buf_tallocs [buffer_id ];
556+ ggml_backend_buffer_type_t buft = galloc -> bufts [buffer_id ];
525557 size_t size = ggml_backend_buft_get_alloc_size (buft , node );
526558 ggml_dyn_tallocr_free_tensor (alloc , offset , size , node );
527559 hn -> allocated = false;
@@ -626,11 +658,11 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
626658 AT_PRINTF ("view_src %s: %d children, %d views\n" ,
627659 view_src -> name , view_src_hn -> n_children , view_src_hn -> n_views );
628660 if (view_src_hn -> n_views == 0 && view_src_hn -> n_children == 0 && view_src_hn -> allocated ) {
629- ggml_gallocr_free_node (galloc , view_src , buffer_id );
661+ ggml_gallocr_free_node (galloc , view_src );
630662 }
631663 }
632664 else if (p_hn -> allocated ) {
633- ggml_gallocr_free_node (galloc , parent , buffer_id );
665+ ggml_gallocr_free_node (galloc , parent );
634666 }
635667 }
636668 AT_PRINTF ("\n" );
@@ -674,22 +706,25 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
674706 for (int i = 0 ; i < graph -> n_nodes ; i ++ ) {
675707 struct ggml_tensor * node = graph -> nodes [i ];
676708 struct node_alloc * node_alloc = & galloc -> node_allocs [i ];
677- node_alloc -> buffer_id = get_node_buffer_id (node_buffer_ids , i );
678709 if (node -> view_src || node -> data ) {
710+ node_alloc -> dst .buffer_id = -1 ;
679711 node_alloc -> dst .offset = SIZE_MAX ;
680712 node_alloc -> dst .size_max = 0 ;
681713 } else {
682714 struct hash_node * hn = ggml_gallocr_hash_get (galloc , node );
683- node_alloc -> dst .offset = hn -> offset ;
684- node_alloc -> dst .size_max = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], node );
715+ node_alloc -> dst .buffer_id = hn -> buffer_id ;
716+ node_alloc -> dst .offset = hn -> offset ;
717+ node_alloc -> dst .size_max = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], node );
685718 }
686719 for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
687720 struct ggml_tensor * src = node -> src [j ];
688721 if (!src || src -> view_src || src -> data ) {
722+ node_alloc -> src [j ].buffer_id = -1 ;
689723 node_alloc -> src [j ].offset = SIZE_MAX ;
690724 node_alloc -> src [j ].size_max = 0 ;
691725 } else {
692726 struct hash_node * hn = ggml_gallocr_hash_get (galloc , src );
727+ node_alloc -> src [j ].buffer_id = hn -> buffer_id ;
693728 node_alloc -> src [j ].offset = hn -> offset ;
694729 node_alloc -> src [j ].size_max = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], src );
695730 }
@@ -706,16 +741,26 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
706741 struct hash_node * hn = ggml_gallocr_hash_get (galloc , leaf );
707742 galloc -> leaf_allocs [i ].buffer_id = hn -> buffer_id ;
708743 if (leaf -> view_src || leaf -> data ) {
744+ galloc -> leaf_allocs [i ].leaf .buffer_id = -1 ;
709745 galloc -> leaf_allocs [i ].leaf .offset = SIZE_MAX ;
710746 galloc -> leaf_allocs [i ].leaf .size_max = 0 ;
711747 } else {
748+ galloc -> leaf_allocs [i ].leaf .buffer_id = hn -> buffer_id ;
712749 galloc -> leaf_allocs [i ].leaf .offset = hn -> offset ;
713750 galloc -> leaf_allocs [i ].leaf .size_max = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], leaf );
714751 }
715752 }
716753
717754 // reallocate buffers if needed
718755 for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
756+ // if the buffer type is used multiple times, we reuse the same buffer
757+ for (int j = 0 ; j < i ; j ++ ) {
758+ if (galloc -> buf_tallocs [j ] == galloc -> buf_tallocs [i ]) {
759+ galloc -> buffers [i ] = galloc -> buffers [j ];
760+ break ;
761+ }
762+ }
763+
719764 size_t cur_size = galloc -> buffers [i ] ? ggml_backend_buffer_get_size (galloc -> buffers [i ]) : 0 ;
720765 size_t new_size = ggml_dyn_tallocr_max_size (galloc -> buf_tallocs [i ]);
721766
@@ -724,6 +769,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
724769#ifndef NDEBUG
725770 fprintf (stderr , "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
726771#endif
772+
727773 ggml_backend_buffer_free (galloc -> buffers [i ]);
728774 galloc -> buffers [i ] = ggml_backend_buft_alloc_buffer (galloc -> bufts [i ], new_size );
729775 if (galloc -> buffers [i ] == NULL ) {
@@ -740,7 +786,8 @@ bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
740786 return ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
741787}
742788
743- static void ggml_gallocr_init_tensor (ggml_gallocr_t galloc , struct ggml_tensor * tensor , int buffer_id , struct tensor_alloc * tensor_alloc ) {
789+ static void ggml_gallocr_init_tensor (ggml_gallocr_t galloc , struct ggml_tensor * tensor , struct tensor_alloc * tensor_alloc ) {
790+ int buffer_id = tensor_alloc -> buffer_id ;
744791 assert (tensor -> data || tensor -> view_src || ggml_backend_buffer_get_alloc_size (galloc -> buffers [buffer_id ], tensor ) <= tensor_alloc -> size_max );
745792
746793 if (tensor -> view_src != NULL ) {
@@ -768,8 +815,8 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
768815 }
769816}
770817
771- static bool ggml_gallocr_node_needs_realloc (ggml_gallocr_t galloc , struct ggml_tensor * node , struct node_alloc * nalloc , struct tensor_alloc * talloc ) {
772- ggml_backend_buffer_type_t buft = galloc -> bufts [nalloc -> buffer_id ];
818+ static bool ggml_gallocr_node_needs_realloc (ggml_gallocr_t galloc , struct ggml_tensor * node , struct tensor_alloc * talloc ) {
819+ ggml_backend_buffer_type_t buft = talloc -> buffer_id != -1 ? galloc -> bufts [talloc -> buffer_id ] : NULL ;
773820 size_t node_size = (node -> data || node -> view_src ) ? 0 : ggml_backend_buft_get_alloc_size (buft , node );
774821 return talloc -> size_max >= node_size ;
775822}
@@ -793,7 +840,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
793840 struct ggml_tensor * node = graph -> nodes [i ];
794841 struct node_alloc * node_alloc = & galloc -> node_allocs [i ];
795842
796- if (!ggml_gallocr_node_needs_realloc (galloc , node , node_alloc , & node_alloc -> dst )) {
843+ if (!ggml_gallocr_node_needs_realloc (galloc , node , & node_alloc -> dst )) {
797844#ifndef NDEBUG
798845 fprintf (stderr , "%s: node %s is not valid\n" , __func__ , node -> name );
799846#endif
@@ -805,7 +852,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
805852 if (src == NULL ) {
806853 continue ;
807854 }
808- if (!ggml_gallocr_node_needs_realloc (galloc , src , node_alloc , & node_alloc -> src [j ])) {
855+ if (!ggml_gallocr_node_needs_realloc (galloc , src , & node_alloc -> src [j ])) {
809856#ifndef NDEBUG
810857 fprintf (stderr , "%s: src %d (%s) of node %s is not valid\n" , __func__ , j , src -> name , node -> name );
811858#endif
@@ -846,7 +893,7 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
846893 for (int i = 0 ; i < graph -> n_leafs ; i ++ ) {
847894 struct ggml_tensor * leaf = graph -> leafs [i ];
848895 struct leaf_alloc * leaf_alloc = & galloc -> leaf_allocs [i ];
849- ggml_gallocr_init_tensor (galloc , leaf , leaf_alloc -> buffer_id , & leaf_alloc -> leaf );
896+ ggml_gallocr_init_tensor (galloc , leaf , & leaf_alloc -> leaf );
850897 }
851898 // nodes
852899 for (int i = 0 ; i < graph -> n_nodes ; i ++ ) {
@@ -857,9 +904,9 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
857904 if (src == NULL ) {
858905 continue ;
859906 }
860- ggml_gallocr_init_tensor (galloc , src , node_alloc -> buffer_id , & node_alloc -> src [j ]);
907+ ggml_gallocr_init_tensor (galloc , src , & node_alloc -> src [j ]);
861908 }
862- ggml_gallocr_init_tensor (galloc , node , node_alloc -> buffer_id , & node_alloc -> dst );
909+ ggml_gallocr_init_tensor (galloc , node , & node_alloc -> dst );
863910 }
864911
865912 return true;
@@ -871,6 +918,15 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
871918 if (galloc -> buffers [buffer_id ] == NULL ) {
872919 return 0 ;
873920 }
921+
922+ for (int i = 0 ; i < buffer_id ; i ++ ) {
923+ if (galloc -> buffers [i ] == galloc -> buffers [buffer_id ]) {
924+ // this buffer is the same as a previous one due to the same buffer type being used multiple times
925+ // only return the buffer size the first time it appears to avoid double counting
926+ return 0 ;
927+ }
928+ }
929+
874930 return ggml_backend_buffer_get_size (galloc -> buffers [buffer_id ]);
875931}
876932
0 commit comments