@@ -80,9 +80,7 @@ __global__ void duplicateWithKeys(
8080 uint32_t * gaussian_values_unsorted,
8181 int * radii,
8282 bool * compute_locally,
83- dim3 grid,
84- int local_rank,
85- int world_size)
83+ dim3 grid)
8684{
8785 auto idx = cg::this_grid ().thread_rank ();
8886 if (idx >= P)
@@ -289,9 +287,9 @@ __global__ void updateTileTouched(
289287 tiles_touched[idx] = cnt;
290288}
291289
292- void save_log_in_file (int iteration, int local_rank , int world_size, std::string log_folder, const char * filename_prefix, const char * log_content) {
293- char * filename = new char [128 ];
294- sprintf (filename, " %s/%s_ws=%d_rk=%d.log" , log_folder.c_str (), filename_prefix, world_size, local_rank );
290+ void save_log_in_file (int iteration, int global_rank , int world_size, std::string log_folder, const char * filename_prefix, const char * log_content) {
291+ char * filename = new char [256 ];
292+ sprintf (filename, " %s/%s_ws=%d_rk=%d.log" , log_folder.c_str (), filename_prefix, world_size, global_rank );
295293 std::ofstream outfile;
296294 outfile.open (filename, std::ios_base::app);
297295 outfile << " iteration: " << iteration << " , " << log_content << " \n " ;
@@ -307,7 +305,7 @@ void save_log_in_file(int iteration, int local_rank, int world_size, std::string
307305std::tuple<int , int , int , int , int , bool , bool , std::string, std::string, std::string>
308306 prepareArgs (const pybind11::dict &args) {
309307 std::string mode = args[" mode" ].cast <std::string>();
310- std::string local_rank_str = args[" local_rank " ].cast <std::string>();
308+ std::string global_rank_str = args[" global_rank " ].cast <std::string>();
311309 std::string world_size_str = args[" world_size" ].cast <std::string>();
312310 std::string iteration_str = args[" iteration" ].cast <std::string>();
313311 std::string log_interval_str = args[" log_interval" ].cast <std::string>();
@@ -317,7 +315,7 @@ std::tuple<int, int, int, int, int, bool, bool, std::string, std::string, std::s
317315 // std::string dist_division_mode_str = args["dist_division_mode"].cast<std::string>();
318316 std::string dist_division_mode_str = " " ;
319317
320- int local_rank = std::stoi (local_rank_str );
318+ int global_rank = std::stoi (global_rank_str );
321319 int world_size = std::stoi (world_size_str);
322320 int iteration = std::stoi (iteration_str);
323321 int log_interval = std::stoi (log_interval_str);
@@ -328,7 +326,7 @@ std::tuple<int, int, int, int, int, bool, bool, std::string, std::string, std::s
328326 cudaError_t status = cudaGetDevice (&device);
329327
330328 // Pack and return the variables in a tuple
331- return std::make_tuple (local_rank , world_size, iteration, log_interval, device,
329+ return std::make_tuple (global_rank , world_size, iteration, log_interval, device,
332330 zhx_debug, zhx_time,
333331 mode, dist_division_mode_str, log_folder_str);
334332}
@@ -359,14 +357,14 @@ int CudaRasterizer::Rasterizer::preprocessForward(
359357 bool debug,// raster_settings
360358 const pybind11::dict &args)
361359{
362- auto [local_rank , world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs (args);
360+ auto [global_rank , world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs (args);
363361 char * log_tmp = new char [500 ];
364362
365363 // print out the environment variables
366364 if (mode == " train" && zhx_debug && iteration % log_interval == 1 ) {
367- sprintf (log_tmp, " world_size: %d, local_rank : %d, iteration: %d, log_folder: %s, zhx_debug: %d, zhx_time: %d, device: %d, log_interval: %d, dist_division_mode: %s" ,
368- world_size, local_rank , iteration, log_folder.c_str (), zhx_debug, zhx_time, device, log_interval, dist_division_mode.c_str ());
369- save_log_in_file (iteration, local_rank , world_size, log_folder, " cuda" , log_tmp);
365+ sprintf (log_tmp, " world_size: %d, global_rank : %d, iteration: %d, log_folder: %s, zhx_debug: %d, zhx_time: %d, device: %d, log_interval: %d, dist_division_mode: %s" ,
366+ world_size, global_rank , iteration, log_folder.c_str (), zhx_debug, zhx_time, device, log_interval, dist_division_mode.c_str ());
367+ save_log_in_file (iteration, global_rank , world_size, log_folder, " cuda" , log_tmp);
370368 }
371369
372370 MyTimerOnGPU timer;
@@ -410,17 +408,15 @@ int CudaRasterizer::Rasterizer::preprocessForward(
410408 conic_opacity,
411409 tile_grid,
412410 tiles_touched_temp_buffer,
413- prefiltered,
414- local_rank,
415- world_size
411+ prefiltered
416412 ), debug)
417413 timer.stop (" 10 preprocess" );
418414
419415 int num_rendered = 0 ;// TODO: should I calculate this here?
420416
421417 // Print out timing information
422418 if (zhx_time && iteration % log_interval == 1 ) {
423- timer.printAllTimes (iteration, world_size, local_rank , log_folder, true );
419+ timer.printAllTimes (iteration, world_size, global_rank , log_folder, true );
424420 }
425421 delete log_tmp;
426422 // free temporary buffer for tiles_touched. TODO: remove it.
@@ -454,7 +450,7 @@ void CudaRasterizer::Rasterizer::preprocessBackward(
454450 bool debug,
455451 const pybind11::dict &args)
456452{
457- auto [local_rank , world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs (args);
453+ auto [global_rank , world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs (args);
458454
459455 MyTimerOnGPU timer;
460456 const float focal_y = height / (2 .0f * tan_fovy);
@@ -488,7 +484,7 @@ void CudaRasterizer::Rasterizer::preprocessBackward(
488484
489485 // Print out timing information
490486 if (zhx_time && iteration % log_interval == 1 ) {
491- timer.printAllTimes (iteration, world_size, local_rank , log_folder, false );
487+ timer.printAllTimes (iteration, world_size, global_rank , log_folder, false );
492488 }
493489}
494490
@@ -537,7 +533,7 @@ int CudaRasterizer::Rasterizer::renderForward(
537533 bool debug,
538534 const pybind11::dict &args)
539535{
540- auto [local_rank , world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs (args);
536+ auto [global_rank , world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs (args);
541537 char * log_tmp = new char [500 ];
542538
543539 MyTimerOnGPU timer;
@@ -593,9 +589,7 @@ int CudaRasterizer::Rasterizer::renderForward(
593589 binningState.point_list_unsorted ,
594590 radii,
595591 compute_locally,
596- tile_grid,
597- local_rank,
598- world_size)
592+ tile_grid)
599593 CHECK_CUDA (, debug)
600594 timer.stop (" 40 duplicateWithKeys" );
601595
@@ -675,7 +669,7 @@ int CudaRasterizer::Rasterizer::renderForward(
675669 // ////////////////////////// Logging && Save Statictis ////////////////////////////////////////////
676670 // DEBUG: print out timing information
677671 if (mode == " train" && zhx_time && iteration % log_interval == 1 ) {
678- timer.printAllTimes (iteration, world_size, local_rank , log_folder, false );
672+ timer.printAllTimes (iteration, world_size, global_rank , log_folder, false );
679673 }
680674
681675 // DEBUG: print out the number of Gaussians contributing to each pixel.
@@ -731,7 +725,7 @@ int CudaRasterizer::Rasterizer::renderForward(
731725 ave_n_contrib2loss,
732726 contrib2loss_ratio);
733727
734- save_log_in_file (iteration, local_rank , world_size, log_folder, " n_contrib" , log_tmp);
728+ save_log_in_file (iteration, global_rank , world_size, log_folder, " n_contrib" , log_tmp);
735729 global_sum_n_rendered += n_rendered;
736730 global_sum_n_considered += sum_n_considered;
737731 global_sum_n_contrib2loss += sum_n_contrib2loss;
@@ -742,8 +736,8 @@ int CudaRasterizer::Rasterizer::renderForward(
742736 float global_ave_n_considered_per_pix = global_sum_n_considered / (float )total_pixels;
743737 float global_ave_n_contrib2loss_per_pix = global_sum_n_contrib2loss / (float )total_pixels;
744738
745- sprintf (log_tmp, " local_rank : %d, world_size: %d, num_tiles: %d, num_pixels: %d, num_rendered: %d, global_ave_n_rendered_per_pix: %f, global_ave_n_considered_per_pix: %f, global_ave_n_contrib2loss_per_pix: %f" ,
746- (int )local_rank ,
739+ sprintf (log_tmp, " global_rank : %d, world_size: %d, num_tiles: %d, num_pixels: %d, num_rendered: %d, global_ave_n_rendered_per_pix: %f, global_ave_n_considered_per_pix: %f, global_ave_n_contrib2loss_per_pix: %f" ,
740+ (int )global_rank ,
747741 (int )world_size,
748742 (int )num_local_tiles,
749743 (int )total_pixels,
@@ -752,7 +746,7 @@ int CudaRasterizer::Rasterizer::renderForward(
752746 global_ave_n_considered_per_pix,
753747 global_ave_n_contrib2loss_per_pix
754748 );
755- save_log_in_file (iteration, local_rank , world_size, log_folder, " n_contrib" , log_tmp);
749+ save_log_in_file (iteration, global_rank , world_size, log_folder, " n_contrib" , log_tmp);
756750
757751 delete[] cpu_ranges;
758752 delete[] cpu_n_considered;
@@ -784,7 +778,7 @@ void CudaRasterizer::Rasterizer::renderBackward(
784778 bool debug,
785779 const pybind11::dict &args)
786780{
787- auto [local_rank , world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs (args);
781+ auto [global_rank , world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs (args);
788782
789783 MyTimerOnGPU timer;
790784
@@ -825,6 +819,6 @@ void CudaRasterizer::Rasterizer::renderBackward(
825819
826820 // Print out timing information
827821 if (zhx_time && iteration % log_interval == 1 ) {
828- timer.printAllTimes (iteration, world_size, local_rank , log_folder, false );
822+ timer.printAllTimes (iteration, world_size, global_rank , log_folder, false );
829823 }
830824}
0 commit comments