From 2f86f0d5a9c3e7eaec01943ddbded7091d4f7228 Mon Sep 17 00:00:00 2001 From: Nikita Petko Date: Tue, 4 Nov 2025 20:36:09 +0000 Subject: [PATCH 1/5] Install dotnet-dump #!components: grid-bot --- services/grid-bot/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/services/grid-bot/Dockerfile b/services/grid-bot/Dockerfile index 3d8c946d..6e45a1a9 100755 --- a/services/grid-bot/Dockerfile +++ b/services/grid-bot/Dockerfile @@ -1,9 +1,12 @@ -# Base Image: net8.0 FROM mcr.microsoft.com/dotnet/aspnet:8.0.1-jammy WORKDIR /opt/grid COPY . /opt/grid/ +RUN apt-get -y update && \ + apt-get install -y dotnet-sdk-8.0 +RUN dotnet tool install --global dotnet-dump + COPY ./ssl/global-root-ca.crt /usr/local/share/ca-certificates/global-root-ca.crt RUN chmod 644 /usr/local/share/ca-certificates/global-root-ca.crt && update-ca-certificates From ce3349206943c98d28e4c0b5ae753433993f6a25 Mon Sep 17 00:00:00 2001 From: Nikita Petko Date: Tue, 4 Nov 2025 22:27:28 +0000 Subject: [PATCH 2/5] Update component and dockerfile #!components: grid-bot Move over code for prometheus Add dotnet-counters --- services/grid-bot/.component.yaml | 29 ++++++++++------------------- services/grid-bot/Dockerfile | 2 +- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/services/grid-bot/.component.yaml b/services/grid-bot/.component.yaml index 029774b7..166e09e4 100755 --- a/services/grid-bot/.component.yaml +++ b/services/grid-bot/.component.yaml @@ -1,9 +1,5 @@ component: grid-bot -# This is only used by the build worklow, -# it determines how the component is built -# Docker only relevant when the argument -# build: project_file: src/Grid.Bot.csproj component_directory: ./.deploy @@ -40,32 +36,27 @@ deployment: grpc: static: 5000 http: - static: 8888 + static: 8882 services: - name: ${{ env.NOMAD_ENVIRONMENT }}-grid-bot port: metrics tags: - ${{ env.NOMAD_ENVIRONMENT }} + - prometheus # For prometheus collector checks: - - type: http - path: /metrics + - type: tcp - - name: ${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web - port: http - tags: - - ${{ env.NOMAD_ENVIRONMENT }} - - "traefik.enable=true" - - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-http.rule=(Host(`clientsettingscdn.sitetest4.robloxlabs.com`) || Host(`versioncompatibility.api.sitetest4.robloxlabs.com`))" - - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-http.entrypoints=http" - - - name: ${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-https + - name: ${{ env.NOMAD_ENVIRONMENT }}-grid-bot port: http tags: - ${{ env.NOMAD_ENVIRONMENT }} - "traefik.enable=true" - - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-https.rule=(Host(`clientsettingscdn.sitetest4.robloxlabs.com`) || Host(`versioncompatibility.api.sitetest4.robloxlabs.com`))" - - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-https.entrypoints=https" - - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-https.tls=true" + - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web.rule=(Host(`clientsettingscdn.sitetest4.robloxlabs.com`) || Host(`versioncompatibility.api.sitetest4.robloxlabs.com`) || Host(`avatar.sitetest4.robloxlabs.com`))" + - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web.entrypoints=http" + + - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-secure.rule=(Host(`clientsettingscdn.sitetest4.robloxlabs.com`) || Host(`versioncompatibility.api.sitetest4.robloxlabs.com`) || Host(`avatar.sitetest4.robloxlabs.com`))" + - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-secure.entrypoints=https" + - "traefik.http.routers.${{ env.NOMAD_ENVIRONMENT }}-grid-bot-web-secure.tls=true" volumes: - '/var/run/docker.sock:/var/run/docker.sock' diff --git a/services/grid-bot/Dockerfile b/services/grid-bot/Dockerfile index 6e45a1a9..72f26125 100755 --- a/services/grid-bot/Dockerfile +++ b/services/grid-bot/Dockerfile @@ -5,7 +5,7 @@ COPY . /opt/grid/ RUN apt-get -y update && \ apt-get install -y dotnet-sdk-8.0 -RUN dotnet tool install --global dotnet-dump +RUN dotnet tool install --global dotnet-dump && dotnet tool install --global dotnet-counters COPY ./ssl/global-root-ca.crt /usr/local/share/ca-certificates/global-root-ca.crt RUN chmod 644 /usr/local/share/ca-certificates/global-root-ca.crt && update-ca-certificates From 8abf65b8d2c9aeee4001959c3f7be51b71fe93e3 Mon Sep 17 00:00:00 2001 From: Nikita Petko Date: Thu, 6 Nov 2025 15:44:33 +0000 Subject: [PATCH 3/5] Update projects Bump all packages to 1.1.1 Use GitHub environments instead of custom environments --- .github/workflows/deploy.yml | 24 ++------------- services/grid-bot/lib/Directory.Build.props | 4 +-- .../lib/commands/Shared.Commands.csproj | 12 ++++---- .../grid-bot/lib/events/Shared.Events.csproj | 2 +- .../lib/settings/Shared.Settings.csproj | 18 +++++------ .../lib/utility/Shared.Utility.csproj | 30 +++++++++---------- .../Extensions/IServiceProviderExtensions.cs | 2 +- services/grid-bot/lib/web/Grid.Bot.Web.csproj | 10 +++---- services/grid-bot/src/Grid.Bot.csproj | 7 ++--- .../recovery/src/Grid.Bot.Recovery.csproj | 7 ++--- 10 files changed, 46 insertions(+), 70 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 0ba31311..4c25cd71 100755 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -64,6 +64,7 @@ jobs: runs-on: grid-bot-infra needs: parse-input-components + environment: ${{ github.event.inputs.nomad_environment }} if: ${{ needs.parse-input-components.outputs.nomad-files != '{}' && needs.parse-input-components.outputs.nomad-files != '' }} @@ -76,16 +77,7 @@ jobs: uses: nferch/setup-nomad@v4.0.0 env: NOMAD_TLS_SKIP_VERIFY: 1 - - - name: Set Initial GitHub Deployment Status - uses: chrnorm/deployment-action@v2 - id: deployment - continue-on-error: true - with: - token: "${{ secrets.DEPLOYER_TOKEN }}" - environment: "${{ github.event.inputs.nomad_environment }}" - description: "Components: ${{ github.event.inputs.components }}" - + - name: Deploy Nomad Jobs uses: actions/github-script@v7 id: deploy-nomad-jobs @@ -121,15 +113,3 @@ jobs: core.setFailed(`Failed to deploy the following jobs: ${failedJobs.join(', ')}`); } - - name: Set GitHub Deployment Status - uses: chrnorm/deployment-status@v2 - continue-on-error: true - with: - token: "${{ secrets.DEPLOYER_TOKEN }}" - deployment-id: ${{ steps.deployment.outputs.deployment_id }} - description: "Components: ${{ github.event.inputs.components }}" - - # If the "deploy" step fails, the deployment status will be set to "failure" - # If the "deploy" step succeeds, the deployment status will be set to "success" - state: "${{ steps.deploy-nomad-jobs.outcome }}" - diff --git a/services/grid-bot/lib/Directory.Build.props b/services/grid-bot/lib/Directory.Build.props index 3de19551..476dba01 100755 --- a/services/grid-bot/lib/Directory.Build.props +++ b/services/grid-bot/lib/Directory.Build.props @@ -7,8 +7,6 @@ net8.0 preview Grid.Bot - - debug;release @@ -41,4 +39,4 @@ $(MSBuildThisFileDirectory)..\..\..\ - \ No newline at end of file + diff --git a/services/grid-bot/lib/commands/Shared.Commands.csproj b/services/grid-bot/lib/commands/Shared.Commands.csproj index f798d907..43c77b10 100755 --- a/services/grid-bot/lib/commands/Shared.Commands.csproj +++ b/services/grid-bot/lib/commands/Shared.Commands.csproj @@ -14,12 +14,12 @@ - - - - - - + + + + + + diff --git a/services/grid-bot/lib/events/Shared.Events.csproj b/services/grid-bot/lib/events/Shared.Events.csproj index c1ca45ee..e70b46a1 100755 --- a/services/grid-bot/lib/events/Shared.Events.csproj +++ b/services/grid-bot/lib/events/Shared.Events.csproj @@ -18,7 +18,7 @@ - + diff --git a/services/grid-bot/lib/settings/Shared.Settings.csproj b/services/grid-bot/lib/settings/Shared.Settings.csproj index 2ca7a8a1..d6309074 100755 --- a/services/grid-bot/lib/settings/Shared.Settings.csproj +++ b/services/grid-bot/lib/settings/Shared.Settings.csproj @@ -16,16 +16,16 @@ - - - - - - - - + + + + + + + + - + diff --git a/services/grid-bot/lib/utility/Shared.Utility.csproj b/services/grid-bot/lib/utility/Shared.Utility.csproj index 31ccec18..81293970 100755 --- a/services/grid-bot/lib/utility/Shared.Utility.csproj +++ b/services/grid-bot/lib/utility/Shared.Utility.csproj @@ -13,20 +13,20 @@ - - - - - - - - - - - - - - + + + + + + + + + + + + + + @@ -58,4 +58,4 @@ - \ No newline at end of file + diff --git a/services/grid-bot/lib/web/Extensions/IServiceProviderExtensions.cs b/services/grid-bot/lib/web/Extensions/IServiceProviderExtensions.cs index 11407071..568d2816 100644 --- a/services/grid-bot/lib/web/Extensions/IServiceProviderExtensions.cs +++ b/services/grid-bot/lib/web/Extensions/IServiceProviderExtensions.cs @@ -156,4 +156,4 @@ public static void UseWebServer(this IServiceProvider services, IEnumerable app.Run(webSettings.WebServerBindAddress), TaskCreationOptions.LongRunning); } -} \ No newline at end of file +} diff --git a/services/grid-bot/lib/web/Grid.Bot.Web.csproj b/services/grid-bot/lib/web/Grid.Bot.Web.csproj index 67e52bad..fdc380c0 100644 --- a/services/grid-bot/lib/web/Grid.Bot.Web.csproj +++ b/services/grid-bot/lib/web/Grid.Bot.Web.csproj @@ -13,11 +13,11 @@ - - - - - + + + + + diff --git a/services/grid-bot/src/Grid.Bot.csproj b/services/grid-bot/src/Grid.Bot.csproj index 3e3d0ee4..69038170 100755 --- a/services/grid-bot/src/Grid.Bot.csproj +++ b/services/grid-bot/src/Grid.Bot.csproj @@ -4,7 +4,6 @@ - debug;release Exe True @@ -25,9 +24,9 @@ - - - + + + diff --git a/services/recovery/src/Grid.Bot.Recovery.csproj b/services/recovery/src/Grid.Bot.Recovery.csproj index ac9b6979..b1392ec2 100755 --- a/services/recovery/src/Grid.Bot.Recovery.csproj +++ b/services/recovery/src/Grid.Bot.Recovery.csproj @@ -4,7 +4,6 @@ - debug;release Exe True @@ -15,9 +14,9 @@ - - - + + + From d26b60297c294b28f6d72c58f8e8cd31d776db4f Mon Sep 17 00:00:00 2001 From: Nikita Petko Date: Fri, 7 Nov 2025 15:35:47 +0000 Subject: [PATCH 4/5] Update to gateway logging Add a toggle to allow logging gateway or websocket exceptions at runtime instead of compile time --- .../grid-bot/lib/events/Events/OnLogMessage.cs | 18 +++++++++--------- .../lib/settings/Providers/DiscordSettings.cs | 10 +++++++++- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/services/grid-bot/lib/events/Events/OnLogMessage.cs b/services/grid-bot/lib/events/Events/OnLogMessage.cs index d416986f..4b2d63bd 100755 --- a/services/grid-bot/lib/events/Events/OnLogMessage.cs +++ b/services/grid-bot/lib/events/Events/OnLogMessage.cs @@ -103,21 +103,21 @@ public Task Invoke(LogMessage message) if (message.Exception != null) { #if DEBUG || DEBUG_LOGGING_IN_PROD -#if !DEBUG // Don't log these exceptions outside of debug mode. - if (message.Exception is GatewayReconnectException) - return Task.CompletedTask; + if (!_settings.DebugAllowGatewayWebsocketExceptions) { + if (message.Exception is GatewayReconnectException) + return Task.CompletedTask; - // Closed web socket exceptions are expected when the bot is shutting down. - if (message.Exception.InnerException is WebSocketException) - return Task.CompletedTask; + // Closed web socket exceptions are expected when the bot is shutting down. + if (message.Exception.InnerException is WebSocketException) + return Task.CompletedTask; - if (message.Exception is WebSocketClosedException || message.Exception.InnerException is WebSocketClosedException) - return Task.CompletedTask; + if (message.Exception is WebSocketClosedException || message.Exception.InnerException is WebSocketClosedException) + return Task.CompletedTask; + } if (message.Exception is TaskCanceledException && !_settings.DebugAllowTaskCanceledExceptions) return Task.CompletedTask; -#endif // Temporary fix for discord-net/Discord.Net#3128 // Just keep it out of Backtrace and increment a counter. diff --git a/services/grid-bot/lib/settings/Providers/DiscordSettings.cs b/services/grid-bot/lib/settings/Providers/DiscordSettings.cs index 5e938f03..0ef50899 100755 --- a/services/grid-bot/lib/settings/Providers/DiscordSettings.cs +++ b/services/grid-bot/lib/settings/Providers/DiscordSettings.cs @@ -28,12 +28,20 @@ public class DiscordSettings : BaseSettingsProvider #if DEBUG || DEBUG_LOGGING_IN_PROD /// - /// Can task cancelled exceptions be loggeD? + /// Can task cancelled exceptions be logged? /// public bool DebugAllowTaskCanceledExceptions => GetOrDefault( nameof(DebugAllowTaskCanceledExceptions), false ); + + /// + /// Can websocket or gateway exceptions be logged? + /// + public bool DebugAllowGatewayWebsocketExceptions => GetOrDefault( + nameof(DebugAllowGatewayWebsocketExceptions), + false + ); #endif #if DEBUG From 6ec96fbbe398a682d5a18e42d693c7df11f14714 Mon Sep 17 00:00:00 2001 From: Nikita Petko Date: Fri, 7 Nov 2025 15:48:45 +0000 Subject: [PATCH 5/5] Update OnReady.cs Testing once initialization when the first shard is ready. --- services/grid-bot/lib/events/Events/OnReady.cs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/services/grid-bot/lib/events/Events/OnReady.cs b/services/grid-bot/lib/events/Events/OnReady.cs index 8e73f916..99ea67b9 100755 --- a/services/grid-bot/lib/events/Events/OnReady.cs +++ b/services/grid-bot/lib/events/Events/OnReady.cs @@ -15,7 +15,6 @@ using Threading; using Text.Extensions; - /// /// Event handler to be invoked when a shard is ready, /// @@ -62,7 +61,7 @@ OnCommandExecuted onCommandExecutedEvent { private static readonly Assembly _commandsAssembly = Assembly.Load("Shared.Commands"); - private Atomic _shardCount = 0; // needs to be atomic due to the race situation here. + private OnceFlag _initializeClientOnceFlag = new(); private readonly DiscordSettings _discordSettings = discordSettings ?? throw new ArgumentNullException(nameof(discordSettings)); private readonly MaintenanceSettings _maintenanceSettings = maintenanceSettings ?? throw new ArgumentNullException(nameof(maintenanceSettings)); @@ -85,10 +84,8 @@ private static string GetStatusText(string updateText) /// Invoe the event handler. /// /// The client for the shard. - public async Task Invoke(DiscordSocketClient shard) + public Task Invoke(DiscordSocketClient shard) { - _shardCount++; - _logger.Debug( "Shard '{0}' ready as '{0}#{1}'", shard.ShardId, @@ -96,10 +93,8 @@ public async Task Invoke(DiscordSocketClient shard) _client.CurrentUser.Discriminator ); - if (_shardCount == _client.Shards.Count) + Call.Once(ref _initializeClientOnceFlag, async () => { - _logger.Debug("Final shard ready!"); - await _interactionService.AddModulesAsync(_commandsAssembly, _services); await _commandService.AddModulesAsync(_commandsAssembly, _services); @@ -136,6 +131,8 @@ public async Task Invoke(DiscordSocketClient shard) _client.SetGameAsync( _discordSettings.BotStatusMessage ); - } + }); + + return Task.CompletedTask; } }