diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2dccebcf..3a854398 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,7 +32,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: '1.25.x' + go-version: '1.25.3' - name: Install dependencies run: go mod download diff --git a/FuncStructList.txt b/FuncStructList.txt index 77238e4c..fbfa250a 100644 --- a/FuncStructList.txt +++ b/FuncStructList.txt @@ -1,7 +1,7 @@ ### Struct Signatures ./active_files/hierarchies/organize_by_theme.go:15:type ThemeCategory struct { ./cmd/extract_hierarchies/extract_hierarchies.go:138: type hierarchyStat struct { -./cmd/fix-thread-activity/main.go:73: type threadInfo struct { +./cmd/fix-thread-activity/main.go:80: type threadInfo struct { ./cmd/history-rebuild/main.go:26:type RebuildStats struct { ./cmd/history-rebuild/main.go:36:type HistoryAnalysisStats struct { ./cmd/import-flat-files/main.go:193:type FileScanner struct { @@ -10,9 +10,11 @@ ./cmd/merge-active/main.go:21:type ActiveEntry struct { ./cmd/merge-active/main.go:31:type OverviewEntry struct { ./cmd/nntp-server/processor_adapter.go:10:type ProcessorAdapter struct { -./cmd/nntp-transfer/main.go:503:type ProxyConfig struct { +./cmd/nntp-transfer/main.go:2152:type ReturnSignal struct { +./cmd/nntp-transfer/main.go:3066: type ProgressInfo struct { +./cmd/nntp-transfer/main.go:570:type ProxyConfig struct { ./cmd/recover-db/main.go:20:type GroupResult struct { -./cmd/recover-db/main.go:676:type DateProblem struct { +./cmd/recover-db/main.go:675:type DateProblem struct { ./cmd/tcp2tor/main.go:28:type ProxyConfig struct { ./cmd/tcp2tor/main.go:35:type ProxyTarget struct { ./cmd/tcp2tor/main.go:43:type ConfigEntry struct { @@ -24,11 +26,11 @@ ./internal/cache/newsgroup_cache.go:38:type NewsgroupCache struct { ./internal/cache/sanitized_cache.go:12:type SanitizedArticle struct { ./internal/cache/sanitized_cache.go:30:type SanitizedCache struct { -./internal/config/config.go:113:type ServerConfig struct { -./internal/config/config.go:129:type DatabaseConfig struct { -./internal/config/config.go:136:type WebConfig struct { -./internal/config/config.go:68:type MainConfig struct { -./internal/config/config.go:90:type Provider struct { +./internal/config/config.go:115:type ServerConfig struct { +./internal/config/config.go:131:type DatabaseConfig struct { +./internal/config/config.go:138:type WebConfig struct { +./internal/config/config.go:70:type MainConfig struct { +./internal/config/config.go:92:type Provider struct { ./internal/database/article_cache.go:13:type ArticleCacheEntry struct { ./internal/database/article_cache.go:22:type ArticleCache struct { ./internal/database/config_cache.go:10:type ConfigCache struct { @@ -131,20 +133,27 @@ ./internal/models/models.go:72:type Overview struct { ./internal/models/models.go:91:type User struct { ./internal/nntp/nntp-article-common.go:26:type ArticleRetrievalResult struct { -./internal/nntp/nntp-article-common.go:35:type TakeThisMode struct { ./internal/nntp/nntp-auth-manager.go:12:type AuthManager struct { ./internal/nntp/nntp-backend-pool.go:15:type Pool struct { -./internal/nntp/nntp-backend-pool.go:406:type PoolStats struct { +./internal/nntp/nntp-backend-pool.go:417:type PoolStats struct { ./internal/nntp/nntp-cache-local.go:11:type Local430 struct { ./internal/nntp/nntp-cache-local.go:57:type CacheMessageIDNumtoGroup struct { ./internal/nntp/nntp-cache-local.go:62:type ItemCMIDNG struct { -./internal/nntp/nntp-client.go:107:type GroupInfo struct { -./internal/nntp/nntp-client.go:118:type OverviewLine struct { -./internal/nntp/nntp-client.go:130:type HeaderLine struct { -./internal/nntp/nntp-client.go:52:type BackendConn struct { -./internal/nntp/nntp-client.go:75:type BackendConfig struct { -./internal/nntp/nntp-client.go:98:type Article struct { -./internal/nntp/nntp-cmd-posting.go:163:type ArticleData struct { +./internal/nntp/nntp-client-commands.go:114:type TTResponse struct { +./internal/nntp/nntp-client-commands.go:120:type CheckResponse struct { // deprecated +./internal/nntp/nntp-client-commands.go:125:type ReadRequest struct { +./internal/nntp/nntp-client-commands.go:148:type CHTTJob struct { +./internal/nntp/nntp-client-commands.go:199:type NewsgroupTransferProgress struct { +./internal/nntp/nntp-client-commands.go:41:type TakeThisMode struct { +./internal/nntp/nntp-client-commands.go:54:type TTSetup struct { +./internal/nntp/nntp-client-commands.go:59:type OffsetQueue struct { +./internal/nntp/nntp-client.go:100:type Article struct { +./internal/nntp/nntp-client.go:109:type GroupInfo struct { +./internal/nntp/nntp-client.go:120:type OverviewLine struct { +./internal/nntp/nntp-client.go:132:type HeaderLine struct { +./internal/nntp/nntp-client.go:55:type BackendConn struct { +./internal/nntp/nntp-client.go:77:type BackendConfig struct { +./internal/nntp/nntp-cmd-posting.go:164:type ArticleData struct { ./internal/nntp/nntp-peering.go:153:type PeeringStats struct { ./internal/nntp/nntp-peering.go:47:type PeeringManager struct { ./internal/nntp/nntp-peering.go:70:type PeeringConfig struct { @@ -163,9 +172,9 @@ ./internal/processor/bridges.go:24:type BridgeManager struct { ./internal/processor/counter.go:12:type Counter struct { ./internal/processor/PostQueue.go:12:type PostQueueWorker struct { -./internal/processor/proc_DLArt.go:18:type BatchQueue struct { -./internal/processor/proc_DLArt.go:26:type GroupBatch struct { -./internal/processor/proc_DLArt.go:30:type BatchItem struct { +./internal/processor/proc_DLArt.go:19:type BatchQueue struct { +./internal/processor/proc_DLArt.go:27:type GroupBatch struct { +./internal/processor/proc_DLArt.go:31:type BatchItem struct { ./internal/processor/processor.go:23:type Processor struct { ./internal/processor/proc_MsgIDtmpCache.go:14://type MsgTmpCache struct { ./internal/processor/proc_MsgIDtmpCache.go:57:type MsgIdTmpCacheItem struct { @@ -174,12 +183,12 @@ ./internal/processor/rslight.go:45:type GroupsEntry struct { ./internal/processor/rslight.go:52:type LegacyArticle struct { ./internal/processor/rslight.go:65:type LegacyThread struct { -./internal/web/cronjobs.go:20:type CronJobManager struct { -./internal/web/cronjobs.go:29:type CronJob struct { +./internal/web/cronjobs.go:21:type CronJobManager struct { +./internal/web/cronjobs.go:30:type CronJob struct { ./internal/web/web_admin.go:11:type FlashMessage struct { ./internal/web/web_admin.go:17:type SpamArticleInfo struct { ./internal/web/web_admin.go:23:type AdminPageData struct { -./internal/web/web_admin_newsgroups.go:565: type threadInfo struct { +./internal/web/web_admin_newsgroups.go:568: type threadInfo struct { ./internal/web/web_admin_ollama.go:18:type ProxyModelResponse struct { ./internal/web/web_admin_ollama.go:22:type ProxyModel struct { ./internal/web/web_admin_settings_unified.go:17:type SettingConfig struct { @@ -192,26 +201,26 @@ ./internal/web/web_newsPage.go:12:type NewsPageData struct { ./internal/web/web_profile.go:17:type ProfilePageData struct { ./internal/web/web_registerPage.go:15:type RegisterPageData struct { -./internal/web/webserver_core_routes.go:108:type HierarchiesPageData struct { -./internal/web/webserver_core_routes.go:116:type HierarchyGroupsPageData struct { -./internal/web/webserver_core_routes.go:125:type HierarchyTreePageData struct { -./internal/web/webserver_core_routes.go:143:type HierarchyBreadcrumb struct { -./internal/web/webserver_core_routes.go:150:type HierarchyNode struct { -./internal/web/webserver_core_routes.go:158:type SectionPageData struct { -./internal/web/webserver_core_routes.go:169:type SectionGroupPageData struct { -./internal/web/webserver_core_routes.go:179:type SectionArticlePageData struct { -./internal/web/webserver_core_routes.go:191:type SearchPageData struct { +./internal/web/webserver_core_routes.go:109:type HierarchiesPageData struct { +./internal/web/webserver_core_routes.go:117:type HierarchyGroupsPageData struct { +./internal/web/webserver_core_routes.go:126:type HierarchyTreePageData struct { +./internal/web/webserver_core_routes.go:144:type HierarchyBreadcrumb struct { +./internal/web/webserver_core_routes.go:151:type HierarchyNode struct { +./internal/web/webserver_core_routes.go:159:type SectionPageData struct { +./internal/web/webserver_core_routes.go:170:type SectionGroupPageData struct { +./internal/web/webserver_core_routes.go:180:type SectionArticlePageData struct { +./internal/web/webserver_core_routes.go:192:type SearchPageData struct { ./internal/web/webserver_core_routes.go:25:type WebServer struct { ./internal/web/webserver_core_routes.go:39:type TemplateData struct { ./internal/web/webserver_core_routes.go:56:type GroupPageData struct { ./internal/web/webserver_core_routes.go:65:type ArticlePageData struct { ./internal/web/webserver_core_routes.go:77:type StatsPageData struct { -./internal/web/webserver_core_routes.go:84:type GroupsPageData struct { -./internal/web/webserver_core_routes.go:92:type GroupThreadsPageData struct { +./internal/web/webserver_core_routes.go:85:type GroupsPageData struct { +./internal/web/webserver_core_routes.go:93:type GroupThreadsPageData struct { ./internal/web/web_sitePostPage.go:23:type PostPageData struct { ### Function Signatures -./cmd/history-rebuild/main.go:297:func (s *RebuildStats) PrintProgress() { -./cmd/history-rebuild/main.go:328:func (s *RebuildStats) PrintFinal() { +./cmd/history-rebuild/main.go:301:func (s *RebuildStats) PrintProgress() { +./cmd/history-rebuild/main.go:332:func (s *RebuildStats) PrintFinal() { ./cmd/import-flat-files/main.go:162:func (dm *DBManager) ArticleExists(article *Article) (bool, error) { ./cmd/import-flat-files/main.go:181:func (dm *DBManager) Close() { ./cmd/import-flat-files/main.go:206:func (fs *FileScanner) ScanFiles() <-chan *Article { @@ -220,6 +229,12 @@ ./cmd/nntp-server/processor_adapter.go:20:func (pa *ProcessorAdapter) ProcessIncomingArticle(article *models.Article) (int, error) { ./cmd/nntp-server/processor_adapter.go:27:func (pa *ProcessorAdapter) Lookup(msgIdItem *history.MessageIdItem) (int, error) { ./cmd/nntp-server/processor_adapter.go:32:func (pa *ProcessorAdapter) CheckNoMoreWorkInHistory() bool { +./cmd/nntp-transfer/main.go:2166:func (rs *ReturnSignal) UnlockTT() { +./cmd/nntp-transfer/main.go:2173:func (rs *ReturnSignal) GetLockTT() { +./cmd/nntp-transfer/main.go:2193:func (rs *ReturnSignal) UnlockCHECKforTTwithWait() { +./cmd/nntp-transfer/main.go:2209:func (rs *ReturnSignal) UnlockCHECKforTT() { +./cmd/nntp-transfer/main.go:2221:func (rs *ReturnSignal) BlockCHECK() { +./cmd/nntp-transfer/main.go:2229:func (rs *ReturnSignal) LockCHECK() { ./cmd/tcp2tor/main.go:558:func (s *ProxyServer) Start() error { ./cmd/tcp2tor/main.go:601:func (s *ProxyServer) Stop() { ./cmd/tcp2tor/main.go:611:func (s *ProxyServer) handleConnection(clientConn net.Conn) { @@ -309,10 +324,10 @@ ./internal/database/db_batch.go:1084:func (c *SQ3batch) findThreadRoot(groupDBs *GroupDBs, refs []string) (int64, error) { ./internal/database/db_batch.go:1111:func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map[int64][]threadCacheUpdateData) error { ./internal/database/db_batch.go:1236:func (o *BatchOrchestrator) StartOrch() { -./internal/database/db_batch.go:1274:func (o *BatchOrchestrator) StartOrchestrator() { -./internal/database/db_batch.go:1333:func (o *BatchOrchestrator) checkThresholds() (haswork bool) { +./internal/database/db_batch.go:1277:func (o *BatchOrchestrator) StartOrchestrator() { +./internal/database/db_batch.go:1338:func (o *BatchOrchestrator) checkThresholds() (haswork bool) { ./internal/database/db_batch.go:139:func (sq *SQ3batch) BatchCaptureOverviewForLater(newsgroupPtr *string, article *models.Article) { -./internal/database/db_batch.go:1418:func (sq *SQ3batch) BatchDivider() { +./internal/database/db_batch.go:1423:func (sq *SQ3batch) BatchDivider() { ./internal/database/db_batch.go:149:func (sq *SQ3batch) ExpireCache() { ./internal/database/db_batch.go:170:func (sq *SQ3batch) GetNewsgroupPointer(newsgroup string) *string { ./internal/database/db_batch.go:196:func (sq *SQ3batch) GetChan(newsgroup *string) chan *models.Article { @@ -356,13 +371,13 @@ ./internal/database/db_groupdbs.go:172:func (db *GroupDBs) ExistsMsgIdInArticlesDB(messageID string) bool { ./internal/database/db_groupdbs.go:181:func (dbs *GroupDBs) Close(who string) error { ./internal/database/db_groupdbs.go:28:func (db *Database) GetGroupDBs(groupName string) (*GroupDBs, error) { -./internal/database/db_init.go:233:func (db *Database) IsDBshutdown() bool { -./internal/database/db_init.go:249:func (db *Database) initMainDB() error { -./internal/database/db_init.go:290:func (db *Database) applySQLitePragmas(conn *sql.DB) error { -./internal/database/db_init.go:318:func (db *Database) applySQLitePragmasGroupDB(conn *sql.DB) error { -./internal/database/db_init.go:358:func (db *Database) LoadDefaultProviders() error { -./internal/database/db_init.go:399:func (db *Database) loadBotConfiguration() { -./internal/database/db_init.go:443:func (db *Database) loadIPBlockingConfiguration() { +./internal/database/db_init.go:251:func (db *Database) IsDBshutdown() bool { +./internal/database/db_init.go:267:func (db *Database) initMainDB() error { +./internal/database/db_init.go:308:func (db *Database) applySQLitePragmas(conn *sql.DB) error { +./internal/database/db_init.go:336:func (db *Database) applySQLitePragmasGroupDB(conn *sql.DB) error { +./internal/database/db_init.go:376:func (db *Database) LoadDefaultProviders() error { +./internal/database/db_init.go:417:func (db *Database) loadBotConfiguration() { +./internal/database/db_init.go:441:func (db *Database) loadIPBlockingConfiguration() { ./internal/database/db_migrate.go:245:func (db *Database) migrateMainDB() error { ./internal/database/db_migrate.go:281:func (db *Database) MigrateGroup(groupName string) error { ./internal/database/db_migrate.go:293:func (db *Database) migrateGroupDB(groupDBs *GroupDBs) error { @@ -631,28 +646,28 @@ ./internal/history/history_config.go:293:func (h *History) xxxGetHashPrefix(hash string) string { ./internal/history/history_config.go:302:func (h *History) initDatabase() error { ./internal/history/history_config.go:324:func (h *History) openHistoryFile() error { -./internal/history/history.go:1004:func (h *History) xxLookupStorageToken(msgIdItem *MessageIdItem) int { -./internal/history/history.go:1059:func (h *History) CheckNoMoreWorkInHistory() bool { -./internal/history/history.go:1079:func (h *History) SetDatabaseWorkChecker(checker DatabaseWorkChecker) { -./internal/history/history.go:160:func (h *History) bootLookupWorkers() { -./internal/history/history.go:168:func (h *History) LookupWorker(wid int) { -./internal/history/history.go:214:func (h *History) Lookup(msgIdItem *MessageIdItem) (int, error) { -./internal/history/history.go:235:func (h *History) lookupInDatabase(msgIdItem *MessageIdItem) (bool, error) { -./internal/history/history.go:334:func (h *History) GetStats() HistoryStats { -./internal/history/history.go:348:func (h *History) updateStats(fn func(*HistoryStats)) { -./internal/history/history.go:355:func (h *History) Close() error { -./internal/history/history.go:372:func (h *History) writerWorker() { -./internal/history/history.go:466:func (h *History) ServerShutdown() bool { -./internal/history/history.go:479:func (h *History) readHistoryEntryAtOffset(offset int64, msgIdItem *MessageIdItem) (int, error) { -./internal/history/history.go:567:func (h *History) routeHash(msgId string) (int, string, string, error) { -./internal/history/history.go:604:func (h *History) flushPendingBatch() { -./internal/history/history.go:623:func (h *History) processBatch() { -./internal/history/history.go:666:func (h *History) writeBatchToFile() error { -./internal/history/history.go:765:func (h *History) writeBatchToDatabase() error { -./internal/history/history.go:834:func (h *History) writeBatchToHashDB(dbIndex int, entries []*MessageIdItem) error { -./internal/history/history.go:861:func (h *History) executeDBTransaction(dbIndex int, entries []*MessageIdItem) error { -./internal/history/history.go:92:func (h *History) Add(msgIdItem *MessageIdItem) { -./internal/history/history.go:938:func (h *History) processTableInTransaction(tx *sql.Tx, tableName string, hashGroups map[string][]*MessageIdItem) error { +./internal/history/history.go:1007:func (h *History) xxLookupStorageToken(msgIdItem *MessageIdItem) int { +./internal/history/history.go:1062:func (h *History) CheckNoMoreWorkInHistory() bool { +./internal/history/history.go:1082:func (h *History) SetDatabaseWorkChecker(checker DatabaseWorkChecker) { +./internal/history/history.go:161:func (h *History) bootLookupWorkers() { +./internal/history/history.go:169:func (h *History) LookupWorker(wid int) { +./internal/history/history.go:215:func (h *History) Lookup(msgIdItem *MessageIdItem) (int, error) { +./internal/history/history.go:236:func (h *History) lookupInDatabase(msgIdItem *MessageIdItem) (bool, error) { +./internal/history/history.go:335:func (h *History) GetStats() HistoryStats { +./internal/history/history.go:349:func (h *History) updateStats(fn func(*HistoryStats)) { +./internal/history/history.go:356:func (h *History) Close() error { +./internal/history/history.go:373:func (h *History) writerWorker() { +./internal/history/history.go:469:func (h *History) ServerShutdown() bool { +./internal/history/history.go:482:func (h *History) readHistoryEntryAtOffset(offset int64, msgIdItem *MessageIdItem) (int, error) { +./internal/history/history.go:570:func (h *History) routeHash(msgId string) (int, string, string, error) { +./internal/history/history.go:607:func (h *History) flushPendingBatch() { +./internal/history/history.go:626:func (h *History) processBatch() { +./internal/history/history.go:669:func (h *History) writeBatchToFile() error { +./internal/history/history.go:768:func (h *History) writeBatchToDatabase() error { +./internal/history/history.go:837:func (h *History) writeBatchToHashDB(dbIndex int, entries []*MessageIdItem) error { +./internal/history/history.go:864:func (h *History) executeDBTransaction(dbIndex int, entries []*MessageIdItem) error { +./internal/history/history.go:93:func (h *History) Add(msgIdItem *MessageIdItem) { +./internal/history/history.go:941:func (h *History) processTableInTransaction(tx *sql.Tx, tableName string, hashGroups map[string][]*MessageIdItem) error { ./internal/history/history_L1-cache.go:113:func (c *L1Cache) Close() { ./internal/history/history_L1-cache.go:205:func (l1 *L1CACHE) BootL1Cache() { ./internal/history/history_L1-cache.go:248:func (l1 *L1CACHE) LockL1Cache(hash string, value int) int { @@ -716,31 +731,31 @@ ./internal/models/sanitizing.go:481:func (o *Overview) PrintSanitized(field string, groupName ...string) template.HTML { ./internal/models/sanitizing.go:547:func (a *Article) GetCleanSubject() string { ./internal/models/sanitizing.go:567:func (o *Overview) GetCleanSubject() string { -./internal/nntp/nntp-article-common.go:320:func (c *ClientConnection) sendArticleContent(result *ArticleRetrievalResult) error { -./internal/nntp/nntp-article-common.go:355:func (c *ClientConnection) sendHeadContent(result *ArticleRetrievalResult) error { -./internal/nntp/nntp-article-common.go:376:func (c *ClientConnection) sendBodyContent(result *ArticleRetrievalResult) error { -./internal/nntp/nntp-article-common.go:397:func (c *ClientConnection) sendStatContent(result *ArticleRetrievalResult) error { -./internal/nntp/nntp-article-common.go:46:func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType ArticleRetrievalType) error { -./internal/nntp/nntp-article-common.go:89:func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResult, error) { +./internal/nntp/nntp-article-common.go:309:func (c *ClientConnection) sendArticleContent(result *ArticleRetrievalResult) error { +./internal/nntp/nntp-article-common.go:344:func (c *ClientConnection) sendHeadContent(result *ArticleRetrievalResult) error { +./internal/nntp/nntp-article-common.go:35:func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType ArticleRetrievalType) error { +./internal/nntp/nntp-article-common.go:365:func (c *ClientConnection) sendBodyContent(result *ArticleRetrievalResult) error { +./internal/nntp/nntp-article-common.go:386:func (c *ClientConnection) sendStatContent(result *ArticleRetrievalResult) error { +./internal/nntp/nntp-article-common.go:78:func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResult, error) { ./internal/nntp/nntp-auth-manager.go:24:func (am *AuthManager) AuthenticateUser(username, password string) (*models.NNTPUser, error) { ./internal/nntp/nntp-auth-manager.go:47:func (am *AuthManager) CheckGroupAccess(user *models.NNTPUser, groupName string) bool { ./internal/nntp/nntp-auth-manager.go:58:func (am *AuthManager) CanPost(user *models.NNTPUser) bool { ./internal/nntp/nntp-auth-manager.go:66:func (am *AuthManager) IsAdmin(user *models.NNTPUser) bool { ./internal/nntp/nntp-auth-manager.go:77:func (am *AuthManager) CheckConnectionLimit(user *models.NNTPUser) bool { -./internal/nntp/nntp-backend-pool.go:119:func (pool *Pool) GetArticle(messageID *string, bulkmode bool) (*models.Article, error) { -./internal/nntp/nntp-backend-pool.go:151:func (pool *Pool) SelectGroup(group string) (*GroupInfo, error) { -./internal/nntp/nntp-backend-pool.go:185:func (pool *Pool) Get(mode int) (*BackendConn, error) { -./internal/nntp/nntp-backend-pool.go:299:func (pool *Pool) Put(client *BackendConn) error { -./internal/nntp/nntp-backend-pool.go:342:func (pool *Pool) CloseConn(client *BackendConn, lock bool) error { -./internal/nntp/nntp-backend-pool.go:363:func (pool *Pool) ClosePool() error { -./internal/nntp/nntp-backend-pool.go:391:func (pool *Pool) Stats() PoolStats { -./internal/nntp/nntp-backend-pool.go:416:func (pool *Pool) createConnection() (*BackendConn, error) { -./internal/nntp/nntp-backend-pool.go:430:func (pool *Pool) isConnectionValid(client *BackendConn) bool { -./internal/nntp/nntp-backend-pool.go:449:func (pool *Pool) Cleanup() { -./internal/nntp/nntp-backend-pool.go:45:func (pool *Pool) XOver(group string, start, end int64, enforceLimit bool) ([]OverviewLine, error) { -./internal/nntp/nntp-backend-pool.go:498:func (pool *Pool) startCleanupWorker() { -./internal/nntp/nntp-backend-pool.go:72:func (pool *Pool) XHdr(group string, header string, start, end int64) ([]*HeaderLine, error) { -./internal/nntp/nntp-backend-pool.go:94:func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +./internal/nntp/nntp-backend-pool.go:118:func (pool *Pool) GetArticle(messageID *string, bulkmode bool) (*models.Article, error) { +./internal/nntp/nntp-backend-pool.go:150:func (pool *Pool) SelectGroup(group string) (*GroupInfo, error) { +./internal/nntp/nntp-backend-pool.go:184:func (pool *Pool) Get(wantMode int) (*BackendConn, error) { +./internal/nntp/nntp-backend-pool.go:310:func (pool *Pool) Put(conn *BackendConn) error { +./internal/nntp/nntp-backend-pool.go:351:func (pool *Pool) closeConn(client *BackendConn, lock bool) error { +./internal/nntp/nntp-backend-pool.go:372:func (pool *Pool) ClosePool() error { +./internal/nntp/nntp-backend-pool.go:402:func (pool *Pool) Stats() PoolStats { +./internal/nntp/nntp-backend-pool.go:427:func (pool *Pool) createConnection() (*BackendConn, error) { +./internal/nntp/nntp-backend-pool.go:441:func (pool *Pool) isConnectionValid(client *BackendConn) bool { +./internal/nntp/nntp-backend-pool.go:463:func (pool *Pool) Cleanup() { +./internal/nntp/nntp-backend-pool.go:47:func (pool *Pool) XOver(group string, start, end int64, enforceLimit bool) ([]OverviewLine, error) { +./internal/nntp/nntp-backend-pool.go:504:func (pool *Pool) startCleanupWorker() { +./internal/nntp/nntp-backend-pool.go:74:func (pool *Pool) XHdr(group string, header string, start, end int64) ([]*HeaderLine, error) { +./internal/nntp/nntp-backend-pool.go:96:func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { ./internal/nntp/nntp-cache-local.go:102:func (c *CacheMessageIDNumtoGroup) Del(messageID, group string) { ./internal/nntp/nntp-cache-local.go:115:func (c *CacheMessageIDNumtoGroup) Clear(messageID string) { ./internal/nntp/nntp-cache-local.go:122:func (c *CacheMessageIDNumtoGroup) CleanupCron() { @@ -751,37 +766,58 @@ ./internal/nntp/nntp-cache-local.go:43:func (lc *Local430) Cleanup() { ./internal/nntp/nntp-cache-local.go:75:func (c *CacheMessageIDNumtoGroup) Get(messageID, group string) (int64, bool) { ./internal/nntp/nntp-cache-local.go:88:func (c *CacheMessageIDNumtoGroup) Set(messageID, group string, articleNum int64) { -./internal/nntp/nntp-client-commands.go:1016:func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { -./internal/nntp/nntp-client-commands.go:1035:func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) (chan *string, error) { -./internal/nntp/nntp-client-commands.go:1119:func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *string) (int, error) { -./internal/nntp/nntp-client-commands.go:1211:func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string) (uint, error) { -./internal/nntp/nntp-client-commands.go:1290:func (c *BackendConn) ReadTakeThisResponseStreaming(id uint) (int, error) { -./internal/nntp/nntp-client-commands.go:130:func (c *BackendConn) GetHead(messageID string) (*models.Article, error) { -./internal/nntp/nntp-client-commands.go:1311:func (c *BackendConn) PostArticle(article *models.Article) (int, error) { -./internal/nntp/nntp-client-commands.go:1431:func (c *BackendConn) SwitchMode(mode int) error { -./internal/nntp/nntp-client-commands.go:1443:func (c *BackendConn) SwitchToModeReader() error { -./internal/nntp/nntp-client-commands.go:1475:func (c *BackendConn) SwitchToModeStream() error { -./internal/nntp/nntp-client-commands.go:186:func (c *BackendConn) GetBody(messageID string) ([]byte, error) { -./internal/nntp/nntp-client-commands.go:234:func (c *BackendConn) ListGroups() ([]GroupInfo, error) { -./internal/nntp/nntp-client-commands.go:281:func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { -./internal/nntp/nntp-client-commands.go:32:func (c *BackendConn) StatArticle(messageID string) (bool, error) { -./internal/nntp/nntp-client-commands.go:362:func (c *BackendConn) SelectGroup(groupName string) (*GroupInfo, int, error) { -./internal/nntp/nntp-client-commands.go:430:func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit bool) ([]OverviewLine, error) { -./internal/nntp/nntp-client-commands.go:496:func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*HeaderLine, error) { -./internal/nntp/nntp-client-commands.go:558:func (c *BackendConn) WantShutdown(shutdownChan <-chan struct{}) bool { -./internal/nntp/nntp-client-commands.go:572:func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { -./internal/nntp/nntp-client-commands.go:628:func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { -./internal/nntp/nntp-client-commands.go:66:func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Article, error) { -./internal/nntp/nntp-client-commands.go:724:func (c *BackendConn) ListGroup(groupName string, start, end int64) ([]int64, error) { -./internal/nntp/nntp-client-commands.go:781:func (c *BackendConn) readMultilineResponse(src string) ([]string, error) { -./internal/nntp/nntp-client-commands.go:956:func (c *BackendConn) parseGroupLine(line string) (GroupInfo, error) { -./internal/nntp/nntp-client-commands.go:989:func (c *BackendConn) parseOverviewLine(line string) (OverviewLine, error) { -./internal/nntp/nntp-client.go:144:func (c *BackendConn) Connect() error { -./internal/nntp/nntp-client.go:247:func (c *BackendConn) authenticate() error { -./internal/nntp/nntp-client.go:289:func (c *BackendConn) CloseFromPoolOnly() error { -./internal/nntp/nntp-client.go:319:func (c *BackendConn) xSetReadDeadline(t time.Time) error { -./internal/nntp/nntp-client.go:329:func (c *BackendConn) xSetWriteDeadline(t time.Time) error { -./internal/nntp/nntp-client.go:338:func (c *BackendConn) UpdateLastUsed() { +./internal/nntp/nntp-client-commands.go:100:func (o *OffsetQueue) Done() { +./internal/nntp/nntp-client-commands.go:1049:func (c *BackendConn) ListGroup(groupName string, start, end int64) ([]int64, error) { +./internal/nntp/nntp-client-commands.go:107:func (o *OffsetQueue) Add(n int) { +./internal/nntp/nntp-client-commands.go:1106:func (c *BackendConn) readMultilineResponse(src string) ([]string, error) { +./internal/nntp/nntp-client-commands.go:1281:func (c *BackendConn) parseGroupLine(line string) (GroupInfo, error) { +./internal/nntp/nntp-client-commands.go:1314:func (c *BackendConn) parseOverviewLine(line string) (OverviewLine, error) { +./internal/nntp/nntp-client-commands.go:133:func (rr *ReadRequest) ClearReadRequest() { +./internal/nntp/nntp-client-commands.go:1341:func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { +./internal/nntp/nntp-client-commands.go:1360:func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan chan *ReadRequest, job *CHTTJob) error { +./internal/nntp/nntp-client-commands.go:139:func (rr *ReadRequest) ReturnReadRequest(channel chan struct{}) { +./internal/nntp/nntp-client-commands.go:1404:func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) ([]*string, error) { +./internal/nntp/nntp-client-commands.go:1482:func (c *BackendConn) xxTakeThisArticle(article *models.Article, nntphostname *string, newsgroup string) (int, error) { +./internal/nntp/nntp-client-commands.go:1570:func (c *BackendConn) GetBufSize(size int) int { +./internal/nntp/nntp-client-commands.go:1579:func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (cmdID uint, txBytes int, err error) { +./internal/nntp/nntp-client-commands.go:1665:func (c *BackendConn) ReadTakeThisResponseStreaming(newsgroup string, cr *CheckResponse) (int, error) { +./internal/nntp/nntp-client-commands.go:1702:func (c *BackendConn) PostArticle(article *models.Article) (int, error) { +./internal/nntp/nntp-client-commands.go:175:func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { +./internal/nntp/nntp-client-commands.go:1819:func (c *BackendConn) SwitchMode(mode int) error { +./internal/nntp/nntp-client-commands.go:1831:func (c *BackendConn) SwitchToModeReader() error { +./internal/nntp/nntp-client-commands.go:1863:func (c *BackendConn) SwitchToModeStream() error { +./internal/nntp/nntp-client-commands.go:215:func (ngp *NewsgroupTransferProgress) CalcSpeed() (speed int64) { +./internal/nntp/nntp-client-commands.go:231:func (ngp *NewsgroupTransferProgress) AddTXBytes(n int) { +./internal/nntp/nntp-client-commands.go:251:func (job *CHTTJob) Increment(counter int) { +./internal/nntp/nntp-client-commands.go:276:func (job *CHTTJob) AppendWantedMessageID(msgID *string) { +./internal/nntp/nntp-client-commands.go:283:func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, txErrors, connErrors *uint64) { +./internal/nntp/nntp-client-commands.go:294:func (ttMode *TakeThisMode) UseCHECK() bool { +./internal/nntp/nntp-client-commands.go:303:func (ttMode *TakeThisMode) SetForceCHECK() { +./internal/nntp/nntp-client-commands.go:309:func (ttMode *TakeThisMode) IncrementSuccess() { +./internal/nntp/nntp-client-commands.go:315:func (ttMode *TakeThisMode) IncrementTmp() { +./internal/nntp/nntp-client-commands.go:321:func (ttMode *TakeThisMode) SetNoCHECK() { +./internal/nntp/nntp-client-commands.go:327:func (ttMode *TakeThisMode) FlipMode(lowerLevel float64, upperLevel float64) bool { +./internal/nntp/nntp-client-commands.go:352:func (c *BackendConn) ForceCloseConn() { +./internal/nntp/nntp-client-commands.go:362:func (c *BackendConn) StatArticle(messageID string) (bool, error) { +./internal/nntp/nntp-client-commands.go:396:func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Article, error) { +./internal/nntp/nntp-client-commands.go:460:func (c *BackendConn) GetHead(messageID string) (*models.Article, error) { +./internal/nntp/nntp-client-commands.go:516:func (c *BackendConn) GetBody(messageID string) ([]byte, error) { +./internal/nntp/nntp-client-commands.go:564:func (c *BackendConn) ListGroups() ([]GroupInfo, error) { +./internal/nntp/nntp-client-commands.go:611:func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { +./internal/nntp/nntp-client-commands.go:67:func (o *OffsetQueue) Wait(n int) { +./internal/nntp/nntp-client-commands.go:692:func (c *BackendConn) SelectGroup(groupName string) (*GroupInfo, int, error) { +./internal/nntp/nntp-client-commands.go:756:func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit bool) ([]OverviewLine, error) { +./internal/nntp/nntp-client-commands.go:822:func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*HeaderLine, error) { +./internal/nntp/nntp-client-commands.go:884:func (c *BackendConn) WantShutdown(shutdownChan <-chan struct{}) bool { +./internal/nntp/nntp-client-commands.go:898:func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +./internal/nntp/nntp-client-commands.go:954:func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +./internal/nntp/nntp-client.go:146:func (c *BackendConn) Connect() error { +./internal/nntp/nntp-client.go:238:func (c *BackendConn) authenticate() error { +./internal/nntp/nntp-client.go:280:func (c *BackendConn) CloseFromPoolOnly() error { +./internal/nntp/nntp-client.go:309:func (c *BackendConn) IsConnected() bool { +./internal/nntp/nntp-client.go:323:func (c *BackendConn) xSetReadDeadline(t time.Time) error { +./internal/nntp/nntp-client.go:333:func (c *BackendConn) xSetWriteDeadline(t time.Time) error { +./internal/nntp/nntp-client.go:342:func (c *BackendConn) UpdateLastUsed() { ./internal/nntp/nntp-cmd-article.go:4:func (c *ClientConnection) handleArticle(args []string) error { ./internal/nntp/nntp-cmd-auth.go:9:func (c *ClientConnection) handleAuthInfo(args []string) error { ./internal/nntp/nntp-cmd-basic.go:15:func (c *ClientConnection) handleMode(args []string) error { @@ -801,10 +837,10 @@ ./internal/nntp/nntp-cmd-list.go:26:func (c *ClientConnection) handleListActive() error { ./internal/nntp/nntp-cmd-list.go:46:func (c *ClientConnection) handleListNewsgroups() error { ./internal/nntp/nntp-cmd-list.go:9:func (c *ClientConnection) handleList(args []string) error { -./internal/nntp/nntp-cmd-posting.go:108:func (c *ClientConnection) handleTakeThis(args []string) error { -./internal/nntp/nntp-cmd-posting.go:13:func (c *ClientConnection) handlePost() error { -./internal/nntp/nntp-cmd-posting.go:173:func (c *ClientConnection) readArticleData() (*models.Article, error) { -./internal/nntp/nntp-cmd-posting.go:49:func (c *ClientConnection) handleIHave(args []string) error { +./internal/nntp/nntp-cmd-posting.go:109:func (c *ClientConnection) handleTakeThis(args []string) error { +./internal/nntp/nntp-cmd-posting.go:14:func (c *ClientConnection) handlePost() error { +./internal/nntp/nntp-cmd-posting.go:174:func (c *ClientConnection) readArticleData() (*models.Article, error) { +./internal/nntp/nntp-cmd-posting.go:50:func (c *ClientConnection) handleIHave(args []string) error { ./internal/nntp/nntp-cmd-stat.go:4:func (c *ClientConnection) handleStat(args []string) error { ./internal/nntp/nntp-cmd-xhdr.go:10:func (c *ClientConnection) handleXHdr(args []string) error { ./internal/nntp/nntp-cmd-xover.go:9:func (c *ClientConnection) handleXOver(args []string) error { @@ -889,17 +925,16 @@ ./internal/processor/interface.go:14:func (proc *Processor) IsNewsGroupInSectionsDB(name *string) bool { ./internal/processor/interface.go:20:func (proc *Processor) ForceCloseGroupDBs(groupsDB *database.GroupDBs) error { ./internal/processor/interface.go:9:func (proc *Processor) MsgIdExists(group *string, messageID string) bool { -./internal/processor/PostQueue.go:112:func (w *PostQueueWorker) processArticleForNewsgroup(article *models.Article, newsgroup string) error { +./internal/processor/PostQueue.go:130:func (w *PostQueueWorker) processArticleForNewsgroup(article *models.Article, newsgroup string) error { ./internal/processor/PostQueue.go:18:func (processor *Processor) NewPostQueueWorker() *PostQueueWorker { ./internal/processor/PostQueue.go:26:func (w *PostQueueWorker) Start() { ./internal/processor/PostQueue.go:32:func (w *PostQueueWorker) Stop() { ./internal/processor/PostQueue.go:38:func (w *PostQueueWorker) processLoop() { ./internal/processor/PostQueue.go:64:func (w *PostQueueWorker) pre_processArticle(article *models.Article) error { -./internal/processor/proc_DLArt.go:292:func (proc *Processor) FindStartArticleByDate(groupName string, targetDate time.Time, groupInfo *nntp.GroupInfo) (int64, error) { -./internal/processor/proc_DLArt.go:378:func (proc *Processor) DownloadArticlesFromDate(groupName string, startDate time.Time, DLParChan chan struct{}, progressDB *database.ProgressDB, groupInfo *nntp.GroupInfo, shutdownChan <-chan struct{}) error { -./internal/processor/proc_DLArt.go:44:func (bq *BatchQueue) GetOrCreateGroupBatch(newsgroup string) *GroupBatch { -./internal/processor/proc_DLArt.go:472:func (proc *Processor) WantShutdown(shutdownChan <-chan struct{}) bool { -./internal/processor/proc_DLArt.go:67:func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{}, progressDB *database.ProgressDB, start int64, end int64, shutdownChan <-chan struct{}) error { +./internal/processor/proc_DLArt.go:293:func (proc *Processor) FindStartArticleByDate(groupName string, targetDate time.Time, groupInfo *nntp.GroupInfo) (int64, error) { +./internal/processor/proc_DLArt.go:379:func (proc *Processor) DownloadArticlesFromDate(groupName string, startDate time.Time, DLParChan chan struct{}, progressDB *database.ProgressDB, groupInfo *nntp.GroupInfo, shutdownChan <-chan struct{}) error { +./internal/processor/proc_DLArt.go:45:func (bq *BatchQueue) GetOrCreateGroupBatch(newsgroup string) *GroupBatch { +./internal/processor/proc_DLArt.go:68:func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{}, progressDB *database.ProgressDB, start int64, end int64, shutdownChan <-chan struct{}) error { ./internal/processor/proc_DLXHDR.go:6:func (proc *Processor) GetXHDR(groupName string, header string, start, end int64) ([]*nntp.HeaderLine, error) { ./internal/processor/processor.go:112:func (proc *Processor) CheckNoMoreWorkInHistory() bool { ./internal/processor/processor.go:117:func (proc *Processor) AddProcessedArticleToHistory(msgIdItem *history.MessageIdItem, newsgroupPtr *string, articleNumber int64) { @@ -907,11 +942,11 @@ ./internal/processor/processor.go:180:func (proc *Processor) GetHistoryStats() history.HistoryStats { ./internal/processor/processor.go:188:func (proc *Processor) Close() error { ./internal/processor/processor.go:204:func (proc *Processor) WaitForBatchCompletion() { -./internal/processor/processor.go:239:func (proc *Processor) Lookup(msgIdItem *history.MessageIdItem) (int, error) { -./internal/processor/processor.go:245:func (proc *Processor) AddArticleToHistory(article *nntp.Article, newsgroup string) { -./internal/processor/processor.go:250:func (proc *Processor) ProcessIncomingArticle(article *models.Article) (int, error) { -./internal/processor/processor.go:270:func (proc *Processor) EnableBridges(config *BridgeConfig) { -./internal/processor/processor.go:282:func (proc *Processor) DisableBridges() { +./internal/processor/processor.go:238:func (proc *Processor) Lookup(msgIdItem *history.MessageIdItem) (int, error) { +./internal/processor/processor.go:244:func (proc *Processor) AddArticleToHistory(article *nntp.Article, newsgroup string) { +./internal/processor/processor.go:249:func (proc *Processor) ProcessIncomingArticle(article *models.Article) (int, error) { +./internal/processor/processor.go:269:func (proc *Processor) EnableBridges(config *BridgeConfig) { +./internal/processor/processor.go:281:func (proc *Processor) DisableBridges() { ./internal/processor/proc_ImportOV.go:14:func (proc *Processor) ImportOverview(groupName string) error { ./internal/processor/proc_MsgIDtmpCache.go:105://func (c *MsgTmpCache) Clear() { ./internal/processor/proc_MsgIDtmpCache.go:112://func (c *MsgTmpCache) UpdateThreadRootToTmpCache(group string, messageID string, rootArticle int64, isThreadRoot bool) bool { @@ -933,46 +968,45 @@ ./internal/processor/rslight.go:582:func (leg *LegacyImporter) insertNewsgroupIfNotExists(name, description string) error { ./internal/processor/rslight.go:86:func (leg *LegacyImporter) Close() error { ./internal/processor/rslight.go:94:func (leg *LegacyImporter) ImportSections() error { -./internal/processor/threading.go:46:func (proc *Processor) setCaseDupes(msgIdItem *history.MessageIdItem, bulkmode bool) { -./internal/processor/threading.go:56:func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup string, bulkmode bool) (int, error) { -./internal/web/cronjobs.go:120:func (cm *CronJobManager) StopCronManager() { -./internal/web/cronjobs.go:212:func (cm *CronJobManager) GetJobOutput(jobID int64) []string { -./internal/web/cronjobs.go:232:func (cm *CronJobManager) StopJob(jobId int64) error { -./internal/web/cronjobs.go:279:func (cm *CronJobManager) startJob(cronJob *models.CronJob) error { -./internal/web/cronjobs.go:307:func (cm *CronJobManager) runJobScheduler(job *CronJob) { -./internal/web/cronjobs.go:372:func (cm *CronJobManager) executeJob(job *CronJob, execWG *sync.WaitGroup) { -./internal/web/cronjobs.go:467:func (job *CronJob) addLogLine(line string) { -./internal/web/cronjobs.go:482:func (job *CronJob) setRunningStatus(running bool) { -./internal/web/cronjobs.go:495:func (cm *CronJobManager) GetJobStatus(jobID int64) (*CronJob, bool) { -./internal/web/cronjobs.go:503:func (cm *CronJobManager) GetJobPID(jobID int64) int { -./internal/web/cronjobs.go:516:func (cm *CronJobManager) GetAllJobStatuses() map[int64]*CronJob { -./internal/web/cronjobs.go:68:func (cm *CronJobManager) StartCronManager() { +./internal/processor/threading.go:47:func (proc *Processor) setCaseDupes(msgIdItem *history.MessageIdItem, bulkmode bool) { +./internal/processor/threading.go:57:func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup string, bulkmode bool) (int, error) { +./internal/web/cronjobs.go:113:func (cm *CronJobManager) StopCronManager() { +./internal/web/cronjobs.go:219:func (cm *CronJobManager) GetJobOutput(jobID int64) []string { +./internal/web/cronjobs.go:238:func (cm *CronJobManager) StopJob(jobId int64) error { +./internal/web/cronjobs.go:285:func (cm *CronJobManager) startJob(cronJob *models.CronJob) error { +./internal/web/cronjobs.go:313:func (cm *CronJobManager) runJobScheduler(job *CronJob) { +./internal/web/cronjobs.go:395:func (cm *CronJobManager) executeJob(job *CronJob, execWG *sync.WaitGroup) { +./internal/web/cronjobs.go:492:func (job *CronJob) addLogLine(line string) { +./internal/web/cronjobs.go:507:func (cm *CronJobManager) GetJobStatus(jobID int64) (*CronJob, bool) { +./internal/web/cronjobs.go:515:func (cm *CronJobManager) GetJobPID(jobID int64) int { +./internal/web/cronjobs.go:529:func (cm *CronJobManager) GetAllJobStatuses() map[int64]*CronJob { +./internal/web/cronjobs.go:70:func (cm *CronJobManager) StartCronManager() { ./internal/web/web_admin_apitokens.go:124:func (s *WebServer) adminDeleteAPIToken(c *gin.Context) { ./internal/web/web_admin_apitokens.go:14:func (s *WebServer) countEnabledAPITokens(tokens []*database.APIToken) (count int64) { ./internal/web/web_admin_apitokens.go:159:func (s *WebServer) adminCleanupExpiredTokens(c *gin.Context) { ./internal/web/web_admin_apitokens.go:24:func (s *WebServer) adminCreateAPIToken(c *gin.Context) { ./internal/web/web_admin_apitokens.go:81:func (s *WebServer) adminToggleAPIToken(c *gin.Context) { ./internal/web/web_admin_cache.go:11:func (s *WebServer) adminClearCache(c *gin.Context) { -./internal/web/web_admin_crons.go:13:func (s *WebServer) adminCreateCronJob(c *gin.Context) { -./internal/web/web_admin_crons.go:143:func (s *WebServer) adminToggleCronJob(c *gin.Context) { -./internal/web/web_admin_crons.go:171:func (s *WebServer) adminDeleteCronJob(c *gin.Context) { -./internal/web/web_admin_crons.go:205:func (s *WebServer) adminViewCronJobLog(c *gin.Context) { -./internal/web/web_admin_crons.go:253:func (s *WebServer) adminStopCronJob(c *gin.Context) { -./internal/web/web_admin_crons.go:67:func (s *WebServer) adminUpdateCronJob(c *gin.Context) { -./internal/web/web_admin.go:88:func (s *WebServer) getUptime() string { -./internal/web/web_admin_newsgroups.go:123:func (s *WebServer) adminUpdateNewsgroup(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:223:func (s *WebServer) adminDeleteNewsgroup(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:251:func (s *WebServer) adminAssignNewsgroupSection(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:30:func (s *WebServer) adminCreateNewsgroup(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:334:func (s *WebServer) adminToggleNewsgroup(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:378:func (s *WebServer) adminBulkEnableNewsgroups(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:383:func (s *WebServer) adminBulkDisableNewsgroups(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:388:func (s *WebServer) adminBulkDeleteNewsgroups(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:421:func (s *WebServer) handleBulkNewsgroupAction(c *gin.Context, activeStatus bool, actionName string) { -./internal/web/web_admin_newsgroups.go:454:func (s *WebServer) adminMigrateNewsgroupActivity(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:513:func (s *WebServer) adminFixThreadActivity(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:552:func (s *WebServer) fixGroupThreadActivity(groupName string) error { -./internal/web/web_admin_newsgroups.go:679:func (s *WebServer) adminHideFuturePosts(c *gin.Context) { +./internal/web/web_admin_crons.go:145:func (s *WebServer) adminToggleCronJob(c *gin.Context) { +./internal/web/web_admin_crons.go:15:func (s *WebServer) adminCreateCronJob(c *gin.Context) { +./internal/web/web_admin_crons.go:173:func (s *WebServer) adminDeleteCronJob(c *gin.Context) { +./internal/web/web_admin_crons.go:207:func (s *WebServer) adminViewCronJobLog(c *gin.Context) { +./internal/web/web_admin_crons.go:299:func (s *WebServer) adminStopCronJob(c *gin.Context) { +./internal/web/web_admin_crons.go:69:func (s *WebServer) adminUpdateCronJob(c *gin.Context) { +./internal/web/web_admin.go:91:func (s *WebServer) getUptime() string { +./internal/web/web_admin_newsgroups.go:126:func (s *WebServer) adminUpdateNewsgroup(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:226:func (s *WebServer) adminDeleteNewsgroup(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:254:func (s *WebServer) adminAssignNewsgroupSection(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:337:func (s *WebServer) adminToggleNewsgroup(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:33:func (s *WebServer) adminCreateNewsgroup(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:381:func (s *WebServer) adminBulkEnableNewsgroups(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:386:func (s *WebServer) adminBulkDisableNewsgroups(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:391:func (s *WebServer) adminBulkDeleteNewsgroups(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:424:func (s *WebServer) handleBulkNewsgroupAction(c *gin.Context, activeStatus bool, actionName string) { +./internal/web/web_admin_newsgroups.go:457:func (s *WebServer) adminMigrateNewsgroupActivity(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:516:func (s *WebServer) adminFixThreadActivity(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:555:func (s *WebServer) fixGroupThreadActivity(groupName string) error { +./internal/web/web_admin_newsgroups.go:682:func (s *WebServer) adminHideFuturePosts(c *gin.Context) { ./internal/web/web_admin_nntp.go:130:func (s *WebServer) adminUpdateNNTPUser(c *gin.Context) { ./internal/web/web_admin_nntp.go:210:func (s *WebServer) adminDeleteNNTPUser(c *gin.Context) { ./internal/web/web_admin_nntp.go:246:func (s *WebServer) adminToggleNNTPUser(c *gin.Context) { @@ -984,7 +1018,7 @@ ./internal/web/web_admin_ollama.go:196:func (s *WebServer) adminSyncOllamaModels(c *gin.Context) { ./internal/web/web_admin_ollama.go:33:func (s *WebServer) adminCreateAIModel(c *gin.Context) { ./internal/web/web_admin_ollama.go:94:func (s *WebServer) adminUpdateAIModel(c *gin.Context) { -./internal/web/web_adminPage.go:18:func (s *WebServer) adminPage(c *gin.Context) { +./internal/web/web_adminPage.go:19:func (s *WebServer) adminPage(c *gin.Context) { ./internal/web/web_admin_postqueue.go:12:func (s *WebServer) adminDeletePostQueueEntry(c *gin.Context) { ./internal/web/web_admin_provider.go:16:func (s *WebServer) adminCreateProvider(c *gin.Context) { ./internal/web/web_admin_provider.go:186:func (s *WebServer) adminUpdateProvider(c *gin.Context) { @@ -995,17 +1029,18 @@ ./internal/web/web_admin_sections.go:22:func (s *WebServer) CreateSectionHandler(c *gin.Context) { ./internal/web/web_admin_sections.go:349:func (s *WebServer) UnassignNewsgroupHandler(c *gin.Context) { ./internal/web/web_admin_sections.go:93:func (s *WebServer) UpdateSectionHandler(c *gin.Context) { -./internal/web/web_admin_settings_unified.go:238:func (s *WebServer) validateHostname(hostname string) error { -./internal/web/web_admin_settings_unified.go:246:func (s *WebServer) validateWebPostSize(sizeStr string) error { -./internal/web/web_admin_settings_unified.go:263:func (s *WebServer) validateEmail(email string) error { -./internal/web/web_admin_settings_unified.go:272:func (s *WebServer) processHostname(server *WebServer, hostname string) error { -./internal/web/web_admin_settings_unified.go:276:func (s *WebServer) processRegistrationToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:266:func (s *WebServer) validateHostname(hostname string) error { +./internal/web/web_admin_settings_unified.go:274:func (s *WebServer) validateWebPostSize(sizeStr string) error { ./internal/web/web_admin_settings_unified.go:27:func (s *WebServer) adminUpdateSettings(c *gin.Context) { -./internal/web/web_admin_settings_unified.go:301:func (s *WebServer) processBlockBadBotsToggle(server *WebServer, value string) error { -./internal/web/web_admin_settings_unified.go:333:func (s *WebServer) processBlockBadIPsToggle(server *WebServer, value string) error { -./internal/web/web_admin_settings_unified.go:364:func (s *WebServer) validateCIDRList(value string) error { -./internal/web/web_admin_settings_unified.go:391:func (s *WebServer) processBadBotsUpdate(server *WebServer, value string) error { -./internal/web/web_admin_settings_unified.go:412:func (s *WebServer) processBadIPsUpdate(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:291:func (s *WebServer) validateEmail(email string) error { +./internal/web/web_admin_settings_unified.go:300:func (s *WebServer) processHostname(server *WebServer, hostname string) error { +./internal/web/web_admin_settings_unified.go:304:func (s *WebServer) processRegistrationToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:329:func (s *WebServer) processBlockBadBotsToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:361:func (s *WebServer) processBlockBadIPsToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:393:func (s *WebServer) processAPIEnabledToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:418:func (s *WebServer) validateCIDRList(value string) error { +./internal/web/web_admin_settings_unified.go:445:func (s *WebServer) processBadBotsUpdate(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:466:func (s *WebServer) processBadIPsUpdate(server *WebServer, value string) error { ./internal/web/web_admin_sitenews.go:136:func (s *WebServer) adminDeleteSiteNews(c *gin.Context) { ./internal/web/web_admin_sitenews.go:166:func (s *WebServer) adminToggleSiteNewsVisibility(c *gin.Context) { ./internal/web/web_admin_sitenews.go:16:func (s *WebServer) adminCreateSiteNews(c *gin.Context) { @@ -1025,19 +1060,20 @@ ./internal/web/web_aichatPage.go:430:func (s *WebServer) aichatGetCounts(c *gin.Context) { ./internal/web/web_aichatPage.go:459:func (s *WebServer) renderChatError(c *gin.Context, title, message string) { ./internal/web/web_aichatPage.go:82:func (s *WebServer) aichatPage(c *gin.Context) { -./internal/web/web_apiHandlers.go:158:func (s *WebServer) getArticle(c *gin.Context) { -./internal/web/web_apiHandlers.go:188:func (s *WebServer) getArticleByMessageId(c *gin.Context) { -./internal/web/web_apiHandlers.go:20:// - func (s *WebServer) listGroups(c *gin.Context) (line ~313) -./internal/web/web_apiHandlers.go:212:func (s *WebServer) getGroupThreads(c *gin.Context) { -./internal/web/web_apiHandlers.go:22:// - func (s *WebServer) getGroupOverview(c *gin.Context) (line ~354) -./internal/web/web_apiHandlers.go:236:func (s *WebServer) getStats(c *gin.Context) { -./internal/web/web_apiHandlers.go:24:// - func (s *WebServer) getArticle(c *gin.Context) (line ~403) -./internal/web/web_apiHandlers.go:26:// - func (s *WebServer) getArticleByMessageId(c *gin.Context) (line ~428) -./internal/web/web_apiHandlers.go:28:// - func (s *WebServer) getGroupThreads(c *gin.Context) (line ~447) -./internal/web/web_apiHandlers.go:307:func (s *WebServer) getArticlePreview(c *gin.Context) { -./internal/web/web_apiHandlers.go:30:// - func (s *WebServer) getStats(c *gin.Context) (line ~466) -./internal/web/web_apiHandlers.go:35:func (s *WebServer) listGroups(c *gin.Context) { -./internal/web/web_apiHandlers.go:70:func (s *WebServer) getGroupOverview(c *gin.Context) { +./internal/web/web_apiHandlers.go:174:func (s *WebServer) getArticle(c *gin.Context) { +./internal/web/web_apiHandlers.go:204:func (s *WebServer) getArticleByMessageId(c *gin.Context) { +./internal/web/web_apiHandlers.go:20:func (s *WebServer) requireAPIEnabled() gin.HandlerFunc { +./internal/web/web_apiHandlers.go:228:func (s *WebServer) getGroupThreads(c *gin.Context) { +./internal/web/web_apiHandlers.go:252:func (s *WebServer) getStats(c *gin.Context) { +./internal/web/web_apiHandlers.go:323:func (s *WebServer) getArticlePreview(c *gin.Context) { +./internal/web/web_apiHandlers.go:36:// - func (s *WebServer) listGroups(c *gin.Context) (line ~313) +./internal/web/web_apiHandlers.go:38:// - func (s *WebServer) getGroupOverview(c *gin.Context) (line ~354) +./internal/web/web_apiHandlers.go:40:// - func (s *WebServer) getArticle(c *gin.Context) (line ~403) +./internal/web/web_apiHandlers.go:42:// - func (s *WebServer) getArticleByMessageId(c *gin.Context) (line ~428) +./internal/web/web_apiHandlers.go:44:// - func (s *WebServer) getGroupThreads(c *gin.Context) (line ~447) +./internal/web/web_apiHandlers.go:46:// - func (s *WebServer) getStats(c *gin.Context) (line ~466) +./internal/web/web_apiHandlers.go:51:func (s *WebServer) listGroups(c *gin.Context) { +./internal/web/web_apiHandlers.go:86:func (s *WebServer) getGroupOverview(c *gin.Context) { ./internal/web/web_apitokens.go:120:func (s *WebServer) disableAPITokenHandler(c *gin.Context) { ./internal/web/web_apitokens.go:14:func (s *WebServer) APIAuthRequired() gin.HandlerFunc { ./internal/web/web_apitokens.go:153:func (s *WebServer) enableAPITokenHandler(c *gin.Context) { @@ -1102,20 +1138,20 @@ ./internal/web/web_sectionsPage.go:347:func (s *WebServer) sectionArticleByMessageIdPage(c *gin.Context) { ./internal/web/web_sectionsPage.go:36:func (s *WebServer) sectionsPage(c *gin.Context) { ./internal/web/web_sectionsPage.go:65:func (s *WebServer) sectionPage(c *gin.Context) { -./internal/web/webserver_core_routes.go:282:func (s *WebServer) setupRoutes() { -./internal/web/webserver_core_routes.go:491:func (s *WebServer) Start() error { -./internal/web/webserver_core_routes.go:507:func (s *WebServer) BotDetectionMiddleware() gin.HandlerFunc { -./internal/web/webserver_core_routes.go:553:func (s *WebServer) ReverseProxyMiddleware() gin.HandlerFunc { -./internal/web/webserver_core_routes.go:630:func (s *WebServer) ApacheLogFormat() gin.HandlerFunc { -./internal/web/webserver_core_routes.go:647:func (s *WebServer) loadSectionsCache() { -./internal/web/webserver_core_routes.go:664:func (s *WebServer) refreshSectionsCache() { -./internal/web/webserver_core_routes.go:669:func (s *WebServer) isValidSection(sectionName string) bool { -./internal/web/webserver_core_routes.go:677:func (s *WebServer) sectionValidationMiddleware() gin.HandlerFunc { +./internal/web/webserver_core_routes.go:283:func (s *WebServer) setupRoutes() { +./internal/web/webserver_core_routes.go:494:func (s *WebServer) Start() error { +./internal/web/webserver_core_routes.go:510:func (s *WebServer) BotDetectionMiddleware() gin.HandlerFunc { +./internal/web/webserver_core_routes.go:556:func (s *WebServer) ReverseProxyMiddleware() gin.HandlerFunc { +./internal/web/webserver_core_routes.go:633:func (s *WebServer) ApacheLogFormat() gin.HandlerFunc { +./internal/web/webserver_core_routes.go:650:func (s *WebServer) loadSectionsCache() { +./internal/web/webserver_core_routes.go:667:func (s *WebServer) refreshSectionsCache() { +./internal/web/webserver_core_routes.go:672:func (s *WebServer) isValidSection(sectionName string) bool { +./internal/web/webserver_core_routes.go:680:func (s *WebServer) sectionValidationMiddleware() gin.HandlerFunc { ./internal/web/web_session_cleanup.go:9:func (s *WebServer) StartSessionCleanup() { ./internal/web/web_sitePostPage.go:165:func (s *WebServer) sitePostSubmit(c *gin.Context) { ./internal/web/web_sitePostPage.go:40:func (s *WebServer) sitePostPage(c *gin.Context) { -./internal/web/web_statsPage.go:14:// - func (s *WebServer) statsPage(c *gin.Context) (line ~857) -./internal/web/web_statsPage.go:18:func (s *WebServer) statsPage(c *gin.Context) { +./internal/web/web_statsPage.go:15:// - func (s *WebServer) statsPage(c *gin.Context) (line ~857) +./internal/web/web_statsPage.go:19:func (s *WebServer) statsPage(c *gin.Context) { ./internal/web/web_threadPage.go:17:// - func (s *WebServer) singleThreadPage(c *gin.Context) (line ~1394) ./internal/web/web_threadPage.go:25:func (s *WebServer) singleThreadPage(c *gin.Context) { ./internal/web/web_threadTreePage.go:16:// - func (s *WebServer) threadTreePage(c *gin.Context) (line ~1596) diff --git a/appVersion.txt b/appVersion.txt index 87b18a56..b48b2de9 100644 --- a/appVersion.txt +++ b/appVersion.txt @@ -1 +1 @@ -4.7.3 +4.7.4 diff --git a/cmd/expire-news/main.go b/cmd/expire-news/main.go index d161fb84..d85e1449 100644 --- a/cmd/expire-news/main.go +++ b/cmd/expire-news/main.go @@ -310,11 +310,11 @@ func getNewsgroupsToExpire(db *database.Database, targetGroup string) ([]*models // expireArticlesInGroup expires articles older than cutoffDate in the specified group func expireArticlesInGroup(db *database.Database, groupName string, cutoffDate time.Time, batchSize int, dryRun bool) (int, int, error) { // Get group database - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { return 0, 0, fmt.Errorf("failed to get group database: %v", err) } - defer groupDBs.Return(db) + defer groupDB.Return() totalExpired := 0 totalScanned := 0 @@ -324,7 +324,7 @@ func expireArticlesInGroup(db *database.Database, groupName string, cutoffDate t offset := 0 for { // Get batch of articles - articles, err := getArticleBatch(groupDBs, offset, batchSize) + articles, err := getArticleBatch(groupDB, offset, batchSize) if err != nil { return totalExpired, totalScanned, fmt.Errorf("failed to get article batch: %v", err) } @@ -357,7 +357,7 @@ func expireArticlesInGroup(db *database.Database, groupName string, cutoffDate t // Delete articles in this batch if not dry run if !dryRun && len(articlesToDelete) > 0 { - if err := deleteArticles(groupDBs, articlesToDelete); err != nil { + if err := deleteArticles(groupDB, articlesToDelete); err != nil { return totalExpired, totalScanned, fmt.Errorf("failed to delete articles: %v", err) } } @@ -380,7 +380,7 @@ func expireArticlesInGroup(db *database.Database, groupName string, cutoffDate t } // getArticleBatch retrieves a batch of articles from the group database -func getArticleBatch(groupDBs *database.GroupDBs, offset, limit int) ([]*models.Article, error) { +func getArticleBatch(groupDB *database.GroupDB, offset, limit int) ([]*models.Article, error) { query := ` SELECT article_num, date_sent FROM articles @@ -388,7 +388,7 @@ func getArticleBatch(groupDBs *database.GroupDBs, offset, limit int) ([]*models. LIMIT ? OFFSET ? ` - rows, err := database.RetryableQuery(groupDBs.DB, query, limit, offset) + rows, err := database.RetryableQuery(groupDB.DB, query, limit, offset) if err != nil { return nil, err } @@ -398,8 +398,8 @@ func getArticleBatch(groupDBs *database.GroupDBs, offset, limit int) ([]*models. for rows.Next() { article := &models.Article{} article.ArticleNums = make(map[*string]int64) - article.ArticleNums[groupDBs.NewsgroupPtr] = -1 // Initialize with group name - err := rows.Scan(article.ArticleNums[groupDBs.NewsgroupPtr], &article.DateSent) + article.ArticleNums[groupDB.NewsgroupPtr] = -1 // Initialize with group name + err := rows.Scan(article.ArticleNums[groupDB.NewsgroupPtr], &article.DateSent) if err != nil { return nil, err } @@ -410,13 +410,13 @@ func getArticleBatch(groupDBs *database.GroupDBs, offset, limit int) ([]*models. } // deleteArticles removes articles from the database using proper batch operations -func deleteArticles(groupDBs *database.GroupDBs, articleNums []int64) error { +func deleteArticles(groupDB *database.GroupDB, articleNums []int64) error { if len(articleNums) == 0 { return nil } // Begin transaction - tx, err := groupDBs.DB.Begin() + tx, err := groupDB.DB.Begin() if err != nil { return err } @@ -495,15 +495,15 @@ func getPlaceholders(count int) string { // pruneArticlesInGroup removes oldest articles to keep the group under maxArticles limit func pruneArticlesInGroup(db *database.Database, groupName string, maxArticles int, batchSize int, dryRun bool) (int, int, error) { // Get group database - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { return 0, 0, fmt.Errorf("failed to get group database: %v", err) } - defer groupDBs.Return(db) + defer groupDB.Return() // First count total articles var totalArticles int - err = database.RetryableQueryRowScan(groupDBs.DB, "SELECT COUNT(*) FROM articles", nil, &totalArticles) + err = database.RetryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM articles", nil, &totalArticles) if err != nil { return 0, 0, fmt.Errorf("failed to count articles: %v", err) } @@ -525,7 +525,7 @@ func pruneArticlesInGroup(db *database.Database, groupName string, maxArticles i LIMIT ? ` - rows, err := database.RetryableQuery(groupDBs.DB, query, articlesToRemove) + rows, err := database.RetryableQuery(groupDB.DB, query, articlesToRemove) if err != nil { return 0, totalArticles, fmt.Errorf("failed to query oldest articles: %v", err) } @@ -556,7 +556,7 @@ func pruneArticlesInGroup(db *database.Database, groupName string, maxArticles i } batch := articlesToDelete[i:end] - if err := deleteArticles(groupDBs, batch); err != nil { + if err := deleteArticles(groupDB, batch); err != nil { return totalPruned, totalArticles, fmt.Errorf("failed to delete article batch: %v", err) } @@ -577,22 +577,22 @@ func pruneArticlesInGroup(db *database.Database, groupName string, maxArticles i // updateNewsgroupCounters updates the message count and last article number for a newsgroup func updateNewsgroupCounters(db *database.Database, groupName string) error { // Get group database to count current articles - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { return fmt.Errorf("failed to get group database: %v", err) } - defer groupDBs.Return(db) + defer groupDB.Return() // Count current articles var messageCount int64 - err = database.RetryableQueryRowScan(groupDBs.DB, "SELECT COUNT(*) FROM articles", nil, &messageCount) + err = database.RetryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM articles", nil, &messageCount) if err != nil { return fmt.Errorf("failed to count articles: %v", err) } // Get the highest article number var lastArticle int64 - err = database.RetryableQueryRowScan(groupDBs.DB, "SELECT COALESCE(MAX(article_num), 0) FROM articles", nil, &lastArticle) + err = database.RetryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(article_num), 0) FROM articles", nil, &lastArticle) if err != nil { return fmt.Errorf("failed to get last article: %v", err) } diff --git a/cmd/fix-references/main.go b/cmd/fix-references/main.go index 806810be..388afff9 100644 --- a/cmd/fix-references/main.go +++ b/cmd/fix-references/main.go @@ -129,7 +129,7 @@ func main() { for i, newsgroup := range newsgroups { fmt.Printf("🔍 [%d/%d] Processing newsgroup: %s\n", i+1, len(newsgroups), newsgroup.Name) - groupDB, err := db.GetGroupDBs(newsgroup.Name) + groupDB, err := db.GetGroupDB(newsgroup.Name) if err != nil { fmt.Printf(" ❌ Failed to get group database: %v\n", err) continue @@ -138,7 +138,7 @@ func main() { processed, fixed, err := fixReferencesInNewsgroup(groupDB, *dryRun, *verbose, *limit, *batchSize) if err != nil { fmt.Printf(" ❌ Failed to fix references: %v\n", err) - groupDB.Return(db) + groupDB.Return() continue } @@ -156,7 +156,7 @@ func main() { } } - groupDB.Return(db) + groupDB.Return() if processed > 0 { if fixed > 0 { @@ -186,7 +186,7 @@ func main() { } } -func fixReferencesInNewsgroup(groupDB *database.GroupDBs, dryRun, verbose bool, limit, batchSize int) (int, int, error) { +func fixReferencesInNewsgroup(groupDB *database.GroupDB, dryRun, verbose bool, limit, batchSize int) (int, int, error) { // Get total count for progress tracking var totalCount int countQuery := "SELECT COUNT(*) FROM articles WHERE headers_json IS NOT NULL AND headers_json != ''" @@ -234,7 +234,7 @@ func fixReferencesInNewsgroup(groupDB *database.GroupDBs, dryRun, verbose bool, return totalProcessed, totalFixed, nil } -func processBatch(groupDB *database.GroupDBs, dryRun, verbose bool, offset, batchSize int) (int, int, error) { +func processBatch(groupDB *database.GroupDB, dryRun, verbose bool, offset, batchSize int) (int, int, error) { // Query articles with potentially broken references query := ` SELECT article_num, message_id, "references", headers_json @@ -320,7 +320,7 @@ func processBatch(groupDB *database.GroupDBs, dryRun, verbose bool, offset, batc } // rebuildThreadsInNewsgroup rebuilds thread relationships for a newsgroup using batched processing -func rebuildThreadsInNewsgroup(groupDB *database.GroupDBs, verbose bool, batchSize int) (int, error) { +func rebuildThreadsInNewsgroup(groupDB *database.GroupDB, verbose bool, batchSize int) (int, error) { // Get total article count var totalCount int err := database.RetryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM articles", nil, &totalCount) @@ -419,7 +419,7 @@ func rebuildThreadsInNewsgroup(groupDB *database.GroupDBs, verbose bool, batchSi return totalThreadsBuilt, nil } -func processThreadBatch(groupDB *database.GroupDBs, msgIDToArticleNum map[string]int64, offset, batchSize int, verbose bool) (int, error) { +func processThreadBatch(groupDB *database.GroupDB, msgIDToArticleNum map[string]int64, offset, batchSize int, verbose bool) (int, error) { // Get batch of articles with their references rows, err := database.RetryableQuery(groupDB.DB, ` SELECT article_num, message_id, "references" diff --git a/cmd/fix-thread-activity/main.go b/cmd/fix-thread-activity/main.go index 9e74c849..8cbd6866 100644 --- a/cmd/fix-thread-activity/main.go +++ b/cmd/fix-thread-activity/main.go @@ -60,14 +60,14 @@ func main() { } func fixGroupThreadActivity(db *database.Database, groupName string) error { - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { return fmt.Errorf("failed to get group DB: %w", err) } - defer groupDBs.Return(db) + defer groupDB.Return() // Get only thread cache entries that have future last_activity timestamps - rows, err := database.RetryableQuery(groupDBs.DB, ` + rows, err := database.RetryableQuery(groupDB.DB, ` SELECT thread_root, child_articles, last_activity FROM thread_cache WHERE last_activity > datetime('now', '+25 hour') @@ -136,7 +136,7 @@ func fixGroupThreadActivity(db *database.Database, groupName string) error { for _, articleNum := range articleNums { var dateSent time.Time var hide int - err := database.RetryableQueryRowScan(groupDBs.DB, ` + err := database.RetryableQueryRowScan(groupDB.DB, ` SELECT date_sent, hide FROM articles WHERE article_num = ?`, []interface{}{articleNum}, &dateSent, &hide) @@ -161,7 +161,7 @@ func fixGroupThreadActivity(db *database.Database, groupName string) error { // Format as UTC string to avoid timezone encoding issues utcTimeStr := maxDate.UTC().Format("2006-01-02 15:04:05") - _, err := database.RetryableExec(groupDBs.DB, ` + _, err := database.RetryableExec(groupDB.DB, ` UPDATE thread_cache SET last_activity = ? WHERE thread_root = ?`, utcTimeStr, thread.root) diff --git a/cmd/history-rebuild/main.go b/cmd/history-rebuild/main.go index 727344c6..588db25b 100644 --- a/cmd/history-rebuild/main.go +++ b/cmd/history-rebuild/main.go @@ -59,7 +59,6 @@ func main() { //debug.SetMemoryLimit(4 * 1024 * 1024 * 1024) // 4GB limit var ( - nntpHostname = flag.String("nntphostname", "", "NNTP hostname (required for proper article processing)") batchSize = flag.Int("batch-size", 5000, "Number of articles to process per batch (deprecated - now processes individually)") progressInterval = flag.Int("progress", 2500, "Show progress every N articles") validateOnly = flag.Bool("validate-only", false, "Only validate existing history, don't rebuild") @@ -91,7 +90,6 @@ func main() { fmt.Println("======================================") fmt.Printf("Configuration:\n") - fmt.Printf(" NNTP Hostname: %s\n", *nntpHostname) fmt.Printf(" Batch Size: %d\n", *batchSize) fmt.Printf(" Validate Only: %t\n", *validateOnly) fmt.Printf(" Analyze Only: %t\n", *analyzeOnly) @@ -135,10 +133,6 @@ func main() { fmt.Printf("✅ Using locked UseShortHashLen: %d\n", lockedHashLen) } - if *nntpHostname == "" && *readOffset < 0 { - log.Fatalf("ERROR: NNTP hostname must be set with -nntphostname flag (unless using -read-offset)") - } - // Handle offset reading mode (doesn't need processor) if *readOffset >= 0 { fmt.Printf("🔍 Reading history.dat at offset %d...\n", *readOffset) @@ -155,11 +149,6 @@ func main() { return } - // Set hostname in processor with database fallback support - if err := processor.SetHostname(*nntpHostname, db); err != nil { - log.Fatalf("Failed to set NNTP hostname: %v", err) - } - // Initialize processor with proper cache management fmt.Println("🔧 Initializing processor for cache management...") proc := processor.NewProcessor(db, nil, lockedHashLen) // nil pool since we're not fetching @@ -424,29 +413,34 @@ func getRealMemoryUsageSimple() uint64 { return mem } +const query_processGroup = `SELECT message_id, article_num FROM articles + WHERE message_id IS NOT NULL AND message_id != '' + AND article_num >= ? AND article_num <= ? + ORDER BY article_num` + func processGroup(db *database.Database, proc *processor.Processor, groupName string, progressInterval int, validateOnly, verbose bool, stats *RebuildStats) error { // Get group databases - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { return fmt.Errorf("failed to get group databases: %w", err) } - defer groupDBs.Return(db) + defer groupDB.Return() /* // Configure SQLite for memory efficiency - if groupDBs.DB != nil { + if groupDB.DB != nil { // Reduce SQLite memory usage - groupDBs.DB.Exec("PRAGMA cache_size = 1000") // Reduce page cache (default ~2MB) - groupDBs.DB.Exec("PRAGMA temp_store = MEMORY") // Use memory for temp storage (faster) - groupDBs.DB.Exec("PRAGMA mmap_size = 134217728") // Limit mmap to 128MB - groupDBs.DB.Exec("PRAGMA journal_mode = WAL") // Use WAL mode for better concurrency + groupDB.DB.Exec("PRAGMA cache_size = 1000") // Reduce page cache (default ~2MB) + groupDB.DB.Exec("PRAGMA temp_store = MEMORY") // Use memory for temp storage (faster) + groupDB.DB.Exec("PRAGMA mmap_size = 134217728") // Limit mmap to 128MB + groupDB.DB.Exec("PRAGMA journal_mode = WAL") // Use WAL mode for better concurrency log.Printf("[SQLITE-CONFIG] Configured SQLite memory limits for group '%s'", groupName) } */ // Get total count first var totalArticles int64 - err = database.RetryableQueryRowScan(groupDBs.DB, `SELECT COUNT(*) FROM articles WHERE message_id IS NOT NULL AND message_id != ''`, nil, &totalArticles) + err = database.RetryableQueryRowScan(groupDB.DB, `SELECT COUNT(*) FROM articles WHERE message_id IS NOT NULL AND message_id != ''`, nil, &totalArticles) if err != nil { return fmt.Errorf("failed to count articles: %w", err) } @@ -460,7 +454,7 @@ func processGroup(db *database.Database, proc *processor.Processor, groupName st // Get the min and max article numbers for efficient range processing var minArtNum, maxArtNum int64 - err = database.RetryableQueryRowScan(groupDBs.DB, `SELECT MIN(article_num), MAX(article_num) FROM articles WHERE message_id IS NOT NULL AND message_id != ''`, nil, &minArtNum, &maxArtNum) + err = database.RetryableQueryRowScan(groupDB.DB, `SELECT MIN(article_num), MAX(article_num) FROM articles WHERE message_id IS NOT NULL AND message_id != ''`, nil, &minArtNum, &maxArtNum) if err != nil { return fmt.Errorf("failed to get article number range: %w", err) } @@ -476,14 +470,7 @@ func processGroup(db *database.Database, proc *processor.Processor, groupName st if maxRangeArtNum > maxArtNum { maxRangeArtNum = maxArtNum } - - // Use article number range instead of OFFSET - much faster! - query := `SELECT message_id, article_num FROM articles - WHERE message_id IS NOT NULL AND message_id != '' - AND article_num >= ? AND article_num <= ? - ORDER BY article_num` - - rows, err := database.RetryableQuery(groupDBs.DB, query, currentArtNum, maxRangeArtNum) + rows, err := database.RetryableQuery(groupDB.DB, query_processGroup, currentArtNum, maxRangeArtNum) if err != nil { return fmt.Errorf("failed to query article range %d-%d: %w", currentArtNum, maxRangeArtNum, err) } @@ -541,8 +528,8 @@ func processGroup(db *database.Database, proc *processor.Processor, groupName st msgIdItem.Mux.Unlock() continue } - msgIdItem.GroupName = db.Batch.GetNewsgroupPointer(groupName) - msgIdItem.ArtNum = articleNum + //msgIdItem.GroupName = db.Batch.GetNewsgroupPointer(groupName) + //msgIdItem.ArtNum = articleNum msgIdItem.Response = history.CaseLock /* if messageID == "<32304224.79C1@parkcity.com>" { @@ -559,7 +546,7 @@ func processGroup(db *database.Database, proc *processor.Processor, groupName st msgIdItem.CachedEntryExpires = time.Now().Add(history.CachedEntryTTL) msgIdItem.Mux.Unlock() // Validation mode: check if article exists in history - result, err := proc.History.Lookup(msgIdItem) + result, _, err := proc.History.Lookup(msgIdItem, true) if err == nil && result == history.CaseDupes { stats.HistoryFound++ msgIdItem.Mux.Lock() @@ -568,7 +555,7 @@ func processGroup(db *database.Database, proc *processor.Processor, groupName st msgIdItem.Mux.Unlock() } else { if verbose { - log.Printf("Missing from history: %s (%s)", msgIdItem.MessageId, msgIdItem.StorageToken) + log.Printf("[HISTORY] miss: '%s'", msgIdItem.MessageId) } msgIdItem.Mux.Lock() msgIdItem.Response = history.CaseError @@ -583,13 +570,18 @@ func processGroup(db *database.Database, proc *processor.Processor, groupName st } */ // Rebuild mode: add article to history - proc.History.Add(msgIdItem) + if proc.History.Add(msgIdItem) { + stats.HistoryAdded++ + } else { + log.Printf("[HISTORY-REBUILD] did not add: '%s'", msgIdItem.MessageId) + stats.ArticlesSkipped++ + } /* if messageID == "<32304224.79C1@parkcity.com>" { log.Printf("[DEBUG-STEP7] Called proc.History.Add() for target message ID: %s - should reach history system now!", messageID) } */ - stats.HistoryAdded++ + } //addTime := time.Since(addStart) @@ -628,8 +620,8 @@ func processGroup(db *database.Database, proc *processor.Processor, groupName st // Move to next article number range currentArtNum = maxRangeArtNum + 1 - // Aggressive memory management every 5K articles - if processed%100000 == 0 { + // Aggressive memory management every N articles + if processed%10000 == 0 { // Force garbage collection runtime.GC() // Second GC to clean up finalizers @@ -651,17 +643,17 @@ func processGroup(db *database.Database, proc *processor.Processor, groupName st if realMem > 2*1024*1024*1024 { // 2GB threshold log.Printf("[MEMORY-CRITICAL] RSS exceeds 2GB, forcing database memory release...") // Try to force SQLite memory release via PRAGMA - if groupDBs != nil && groupDBs.DB != nil { - groupDBs.DB.Exec("PRAGMA shrink_memory") - groupDBs.DB.Exec("PRAGMA cache_size = 1000") // Reduce cache + if groupDB != nil && groupDB.DB != nil { + groupDB.DB.Exec("PRAGMA shrink_memory") + groupDB.DB.Exec("PRAGMA cache_size = 1000") // Reduce cache } } */ // Emergency stop if RSS exceeds N GB - if realMem > 4*1024*1024*1024 { - log.Printf("[MEMORY-EMERGENCY] RSS HIGH! Pausing for 30 seconds to allow memory cleanup...") - time.Sleep(30 * time.Second) + if realMem > 16*1024*1024*1024 { + log.Printf("[MEMORY-EMERGENCY] RSS HIGH! Pausing for 10 seconds to allow memory cleanup...") + <-time.After(time.Second * 10) // Check again after cleanup newRealMem, _ := getRealMemoryUsage() diff --git a/cmd/nntp-fetcher/main.go b/cmd/nntp-fetcher/main.go index 35968d01..75d3cf14 100644 --- a/cmd/nntp-fetcher/main.go +++ b/cmd/nntp-fetcher/main.go @@ -9,6 +9,7 @@ import ( "log" "os" "os/signal" + "runtime" "strconv" "strings" "sync" @@ -22,6 +23,10 @@ import ( "github.com/go-while/go-pugleaf/internal/processor" ) +const BW_V4_Prefix = "24.182.239.6" // news.blueworldhosting.com. +const ET_V4 = "157.180.91.226" // news.eternal-september.org. +const ET_V6 = "2a01:4f9:c012:f55a::1" // news.eternal-september.org. + // showUsageExamples displays usage examples for connection testing func showUsageExamples() { fmt.Println("\n=== NNTP Fetcher - Connection Testing Examples ===") @@ -34,15 +39,11 @@ func showUsageExamples() { fmt.Println("Article Downloading:") fmt.Println(" ./nntp-fetcher -group alt.* (downloads all groups with prefix alt.*)") fmt.Println(" ./nntp-fetcher -group alt.test") - fmt.Println(" ./nntp-fetcher -group alt.test -xover-copy (use xover-copy to do identical copy from remote server!)") fmt.Println(" ./nntp-fetcher -group alt.test -download-start-date 2024-12-31") fmt.Println() fmt.Println("Newsgroup List Update:") fmt.Println(" ./nntp-fetcher -update-list (fetch remote newsgroup list and add new groups to database)") fmt.Println() - fmt.Println("Server Configuration:") - fmt.Println(" ./nntp-fetcher -test-conn -host news.server.com -port 563") - fmt.Println(" ./nntp-fetcher -test-conn -username user -password pass") fmt.Println() fmt.Println("Note: For newsgroup analysis use cmd/nntp-analyze instead") fmt.Println() @@ -64,13 +65,15 @@ func main() { fetchNewsgroup = flag.String("group", "", "Newsgroup to fetch (default: empty = all groups once up to max-batch) or rocksolid.* with final wildcard to match prefix.*") nntphostname = flag.String("nntphostname", "", "Your hostname must be set!") useShortHashLenPtr = flag.Int("useshorthashlen", 7, "short hash length for history storage (2-7, default: 7) - NOTE: cannot be changed once set!") - fetchActiveOnly = flag.Bool("fetch-active-only", true, "Fetch only active newsgroups (default: true)") + fetchActiveOnly = flag.Bool("fetch-active-only", true, "Downloads only active newsgroups (default: true) To download only disabled newsgroups set to false!") + excludePrefix = flag.String("exclude-prefix", "", "use with UpdateNewsgroupList to exclude newsgroups with this prefix (default: empty = no exclusion) allows comma separation and wildcards alt.*,comp.*") downloadMaxPar = flag.Int("download-max-par", 1, "run this many groups in parallel, can eat your memory! (default: 1)") - updateList = flag.String("fetch-newsgroups-from-remote", "", "Fetch remote newsgroup list from first enabled provider (default: empty, nothing. use \"group.*\" or \"\\$all\")") + updateList = flag.String("fetch-newsgroups-from-remote", "", "UpdateNewsgroupList: get remote newsgroup list from first enabled provider (default: empty, nothing. use \"group.*\" or \"\\$all\")") updateListForce = flag.Bool("fetch-newsgroups-force", false, "use with -fetch-newsgroups-from-remote .. to really add them to database") dataDir = flag.String("data", "./data", "Directory to store database files") // Download options with date filtering downloadStartDate = flag.String("download-start-date", "", "Start downloading articles from this date (YYYY-MM-DD format)") + resetProgress = flag.Int64("reset-progress", 0, "Reset download progress for all newsgroups on primary provider") showHelp = flag.Bool("help", false, "Show usage examples and exit") ) flag.Parse() @@ -80,7 +83,7 @@ func main() { os.Exit(0) } if *updateList != "" { - if err := UpdateNewsgroupList(updateList, *updateListForce); err != nil { + if err := UpdateNewsgroupList(updateList, excludePrefix, *updateListForce); err != nil { log.Fatalf("Newsgroup list update failed: %v", err) } os.Exit(0) @@ -88,6 +91,7 @@ func main() { if *downloadMaxPar < 1 { *downloadMaxPar = 1 + processor.DownloadMaxPar = *downloadMaxPar } if *maxBatch < 10 { *maxBatch = 10 @@ -98,28 +102,27 @@ func main() { if *maxQueued < 1 { *maxQueued = 1 } - if *maxBatchThreads > 128 { - *maxBatchThreads = 128 - log.Printf("[WARN] max batch threads: %d (should be between 1 and 128. recommended: 16)", *maxBatchThreads) + if *maxBatchThreads > 1024 { + *maxBatchThreads = 1024 + log.Printf("[WARN] max batch threads: %d (should be between 1 and 1024. recommended: 1-16)", *maxBatchThreads) } if *maxBatch > 1000 { - log.Printf("[WARN] max batch: %d (should be between 100 and 1000)", *maxBatch) + log.Printf("[WARN] max batch: %d (should be between 100 and 10000. recommended: 1000-10000)", *maxBatch) } // Validate command-line flag if *useShortHashLenPtr < 2 || *useShortHashLenPtr > 7 { log.Fatalf("Invalid UseShortHashLen: %d (must be between 2 and 7)", *useShortHashLenPtr) } - database.InitialBatchChannelSize = *maxBatch - database.MaxBatchThreads = *maxBatchThreads - database.MaxBatchSize = *maxBatch - database.MaxQueued = *maxQueued nntp.MaxReadLinesXover = int64(*maxBatch) processor.MaxBatchSize = int64(*maxBatch) // Initialize database (default config, data in ./data) dbConfig := database.DefaultDBConfig() dbConfig.DataDir = *dataDir + dbConfig.MaxDBbatch = *maxBatch + dbConfig.MaxDBthreads = *maxBatchThreads + dbConfig.MaxQueued = *maxQueued db, err := database.OpenDatabase(dbConfig) if err != nil { @@ -176,7 +179,6 @@ func main() { return } log.Printf("Loaded %d providers from database", len(providers)) - // Get all newsgroups from database using admin function (includes empty groups) suffixWildcard := strings.HasSuffix(*fetchNewsgroup, "*") var wildcardNG string @@ -201,24 +203,23 @@ func main() { } else if *fetchNewsgroup != "" { newsgroups = append(newsgroups, &models.Newsgroup{Name: *fetchNewsgroup}) } - pools := make([]*nntp.Pool, 0, len(providers)) for _, p := range providers { if !p.Enabled || p.Host == "" || p.Port <= 0 || p.MaxConns <= 0 { //log.Printf("Ignore disabled Provider: %s", p.Name) continue } - if strings.Contains(p.Host, "eternal-september") && p.MaxConns > 3 { - p.MaxConns = 3 - } else if strings.Contains(p.Host, "blueworldhosting") && p.MaxConns > 3 { + if (strings.Contains(p.Host, "eternal-september") || p.Host == ET_V4 || p.Host == ET_V6) && p.MaxConns > 3 { p.MaxConns = 3 + } else if (strings.Contains(p.Host, "blueworldhosting") || strings.HasPrefix(p.Host, BW_V4_Prefix)) && p.MaxConns > 16 { + p.MaxConns = 16 } + /* disabled if p.MaxConns > *maxBatch { p.MaxConns = *maxBatch // limit conns to maxBatch } - log.Printf("Provider: %s (ID: %d, Host: %s, Port: %d, SSL: %v, MaxConns: %d)", - p.Name, p.ID, p.Host, p.Port, p.SSL, p.MaxConns) - + */ + log.Printf("[FETCHER]: Provider '%s' (ID: %d, Host: %s, Port: %d, SSL: %v, MaxConns: %d)", p.Name, p.ID, p.Host, p.Port, p.SSL, p.MaxConns) // Convert models.Provider to config.Provider for the BackendConfig configProvider := &config.Provider{ Grp: p.Grp, @@ -263,15 +264,53 @@ func main() { } pool := nntp.NewPool(backendConfig) pools = append(pools, pool) - log.Printf("Created connection pool for provider '%s' with max %d connections", p.Name, p.MaxConns) + log.Printf("[FETCHER]: Created connection pool for provider '%s' with max %d connections", p.Name, p.MaxConns) defer pool.ClosePool() break // Only use the first provider for import } + if *resetProgress > 0 { + log.Printf("[FETCHER]: Resetting download progress to %d for all newsgroups on primary provider", *resetProgress) + for _, ng := range newsgroups { + //last, err = progressDB.GetLastArticle(pools[0].Backend.Provider.Name, ng.Name) // get last article + err = progressDB.UpdateProgress(pools[0].Backend.Provider.Name, ng.Name, *resetProgress) + if err != nil { + log.Fatalf("Failed to update progress for provider '%s' group '%s': %v", pools[0].Backend.Provider.Name, ng.Name, err) + } + } + log.Printf("[FETCHER]: Download progress reset completed.") + os.Exit(0) + } + + remoteGroups, err := pools[0].FileCachedListNewsgroups() + if err != nil || len(remoteGroups) == 0 { + log.Fatalf("failed to fetch newsgroup list from remote server: %v", err) + } + quickRGLookup := make(map[string]bool, len(remoteGroups)) + for _, rg := range remoteGroups { + quickRGLookup[rg] = true + } + var validNGs []*models.Newsgroup + for _, ng := range newsgroups { + if _, exists := quickRGLookup[ng.Name]; exists { + validNGs = append(validNGs, ng) + } else { + //log.Printf("[FETCHER]: WARNING: Newsgroup '%s' not found on remote server!", ng.Name) + } + } + if len(validNGs) == 0 { + log.Printf("[FETCHER]: No valid newsgroups to process after checking remote server list.") + return + } + newsgroups = validNGs + log.Printf("[FETCHER]: Starting fetch for %d newsgroups", len(newsgroups)) + time.Sleep(time.Second * 2) // debug sleep + + // Start shutdown listener goroutine fetchDoneChan := make(chan error, 1) go func() { <-sigChan - log.Printf("[FETCHER]: Received shutdown signal, initiating graceful shutdown...") + log.Printf("[FETCHER]: sigChan received shutdown signal") // Signal all worker goroutines to stop common.ForceShutdown() }() @@ -290,28 +329,42 @@ func main() { DownloadMaxPar := *downloadMaxPar // unchangeable (code not working yet) DLParChan := make(chan struct{}, DownloadMaxPar) var mux sync.Mutex - downloaded := 0 + var downloaded, queued, notfound, todo uint64 // scan group worker - queued := 0 - todo := 0 go func() { defer close(processor.Batch.Check) + var skippedWildcard, skippedInActive, skippedActive uint64 for _, ng := range newsgroups { if common.WantShutdown() { - //log.Printf("[FETCHER]: Feed Batch.Check shutdown") + //log.Printf("[FETCHER]: Feed Batch.Check common.WantShutdown()") return } + /* disabled if db.IsDBshutdown() { - //log.Printf("[FETCHER]: Feed Batch.Check shutdown") + log.Printf("[FETCHER]: Feed Batch.Check database shutdown") return } + */ if wildcardNG != "" && !strings.HasPrefix(ng.Name, wildcardNG) { //log.Printf("[FETCHER] Skipping newsgroup '%s' as it does not match prefix '%s'", ng.Name, wildcardNG) + skippedWildcard++ continue } nga, err := db.MainDBGetNewsgroup(ng.Name) - if err != nil || nga == nil || *fetchActiveOnly && !nga.Active { - //log.Printf("[FETCHER] ignore newsgroup '%s' err='%v' ng='%#v'", ng.Name, err, ng) + if err != nil || nga == nil { + log.Printf("[FETCHER]: Failed to get newsgroup '%s' from database: err='%v' nga='%#v'", ng.Name, err, nga) + return + } + + if *fetchActiveOnly && !nga.Active { + //log.Printf("[FETCHER]: ignore inactive newsgroup '%s'", ng.Name) + skippedInActive++ + continue + } + + if !*fetchActiveOnly && nga.Active { + //log.Printf("[FETCHER]: ignore active newsgroup '%s'", ng.Name) + skippedActive++ continue } @@ -319,13 +372,29 @@ func main() { //log.Printf("Checking ng: %s", ng.Name) mux.Lock() queued++ + if queued%1000 == 0 { + log.Printf("[FETCHER]: Queued %d/%d newsgroups", queued, len(newsgroups)) + } mux.Unlock() } - log.Printf("Queued %d newsgroups", queued) + totalSkipped := skippedWildcard + skippedInActive + skippedActive + log.Printf("[FETCHER]: Feeding Queue Done: %d/%d newsgroups (skipped: %d). Wildcard skipped: %d, skippedInActive: %d, skippedActive: %d. fetchActiveOnly=%t", queued, len(newsgroups), totalSkipped, skippedWildcard, skippedInActive, skippedActive, *fetchActiveOnly) }() var wgCheck sync.WaitGroup startDates := make(map[string]string) - for i := 1; i <= proc.Pool.Backend.MaxConns; i++ { + limitCheckWorker := runtime.NumCPU() / 2 + if limitCheckWorker < 1 { + limitCheckWorker = 1 + } + if proc.Pool.Backend.MaxConns < limitCheckWorker { + limitCheckWorker = proc.Pool.Backend.MaxConns + } + if limitCheckWorker < 1 { + log.Printf("[FETCHER]: Invalid limitCheckWorker: %d (overwrite to 1), proc.Pool.Backend.MaxConns: %d", limitCheckWorker, proc.Pool.Backend.MaxConns) + limitCheckWorker = 1 + } + log.Printf("[FETCHER]: Starting %d check group workers", limitCheckWorker) + for i := 1; i <= limitCheckWorker; i++ { wgCheck.Add(1) go func(worker int, wgCheck *sync.WaitGroup, progressDB *database.ProgressDB) { defer wgCheck.Done() @@ -334,7 +403,8 @@ func main() { //log.Printf("[FETCHER]: Batch.Check shutdown") return } - if db.IsDBshutdown() { + /* disabled */ + if db.IsDBshutdown() && len(processor.Batch.Check) == 0 { //log.Printf("[FETCHER]: Batch.Check DB shutdown") return } @@ -345,11 +415,11 @@ func main() { //log.Printf("[FETCHER]: Newsgroup not found: '%s'", *ng) continue case io.EOF: - log.Printf("pool.SelectGroup failed. connection EOF. skipping ng: '%s'", *ng) + log.Printf("[FETCHER]: pool.SelectGroup failed. connection EOF. skipping ng: '%s'", *ng) continue default: log.Printf("[FETCHER]: Error in select ng='%s' groupInfo='%#v' err='%v'", *ng, groupInfo, err) - return + continue } } if groupInfo.Last == 0 || groupInfo.Last < groupInfo.First { @@ -373,15 +443,15 @@ func main() { switch lastArticle { case 0: // Open group DB only when we need to check last-article date - groupDBs, err := proc.DB.GetGroupDBs(*ng) + groupDB, err := proc.DB.GetGroupDB(*ng) if err != nil { log.Printf("[FETCHER]: Failed to get group DBs for newsgroup '%s': %v", *ng, err) continue } - lastArticleDate, checkDateErr := proc.DB.GetLastArticleDate(groupDBs) + lastArticleDate, checkDateErr := proc.DB.GetLastArticleDate(groupDB) // ensure close regardless of errors - if ferr := proc.DB.ForceCloseGroupDBs(groupDBs); ferr != nil { - log.Printf("[FETCHER]: ForceCloseGroupDBs error for '%s': %v", *ng, ferr) + if ferr := proc.DB.ForceCloseGroupDB(groupDB); ferr != nil { + log.Printf("[FETCHER]: ForceCloseGroupDB error for '%s': %v", *ng, ferr) } if checkDateErr != nil { log.Printf("[FETCHER]: Failed to get last article date for '%s': %v", *ng, checkDateErr) @@ -410,6 +480,9 @@ func main() { default: // pass } + if groupInfo.First > lastArticle { + lastArticle = groupInfo.First - 1 + } //log.Printf("DEBUG-RANGE: ng='%s' lastArticle=%d (after switch)", *ng, lastArticle) start := lastArticle + 1 // Start from the first article in the remote group end := start + processor.MaxBatchSize - 1 // End at the last article in the remote group @@ -458,15 +531,14 @@ func main() { // fire up async goroutines to fetch articles go func(worker int) { //log.Printf("DownloadArticles: Worker %d group '%s' start", worker, groupName) - for item := range processor.Batch.GetQ { // gets fed from internal/processor/proc_DLArt.go:150: Batch.GetQ <- item + for item := range processor.Batch.GetQ { // gets fed from internal/processor/proc_DLArt.go:~L151: Batch.GetQ <- item + /* disabled if common.WantShutdown() { //log.Printf("[FETCHER]: Batch.GetQ shutdown") return } - if db.IsDBshutdown() { - //log.Printf("[FETCHER]: Batch.GetQ DB shutdown") - return - } + */ + /* disabled */ //log.Printf("DownloadArticles: Worker %d GetArticle group '%s' article (%s)", worker, *item.GroupName, *item.MessageID) art, err := proc.Pool.GetArticle(item.MessageID, true) if err != nil || art == nil { @@ -475,15 +547,20 @@ func main() { switch err { case nntp.ErrArticleNotFound, nntp.ErrArticleRemoved: // article not found, not a big deal + mux.Lock() + notfound++ + mux.Unlock() continue case io.EOF: - log.Printf("ERROR DownloadArticles: pool.GetArticle failed. connection EOF ... quitting! ng: '%s'", *item.GroupName) - common.ForceShutdown() - return + log.Printf("ERROR DownloadArticles: pool.GetArticle failed. connection EOF ... continue! ng: '%s'", *item.GroupName) + //common.ForceShutdown() + //return + continue default: - log.Printf("ERROR DownloadArticles: proc.Pool.GetArticle '%s' err='%v' ... quitting! ng: '%s'", *item.MessageID, err, *item.GroupName) - common.ForceShutdown() - return + log.Printf("ERROR DownloadArticles: proc.Pool.GetArticle '%s' err='%v' ... continue! ng: '%s'", *item.MessageID, err, *item.GroupName) + //common.ForceShutdown() + //return + continue } } item.Article = art // set pointer @@ -491,6 +568,10 @@ func main() { mux.Lock() downloaded++ mux.Unlock() + if db.IsDBshutdown() && len(processor.Batch.GetQ) == 0 { + log.Printf("[FETCHER]: Batch.GetQ DB shutdown") + return + } //log.Printf("DownloadArticles: Worker %d GetArticle OK group '%s' article (%s)", worker, *item.GroupName, *item.MessageID) } // end for item }(i) @@ -509,7 +590,7 @@ func main() { } }() for { - if common.WantShutdown() { + if common.WantShutdown() && len(processor.Batch.TodoQ) == 0 { //log.Printf("[FETCHER]: Worker received shutdown signal, stopping") return } @@ -524,13 +605,14 @@ func main() { //log.Printf("[FETCHER]: TodoQ closed, worker stopping") return } - if common.WantShutdown() { + if common.WantShutdown() && len(processor.Batch.TodoQ) == 0 { //log.Printf("[FETCHER]: Worker received shutdown signal, stopping") return } + /* disabled */ // Check if database is shutting down - if db.IsDBshutdown() { - //log.Printf("[FETCHER]: TodoQ Database shutdown detected, stopping processing. still queued in TodoQ: %d", len(processor.Batch.TodoQ)) + if db.IsDBshutdown() && len(processor.Batch.TodoQ) == 0 { + log.Printf("[FETCHER]: TodoQ Database shutdown detected, stopping processing") return } /* @@ -655,10 +737,10 @@ func main() { select { case _, ok := <-common.ShutdownChan: if !ok { - //log.Printf("[FETCHER]: Shutdown channel closed, initiating graceful shutdown...") + log.Printf("[FETCHER]: common.ShutdownChan closed, initiating graceful shutdown...") } case err := <-fetchDoneChan: - log.Printf("[FETCHER]: DONE! err='%v'", err) + log.Printf("[FETCHER]: got fetchDoneChan: DONE! err='%v'", err) } waitHere.Wait() // Signal background tasks to stop @@ -682,7 +764,7 @@ func main() { } mux.Lock() - log.Printf("[FETCHER]: Total downloaded: %d articles (newsgroups: %d)", downloaded, queued) + log.Printf("[FETCHER]: Total downloaded: %d articles, notfound: %d (newsgroups: %d)", downloaded, notfound, queued) mux.Unlock() log.Printf("[FETCHER]: Graceful shutdown completed. Exiting here.") @@ -716,8 +798,8 @@ func getRealMemoryUsage() (uint64, error) { // UpdateNewsgroupList fetches the remote newsgroup list from the first enabled provider // and adds all groups to the database that we don't already have -func UpdateNewsgroupList(updateList *string, updateListForce bool) error { - log.Printf("Starting newsgroup list update from remote server...") +func UpdateNewsgroupList(updateList *string, excludePrefix *string, updateListForce bool) error { + log.Printf("UpdateNewsgroupList: Starting newsgroup list update from remote server...") // Initialize database db, err := database.OpenDatabase(nil) @@ -744,7 +826,7 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { return fmt.Errorf("no enabled providers found in database") } - log.Printf("Using provider: %s (Host: %s, Port: %d, SSL: %v)", + log.Printf("UpdateNewsgroupList: Using provider: %s (Host: %s, Port: %d, SSL: %v)", firstProvider.Name, firstProvider.Host, firstProvider.Port, firstProvider.SSL) // Create NNTP backend config using the first enabled provider @@ -760,23 +842,12 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { // Create NNTP pool pool := nntp.NewPool(backendConfig) defer pool.ClosePool() - - // Get a connection from the pool - conn, err := pool.Get(nntp.MODE_READER_MV) - if err != nil { - return fmt.Errorf("failed to get NNTP connection: %w", err) - } - defer pool.Put(conn) - - log.Printf("Connected to %s:%d, fetching newsgroup list...", firstProvider.Host, firstProvider.Port) - - // Fetch the complete newsgroup list - remoteGroups, err := conn.ListGroups() + // Fetch remote newsgroup list + remoteGroups, err := pool.ListNewsgroups() if err != nil { - return fmt.Errorf("failed to fetch newsgroup list: %w", err) + return fmt.Errorf("failed to fetch newsgroup list from remote server: %w", err) } - - log.Printf("Fetched %d newsgroups from remote server", len(remoteGroups)) + log.Printf("UpdateNewsgroupList: Fetched %d newsgroups from remote server", len(remoteGroups)) // Parse the update pattern to determine filtering updatePattern := *updateList @@ -785,17 +856,28 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { if updatePattern == "$all" { addAllGroups = true - log.Printf("Listing all newsgroups from remote server") + log.Printf("UpdateNewsgroupList: Listing all newsgroups from remote server") } else if strings.HasSuffix(updatePattern, "*") { groupPrefix = strings.TrimSuffix(updatePattern, "*") - log.Printf("Listing newsgroups with prefix: '%s'", groupPrefix) + log.Printf("UpdateNewsgroupList: Listing newsgroups with prefix: '%s'", groupPrefix) } else if updatePattern != "" { groupPrefix = updatePattern - log.Printf("Listing newsgroups matching: '%s'", groupPrefix) + log.Printf("UpdateNewsgroupList: Listing newsgroups matching: '%s'", groupPrefix) } else { return fmt.Errorf("invalid update pattern: '%s' (use 'group.*' or '$all')", updatePattern) } - + var excludePrefixes []string + if excludePrefix != nil && *excludePrefix != "" { + excludePrefixes = strings.Split(*excludePrefix, ",") + for i, p := range excludePrefixes { + trimmed := strings.TrimSpace(p) + if trimmed == "" { + continue + } + excludePrefixes[i] = trimmed + log.Printf("UpdateNewsgroupList: Excluding newsgroups with prefix: '%s'", excludePrefixes[i]) + } + } // Get existing newsgroups from local database localGroups, err := db.MainDBGetAllNewsgroups() if err != nil { @@ -808,28 +890,54 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { existingGroups[group.Name] = true } - log.Printf("Found %d newsgroups in local database", len(localGroups)) + log.Printf("UpdateNewsgroupList: Found %d newsgroups in local database", len(localGroups)) + today := time.Now().UTC() + //today := time.Now().UTC().Truncate(24 * time.Hour) // Add new newsgroups that don't exist locally and match the pattern newGroupCount := 0 skippedCount := 0 var messages int64 +loopGroups: for _, remoteGroup := range remoteGroups { // Apply prefix filtering if !addAllGroups { if groupPrefix != "" && !strings.HasPrefix(remoteGroup.Name, groupPrefix) { skippedCount++ - continue + continue loopGroups } } - + if len(excludePrefixes) > 0 { + for _, excludePrefix := range excludePrefixes { + if excludePrefix == "" { + continue + } + if strings.HasSuffix(excludePrefix, "*") { + pattern := strings.TrimSuffix(excludePrefix, "*") + if strings.HasPrefix(remoteGroup.Name, pattern) { + log.Printf("Excluding newsgroup: '%s' by prefix: '%s' pattern: '%s'", remoteGroup.Name, excludePrefix, pattern) + skippedCount++ + continue loopGroups + } + } else if remoteGroup.Name == excludePrefix { + log.Printf("Excluding newsgroup: '%s'", remoteGroup.Name) + skippedCount++ + continue loopGroups + } + } + } + if !common.IsValidGroupName(remoteGroup.Name) { + log.Printf("Skipping invalid newsgroup name: '%s'", remoteGroup.Name) + skippedCount++ + continue loopGroups + } if !existingGroups[remoteGroup.Name] { // Create a new newsgroup model newGroup := &models.Newsgroup{ Name: remoteGroup.Name, - Active: true, // Default to active - Status: "y", // Default posting status - CreatedAt: time.Now().UTC(), // Default created at + Active: false, // Default to inactive + Status: remoteGroup.Status, // newsgroups y,m,c,a status + CreatedAt: today, // Default created at } if updateListForce { @@ -840,16 +948,16 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { continue } - log.Printf("Added new newsgroup: %s", remoteGroup.Name) + log.Printf("(Added new) newsgroup: '%s'", remoteGroup.Name) } else { - log.Printf("New newsgroup: %s (not added) lo=%d hi=%d messages=%d", remoteGroup.Name, remoteGroup.First, remoteGroup.Last, remoteGroup.Count) + log.Printf("(not added) newsgroup: '%s' messages=%d status=%s", remoteGroup.Name, remoteGroup.Count, remoteGroup.Status) } newGroupCount++ messages += remoteGroup.Count } } - log.Printf("Newsgroup list update completed: %d new groups added, %d skipped (prefix filter), out of %d remote groups with total: %d messages", + log.Printf("Result: %d new groups, %d skipped, out of %d remote groups with total: %d messages", newGroupCount, skippedCount, len(remoteGroups), messages) return nil diff --git a/cmd/nntp-server/processor_adapter.go b/cmd/nntp-server/processor_adapter.go index 6b47a5e4..bfd7bae3 100644 --- a/cmd/nntp-server/processor_adapter.go +++ b/cmd/nntp-server/processor_adapter.go @@ -24,8 +24,8 @@ func (pa *ProcessorAdapter) ProcessIncomingArticle(article *models.Article) (int } // Lookup checks if a message-ID exists in history -func (pa *ProcessorAdapter) Lookup(msgIdItem *history.MessageIdItem) (int, error) { - return pa.processor.History.Lookup(msgIdItem) +func (pa *ProcessorAdapter) Lookup(msgIdItem *history.MessageIdItem, quick bool) (response int, newsgroupIDs []int64, err error) { + return pa.processor.History.Lookup(msgIdItem, quick) } // CheckNoMoreWorkInHistory checks if there's no more work in history diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 1366f641..56b7b007 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -6,12 +6,20 @@ import ( "context" "flag" "fmt" + "html/template" "log" + "net/http" + _ "net/http/pprof" // Memory profiling "os" "os/signal" + "runtime" + "runtime/debug" + "slices" + "sort" "strconv" "strings" "sync" + "sync/atomic" "time" "github.com/go-while/go-pugleaf/internal/common" @@ -72,6 +80,16 @@ func showUsageExamples() { fmt.Println(" ./nntp-transfer -host news.server.local -group alt.test -redis-clear-cache") fmt.Println(" # Use -redis-clear-cache to start fresh (clears all cached message IDs)") fmt.Println() + fmt.Println("Memory Profiling & Monitoring:") + fmt.Println(" ./nntp-transfer -host news.server.local -group alt.* -mem-stats") + fmt.Println(" ./nntp-transfer -host news.server.local -group alt.* -pprof-port 6060") + fmt.Println(" ./nntp-transfer -host news.server.local -group alt.* -gc-percent 50") + fmt.Println(" # -mem-stats: Log memory stats every 30 seconds") + fmt.Println(" # -pprof-port: Enable pprof at http://localhost:6060/debug/pprof/") + fmt.Println(" # -gc-percent: Lower values = more GC, less memory (default 100)") + fmt.Println(" # Get heap profile: curl http://localhost:6060/debug/pprof/heap > heap.prof") + fmt.Println(" # Analyze: go tool pprof heap.prof") + fmt.Println() fmt.Println("Show ALL command line flags:") fmt.Println(" ./nntp-transfer -h") @@ -82,10 +100,39 @@ var appVersion = "-unset-" var redisCtx = context.Background() var REDIS_TTL time.Duration = 3600 * time.Second // default 1h +var MaxQueuedJobs int = 8 +var BatchCheck int64 +var CHECK_FIRST bool + +// statistics +var TotalNewsgroups int64 +var NewsgroupsToProcess int64 +var ServerHostName string +var StartDate string +var EndDate string +var GlobalSpeed uint64 + +var totalTransferred, totalTTSentCount, totalCheckSentCount, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalRedisCacheHits, totalRedisCacheBeforeCheck, totalRedisCacheBeforeTakethis, totalTXErrors, totalConnErrors, globalTotalArticles, nothingInDateRange uint64 + +func CalcGlobalSpeed() { + for { + time.Sleep(time.Second * 3) + var speed uint64 + nntp.ResultsMutex.Lock() + for _, progress := range nntp.NewsgroupTransferProgressMap { + progress.CalcSpeed() + speed += progress.GetSpeed() + } + GlobalSpeed = speed + nntp.ResultsMutex.Unlock() + } +} func main() { - common.VerboseHeaders = false config.AppVersion = appVersion + + bootTime := time.Now() + common.VERBOSE_HEADERS = false database.NO_CACHE_BOOT = true // prevents booting caches and several other not needed functions log.Printf("Starting go-pugleaf NNTP Transfer Tool (version %s)", config.AppVersion) @@ -109,7 +156,7 @@ func main() { proxyPassword = flag.String("proxy-password", "", "Proxy authentication password") // Transfer configuration - batchCheck = flag.Int("batch-check", 100, "Number of message IDs/articles to send in streamed CHECK/TAKETHIS") + batchCheck = flag.Int64("batch-check", 100, "Number of message IDs/articles to send in streamed CHECK/TAKETHIS") batchDB = flag.Int64("batch-db", 1000, "Fetch N articles from DB in a batch") maxThreads = flag.Int("max-threads", 1, "Transfer N newsgroups in concurrent threads. Each thread uses 1 connection.") redisCache = flag.Bool("redis-cache", true, "Use Redis caching for message IDs") @@ -136,14 +183,33 @@ func main() { // History configuration useShortHashLen = flag.Int("useshorthashlen", 7, "Short hash length for history storage (2-7, default: 7)") + checkFirst = flag.Bool("check-first", true, "Use CHECK command before TAKETHIS to avoid duplicates (recommended)") // Newsgroup filtering options fileInclude = flag.String("file-include", "", "File containing newsgroup patterns to include (one per line)") fileExclude = flag.String("file-exclude", "", "File containing newsgroup patterns to exclude (one per line)") forceIncludeOnly = flag.Bool("force-include-only", false, "When set, only transfer newsgroups that match patterns in include file (ignores -group pattern)") + excludePrefix = flag.String("exclude-prefix", "", "Exclude newsgroups with this prefix (comma-separated list, supports wildcards like 'alt.binaries.*')") + useProgressDB = flag.Bool("use-progress-db", true, "Use transfer progress database to track transferred newsgroups and stats") + + // Web server and profiling options + webPort = flag.Int("web-port", 0, "Enable web server on this port to view results (e.g. 8080, default: disabled)") + pprofPort = flag.Int("pprof-port", 0, "Enable pprof profiling server on this port (e.g., 6060). Access at http://localhost:PORT/debug/pprof/") + memStats = flag.Bool("mem-stats", false, "Log memory statistics every 30 seconds") + gcPercent = flag.Int("gc-percent", 50, "Set GOGC percentage (default 100). Lower values = more frequent GC, less memory") ) flag.Parse() common.IgnoreGoogleHeaders = *ignoreGoogleHeaders + CHECK_FIRST = *checkFirst + ServerHostName = *host + StartDate = *startDate + EndDate = *endDate + + // Configure garbage collector + if *gcPercent != 100 { + old := debug.SetGCPercent(*gcPercent) + log.Printf("Set GOGC from %d to %d (lower = more GC, less memory)", old, *gcPercent) + } // Show help if requested if *showHelp { @@ -156,9 +222,10 @@ func main() { } // Validate batch size - if *batchCheck < 1 || *batchCheck > 10000 { - log.Fatalf("Error: batch-check must be between 1 and 10000 (got %d)", *batchCheck) + if *batchCheck < 1 || *batchCheck > 100000 { + log.Fatalf("Error: batch-check must be between 1 and 100000 (got %d)", *batchCheck) } + BatchCheck = *batchCheck // Validate batch size if *batchDB < 100 { @@ -170,6 +237,7 @@ func main() { if *maxThreads < 1 || *maxThreads > 500 { log.Fatalf("Error: max-threads must be between 1 and 500 (got %d)", *maxThreads) } + nntp.NNTPTransferThreads = *maxThreads // Validate UseShortHashLen if *useShortHashLen < 2 || *useShortHashLen > 7 { @@ -277,7 +345,7 @@ func main() { sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, os.Interrupt) // Cross-platform (Ctrl+C on both Windows and Linux) - //db.WG.Add(2) // Adds to wait group for db_batch.go cron jobs + db.WG.Add(2) // Adds to wait group for db_batch.go cron jobs db.WG.Add(1) // Adds for history: one for writer worker // Get UseShortHashLen from database (with safety check) @@ -302,7 +370,16 @@ func main() { } log.Printf("Using stored UseShortHashLen: %d", finalUseShortHashLen) } - + var progressDB *nntp.TransferProgressDB = nil + if *useProgressDB { + // Open transfer progress database + aprogressDB, err := nntp.OpenTransferProgressDB(*dataDir, *host+":"+strconv.Itoa(*port)) + if err != nil { + log.Fatalf("Failed to open transfer progress database: %v", err) + } + progressDB = aprogressDB + defer progressDB.Close() + } // Create target server connection pool targetProvider := &config.Provider{ Name: "transfer:" + *host, @@ -337,17 +414,16 @@ func main() { backendConfig.ProxyUsername = proxyConfig.Username backendConfig.ProxyPassword = proxyConfig.Password } - nntphostname, err := db.GetConfigValue("local_nntp_hostname") + nntphostname, err := db.GetConfigValue(config.CFG_KEY_HOSTNAME) if err != nil || nntphostname == "" { - log.Printf("Failed to get local_nntp_hostname from database: %v", err) - os.Exit(1) + log.Fatalf("Failed to get local_nntp_hostname from database: %v", err) } pool := nntp.NewPool(backendConfig) log.Printf("Created connection pool for target server '%s:%d' with max %d connections", *host, *port, *maxThreads) // Get newsgroups to transfer - newsgroups, err := getNewsgroupsToTransfer(db, *transferGroup, *fileInclude, *fileExclude, *forceIncludeOnly) + newsgroups, err := getNewsgroupsToTransfer(db, progressDB, startTime, endTime, *transferGroup, *fileInclude, *fileExclude, *excludePrefix, *forceIncludeOnly) if err != nil { log.Fatalf("Failed to get newsgroups: %v", err) } @@ -371,13 +447,23 @@ func main() { log.Printf("Debug capture mode enabled - capturing articles without sending") *dryRun = true } + // Start NNTP worker pool + if !*dryRun { + log.Printf("Starting NNTP connection worker pool...") + go BootConnWorkers(db, pool, redisCli) + } + nntp.ResultsMutex.Lock() + TotalNewsgroups = int64(len(newsgroups)) + NewsgroupsToProcess = TotalNewsgroups + nntp.ResultsMutex.Unlock() + go CalcGlobalSpeed() // Start transfer process var wgP sync.WaitGroup wgP.Add(2) go func(wgP *sync.WaitGroup, redisCli *redis.Client) { defer wgP.Done() resultChan := make(chan error, 1) - resultChan <- runTransfer(db, proc, pool, newsgroups, *batchCheck, *maxThreads, *dryRun, startTime, endTime, *debugCapture, wgP, redisCli) + resultChan <- runTransfer(db, newsgroups, *batchCheck, *maxThreads, *dryRun, startTime, endTime, *debugCapture, wgP, redisCli, progressDB) result := <-resultChan if !*debugCapture { transferDoneChan <- result @@ -390,17 +476,22 @@ func main() { // you did create a backup before, right? debugMutex.Lock() defer debugMutex.Unlock() - for newsgroup, articles := range debugArticles { - fmt.Printf("Debug capture - Newsgroup: %s, Articles: %d\n", newsgroup, len(articles)) + for newsgroup, messageIDs := range debugArticles { + fmt.Printf("Debug capture - Newsgroup: %s, Articles: %d\n", newsgroup, len(messageIDs)) // Get group database for updates if needed - groupDBs, err := db.GetGroupDBs(newsgroup) + groupDB, err := db.GetGroupDB(newsgroup) if err != nil { fmt.Printf("! Error getting group database for %s: %v\n", newsgroup, err) continue } - for _, article := range articles { + for _, messageId := range messageIDs { + article, err := db.GetArticleByMessageID(groupDB, *messageId) + if err != nil { + fmt.Printf("! Error getting article '%s': %v\n", *messageId, err) + continue + } fmt.Printf("# %s: #%d : '%s' | orgDate='%s' parsed='%#v'\n", newsgroup, article.DBArtNum, article.MessageID, article.DateString, article.DateSent) // Track original values to detect changes @@ -424,7 +515,7 @@ func main() { originalDateString, article.DateString) - if err := db.UpdateArticleDateSent(groupDBs, article.MessageID, article.DateSent, article.DateString); err != nil { + if err := db.UpdateArticleDateSent(groupDB, article.MessageID, article.DateSent, article.DateString); err != nil { fmt.Printf("! Error updating database for article '%s': %v\n", article.MessageID, err) } else { fmt.Printf("! Database updated for article '%s'\n", article.MessageID) @@ -440,11 +531,35 @@ func main() { //fmt.Printf("%s\n", article.BodyText) //fmt.Printf("### BODY EOF '%s' ###\n\n", article.MessageID) } - groupDBs.Return(db) + groupDB.Return() } transferDoneChan <- result }(&wgP, redisCli) - wgP.Wait() + + // Start web server if port is specified + if *webPort > 0 { + go startWebServer(*webPort) + } + + // Start pprof server if port is specified + if *pprofPort > 0 { + go func() { + addr := fmt.Sprintf("localhost:%d", *pprofPort) + log.Printf("Starting pprof server on http://%s/debug/pprof/", addr) + log.Printf(" Heap profile: http://%s/debug/pprof/heap", addr) + log.Printf(" Goroutines: http://%s/debug/pprof/goroutine", addr) + log.Printf(" Allocs: http://%s/debug/pprof/allocs", addr) + if err := http.ListenAndServe(addr, nil); err != nil { + log.Printf("pprof server error: %v", err) + } + }() + } + + // Start memory stats monitoring if enabled + if *memStats { + go monitorMemoryStats() + } + // Wait for either shutdown signal or transfer completion select { case <-sigChan: @@ -457,8 +572,10 @@ func main() { log.Printf("Transfer completed successfully") } } - + wgP.Wait() pool.ClosePool() + // Signal background tasks to stop + close(db.StopChan) // Close processor if proc != nil { @@ -479,8 +596,8 @@ func main() { } else { log.Printf("Database shutdown successfully") } - - log.Printf("Graceful shutdown completed. Exiting.") + time.Sleep(time.Second * 3) // wait for all goroutines to finish + log.Printf("nntp-transfer exit. Runtime: %v", time.Since(bootTime)) } // parseDateTime parses a date string in multiple supported formats @@ -547,101 +664,6 @@ func parseProxyConfig(address, proxyType, username, password string) (*ProxyConf }, nil } -const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles` -const query_getArticlesBatchWithDateFilter_nodatefilter = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles ORDER BY date_sent ASC LIMIT ? OFFSET ?` -const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" - -// getArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering -func getArticlesBatchWithDateFilter(groupDBs *database.GroupDBs, offset int64, startTime, endTime *time.Time) ([]*models.Article, error) { - - var query string - var args []interface{} - - if startTime != nil || endTime != nil { - // Build query with date filtering - - var whereConditions []string - - if startTime != nil { - whereConditions = append(whereConditions, "date_sent >= ?") - args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) - } - - if endTime != nil { - whereConditions = append(whereConditions, "date_sent <= ?") - args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) - } - - whereClause := "" - if len(whereConditions) > 0 { - whereClause = " WHERE " + strings.Join(whereConditions, " AND ") - } - - query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby - args = append(args, dbBatchSize, offset) - } else { - // No date filtering, use original query but with date_sent ordering - query = query_getArticlesBatchWithDateFilter_nodatefilter - args = []interface{}{dbBatchSize, offset} - } - - rows, err := groupDBs.DB.Query(query, args...) - if err != nil { - return nil, err - } - defer rows.Close() - - var out []*models.Article - for rows.Next() { - var a models.Article - if err := rows.Scan(&a.DBArtNum, &a.MessageID, &a.Subject, &a.FromHeader, &a.DateSent, &a.DateString, &a.References, &a.Bytes, &a.Lines, &a.ReplyCount, &a.Path, &a.HeadersJSON, &a.BodyText, &a.ImportedAt); err != nil { - return nil, err - } - out = append(out, &a) - } - - return out, nil -} - -// getArticleCountWithDateFilter gets the total count of articles with optional date filtering -func getArticleCountWithDateFilter(groupDBs *database.GroupDBs, startTime, endTime *time.Time) (int64, error) { - var query string - var args []interface{} - - if startTime != nil || endTime != nil { - // Build count query with date filtering - var whereConditions []string - - if startTime != nil { - whereConditions = append(whereConditions, "date_sent >= ?") - args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) - } - - if endTime != nil { - whereConditions = append(whereConditions, "date_sent <= ?") - args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) - } - - whereClause := "" - if len(whereConditions) > 0 { - whereClause = " WHERE " + strings.Join(whereConditions, " AND ") - } - - query = "SELECT COUNT(*) FROM articles" + whereClause - } else { - // No date filtering - query = "SELECT COUNT(*) FROM articles" - } - - var count int64 - err := groupDBs.DB.QueryRow(query, args...).Scan(&count) - if err != nil { - return 0, err - } - - return count, nil -} - // testConnection tests the connection to the target NNTP server func testConnection(host *string, port *int, username *string, password *string, ssl *bool, timeout *int, proxyConfig *ProxyConfig) error { testProvider := &config.Provider{ @@ -704,14 +726,25 @@ func testConnection(host *string, port *int, username *string, password *string, } // getNewsgroupsToTransfer returns newsgroups matching the specified pattern and file filters -func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, fileExclude string, forceIncludeOnly bool) ([]*models.Newsgroup, error) { +func getNewsgroupsToTransfer(db *database.Database, progressDB *nntp.TransferProgressDB, startTime, endTime *time.Time, groupPattern, fileInclude, fileExclude, excludePrefix string, forceIncludeOnly bool) ([]*models.Newsgroup, error) { var newsgroups []*models.Newsgroup + // Parse exclude prefix patterns (comma-separated) + var excludePrefixes []string + if excludePrefix != "" { + excludePrefixes = strings.Split(excludePrefix, ",") + for i, p := range excludePrefixes { + excludePrefixes[i] = strings.TrimSpace(p) + log.Printf("Excluding newsgroups with prefix: '%s'", excludePrefixes[i]) + } + } + // Load include/exclude patterns from files if specified var includePatterns, excludePatterns []string var includeLookup, excludeLookup map[string]bool var hasIncludeWildcards, hasExcludeWildcards bool var err error + var notExists []string if fileInclude != "" { includePatterns, err = loadPatternsFromFile(fileInclude) @@ -759,6 +792,34 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f return nil, fmt.Errorf("failed to get newsgroups from database: %v", err) } log.Printf("Loaded %d newsgroups from database in %v", len(allNewsgroups), time.Since(start)) + quickLookup := make(map[string]bool, len(allNewsgroups)) + for _, ng := range allNewsgroups { + quickLookup[ng.Name] = true + } + for ng := range includeLookup { + if !quickLookup[ng] { + notExists = append(notExists, ng) + } + } + if len(notExists) > 0 { + log.Printf("%d newsgroup not found locally.", len(notExists)) + for _, ngName := range notExists { + log.Printf(" - %s (not found locally)", ngName) + } + } + + // Fetch all progress newsgroups once for fast lookup (used by all paths below) + progressMap := make(map[string]bool, 125000) + if progressDB != nil { + progressFetchStart := time.Now() + progressMap, err = progressDB.GetAllProgressNewsgroups(startTime, endTime) + if err != nil { + log.Printf("Warning: Failed to fetch progress newsgroups: %v (will skip progress checks)", err) + progressMap = make(map[string]bool) + } else { + log.Printf("Fetched %d newsgroups with existing progress in %v", len(progressMap), time.Since(progressFetchStart)) + } + } // Handle force-include-only mode if forceIncludeOnly { @@ -797,6 +858,17 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f // Now apply include patterns to group-filtered newsgroups start = time.Now() for _, ng := range groupFiltered { + // Check exclude prefix first + if len(excludePrefixes) > 0 { + if matchesExcludePrefix(ng.Name, excludePrefixes) { + continue + } + } + // Check if newsgroup already has results for this remote (fast map lookup) + if progressMap[ng.Name] { + log.Printf("Skipping newsgroup %s - already has transfer results for this remote", ng.Name) + continue + } // Fast exact match check first if includeLookup[ng.Name] { newsgroups = append(newsgroups, ng) @@ -808,7 +880,8 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f } // If no wildcards exist and no exact match, skip this newsgroup } - log.Printf("Applied include pattern filtering in %v", time.Since(start)) + log.Printf("Applied include pattern filtering in %v.", time.Since(start)) + log.Printf("Force-include-only mode: found %d newsgroups to transfer after filtering", len(newsgroups)) return newsgroups, nil } @@ -817,10 +890,24 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f log.Printf("Using $all pattern: transferring all newsgroups with file filters applied") start := time.Now() for _, ng := range allNewsgroups { - if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { - newsgroups = append(newsgroups, ng) + // Check exclude prefix first + if len(excludePrefixes) > 0 { + if matchesExcludePrefix(ng.Name, excludePrefixes) { + continue + } + } + // Check if newsgroup already has results for this remote (fast map lookup) + if progressMap[ng.Name] { + log.Printf("Skipping newsgroup %s - already has transfer results for this remote", ng.Name) + continue + } + if !shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { + log.Printf("Excluding newsgroup %s based on include/exclude patterns", ng.Name) + continue } + newsgroups = append(newsgroups, ng) } + log.Printf("Filtered %d newsgroups from %d total in %v", len(newsgroups), len(allNewsgroups), time.Since(start)) return newsgroups, nil } @@ -839,6 +926,15 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f if suffixWildcard { for _, ng := range allNewsgroups { if strings.HasPrefix(ng.Name, wildcardPrefix) { + // Check exclude prefix first + if matchesExcludePrefix(ng.Name, excludePrefixes) { + continue + } + // Check if newsgroup already has results for this remote (fast map lookup) + if progressMap[ng.Name] { + log.Printf("Skipping newsgroup %s - already has transfer results for this remote", ng.Name) + continue + } if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { newsgroups = append(newsgroups, ng) } @@ -848,6 +944,15 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f // Exact match for _, ng := range allNewsgroups { if ng.Name == groupPattern { + // Check exclude prefix first + if matchesExcludePrefix(ng.Name, excludePrefixes) { + break + } + // Check if newsgroup already has results for this remote (fast map lookup) + if progressMap[ng.Name] { + log.Printf("Skipping newsgroup %s - already has transfer results for this remote", ng.Name) + break + } if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { newsgroups = append(newsgroups, ng) } @@ -860,6 +965,35 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f return newsgroups, nil } +func IgnoreNewsgroupProgress(ng *models.Newsgroup, progressDB *nntp.TransferProgressDB, startTime, endTime *time.Time) bool { + if progressDB != nil { + exists, err := progressDB.NewsgroupExists(ng.Name, startTime, endTime) + if err != nil { + log.Printf("Warning: Failed to check if newsgroup %s exists in progress DB: %v", ng.Name, err) + return true + } else if exists { + //log.Printf("Skipping newsgroup %s - already has transfer results for this remote", ng.Name) + return true + } + } + return false +} + +// matchesExcludePrefix checks if a newsgroup name matches any of the exclude prefixes +func matchesExcludePrefix(ngName string, excludePrefixes []string) bool { + for _, excludePrefix := range excludePrefixes { + if strings.HasSuffix(excludePrefix, "*") { + pattern := strings.TrimSuffix(excludePrefix, "*") + if strings.HasPrefix(ngName, pattern) { + return true + } + } else if ngName == excludePrefix { + return true + } + } + return false +} + // loadPatternsFromFile loads newsgroup patterns from a file (one per line) func loadPatternsFromFile(filePath string) ([]string, error) { file, err := os.Open(filePath) @@ -970,20 +1104,16 @@ func matchesAnyWildcardPattern(newsgroup string, patterns []string) bool { } // runTransfer performs the actual article transfer process -func runTransfer(db *database.Database, proc *processor.Processor, pool *nntp.Pool, newsgroups []*models.Newsgroup, batchCheck int, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client) error { +func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchCheck int64, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client, progressDB *nntp.TransferProgressDB) error { defer wgP.Done() - var totalTransferred, nothingInDateRange, totalRedisCacheHits uint64 - var totalUnwanted, totalRejected, totalTXErrors, totalConnErrors uint64 - var transferMutex sync.Mutex + defer log.Printf("runTransfer() quitted") maxThreadsChan := make(chan struct{}, maxThreads) var wg sync.WaitGroup + log.Printf("Todo: %d newsgroups", len(newsgroups)) // Process each newsgroup - log.Printf("Starting transfer for %d newsgroups", len(newsgroups)) - for _, newsgroup := range newsgroups { + for _, ng := range newsgroups { if common.WantShutdown() { - transferMutex.Lock() - log.Printf("Shutdown requested, stopping transfer. Total transferred: %d articles", totalTransferred) - transferMutex.Unlock() + log.Printf("Aborted before next: %s", ng.Name) return nil } maxThreadsChan <- struct{}{} // acquire a thread slot @@ -994,587 +1124,2309 @@ func runTransfer(db *database.Database, proc *processor.Processor, pool *nntp.Po <-maxThreadsChan // release the thread slot }(wg) if common.WantShutdown() { + log.Printf("Aborted before next: %s", ng.Name) return } - start := time.Now() if VERBOSE { - log.Printf("Starting transfer for newsgroup: %s", newsgroup.Name) + log.Printf("Newsgroup: '%s' | Start", ng.Name) } - transferred, checked, rc, unwanted, rejected, txErrors, connErrors, err := transferNewsgroup(db, proc, pool, newsgroup, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) - - transferMutex.Lock() - totalTransferred += transferred - totalRedisCacheHits += rc - totalUnwanted += unwanted - totalRejected += rejected - totalTXErrors += txErrors - totalConnErrors += connErrors + err := transferNewsgroup(db, ng, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli, progressDB) if err == ErrNotInDateRange { + nntp.ResultsMutex.Lock() nothingInDateRange++ + nntp.ResultsMutex.Unlock() err = nil // not a real error } - transferMutex.Unlock() + nntp.ResultsMutex.Lock() + NewsgroupsToProcess-- if err != nil { - log.Printf("Error transferring newsgroup %s: %v", newsgroup.Name, err) + log.Printf("Error transferring newsgroup %s: %v", ng.Name, err) } else { - if startTime == nil && endTime == nil { - log.Printf("DONE runTransfer Newsgroup '%s' | transferred %d articles. checked %d. took %v", newsgroup.Name, transferred, checked, time.Since(start)) - } + log.Printf("Newsgroup: '%s' | completed transferNewsgroup() remaining newsgroups: %d", ng.Name, NewsgroupsToProcess) } - }(newsgroup, &wg, redisCli) + nntp.ResultsMutex.Unlock() + }(ng, &wg, redisCli) } - // Wait for all transfers to complete wg.Wait() + + nntp.ResultsMutex.Lock() if nothingInDateRange > 0 { log.Printf("Note: %d newsgroups had no articles in the specified date range", nothingInDateRange) } for _, result := range results { log.Print(result) } - log.Printf("Summary: transferred: %d | redis_cache_hits: %d | unwanted: %d | rejected: %d | TX_Errors: %d | connErrors: %d", - totalTransferred, totalRedisCacheHits, totalUnwanted, totalRejected, totalTXErrors, totalConnErrors) + log.Printf("Summary: total: %d | transferred: %d | cache_hits: %d (before_check: %d, before_takethis: %d) | checked: %d/%d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d", + globalTotalArticles, totalTransferred, totalRedisCacheHits, totalRedisCacheBeforeCheck, totalRedisCacheBeforeTakethis, totalChecked, totalCheckSentCount, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalTXErrors, totalConnErrors) + nntp.ResultsMutex.Unlock() + log.Printf("Debug: StructChansCap1: %d/%d", len(common.StructChansCap1), cap(common.StructChansCap1)) return nil } -var debugArticles = make(map[string][]*models.Article) +var debugArticles = make(map[string][]*string) var debugMutex sync.Mutex -var ErrNotInDateRange = fmt.Errorf("article not in specified date range") +var ErrNotInDateRange = fmt.Errorf("notinrange") + +// processRequeuedJobs processes any failed jobs that were requeued for retry +// Returns the number of jobs processed successfully +func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponsesSetupChan chan *nntp.TTSetup, redisCli *redis.Client) (int, error) { + var queuedJobs []*nntp.CHTTJob + jobRequeueMutex.Lock() + if jobs, exists := jobRequeue[ttMode.Newsgroup]; exists { + queuedJobs = jobs + // clear requeue + delete(jobRequeue, ttMode.Newsgroup) + } + jobRequeueMutex.Unlock() + + if len(queuedJobs) == 0 { + return 0, nil + } + + log.Printf("Newsgroup: '%s' | Processing %d failed requeued jobs", newsgroup, len(queuedJobs)) + for i, job := range queuedJobs { + if common.WantShutdown() { + log.Printf("WantShutdown while processing requeued jobs for '%s'", newsgroup) + // Put remaining jobs back in queue + if i < len(queuedJobs) { + jobRequeueMutex.Lock() + jobRequeue[ttMode.Newsgroup] = slices.Insert(jobRequeue[ttMode.Newsgroup], 0, queuedJobs[i:]...) + jobRequeueMutex.Unlock() + } + return i, nil + } + + log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.MessageIDs)) + // pass articles to CHECK or TAKETHIS queue (async!) + responseChan, err := processBatch(ttMode, job.MessageIDs, redisCli, job.OffsetStart, job.OffsetQ, job.NGTProgress) + if err != nil { + log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup, err) + jobRequeueMutex.Lock() + // insert remaining jobs back to slot 0 + jobRequeue[ttMode.Newsgroup] = slices.Insert(jobRequeue[ttMode.Newsgroup], 0, queuedJobs[i:]...) + jobRequeueMutex.Unlock() + return i, fmt.Errorf("error processing requeued batch for newsgroup '%s': %v", newsgroup, err) + } + if responseChan != nil { + // pass the response channel to the collector channel: ttResponses + ttResponsesSetupChan <- nntp.GetTTSetup(responseChan) + } + } + + log.Printf("Newsgroup: '%s' | Successfully processed %d requeued jobs", newsgroup, len(queuedJobs)) + return len(queuedJobs), nil +} // transferNewsgroup transfers articles from a single newsgroup -func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *nntp.Pool, newsgroup *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) (uint64, uint64, uint64, uint64, uint64, uint64, uint64, error) { +func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck int64, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client, transferProgressDB *nntp.TransferProgressDB) error { + + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Starting (getting group DBs)...", ng.Name) // Get group database - groupDBs, err := db.GetGroupDBs(newsgroup.Name) + groupDBA, err := db.GetGroupDB(ng.Name) if err != nil { - return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) + return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) } - defer func() { - if ferr := db.ForceCloseGroupDBs(groupDBs); ferr != nil { - log.Printf("ForceCloseGroupDBs error for '%s': %v", newsgroup.Name, ferr) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", ng.Name) + // Initialize newsgroup progress tracking + nntp.ResultsMutex.Lock() + if _, exists := nntp.NewsgroupTransferProgressMap[ng.Name]; !exists { + nntp.NewsgroupTransferProgressMap[ng.Name] = &nntp.NewsgroupTransferProgress{ + Newsgroup: &ng.Name, + Started: time.Now(), + LastUpdated: time.Now(), + LastCronTX: time.Now(), + Finished: false, + TotalArticles: 0, } - }() + } + nntp.ResultsMutex.Unlock() // Get total article count first with date filtering - totalArticles, err := getArticleCountWithDateFilter(groupDBs, startTime, endTime) + totalNGArticles, err := db.GetArticleCountWithDateFilter(groupDBA, startTime, endTime) if err != nil { - return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("failed to get article count for newsgroup '%s': %v", newsgroup.Name, err) + if ferr := db.ForceCloseGroupDB(groupDBA); ferr != nil { + log.Printf("ForceCloseGroupDB error for '%s': %v", ng.Name, ferr) + } + return fmt.Errorf("failed to get article count for newsgroup '%s': %v", ng.Name, err) } - if totalArticles == 0 { + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got article count (%d), closing group DBs...", ng.Name, totalArticles) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", ng.Name) + + if totalNGArticles == 0 { + if ferr := db.ForceCloseGroupDB(groupDBA); ferr != nil { + log.Printf("ForceCloseGroupDB error for '%s': %v", ng.Name, ferr) + } + nntp.ResultsMutex.Lock() + nntp.NewsgroupTransferProgressMap[ng.Name].Finished = true + nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() + if VERBOSE { + results = append(results, fmt.Sprintf("END Newsgroup: '%s' | No articles to process", ng.Name)) + } + nntp.ResultsMutex.Unlock() + // Insert result into progress database + if transferProgressDB != nil { + if err := transferProgressDB.InsertResult(ng.Name, startTime, endTime, 0, 0, 0, 0, 0, 0, 0, 0); err != nil { + log.Printf("Warning: Failed to insert result to progress DB for '%s': %v", ng.Name, err) + } + } + // No articles to process if startTime != nil || endTime != nil { if VERBOSE { - log.Printf("No articles found in newsgroup: %s (within specified date range)", newsgroup.Name) + log.Printf("No articles found in newsgroup: %s (within specified date range)", ng.Name) } - return 0, 0, 0, 0, 0, 0, 0, ErrNotInDateRange + return ErrNotInDateRange } else { - log.Printf("No articles found in newsgroup: %s", newsgroup.Name) + if VERBOSE { + log.Printf("No articles found in newsgroup: %s", ng.Name) + } } - - return 0, 0, 0, 0, 0, 0, 0, nil + return nil } + groupDBA.Return() + + // Initialize newsgroup progress tracking + nntp.ResultsMutex.Lock() + nntp.NewsgroupTransferProgressMap[ng.Name].TotalArticles = totalNGArticles + nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() + ngtprogress := nntp.NewsgroupTransferProgressMap[ng.Name] + nntp.ResultsMutex.Unlock() if dryRun { if startTime != nil || endTime != nil { - log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s (within specified date range)", totalArticles, newsgroup.Name) + log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s (within specified date range)", totalNGArticles, ng.Name) } else { - log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s", totalArticles, newsgroup.Name) + log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s", totalNGArticles, ng.Name) } if !debugCapture { - return 0, 0, 0, 0, 0, 0, 0, nil + return nil } } if !dryRun && !debugCapture { - if startTime != nil || endTime != nil { - log.Printf("Found %d articles in newsgroup %s (within specified date range) - processing in batches", totalArticles, newsgroup.Name) - } else { - log.Printf("Found %d articles in newsgroup %s - processing in batches", totalArticles, newsgroup.Name) - } + log.Printf("+ Found %d articles in newsgroup %s", totalNGArticles, ng.Name) } - //time.Sleep(3 * time.Second) // debug sleep - var ioffset int64 - remainingArticles := totalArticles - // Process articles in database batches (much larger than network batches) + + remainingArticles := totalNGArticles ttMode := &nntp.TakeThisMode{ - CheckMode: true, // start with CHECK mode + Newsgroup: &ng.Name, + CheckMode: CHECK_FIRST, } - var transferred, checked, redis_cache_hits uint64 + ttResponsesSetupChan := make(chan *nntp.TTSetup, totalNGArticles/int64(batchCheck)+2) start := time.Now() - for offset := ioffset; offset < totalArticles; offset += dbBatchSize { - if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: %s: Transferred %d articles", newsgroup.Name, transferred) - return transferred, checked, redis_cache_hits, ttMode.Unwanted, ttMode.Rejected, ttMode.TX_Errors, ttMode.ConnErrors, nil - } - // Load batch from database with date filtering - articles, err := getArticlesBatchWithDateFilter(groupDBs, offset, startTime, endTime) - if err != nil { - log.Printf("Error loading article batch (offset %d) for newsgroup %s: %v", offset, newsgroup.Name, err) - continue - } + // WaitGroup to ensure collector goroutine finishes before returning + var collectorWG sync.WaitGroup + collectorWG.Add(1) - if len(articles) == 0 { - //log.Printf("No more articles in newsgroup %s (offset %d)", newsgroup.Name, offset) - break - } - if dryRun && debugCapture { - debugMutex.Lock() - debugArticles[newsgroup.Name] = append(debugArticles[newsgroup.Name], articles...) - debugMutex.Unlock() - return 0, 0, 0, 0, 0, 0, 0, nil - } - if VERBOSE { - log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d)", newsgroup.Name, len(articles), offset) - } - isleep := time.Second - // Process articles in network batches - for i := 0; i < len(articles); i += batchCheck { - if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: %s: Transferred %d articles", newsgroup.Name, transferred) - return transferred, checked, redis_cache_hits, ttMode.Unwanted, ttMode.Rejected, ttMode.TX_Errors, ttMode.ConnErrors, nil - } - if !ttMode.CheckMode && ttMode.TmpSuccessCount >= 100 { - ttMode.TmpSuccessCount = 0 - ttMode.TmpTakeThisCount = 0 - } - // Determine end index for the batch - end := i + batchCheck - if end > len(articles) { - end = len(articles) + // WaitGroup to track individual batched jobs response channel processors + var responseWG sync.WaitGroup + + go func(responseWG *sync.WaitGroup) { + defer collectorWG.Done() + var num uint64 + for setup := range ttResponsesSetupChan { + if setup == nil || setup.ResponseChan == nil { + log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received in collector!?", ng.Name) + continue } - // forever: will process this batch until successful or shutdown - forever: - for { - if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: %s: Transferred %d articles", newsgroup.Name, transferred) - return transferred, checked, redis_cache_hits, ttMode.Unwanted, ttMode.Rejected, ttMode.TX_Errors, ttMode.ConnErrors, nil + num++ + //log.Printf("Newsgroup: '%s' | Starting response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) + responseWG.Add(1) + go func(responseChan chan *nntp.TTResponse, num uint64, responseWG *sync.WaitGroup) { + defer responseWG.Done() + //defer log.Printf("Newsgroup: '%s' | Quit response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) + + // Read exactly ONE response from this channel (channel is buffered with cap 1) + resp := <-responseChan // job.Response(ForceCleanUp, err) arrives here + + if resp == nil { + log.Printf("Newsgroup: '%s' | Warning: nil TT response received!?", ng.Name) + return } - if isleep > time.Minute { - isleep = time.Minute + if resp.Err != nil { + log.Printf("Newsgroup: '%s' | Error in TT response job #%d err='%v' job='%v' ForceCleanUp=%t", ng.Name, resp.Job.JobID, resp.Err, resp.Job, resp.ForceCleanUp) } - if isleep > time.Second { - log.Printf("Newsgroup: '%s' | Sleeping %v before retrying batch %d-%d (transferred %d so far)", newsgroup.Name, isleep, i+1, end, transferred) - time.Sleep(isleep) + if resp.Job == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response job without error!? ForceCleanUp=%t", ng.Name, resp.ForceCleanUp) + return } - // Get connection from pool - conn, err := pool.Get(nntp.MODE_STREAM_MV) - if err != nil { - log.Printf("Newsgroup: '%s' | Failed to get connection from pool: %v", newsgroup.Name, err) - isleep = isleep * 2 - continue forever + if !resp.ForceCleanUp { + return } - - if conn.ModeReader { - if VERBOSE { - log.Printf("got connection in reader mode, closing and getting a new one") - } - conn.ForceCloseConn() - continue forever - } - - batchTransferred, batchChecked, TTsuccessRate, rc, berr := processBatch(conn, newsgroup.Name, ttMode, articles[i:end], redisCli) - transferred += batchTransferred - redis_cache_hits += rc - checked += batchChecked - if berr != nil { - log.Printf("Newsgroup: '%s' | Error processing network batch: %v ... retry", newsgroup.Name, berr) - conn.ForceCloseConn() - isleep = isleep * 2 - ttMode.CheckMode = true - ttMode.TmpSuccessCount = 0 - ttMode.TmpTakeThisCount = 0 - continue forever - } - if VERBOSE || (transferred >= 1000 && transferred%1000 == 0) || (checked >= 1000 && checked%1000 == 0) { - log.Printf("Newsgroup: '%s' | BatchDone (offset %d/%d) %d-%d TX:%d check=%t ttRate=%.1f%% checked=%d redis_cache_hits=%d/%d", newsgroup.Name, offset, totalArticles, i+1, end, batchTransferred, ttMode.CheckMode, TTsuccessRate, batchChecked, rc, redis_cache_hits) - } - pool.Put(conn) - break forever + // free memory - CRITICAL: Lock and unlock in same scope, not with defer! + //resp.Job.Mux.Lock() + //if VERBOSE { + // log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) + //} + //models.RecycleArticles(resp.Job.Articles) + //resp.Job.Articles = nil + + // Clean up ArticleMap - nil the keys (pointers) before deleting + /* disabled + for msgid := range resp.Job.ArticleMap { + resp.Job.ArticleMap[msgid] = nil + delete(resp.Job.ArticleMap, msgid) + } + resp.Job.ArticleMap = nil + */ + //resp.Job.Mux.Unlock() + nntp.RecycleTTResponseChan(responseChan) + nntp.RecycleTTResponse(resp) + }(setup.ResponseChan, num, responseWG) + nntp.RecycleTTSetup(setup) + } + if VERBOSE { + log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", ng.Name, num) + } + // Wait for all response channel processors to finish + responseWG.Wait() + if VERBOSE { + log.Printf("Newsgroup: '%s' | Collector: all response processors closed", ng.Name) + } + + var tmpglobalTotalArticles, tmptotalTransferred, tmptotalTTSentCount, tmptotalCheckSentCount, tmptotalRedisCacheHits, tmptotalRedisCacheBeforeCheck, tmptotalRedisCacheBeforeTakethis uint64 + var tmptotalWanted, tmptotalUnwanted, tmptotalChecked, tmptotalRejected, tmptotalRetry, tmptotalSkipped, tmptotalTXErrors, tmptotalConnErrors uint64 + + ngtprogress.Mux.Lock() + result := fmt.Sprintf("END Newsgroup: '%s' total: %d | CHECK_sent: %d | checked: %d | transferred: %d | cache_hits: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d | took %v", + ng.Name, totalNGArticles, + ngtprogress.CheckSentCount, ngtprogress.Checked, + ngtprogress.Transferred, ngtprogress.RedisCached, + ngtprogress.Wanted, ngtprogress.Unwanted, ngtprogress.Rejected, + ngtprogress.Retry, ngtprogress.Skipped, + ngtprogress.TxErrors, ngtprogress.ConnErrors, + time.Since(start)) + // capture values + tmpglobalTotalArticles += uint64(totalNGArticles) + tmptotalTransferred += ngtprogress.Transferred + tmptotalTTSentCount += ngtprogress.TTSentCount + tmptotalCheckSentCount += ngtprogress.CheckSentCount + tmptotalRedisCacheHits += ngtprogress.RedisCached + tmptotalRedisCacheBeforeCheck += ngtprogress.RedisCachedBeforeCheck + tmptotalRedisCacheBeforeTakethis += ngtprogress.RedisCachedBeforeTakethis + tmptotalWanted += ngtprogress.Wanted + tmptotalUnwanted += ngtprogress.Unwanted + tmptotalChecked += ngtprogress.Checked + tmptotalRejected += ngtprogress.Rejected + tmptotalRetry += ngtprogress.Retry + tmptotalSkipped += ngtprogress.Skipped + tmptotalTXErrors += ngtprogress.TxErrors + tmptotalConnErrors += ngtprogress.ConnErrors + // Mark newsgroup as finished + ngtprogress.Finished = true + ngtprogress.LastUpdated = time.Now() + ngtprogress.LastCronTX = ngtprogress.LastUpdated + ngtprogress.Mux.Unlock() + + nntp.ResultsMutex.Lock() + // capture values + globalTotalArticles += tmpglobalTotalArticles + totalTransferred += tmptotalTransferred + totalTTSentCount += tmptotalTTSentCount + totalCheckSentCount += tmptotalCheckSentCount + totalRedisCacheHits += tmptotalRedisCacheHits + totalRedisCacheBeforeCheck += tmptotalRedisCacheBeforeCheck + totalRedisCacheBeforeTakethis += tmptotalRedisCacheBeforeTakethis + totalWanted += tmptotalWanted + totalUnwanted += tmptotalUnwanted + totalChecked += tmptotalChecked + totalRejected += tmptotalRejected + totalRetry += tmptotalRetry + totalSkipped += tmptotalSkipped + totalTXErrors += tmptotalTXErrors + totalConnErrors += tmptotalConnErrors + // store result + results = append(results, result) + for _, msgId := range rejectedArticles[ng.Name] { + // prints all at the end again + log.Printf("END Newsgroup: '%s' | REJECTED '%s'", ng.Name, msgId) + } + delete(rejectedArticles, ng.Name) // free memory + nntp.ResultsMutex.Unlock() + + // Insert result into progress database + if transferProgressDB != nil { + if err := transferProgressDB.InsertResult( + ng.Name, + startTime, + endTime, + int64(tmptotalTransferred), + int64(tmptotalUnwanted), + int64(tmptotalChecked), + int64(tmptotalRejected), + int64(tmptotalRetry), + int64(tmptotalSkipped), + int64(tmptotalTXErrors), + int64(tmptotalConnErrors), + ); err != nil { + log.Printf("Warning: Failed to insert result to progress DB for '%s': %v", ng.Name, err) } } + }(&responseWG) + + OffsetQueue := &nntp.OffsetQueue{ + Newsgroup: &ng.Name, + MaxQueuedJobs: MaxQueuedJobs, + } - // Clear articles slice to free memory - for i := range articles { - articles[i] = nil // free memory + // Use simple OFFSET pagination + var processed int64 + msgIDsChan := make(chan []*string, MaxQueuedJobs) + go db.GetMessageIDsWithDateFilter(ng, startTime, endTime, batchCheck, msgIDsChan) + // Get articles in database batches (much larger than network batches) + var dbOffset int64 + //start := time.Now() + for messageIDs := range msgIDsChan { + if common.WantShutdown() { + log.Printf("WantShutdown in newsgroup: '%s' (processed %d messageIDs)", ng.Name, processed) + return nil } - remainingArticles -= int64(len(articles)) - var batchSuccessRate float64 - if transferred > 0 { - batchSuccessRate = float64(transferred) / float64(len(articles)) * 100.0 + // Process any requeued jobs first (from previous failed batches) + if _, err := processRequeuedJobs(ng.Name, ttMode, ttResponsesSetupChan, redisCli); err != nil { + return err } - if VERBOSE { - log.Printf("Newsgroup: '%s' | Done (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t) ttRate=%.1f%%", newsgroup.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.CheckMode, batchSuccessRate) - } - articles = nil // free memory - } // end for offset range totalArticles - result := fmt.Sprintf("END Newsgroup: '%s' | total transferred: %d articles / total articles: %d (unwanted: %d | rejected: %d | checked: %d) TX_Errors: %d, connErrors: %d, took %v", newsgroup.Name, transferred, totalArticles, ttMode.Unwanted, ttMode.Rejected, checked, ttMode.TX_Errors, ttMode.ConnErrors, time.Since(start)) - //log.Print(result) - resultsMutex.Lock() - results = append(results, result) - if VERBOSE { - for _, msgId := range rejectedArticles[newsgroup.Name] { - // prints all at the end again - log.Printf("END Newsgroup: '%s' | REJECTED '%s'", newsgroup.Name, msgId) + if len(messageIDs) == 0 { + log.Printf("No more articles in newsgroup %s (loaded %d)", ng.Name, processed) + break + } + + if dryRun && debugCapture { + log.Printf("Newsgroup: '%s' | DRY RUN with debug capture: Capturing %d articles (processed %d) BROKEN TODO NEED FIX!", ng.Name, len(messageIDs), processed) + //debugMutex.Lock() + // TODO: broken debug catpure code fetch articles here + debugArticles[ng.Name] = append(debugArticles[ng.Name], messageIDs...) + //debugMutex.Unlock() + return nil + } + + //log.Printf("Newsgroup: '%s' | Loaded %d mids | took: %v", ng.Name, len(messageIDs), time.Since(start)) + // Process articles in network batches + //start2 := time.Now() + OffsetQueue.Add(1) + if common.WantShutdown() { + log.Printf("WantShutdown in newsgroup: '%s' (processed %d)", ng.Name, processed) + return nil + } + // Determine end index for the batch + /* disabled + //end := i + batchCheck + //if end > len(messageIDs) { + // end = len(messageIDs) + //} + */ + // pass articles to CHECK or TAKETHIS queue (async!) + responseChan, err := processBatch(ttMode, messageIDs, redisCli, dbOffset, OffsetQueue, ngtprogress) + if err != nil { + log.Printf("Newsgroup: '%s' | Error processing batch offset %d: %v", ng.Name, dbOffset, err) + return fmt.Errorf("error processing batch offset %d for newsgroup '%s': %v", dbOffset, ng.Name, err) + } + if responseChan != nil { + // pass the response channel to the collector channel: ttResponses + ttResponsesSetupChan <- nntp.GetTTSetup(responseChan) + } + dbOffset += int64(len(messageIDs)) + processed += int64(len(messageIDs)) + + OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than N in flight + // articlesProcessed already incremented above after loading from DB + remainingArticles -= int64(len(messageIDs)) + + //log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t) took: %v", ng.Name, processed, totalNGArticles, remainingArticles, ttMode.UseCHECK(), time.Since(start2)) + //log.Printf("Newsgroup: '%s' | Pushed (processed %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, articlesProcessed, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) + //start = time.Now() + } // end for msgIDsChan + + log.Printf("Newsgroup: '%s' | msgIDsChan closed, checking for requeued jobs...", ng.Name) + + // Process any remaining requeued jobs after main loop completes + // This handles failures that occurred in the last batch + for { + if common.WantShutdown() { + log.Printf("WantShutdown during final requeue processing for '%s'", ng.Name) + break + } + processed, err := processRequeuedJobs(ng.Name, ttMode, ttResponsesSetupChan, redisCli) + if err != nil { + log.Printf("Newsgroup: '%s' | Error in final requeue processing: %v", ng.Name, err) + // Don't return error, just log it - we've already processed most articles + break } - delete(rejectedArticles, newsgroup.Name) // free memory + if processed == 0 { + // No more requeued jobs to process + break + } + //log.Printf("Newsgroup: '%s' | Processed %d requeued jobs in final pass", ng.Name, processed) + // Loop again to check if any of those jobs failed and were requeued } - resultsMutex.Unlock() - return transferred, checked, redis_cache_hits, ttMode.Unwanted, ttMode.Rejected, ttMode.TX_Errors, ttMode.ConnErrors, nil + + //log.Printf("Newsgroup: '%s' | Final requeue processing completed, closing ttResponses channel...", ng.Name) + + // Close the ttResponses channel to signal collector goroutine to finish + close(ttResponsesSetupChan) + + //log.Printf("Newsgroup: '%s' | ttResponses channel closed, waiting for collector to finish...", ng.Name) + + // Wait for collector goroutine to finish processing all responses + collectorWG.Wait() + + log.Printf("Newsgroup: '%s' | All jobs completed and responses collected", ng.Name) + deassignWorker(ng.Name) + return nil } // end func transferNewsgroup var results []string var rejectedArticles = make(map[string][]string) -var resultsMutex sync.RWMutex -var lowerLevel float64 = 90.0 -var upperLevel float64 = 95.0 +var LowerLevel float64 = 90.0 +var UpperLevel float64 = 95.0 // processBatch processes a batch of articles using NNTP streaming protocol (RFC 4644) // Uses TAKETHIS primarily, falls back to CHECK when success rate < 95% -func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client) (transferred uint64, checked uint64, successRate float64, redis_cache_hits uint64, err error) { +func processBatch(ttMode *nntp.TakeThisMode, messageIDs []*string, redisCli *redis.Client, dbOffset int64, offsetQ *nntp.OffsetQueue, ngtprogress *nntp.NewsgroupTransferProgress) (chan *nntp.TTResponse, error) { - if len(articles) == 0 { - return 0, 0, 0, 0, nil + if len(messageIDs) == 0 { + log.Printf("Newsgroup: '%s' | processBatch: no articles in this batch", *ttMode.Newsgroup) + return nil, nil } - // Calculate success rate to determine whether to use CHECK or TAKETHIS - if ttMode.TmpSuccessCount > 0 { - successRate = float64(ttMode.TmpSuccessCount) / float64(ttMode.TmpTakeThisCount) * 100.0 + // Update newsgroup progress with current offset + nntp.ResultsMutex.RLock() + if progress, exists := nntp.NewsgroupTransferProgressMap[*ttMode.Newsgroup]; exists { + progress.Mux.Lock() + progress.OffsetStart = dbOffset + progress.LastUpdated = time.Now() + progress.Mux.Unlock() } + nntp.ResultsMutex.RUnlock() - // Switch to CHECK mode if TAKETHIS success rate drops below lowerLevel - if !ttMode.CheckMode && successRate < lowerLevel && ttMode.TmpTakeThisCount >= 10 { // Need at least 10 attempts for meaningful stats - ttMode.CheckMode = true - log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% < %f%%, switching to CHECK mode", newsgroup, successRate, lowerLevel) - } else if ttMode.CheckMode && successRate >= upperLevel && ttMode.TmpTakeThisCount >= 20 { // Switch back when rate improves - ttMode.CheckMode = false - log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% >= %f%%, switching to TAKETHIS mode", newsgroup, successRate, upperLevel) - } + ttMode.FlipMode(LowerLevel, UpperLevel) - articleMap := make(map[string]*models.Article) - for _, article := range articles { - articleMap[article.MessageID] = article + job := &nntp.CHTTJob{ + JobID: atomic.AddUint64(&nntp.JobIDCounter, 1), + Newsgroup: ttMode.Newsgroup, + MessageIDs: make([]*string, 0, len(messageIDs)), + ResponseChan: nntp.GetTTResponseChan(), + TTMode: ttMode, + OffsetStart: dbOffset, + OffsetQ: offsetQ, + NGTProgress: ngtprogress, } + var redis_cached uint64 - switch ttMode.CheckMode { - case true: // ttMode.CheckMode - // CHECK mode: verify articles are wanted before sending - //log.Printf("Newsgroup: '%s' | CHECK: %d articles (success rate: %.1f%%)", newsgroup, len(articles), successRate) - - checkMessageIds := make([]*string, 0, len(articles)) - checkArticles := make([]*models.Article, 0, len(articles)) - checkArticleMap := make(map[string]*models.Article, len(articles)) - - // Batch check Redis cache using pipeline (1 round trip for all keys) - if redisCli != nil && len(articles) > 0 { - pipe := redisCli.Pipeline() - cmds := make([]*redis.IntCmd, len(articles)) - - // Queue all EXISTS commands - for i, article := range articles { - cmds[i] = pipe.Exists(redisCtx, article.MessageID) + // Batch check Redis cache using pipeline before sending CHECK + if redisCli != nil { + pipe := redisCli.Pipeline() + cmds := make([]*redis.IntCmd, len(messageIDs)) + redis2Check := 0 + // Queue all EXISTS commands + for i, msgid := range messageIDs { + if msgid == nil { + continue } + cmds[i] = pipe.Exists(redisCtx, *msgid) + redis2Check++ + } - // Execute all in one network round trip - _, err := pipe.Exec(redisCtx) - if err != nil && VERBOSE { - log.Printf("Newsgroup: '%s' | Redis pipeline error: %v", newsgroup, err) - } + // Execute all in one network round trip + if _, err := pipe.Exec(redisCtx); err != nil { + log.Printf("Newsgroup: '%s' | Redis pipeline error: %v", *ttMode.Newsgroup, err) + } else { - // Process results + // Process results and filter cached articles for i, cmd := range cmds { - if articles[i] == nil { - continue + if cmd == nil || messageIDs[i] == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Redis command or nil message ID in batch for job #%d (skip CHECK)", *ttMode.Newsgroup, job.JobID) + continue // Skip if command wasn't queued or message ID is nil } - article := articles[i] + msgid := messageIDs[i] exists, cmdErr := cmd.Result() if cmdErr == nil && exists > 0 { // Cached in Redis - skip this article if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [CHECK])", newsgroup, article.MessageID) + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, *msgid, job.JobID) } - redis_cache_hits++ + job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED_BEFORE_CHECK, 1) + redis_cached++ + messageIDs[i] = nil continue } - - // Not cached - add to valid list - checkMessageIds = append(checkMessageIds, &article.MessageID) - checkArticles = append(checkArticles, article) - checkArticleMap[article.MessageID] = article - } - } else { - // No Redis - add all non-nil message IDs - for _, article := range articles { - if article == nil { - continue + if cmdErr != nil { + log.Printf("Newsgroup: '%s' | Redis cache error for message ID '%s' in job #%d: %v (include in CHECK)", *ttMode.Newsgroup, *msgid, job.JobID, cmdErr) } - checkMessageIds = append(checkMessageIds, &article.MessageID) - checkArticles = append(checkArticles, article) - checkArticleMap[article.MessageID] = article + // Not cached - add to valid list + //job.Articles = append(job.Articles, article) + //job.ArticleMap[&article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, msgid) } } + if redis_cached == uint64(len(messageIDs)) { + if VERBOSE { + log.Printf("Newsgroup: '%s' | All %d articles in batch are cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, len(messageIDs), job.JobID) + } + return job.QuitResponseChan(), nil - if len(checkMessageIds) == 0 { - log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", newsgroup, redis_cache_hits) - return transferred, checked, successRate, redis_cache_hits, nil + } else if redis_cached > 0 { + if VERBOSE { + log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before CHECK)", *ttMode.Newsgroup, redis_cached, len(messageIDs), job.JobID) + } } - if VERBOSE { - log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d valid articles (filtered from %d)", newsgroup, len(checkMessageIds), len(articles)) + } else { + // No Redis - add all non-nil message IDs + for _, msgid := range messageIDs { + if msgid == nil { + continue + } + //job.Articles = append(job.Articles, article) + //job.ArticleMap[&article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, msgid) } + } + if len(job.MessageIDs) == 0 { + log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", *ttMode.Newsgroup, redis_cached) + return job.QuitResponseChan(), nil + } - // Send CHECK commands for all message IDs - wantedIds, err := conn.CheckMultiple(checkMessageIds, ttMode) - if err != nil { - ttMode.ConnErrors++ - conn.ForceCloseConn() - return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("Newsgroup: '%s' | failed to send CHECK command: %v", newsgroup, err) - } - checked += uint64(len(checkMessageIds)) + // Assign job to worker (consistent assignment + load balancing) + QueuesMutex.RLock() + if len(CheckQueues) == 0 { + QueuesMutex.RUnlock() + log.Printf("Newsgroup: '%s' | No workers available to process batch job #%d with %d message IDs", *ttMode.Newsgroup, job.JobID, len(job.MessageIDs)) + return nil, fmt.Errorf("no workers available") + } + QueuesMutex.RUnlock() - if len(wantedIds) == 0 { - //log.Printf("No articles wanted by server in this batch") - ttMode.TmpSuccessCount = 0 - ttMode.TmpTakeThisCount = 0 - return transferred, checked, successRate, redis_cache_hits, nil - } + workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) - // Send TAKETHIS for wanted articles - wantedArticles := make([]*models.Article, 0, len(wantedIds)) - for _, msgId := range wantedIds { - if msgId == nil { - continue - } - article, exists := checkArticleMap[*msgId] - if !exists { - log.Printf("WARN: Article not found in checkArticleMap for msgId: %s", *msgId) - continue - } - wantedArticles = append(wantedArticles, article) - } - - log.Printf("Newsgroup: '%s' | CHECK wants: %d/%d message IDs. sending=%d", newsgroup, len(wantedIds), len(checkMessageIds), len(wantedArticles)) - //log.Printf("Newsgroup: '%s' | Calling sendArticlesBatchViaTakeThis with %d articles...", newsgroup, len(wantedArticles)) - txcount, rc, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, ttMode, newsgroup, redisCli) - log.Printf("Newsgroup: '%s' | sendArticlesBatchViaTakeThis returned: transferred=%d redis_cached=%d err=%v", newsgroup, txcount, rc, err) - transferred += txcount - redis_cache_hits += rc - - if err != nil { - log.Printf("Newsgroup: '%s' | Failed to send CHECKED TAKETHIS: %v", newsgroup, err) - return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("failed to send CHECKED TAKETHIS batch: %v", err) - } - return transferred, checked, successRate, redis_cache_hits, nil - // end case ttMode.CheckMode - // case !ttMode.CheckMode - case false: - // TAKETHIS mode: send articles directly and track success rate - //log.Printf("Newsgroup: '%s' | TAKETHIS: %d articles (success rate: %.1f%%)", newsgroup, len(articles), successRate) - - // Validate articles before sending in TAKETHIS mode - wantedArticles := make([]*models.Article, 0, len(articles)) - for i, article := range articles { - if article == nil { - continue - } - if strings.Contains(article.MessageID, ">?<") { - log.Printf("ERROR: Invalid message ID contains '>?<' in TAKETHIS mode: '%s' - skipping", article.MessageID) - articles[i] = nil - continue - } - if len(article.MessageID) > 128 { - log.Printf("WARN: Message ID very long in TAKETHIS mode (%d chars): '%.100s...'", len(article.MessageID), article.MessageID) - } - wantedArticles = append(wantedArticles, article) - } - - if len(wantedArticles) == 0 { - log.Printf("WARN: No valid articles for TAKETHIS mode, skipping batch") - return transferred, checked, successRate, redis_cache_hits, nil - } - - if len(wantedArticles) != len(articles) { - log.Printf("Newsgroup: '%s' | Filtered articles for TAKETHIS: %d valid from %d total", newsgroup, len(wantedArticles), len(articles)) - } - - txcount, rc, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, ttMode, newsgroup, redisCli) - transferred += txcount - redis_cache_hits += rc + QueuesMutex.RLock() + WorkersCheckChannel := CheckQueues[workerID] + QueuesMutex.RUnlock() - if err != nil { - return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("failed to send TAKETHIS batch: %v", err) - } - if txcount == 0 { - if !ttMode.CheckMode { - ttMode.CheckMode = true - ttMode.TmpSuccessCount = 0 - ttMode.TmpTakeThisCount = 0 - } - } - return transferred, checked, successRate, redis_cache_hits, nil - } // end case !ttMode.CheckMode - // end switch ttMode.CheckMode - return 0, 0, 0, 0, fmt.Errorf("uncatched case in processBatch") + //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queueing job #%d with %d msgIDs to worker %d. CheckQ=%d", *ttMode.Newsgroup, workerID, job.JobID, len(job.MessageIDs), workerID, len(CheckQueues[workerID])) + WorkersCheckChannel <- job // checkQueue <- job // goto: job := <-WorkersCheckChannel + //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, job.JobID) + return job.GetResponseChan(), nil } // end func processBatch -type CheckResponse struct { - CmdId uint - Article *models.Article -} - // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode -// Sends all TAKETHIS commands first, then reads all responses (true streaming) -func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, ttMode *nntp.TakeThisMode, newsgroup string, redisCli *redis.Client) (transferred uint64, redis_cached uint64, err error) { +// Sends all TAKETHIS commands and queues ReadRequests for concurrent processing +func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, job *nntp.CHTTJob, newsgroup string, redisCli *redis.Client, demuxer *nntp.ResponseDemuxer, readTAKETHISResponsesChan chan *nntp.ReadRequest) (redis_cached uint64, err error) { if len(articles) == 0 { - return 0, 0, nil + return 0, nil } // Phase 1: Send all TAKETHIS commands without waiting for responses //log.Printf("Phase 1: Sending %d TAKETHIS commands...", len(articles)) - //commandIDs := make([]uint, 0, len(articles)) - //checkArticles := make([]*models.Article, 0, len(articles)) - // Batch check Redis cache using pipeline before sending TAKETHIS if redisCli != nil { pipe := redisCli.Pipeline() cmds := make([]*redis.IntCmd, len(articles)) - + //redis2Check := 0 // Queue all EXISTS commands (only for non-nil articles) for i, article := range articles { if article == nil { continue } cmds[i] = pipe.Exists(redisCtx, article.MessageID) + //redis2Check++ } // Execute all in one network round trip _, err := pipe.Exec(redisCtx) - if err != nil && VERBOSE { + if err != nil { log.Printf("Newsgroup: '%s' | Redis pipeline error in TAKETHIS: %v", newsgroup, err) - } + } else { - // Process results and filter cached articles - for i, cmd := range cmds { - if cmd == nil || articles[i] == nil { - continue // Skip if command wasn't queued or article is nil - } + // Process results and filter cached articles + for i, cmd := range cmds { + if cmd == nil || articles[i] == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Redis command or nil article in TAKETHIS batch for job #%d (skip)", newsgroup, job.JobID) + continue // Skip if command wasn't queued or article is nil + } - exists, cmdErr := cmd.Result() - if cmdErr == nil && exists > 0 { - // Cached in Redis - skip this article - if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [TAKETHIS])", newsgroup, articles[i].MessageID) + exists, cmdErr := cmd.Result() + if cmdErr == nil && exists > 0 { + // Cached in Redis - skip this article + if VERBOSE { + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip [TAKETHIS])", newsgroup, articles[i].MessageID, job.JobID) + } + job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED_BEFORE_TAKETHIS, 1) + redis_cached++ + articles[i] = nil // free memory + continue } - articles[i] = nil // free memory - redis_cached++ - continue + if cmdErr != nil { + log.Printf("Newsgroup: '%s' | Redis cache error for message ID '%s' in job #%d: %v (include in TAKETHIS)", newsgroup, articles[i].MessageID, job.JobID, cmdErr) + } + // Not cached - will be sent + } + } + if redis_cached == uint64(len(articles)) { + if VERBOSE { + log.Printf("Newsgroup: '%s' | All %d articles are cached in Redis in job #%d (skip TAKETHIS)", newsgroup, len(articles), job.JobID) + } + return redis_cached, nil + + } else if redis_cached > 0 { + if VERBOSE { + log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before TAKETHIS)", newsgroup, redis_cached, len(articles), job.JobID) } - // Not cached - will be sent } } // Now send TAKETHIS for non-cached articles - artChan := make(chan *CheckResponse, len(articles)) - // ← Also close artChan - - for _, article := range articles { + var sentCount int + conn.Lock() + var ttxBytes uint64 + start := time.Now() + astart := start + astart2 := start + skipped := 0 + for n, article := range articles { if article == nil { - continue // Skip cached articles + skipped++ + continue // Skip cached article } + astart = time.Now() // Send TAKETHIS command with article content (non-blocking) - cmdID, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup) + // This also queues the ReadRequest to readTAKETHISResponsesChan BEFORE returning + //log.Printf("Newsgroup: '%s' | ++Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) + + // Increment pending responses counter before sending + job.PendingResponses.Add(1) + + cmdID, txBytes, err, doContinue := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup, demuxer, readTAKETHISResponsesChan, job, n, len(articles)-skipped) + astart2 = time.Now() + job.Mux.Lock() + job.TTxBytes += uint64(txBytes) + job.TmpTxBytes += uint64(txBytes) + job.Mux.Unlock() + ttxBytes += uint64(txBytes) if err != nil { - if err == common.ErrNoNewsgroups { - log.Printf("Newsgroup: '%s' | skipped article '%s': no newsgroups header", newsgroup, article.MessageID) + // Decrement on error since no response will come + job.PendingResponses.Done() + if err == common.ErrNoNewsgroups || doContinue { + job.NGTProgress.Increment(nntp.IncrFLAG_SKIPPED, 1) + log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': bad newsgroups header doContinue=%t", newsgroup, article.MessageID, doContinue) continue } - conn.ForceCloseConn() + conn.Unlock() + job.NGTProgress.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) log.Printf("ERROR Newsgroup: '%s' | Failed to send TAKETHIS for %s: %v", newsgroup, article.MessageID, err) - return 0, redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) + return redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) } + sentCount++ + job.NGTProgress.Increment(nntp.IncrFLAG_TTSentCount, 1) - artChan <- &CheckResponse{ - Article: article, - CmdId: cmdID, + if VERBOSE { + log.Printf("Newsgroup: '%s' | DONE TAKETHIS '%s' CmdID=%d (%d/%d sent) in %v awaiting responses astart2='%v'", newsgroup, article.MessageID, cmdID, sentCount, len(articles), time.Since(astart), time.Since(astart2)) } } - close(artChan) - //log.Printf("Sent %d TAKETHIS commands, reading responses...", len(commandIDs)) - var done []*string - var countDone, rejected int - // Phase 2: Read all responses in order - for cr := range artChan { + conn.Unlock() + if VERBOSE { + log.Printf("Newsgroup: '%s' | DONE TAKETHIS BATCH sent: %d commands. ttxBytes: %d in %v", newsgroup, sentCount, ttxBytes, time.Since(start)) + } + return redis_cached, nil +} // end func sendArticlesBatchViaTakeThis - ttMode.TmpTakeThisCount++ - takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(cr.CmdId) - if err != nil || takeThisResponseCode == 0 { - ttMode.ConnErrors++ - conn.ForceCloseConn() - log.Printf("ERROR Newsgroup: '%s' | Failed to read TAKETHIS response for %s: %v", newsgroup, cr.Article.MessageID, err) - return transferred, redis_cached, fmt.Errorf("failed to read TAKETHIS response for %s: %v", cr.Article.MessageID, err) - } - countDone++ - // Update success rate tracking - switch takeThisResponseCode { - case 239: - ttMode.TmpSuccessCount++ - transferred++ - case 439: - ttMode.Rejected++ - rejected++ - if VERBOSE { - log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - //resultsMutex.Lock() - //rejectedArticles[newsgroup] = append(rejectedArticles[newsgroup], article.MessageID) - //resultsMutex.Unlock() +var jobRequeueMutex sync.RWMutex +var jobRequeue = make(map[*string][]*nntp.CHTTJob) + +// CheckQueues holds per-worker CheckQueue channels for consistent newsgroup routing +var QueuesMutex sync.RWMutex +var CheckQueues []chan *nntp.CHTTJob +var TakeThisQueues []chan *nntp.CHTTJob + +// Demuxers holds per-worker demuxer instances for statistics tracking +var Demuxers []*nntp.ResponseDemuxer +var DemuxersMutex sync.RWMutex + +// NewsgroupWorkerMap tracks which worker is assigned to each newsgroup +var NewsgroupWorkerMap = make(map[string]int) +var NewsgroupWorkerMapMux sync.RWMutex + +// WorkerQueueLength tracks how many jobs are queued per worker (for load balancing) +var WorkerQueueLength []int +var WorkerQueueLengthMux sync.Mutex + +// assignWorkerToNewsgroup finds the best worker for a newsgroup +// If newsgroup already assigned, returns same worker (sequential processing) +// If new newsgroup, assigns to least busy worker (load balancing) +func assignWorkerToNewsgroup(newsgroup string) int { + // Check if already assigned + NewsgroupWorkerMapMux.Lock() + defer NewsgroupWorkerMapMux.Unlock() + if workerID, exists := NewsgroupWorkerMap[newsgroup]; exists { + return workerID + } + + // Find least busy worker + WorkerQueueLengthMux.Lock() + if len(WorkerQueueLength) == 0 { + WorkerQueueLengthMux.Unlock() + log.Fatalf("assignWorkerToNewsgroup: no workers available?") + } + + minLoad := WorkerQueueLength[0] + workerID := 0 + for wid := 1; wid < len(WorkerQueueLength); wid++ { + if WorkerQueueLength[wid] < minLoad { + minLoad = WorkerQueueLength[wid] + workerID = wid + break + } + } + WorkerQueueLength[workerID]++ + WorkerQueueLengthMux.Unlock() + + // Assign newsgroup to this worker + NewsgroupWorkerMap[newsgroup] = workerID + + return workerID +} + +func deassignWorker(newsgroup string) { + // Remove newsgroup assignment + NewsgroupWorkerMapMux.Lock() + workerID, exists := NewsgroupWorkerMap[newsgroup] + if exists { + delete(NewsgroupWorkerMap, newsgroup) + // Decrement worker queue length + WorkerQueueLengthMux.Lock() + if workerID >= 0 && workerID < len(WorkerQueueLength) { + if WorkerQueueLength[workerID] > 0 { + WorkerQueueLength[workerID]-- } - case 400, 480, 500, 501, 502, 503, 504: - log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - ttMode.TX_Errors++ - conn.ForceCloseConn() - return transferred, redis_cached, fmt.Errorf("failed to transfer article '%s': response=%d", cr.Article.MessageID, takeThisResponseCode) + } + WorkerQueueLengthMux.Unlock() + } + NewsgroupWorkerMapMux.Unlock() +} - default: - log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - ttMode.TX_Errors++ - continue +// Find first empty slot +func findEmptySlot(openConns *int, workerSlots []bool, mux *sync.Mutex) int { + mux.Lock() + defer mux.Unlock() + *openConns++ + for i := 0; i < len(workerSlots); i++ { + if !workerSlots[i] { + workerSlots[i] = true + return i } - if redisCli != nil { - done = append(done, &cr.Article.MessageID) + } + return -1 +} + +func UnsetWorker(openConns *int, slotID int, workerSlots []bool, mux *sync.Mutex) { + mux.Lock() + defer mux.Unlock() + *openConns-- + if slotID >= 0 && slotID < len(workerSlots) { + workerSlots[slotID] = false + } +} + +func BootConnWorkers(db *database.Database, pool *nntp.Pool, redisCli *redis.Client) { + openConns := 0 + workerSlots := make([]bool, nntp.NNTPTransferThreads) + defaultSleep := time.Second + isleep := defaultSleep + var mux sync.Mutex + // Create per-worker queues + QueuesMutex.Lock() + CheckQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) + TakeThisQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) + WorkerQueueLength = make([]int, nntp.NNTPTransferThreads) + DemuxersMutex.Lock() + Demuxers = make([]*nntp.ResponseDemuxer, nntp.NNTPTransferThreads) + DemuxersMutex.Unlock() + for i := range CheckQueues { + CheckQueues[i] = make(chan *nntp.CHTTJob) // no cap for CH jobs + TakeThisQueues[i] = make(chan *nntp.CHTTJob, MaxQueuedJobs) // allows max N queued TT jobs + WorkerQueueLength[i] = 0 + } + QueuesMutex.Unlock() + allEstablished := false +forever: + for { + time.Sleep(defaultSleep) + if common.WantShutdown() { + log.Printf("BootConnWorkers: WantShutdown, exiting") + break forever + } + mux.Lock() + allEstablished = openConns == nntp.NNTPTransferThreads + mux.Unlock() + if allEstablished { + continue forever + } + //var sharedConns []*nntp.BackendConn + bootN := nntp.NNTPTransferThreads - openConns + if bootN <= 0 { + log.Printf("BootConnWorkers: all %d/%d connections established", openConns, nntp.NNTPTransferThreads) + continue forever + } + // get connections from pool + log.Printf("BootConnWorkers: need %d connections (have %d), getting from pool...", bootN, openConns) + returnSignals := make([]*ReturnSignal, bootN) + errChan := common.GetStructChanCap1() + defer common.RecycleStructChanCap1(errChan) + newConns := 0 + for workerID := range bootN { + // Get a connection from pool + conn, err := pool.Get(nntp.MODE_STREAM_MV) + if err != nil { + log.Printf("BootConnWorkers failed to get connection from pool: %v ... retry in: %v", err, isleep) + if isleep > defaultSleep { + time.Sleep(isleep) + } + isleep = isleep * 2 + if isleep > time.Minute { + isleep = time.Minute + } + continue forever + } + if conn.ModeReader { + if VERBOSE { + log.Printf("got connection in reader mode, closing and getting a new one") + } + conn.ForceCloseConn() + continue forever + } + // got a connection + slotID := findEmptySlot(&openConns, workerSlots, &mux) + if slotID < 0 { + log.Printf("BootConnWorkers: no empty worker slot found, closing connection") + conn.ForceCloseConn() + continue forever + } + returnSignal := &ReturnSignal{ + slotID: slotID, + errChan: errChan, + redisCli: redisCli, + ExitChan: make(chan *ReturnSignal, 1), + jobsQueued: make(map[*nntp.CHTTJob]uint64), + jobsReadOK: make(map[*nntp.CHTTJob]uint64), + jobMap: make(map[*string]*nntp.CHTTJob), + jobs: make([]*nntp.CHTTJob, 0, MaxQueuedJobs), + } + + returnSignals[workerID] = returnSignal + // assign checkQueue by openConns counter + // so restarted workers get same channels to read from + go CHTTWorker(db, workerID, conn, returnSignal) + newConns++ + } + if newConns == 0 { + log.Printf("BootConnWorkers: no connections obtained, retry in: %v", isleep) + isleep = isleep * 2 + if isleep > time.Minute { + isleep = time.Minute + } + continue forever } - /* - if countDone > 100 && rejected > 10 { - failRate := float64(rejected) / float64(countDone) * 100 - if failRate > 10 { - ttMode.CheckMode = true - breakChan <- struct{}{} - return transferred, redis_cached, fmt.Errorf("Newsgroup: '%s' | ABORT streamed takethis batch. failRate: %.1f%%. transferred=%d rejected=%d", newsgroup, failRate, transferred, rejected) + isleep = defaultSleep // reset to default + log.Printf("BootConnWorkers: launched %d CHTT workers", newConns) + // Monitor recently launched CHTT workers + go func() { + monitoring := newConns + for { + time.Sleep(100 * time.Millisecond) + for workerID, wait := range returnSignals { + if wait == nil { + continue + } + select { + case rs := <-wait.ExitChan: + WorkerQueueLengthMux.Lock() + log.Printf("CHTTWorker (%d) exited. processed jobs: %d", workerID, WorkerQueueLength[workerID]) + WorkerQueueLengthMux.Unlock() + + monitoring-- + + UnsetWorker(&openConns, rs.slotID, workerSlots, &mux) + returnSignals[workerID] = nil + + rs.Mux.Lock() + if len(rs.jobs) > 0 { + log.Printf("CHTTWorker (%d) try requeue %d jobs", workerID, len(rs.jobs)) + for _, job := range rs.jobs { + if job != nil { + // copy articles pointer + job.Mux.Lock() + if len(job.MessageIDs) == 0 { + log.Printf("ERROR in CHTTWorker (%d) job #%d has no articles, skipping requeue", workerID, job.JobID) + job.Mux.Unlock() + continue + } + rqj := &nntp.CHTTJob{ + JobID: job.JobID, + Newsgroup: job.Newsgroup, + MessageIDs: job.MessageIDs, + OffsetQ: job.OffsetQ, + NGTProgress: job.NGTProgress, + } + job.Mux.Unlock() + + jobRequeueMutex.Lock() + jobRequeue[rqj.Newsgroup] = append(jobRequeue[rqj.Newsgroup], rqj) + jobRequeueMutex.Unlock() + log.Printf("CHTTWorker (%d) did requeue job #%d with %d articles for newsgroup '%s'", workerID, rqj.JobID, len(rqj.MessageIDs), *rqj.Newsgroup) + // unlink pointers + job.Mux.Lock() + select { + case job.ResponseChan <- nil: + default: + } + if job.TTMode != nil { + job.TTMode.Newsgroup = nil + } + job.Newsgroup = nil + job.TTMode = nil + job.MessageIDs = nil + job.WantedIDs = nil + job.OffsetQ = nil + job.NGTProgress = nil + job.Mux.Unlock() + } + } + log.Printf("CHTTWorker (%d) did requeue %d jobs", workerID, len(rs.jobs)) + } + + // Clean up ReturnSignal maps and unlink pointers + // Clean up jobMap - nil all pointers before deleting + //log.Printf("CHTTWorker (%d) cleaning up jobMap with %d entries", i, len(rs.jobMap)) + for msgID := range rs.jobMap { + rs.jobMap[msgID] = nil + delete(rs.jobMap, msgID) + } + rs.jobMap = nil + + // Clean up jobsQueued + //log.Printf("CHTTWorker (%d) cleaning up jobsQueued with %d entries", i, len(rs.jobsQueued)) + for job := range rs.jobsQueued { + delete(rs.jobsQueued, job) + } + rs.jobsQueued = nil + + // Clean up jobsReadOK + //log.Printf("CHTTWorker (%d) cleaning up jobsReadOK with %d entries", i, len(rs.jobsReadOK)) + for job := range rs.jobsReadOK { + delete(rs.jobsReadOK, job) + } + rs.jobsReadOK = nil + + // Clean up jobs slice - nil all pointers + //log.Printf("CHTTWorker (%d) cleaning up jobs slice with %d entries", i, len(rs.jobs)) + for idx := range rs.jobs { + rs.jobs[idx] = nil + } + rs.jobs = nil + rs.redisCli = nil + rs.ExitChan = nil + rs.errChan = nil + + rs.Mux.Unlock() + // TODO: check remaining work and restart connection + default: + // Worker still running + } + } + if monitoring == 0 { + return } } - */ - } // end for cmdChan + }() + } // end forever + log.Printf("BootConnWorkers: quit") +} // end func BootConnWorkers + +var DefaultCheckTicker = 5 * time.Second + +var JobsToRetry []*nntp.CHTTJob +var JobsToRetryMux sync.Mutex + +type ReturnSignal struct { + Mux sync.Mutex + ConnMux sync.Mutex // Simple mutex to serialize connection access + slotID int + ExitChan chan *ReturnSignal + errChan chan struct{} + redisCli *redis.Client + jobsQueued map[*nntp.CHTTJob]uint64 + jobsReadOK map[*nntp.CHTTJob]uint64 + jobMap map[*string]*nntp.CHTTJob + jobs []*nntp.CHTTJob +} - if redisCli != nil && len(done) > 0 { - // Cache transferred or rejected message IDs in Redis using pipeline (1 round trip) - pipe := redisCli.Pipeline() +func replyChan(request chan struct{}, reply chan struct{}) { + select { + case <-request: + // got a reply request + select { + case reply <- struct{}{}: // send back + default: + // pass, is full + } + default: + // pass, no request + } +} + +func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs *ReturnSignal) { + var mux sync.Mutex + var runningTTJobs int // protected by local mux + var workerWG sync.WaitGroup + TTworkerRequestChan := make(chan struct{}, 1) + TTworkerReleaseChan := make(chan struct{}, 1) + readCHECKResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) + readTAKETHISResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) + errChan := make(chan struct{}, 9) + + QueuesMutex.RLock() + WorkersTTChannel := TakeThisQueues[workerID] + WorkersCheckChannel := CheckQueues[workerID] + QueuesMutex.RUnlock() + + tickChan := common.GetStructChanCap1() + requestReplyJobDone := common.GetStructChanCap1() + replyJobDone := common.GetStructChanCap1() + defer common.RecycleStructChanCap1(tickChan) + defer common.RecycleStructChanCap1(requestReplyJobDone) + defer common.RecycleStructChanCap1(replyJobDone) + + // Create ResponseDemuxer to eliminate race conditions in ReadCodeLine + demuxer := nntp.NewResponseDemuxer(conn, errChan) + + // Store demuxer for statistics tracking + DemuxersMutex.Lock() + if workerID < len(Demuxers) { + Demuxers[workerID] = demuxer + } + DemuxersMutex.Unlock() + + defer func(conn *nntp.BackendConn, rs *ReturnSignal) { + conn.ForceCloseConn() + rs.ExitChan <- rs + common.SignalErrChan(errChan) + }(conn, rs) + //lastRun := time.Now() + + // Start the central response reader (CRITICAL: only ONE goroutine reads from connection) + demuxer.Start() + log.Printf("CheckWorker (%d): Started ResponseDemuxer", workerID) + + // launch go routine which sends CHECK commands + workerWG.Add(1) + go func(workerWG *sync.WaitGroup) { + ticker := time.NewTicker(DefaultCheckTicker) + defer func(workerWG *sync.WaitGroup) { + conn.ForceCloseConn() + ticker.Stop() + common.SignalErrChan(errChan) + workerWG.Done() + log.Printf("CheckWorker (%d): CHECK sender goroutine exiting", workerID) + + }(workerWG) + // tick every n seconds to check if any CHECKs to do + loop: + for { + select { + case <-errChan: + common.SignalErrChan(errChan) + log.Printf("CheckWorker (%d): Send CHECK got errChan signal... exiting", workerID) + return + + case <-tickChan: + if common.WantShutdown() { + log.Printf("CheckWorker (%d): Tick WantShutdown, exiting", workerID) + return + } - // Queue all SET commands - for _, msgID := range done { - pipe.Set(redisCtx, *msgID, "1", REDIS_TTL) + // Get the next job to process + rs.Mux.Lock() + if len(rs.jobs) == 0 { + rs.Mux.Unlock() + log.Printf("CheckWorker (%d): Ticked but no jobs in queue, continue...", workerID) + continue loop + } + if VERBOSE && len(rs.jobs) >= MaxQueuedJobs { + log.Printf("CheckWorker (%d): Ticked and found %d jobs in queue (max: %d)", workerID, len(rs.jobs), MaxQueuedJobs) + } + currentJob := rs.jobs[0] + rs.jobs = rs.jobs[1:] // Remove first job from queue + rs.Mux.Unlock() + if currentJob == nil { + log.Printf("ERROR CheckWorker (%d): got nil job from queue, skipping...", workerID) + continue loop + } + workerID := assignWorkerToNewsgroup(*currentJob.Newsgroup) + requestedRelease := false + if len(WorkersTTChannel) >= cap(WorkersTTChannel) { + log.Printf("CheckWorker (%d): waiting... takeThisChan full (%d)", workerID, len(WorkersTTChannel)) + select { + case TTworkerRequestChan <- struct{}{}: + requestedRelease = true + default: + } + } + waiting: + for { + if len(WorkersTTChannel) < cap(WorkersTTChannel) { + break waiting + } + select { + case <-errChan: + common.SignalErrChan(errChan) + log.Printf("CheckWorker (%d): waiting for TakeThisChan got errChan signal... exiting", workerID) + return + case <-time.After(time.Millisecond * 16): + if len(WorkersTTChannel) < cap(WorkersTTChannel) { + break waiting + } + case <-TTworkerReleaseChan: + if len(WorkersTTChannel) < cap(WorkersTTChannel) { + break waiting + } + if !requestedRelease { + TTworkerRequestChan <- struct{}{} + requestedRelease = true + } + default: + // continues waiting loop + } + } + currentJob.OffsetQ.OffsetBatchDone() + currentJob.TTMode.FlipMode(LowerLevel, UpperLevel) + if currentJob.TTMode.UseCHECK() { + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waits to check %d message IDs in batches of %d", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs), BatchCheck) + + if !conn.IsConnected() { + rs.Mux.Lock() + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front + rs.Mux.Unlock() + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d connection lost before SendCheckMultiple for batch (offset %d: %d-%d)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + time.Sleep(time.Second) + return + } + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire connection lock for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs)) + rs.ConnMux.Lock() + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired connection lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + checksSent, err := conn.SendCheckMultiple(currentJob.MessageIDs, readCHECKResponsesChan, currentJob, demuxer) + rs.ConnMux.Unlock() + if err != nil { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) + time.Sleep(time.Second) + rs.Mux.Lock() + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front + rs.Mux.Unlock() + return + } + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + + // Add CHECK sent count to progress after all batches are sent + currentJob.NGTProgress.Mux.Lock() + currentJob.NGTProgress.CheckSentCount += checksSent + currentJob.NGTProgress.Mux.Unlock() + } else { + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) + currentJob.WantedIDs = currentJob.MessageIDs + // Use blocking send with select for graceful shutdown support + select { + case WorkersTTChannel <- currentJob: // local takethis chan sharing the same connection + // Job successfully enqueued + case <-errChan: + // Shutdown requested, requeue job and exit + rs.Mux.Lock() + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) + rs.Mux.Unlock() + log.Printf("CheckWorker (%d): Shutdown while waiting to enqueue TAKETHIS job", workerID) + return + } + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) + } + //lastRun = time.Now() + // Check if there are more jobs to process + //log.Printf("CheckWorker (%d): job #%d CHECK done, checking for more jobs...", workerID, currentJob.JobID) + rs.Mux.Lock() + hasMoreJobs := len(rs.jobs) > 0 + rs.Mux.Unlock() + replyChan(requestReplyJobDone, replyJobDone) // see if anybody is waiting and reply + //log.Printf("CheckWorker (%d): job #%d CHECK done, hasMoreJobs=%v", workerID, currentJob.JobID, hasMoreJobs) + + // If there are more jobs waiting, immediately trigger next job processing + if hasMoreJobs { + common.SignalTickChan(tickChan) + } + //log.Printf("CheckWorker (%d): job #%d CHECKs sent, loop to next job", workerID, currentJob.JobID) + + case <-ticker.C: + if common.WantShutdown() { + log.Printf("CheckWorker (%d): Ticker WantShutdown, exiting", workerID) + return + } + rs.Mux.Lock() + hasWork := len(rs.jobs) > 0 + rs.Mux.Unlock() + if hasWork { + //log.Printf("CheckWorker (%d): Ticker found work to do, signaling tickChan...", workerID) + common.SignalTickChan(tickChan) + } else { + //log.Printf("CheckWorker (%d): Ticker found no work to do.", workerID) + } + } // end select + } // end forever + }(&workerWG) + + // launch a go routine to read CHECK responses from the supplied connection with textproto readline + workerWG.Add(1) + go func(workerWG *sync.WaitGroup) { + var responseCount int + var tookTime time.Duration + defer func(workerWG *sync.WaitGroup) { + conn.ForceCloseConn() + common.SignalErrChan(errChan) + workerWG.Done() + }(workerWG) + loop: + for { + select { + case <-errChan: + log.Printf("CheckWorker (%d): Read CHECK responses got errChan signal...", workerID) + common.SignalErrChan(errChan) + log.Printf("CheckWorker (%d): Read CHECK responses exiting", workerID) + return + + case rr := <-readCHECKResponsesChan: + //log.Printf("CheckWorker (%d): Read CHECK got readRequest for rr: '%v'", workerID, rr) + if rr == nil || rr.MsgID == nil { + log.Printf("CheckWorker (%d): Read CHECK got nil readRequest, skipping", workerID) + continue loop + } + if common.WantShutdown() { + log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", workerID) + rr.ClearReadRequest(nil) + return + } + //log.Printf("CheckWorker (%d): Read CHECK response (do conn check) for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + if !conn.IsConnected() { + log.Printf("CheckWorker (%d): Read CHECK connection lost, exiting", workerID) + rr.ClearReadRequest(nil) + return + } + //log.Printf("CheckWorker (%d): Pre-Read CHECK response for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + start := time.Now() + + // NEW: Get pre-read response from demuxer (eliminates race condition) + var respData *nntp.ResponseData + select { + case respData = <-demuxer.GetCheckResponseChan(): + // Got response from demuxer + case <-errChan: + log.Printf("CheckWorker (%d): Read CHECK got errChan while waiting for response", workerID) + rr.ClearReadRequest(respData) + return + } + + // Verify we got the expected command ID + if respData.CmdID != rr.CmdID { + log.Printf("ERROR CheckWorker (%d): Command ID mismatch! Expected %d, got %d", workerID, rr.CmdID, respData.CmdID) + rr.ClearReadRequest(respData) + return + } + + if respData.Code == 0 && respData.Err != nil { + log.Printf("Failed to read CHECK response: %v", respData.Err) + rr.ClearReadRequest(respData) + return + } + + took := time.Since(start) + tookTime += took + responseCount++ + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_CHECKED, 1) + if rr.N == 1 && took.Milliseconds() > 1000 { + log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, took.Milliseconds()) + tookTime = 0 + } else if responseCount >= 10000 { + avg := time.Duration(float64(tookTime) / float64(responseCount)) + if avg.Milliseconds() > 0 { + log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %v, last: %v (cmdID=%d MID=%d/%d)", workerID, responseCount, avg, took, rr.CmdID, rr.N, rr.Reqs) + } + responseCount = 0 + tookTime = 0 + } + // Parse response line + // Format: code [message] + // 238 - article wanted + // 431 - article not wanted + // 438 - article not wanted (already have it) + // ReadCodeLine returns: code=238, message=" article wanted" + parts := strings.Fields(respData.Line) + if len(parts) < 1 { + log.Printf("ERROR in CheckWorker: Malformed CHECK response code=%d line: '%s' (cmdID=%d MID=%d/%d)", respData.Code, respData.Line, rr.CmdID, rr.N, rr.Reqs) + rr.ClearReadRequest(respData) + return + } + if parts[0] != *rr.MsgID { + log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected '%s', got '%s' code=%d (cmdID=%d MID=%d/%d)", *rr.MsgID, parts[0], respData.Code, rr.CmdID, rr.N, rr.Reqs) + rr.ClearReadRequest(respData) + return + } + //log.Printf("CheckWorker (%d): Got CHECK response: '%s' (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + + rs.Mux.Lock() + job, exists := rs.jobMap[rr.MsgID] + rs.Mux.Unlock() + job.NGTProgress.AddNGTP(1, 0, 0) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + if !exists { + log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId '%s' did not exist in jobMap.", *job.Newsgroup, *rr.MsgID) + rr.ClearReadRequest(respData) + continue loop + } + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG2 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + rs.Mux.Lock() + rs.jobMap[rr.MsgID] = nil // Nil the pointer before deleting + delete(rs.jobMap, rr.MsgID) + rs.jobsReadOK[job]++ + rs.Mux.Unlock() + switch respData.Code { + case 238: + //log.Printf("Newsgroup: '%s' | Got Response: Wanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) + job.AppendWantedMessageID(rr.MsgID) + job.NGTProgress.Increment(nntp.IncrFLAG_WANTED, 1) + + case 438: + //log.Printf("Newsgroup: '%s' | Got Response: Unwanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) + job.NGTProgress.Increment(nntp.IncrFLAG_UNWANTED, 1) + // Cache unwanted in Redis if enabled + if rs.redisCli != nil { + if err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err(); err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache rejected message ID in Redis: %v", *rr.Job.Newsgroup, err) + } + } + + case 431: + //log.Printf("Newsgroup: '%s' | Got Response: Retry Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) + job.NGTProgress.Increment(nntp.IncrFLAG_RETRY, 1) + /* disabled caching of retries in Redis for now + if rs.redisCli != nil { + if err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err(); err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache retry message ID in Redis: %v", *rr.Job.Newsgroup, err) + } + } + */ + + default: + log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, respData.Line, respData.Code, *rr.MsgID) + } + // check if all jobs are done + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG3 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + rs.Mux.Lock() + queuedCount, qexists := rs.jobsQueued[job] + readCount, rexists := rs.jobsReadOK[job] + rs.Mux.Unlock() + if !qexists || !rexists { + log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!", *job.Newsgroup) + rr.ClearReadRequest(respData) + continue loop + } + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + rr.ClearReadRequest(respData) + + if queuedCount == readCount { + rs.Mux.Lock() + delete(rs.jobsQueued, job) + delete(rs.jobsReadOK, job) + rs.Mux.Unlock() + if len(job.WantedIDs) > 0 { + // Pass job to TAKETHIS worker via channel + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(WorkersTTChannel)) + WorkersTTChannel <- job // local takethis chan sharing the same connection + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(WorkersTTChannel)) + + } else { + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) + // Send response and close channel for jobs with no wanted articles + job.Response(true, nil) + } + } else { + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d CHECK responses so far: %d/%d readResponsesChan=%d", *job.Newsgroup, workerID, job.JobID, readCount, queuedCount, len(readResponsesChan)) + } + continue loop + } // end select + } // end forever + }(&workerWG) + + // launch a goroutine to process TAKETHIS responses concurrently + // This follows the EXACT pattern as CHECK response reader (lines 2366-2552) + workerWG.Add(1) + go func(workerWG *sync.WaitGroup) { + defer func(workerWG *sync.WaitGroup) { + conn.ForceCloseConn() + common.SignalErrChan(errChan) + workerWG.Done() + }(workerWG) + + ttloop: + for { + select { + case <-errChan: + log.Printf("TTResponseWorker (%d): got errChan signal, exiting", workerID) + common.SignalErrChan(errChan) + return + + case rr := <-readTAKETHISResponsesChan: + if rr == nil || rr.MsgID == nil { + log.Printf("TTResponseWorker (%d): got nil readRequest, skipping", workerID) + if rr.Job != nil { + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + } + continue ttloop + } + if common.WantShutdown() { + log.Printf("TTResponseWorker (%d): WantShutdown, exiting", workerID) + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(nil) + return + } + if !conn.IsConnected() { + log.Printf("TTResponseWorker (%d): connection lost, exiting", workerID) + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(nil) + return + } + + //log.Printf("TTResponseWorker (%d): Pre-Read TAKETHIS response for msgID: %s (cmdID=%d)", workerID, *rr.MsgID, rr.CmdID) + + // Get pre-read response from demuxer (same pattern as CHECK) + var respData *nntp.ResponseData + select { + case respData = <-demuxer.GetTakeThisResponseChan(): + // Got response from demuxer + case <-errChan: + log.Printf("TTResponseWorker (%d): got errChan while waiting for response", workerID) + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(respData) + return + } + + // Verify we got the expected command ID + if respData.CmdID != rr.CmdID { + log.Printf("ERROR TTResponseWorker (%d): Command ID mismatch! Expected %d, got %d", workerID, rr.CmdID, respData.CmdID) + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(respData) + return + } + + if respData.Err != nil { + log.Printf("ERROR TTResponseWorker (%d): Failed to read TAKETHIS response for %s: %v", workerID, *rr.MsgID, respData.Err) + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(respData) + return + } + + rr.Job.TTMode.IncrementTmp() + rr.Job.Mux.Lock() + txbytes := rr.Job.TmpTxBytes + rr.Job.TmpTxBytes = 0 + rr.Job.Mux.Unlock() + rr.Job.NGTProgress.AddNGTP(0, 1, txbytes) + + // Handle response codes + switch respData.Code { + case 239: + rr.Job.TTMode.IncrementSuccess() + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TRANSFERRED, 1) + // Cache transferred in Redis if enabled + if rs.redisCli != nil { + if err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err(); err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache transferred message ID in Redis: %v", *rr.Job.Newsgroup, err) + } + } + + case 439: + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_REJECTED, 1) + // Cache rejection in Redis if enabled + if rs.redisCli != nil { + if err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err(); err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache rejected message ID in Redis: %v", *rr.Job.Newsgroup, err) + } + } + if VERBOSE { + log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) + } + + case 400, 480, 500, 501, 502, 503, 504: + log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TX_ERRORS, 1) + rr.Job.PendingResponses.Done() // Mark response handled (error case) + rr.ClearReadRequest(respData) + return + + default: + log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d", + *rr.Job.Newsgroup, *rr.MsgID, respData.Code) + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TX_ERRORS, 1) + } + rr.Job.PendingResponses.Done() + // Mark this response as processed + rr.ClearReadRequest(respData) + } // end select + } // end for + }(&workerWG) + + // launch a goroutine to process TAKETHIS jobs from local channel sharing the same connection + workerWG.Add(1) + go func(workerWG *sync.WaitGroup) { + defer func(workerWG *sync.WaitGroup) { + conn.ForceCloseConn() + common.SignalErrChan(errChan) + workerWG.Done() + }(workerWG) + var job *nntp.CHTTJob + for { + if common.WantShutdown() { + log.Printf("TTworker (%d): WantShutdown, exiting", workerID) + return + } + + select { + case job = <-WorkersTTChannel: + // got new job + if len(WorkersTTChannel) >= cap(WorkersTTChannel)-1 { + // see if anybody is waiting + select { + case <-TTworkerRequestChan: + // anybody IS waiting! + select { + case TTworkerReleaseChan <- struct{}{}: + // sent release notify + default: + } + default: + // nobody was waiting + } + } + + case <-errChan: + log.Printf("TTworker (%d): got errChan signal, exiting", workerID) + errChan <- struct{}{} + return + } + if job == nil { + log.Printf("TTworker (%d): Received nil job, channels may be closing", workerID) + continue + } + if len(job.WantedIDs) == 0 { + log.Printf("Newsgroup: '%s' | TTworker (%d): job #%d has no wanted articles, skipping TAKETHIS", *job.Newsgroup, workerID, job.JobID) + job.Response(true, nil) + continue + } + // Build list of wanted articles + wantedArticles := make([]*models.Article, 0, len(job.WantedIDs)) + // TODO: fetch articles from database for all wantedIDs + wantedArticles, err := db.GetArticlesByIDs(job.Newsgroup, job.WantedIDs) + if err != nil { + log.Printf("Newsgroup: '%s' | TTworker (%d): Error fetching wanted articles from DB for job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) + job.Response(false, err) + continue + } + + if len(wantedArticles) == 0 { + log.Printf("Newsgroup: '%s' | TTworker (%d): No valid wanted articles found in ArticleMap for job #%d", *job.Newsgroup, workerID, job.JobID) + job.Response(true, nil) + continue + } + mux.Lock() + runningTTJobs++ + mux.Unlock() + //log.Printf("Newsgroup: '%s' | TTworker (%d): Acquire connection lock to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) + //log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) + // Send TAKETHIS commands using existing function + rs.ConnMux.Lock() + redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli, demuxer, readTAKETHISResponsesChan) + rs.ConnMux.Unlock() + if err != nil { + log.Printf("Newsgroup: '%s' | TTworker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) + job.Response(false, err) + rs.Mux.Lock() + // requeue at front + rs.jobs = append([]*nntp.CHTTJob{job}, rs.jobs...) + rs.Mux.Unlock() + mux.Lock() + runningTTJobs-- + mux.Unlock() + return + } + if redis_cached > 0 { + if VERBOSE { + log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d sent, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, redis_cached) + } + } + go func(job *nntp.CHTTJob) { + // Wait for all TAKETHIS responses to be processed before completing job + //log.Printf("Newsgroup: '%s' | TTworker (%d): Waiting for all TAKETHIS responses for job #%d", *job.Newsgroup, workerID, job.JobID) + job.PendingResponses.Wait() + //log.Printf("Newsgroup: '%s' | TTworker (%d): All TAKETHIS responses received for job #%d", *job.Newsgroup, workerID, job.JobID) + // Send response back + //log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, workerID, job.JobID, len(job.ResponseChan)) + job.Response(true, nil) + //log.Printf("Newsgroup: '%s' | TTworker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, workerID, job.JobID) + mux.Lock() + runningTTJobs-- + mux.Unlock() + }(job) + } + }(&workerWG) + +forever: + for { + select { + case <-errChan: + common.SignalErrChan(errChan) + break forever + + case job := <-WorkersCheckChannel: // CheckQueues[workerID] // source: WorkersCheckChannel <- job // checkQueue <- job + if common.WantShutdown() { + log.Printf("CHTTworker: WantShutdown, exiting") + break forever + } + if job == nil || len(job.MessageIDs) == 0 { + log.Printf("CHTTworker: empty job, skipping") + if job != nil { + job.Response(true, fmt.Errorf("got job without valid wanted articles")) + } + continue + } + + // Build jobMap for tracking which message IDs belong to this job + // and count queued messages + rs.Mux.Lock() + queueFull := len(rs.jobs) >= MaxQueuedJobs || len(WorkersTTChannel) >= MaxQueuedJobs + if queueFull { + log.Printf("Newsgroup: '%s' | CHTTworker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) + select { + case requestReplyJobDone <- struct{}{}: + default: + log.Printf("Newsgroup: '%s' | Debug: CHTTworker (%d): job #%d could not signal requestReplyJobDone, channel full. pass", *job.Newsgroup, workerID, job.JobID) + // pass + } + } + rs.Mux.Unlock() + if queueFull { + start := time.Now() + lastPrint := start + waitForReply: + for { + select { + case <-replyJobDone: + // pass + case <-time.After(time.Millisecond * 16): + rs.Mux.Lock() + queueFull = len(rs.jobs) >= MaxQueuedJobs || len(WorkersTTChannel) >= MaxQueuedJobs + rs.Mux.Unlock() + if !queueFull { + break waitForReply + } + // log every 5s + if time.Since(lastPrint) > time.Second { + if common.WantShutdown() { + break forever + } + rs.Mux.Lock() + log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(WorkersTTChannel)) + rs.Mux.Unlock() + lastPrint = time.Now() + } + } + } + log.Printf("Newsgroup: '%s' | CHTTworker (%d): waited %v for previous jobs to clear before queuing job #%d", *job.Newsgroup, workerID, time.Since(start), job.JobID) + } + rs.Mux.Lock() + job.Mux.Lock() + for _, msgId := range job.MessageIDs { + if msgId != nil { + rs.jobMap[msgId] = job + rs.jobsQueued[job]++ // counts message ids to read check later + } + } + job.Mux.Unlock() + // Add job to processing queue + rs.jobs = append(rs.jobs, job) + // Signal ticker to process this job + common.SignalTickChan(tickChan) // goto: case <-tickChan: + rs.Mux.Unlock() + } // end select + } // end for + workerWG.Wait() + startWait := time.Now() + lastPrint := startWait +wait: + for { + mux.Lock() + if runningTTJobs == 0 { + mux.Unlock() + break wait + } + mux.Unlock() + select { + case <-errChan: + common.SignalErrChan(errChan) + break wait + default: + // continue } + time.Sleep(time.Millisecond * 50) + if time.Since(lastPrint) > time.Second*5 { + log.Printf("CHTTworker (%d): waiting since %v for %d running TAKETHIS jobs to complete before exiting...", workerID, time.Since(startWait), runningTTJobs) + lastPrint = time.Now() + } + } +} // end func CHTTWorker - // Execute all SET commands in one network round trip - _, err := pipe.Exec(redisCtx) - if err != nil { - log.Printf("Newsgroup: '%s' | Failed to cache %d message IDs in Redis: %v", newsgroup, len(done), err) - } else if VERBOSE { - log.Printf("Newsgroup: '%s' | Cached %d message IDs in Redis", newsgroup, len(done)) +// monitorMemoryStats logs memory statistics periodically +func monitorMemoryStats() { + + var m runtime.MemStats + startTime := time.Now() + + for { + time.Sleep(30 * time.Second) + runtime.ReadMemStats(&m) + + // Convert bytes to MB for readability + allocMB := float64(m.Alloc) / 1024 / 1024 + totalAllocMB := float64(m.TotalAlloc) / 1024 / 1024 + sysMB := float64(m.Sys) / 1024 / 1024 + heapAllocMB := float64(m.HeapAlloc) / 1024 / 1024 + heapSysMB := float64(m.HeapSys) / 1024 / 1024 + heapIdleMB := float64(m.HeapIdle) / 1024 / 1024 + heapInuseMB := float64(m.HeapInuse) / 1024 / 1024 + + log.Printf("=== MEMORY STATS (uptime: %v) ===", time.Since(startTime).Round(time.Second)) + log.Printf(" Alloc = %.2f MB (currently allocated)", allocMB) + log.Printf(" TotalAlloc = %.2f MB (cumulative allocated)", totalAllocMB) + log.Printf(" Sys = %.2f MB (obtained from system)", sysMB) + log.Printf(" HeapAlloc = %.2f MB (heap allocated)", heapAllocMB) + log.Printf(" HeapSys = %.2f MB (heap from system)", heapSysMB) + log.Printf(" HeapIdle = %.2f MB (heap idle)", heapIdleMB) + log.Printf(" HeapInuse = %.2f MB (heap in use)", heapInuseMB) + log.Printf(" NumGC = %d (garbage collections)", m.NumGC) + log.Printf(" Goroutines = %d", runtime.NumGoroutine()) + log.Printf(" GCCPUFract = %.4f%% (GC CPU fraction)", m.GCCPUFraction*100) + + // Warning if memory usage is high + if allocMB > 1000 { + log.Printf(" ⚠️ WARNING: High memory usage (%.2f MB)! Consider lowering -batch-check or -batch-db", allocMB) + } + if runtime.NumGoroutine() > 1000 { + log.Printf(" ⚠️ WARNING: High goroutine count (%d)! Possible goroutine leak", runtime.NumGoroutine()) } } - if VERBOSE { - log.Printf("Newsgroup: '%s' | Batch transferred: %d/%d articles. redis_cached=%d", newsgroup, transferred, len(articles), redis_cached) +} + +// startWebServer starts a simple HTTP server to display transfer results +func startWebServer(port int) { + http.HandleFunc("/", handleIndex) + http.HandleFunc("/results", handleResults) + addr := fmt.Sprintf(":%d", port) + log.Printf("Starting web server on http://ANY_ADDR:%s", addr) + if err := http.ListenAndServe(addr, nil); err != nil { + log.Printf("Web server error: %v", err) } - return transferred, redis_cached, nil -} // end func sendArticlesBatchViaTakeThis +} + +// handleIndex serves the main page with transfer results +func handleIndex(w http.ResponseWriter, r *http.Request) { + nntp.ResultsMutex.RLock() + defer nntp.ResultsMutex.RUnlock() + + // HTML template for displaying results + const htmlTemplate = ` + + + + + {{.NewsgroupsToProcess}}:{{.ServerHostName}} - go-pugleaf nntp-transfer + + + + +

+ 🚀 NNTP Transfer to {{.ServerHostName}} | +

+

+ Start Date: {{.StartDate}} to + End Date: {{.EndDate}} +

+
+ {{if eq .Started 0}} + Waiting for transfers to start... + {{else}} + {{if eq 0 .NewsgroupsToProcess}} + ✅ All complete! +

+ Summary Statistics:
+ Total Articles: {{.TotalArticles}}
+ Cache Hits (Total): {{.TotalRedisCacheHits}}
+   ├─ Before CHECK: {{.TotalRedisCacheBeforeCheck}}
+   └─ Before TAKETHIS: {{.TotalRedisCacheBeforeTakethis}}
+ Checked: {{.TotalChecked}}
+ Wanted: {{.TotalWanted}}
+ Unwanted: {{.TotalUnwanted}}
+ Transferred: {{.TotalTransferred}}
+ Rejected: {{.TotalRejected}}
+ Retry: {{.TotalRetry}}
+ Skipped: {{.TotalSkipped}}
+ TX Errors: {{.TotalTXErrors}}
+ Conn Errors: {{.TotalConnErrors}} + {{else}} +
+
+
+ {{subtract .TotalNewsgroups .NewsgroupsToProcess}} / {{.TotalNewsgroups}} {{if gt .TotalNewsgroups 0}} @ {{multiply (divide (subtract .TotalNewsgroups .NewsgroupsToProcess) .TotalNewsgroups) 100}}{{else}}0{{end}}% +
+
+
+ Live Statistics:
+ Total Articles: {{.TotalArticles}}
+ Cache Hits (Total): {{.TotalRedisCacheHits}}
+   ├─ Before CHECK: {{.TotalRedisCacheBeforeCheck}}
+   └─ Before TAKETHIS: {{.TotalRedisCacheBeforeTakethis}}
+ Checked: {{.TotalCheckSentCount}} / {{.TotalChecked}}
+ Wanted: {{.TotalWanted}}
+ Unwanted: {{.TotalUnwanted}}
+ TTSentCount: {{.TotalTTSentCount}}
+ Transferred: {{.TotalTransferred}}
+ Rejected: {{.TotalRejected}}
+ Retry: {{.TotalRetry}}
+ Skipped: {{.TotalSkipped}}
+ TX Errors: {{.TotalTXErrors}}
+ Conn Errors: {{.TotalConnErrors}} + {{end}} + {{end}} +
+ + {{if .Results}} +

View /results

+ {{else}} +

No transfer results yet. Waiting for transfers to complete...

+ {{end}} + +
Last updated: {{.Timestamp}}
+ + {{if .Progress}} + + + + + + + + + + + + {{range .Progress}} + + + + + + + {{end}} + +
NG Workers: ( {{subtract .Started .Finished}} )ProgressSpeed{{if gtUint64 .GlobalSpeed 0}}
{{.GlobalSpeed}} KByte/s{{end}}
{{.LiveCH}} CH/s
{{.LiveTT}} TT/s
+ {{.Name}} +
+ + Started {{.Duration}} ago at {{.Started}} | idle: {{.TimeSince}} + +
+ {{if gt .TotalArticles 0}} +
+
+ + {{if gt .OffsetStart 0}}{{multiply (divide .OffsetStart .TotalArticles) 100}}{{else}}0{{end}}% + +
+
+ {{else}} + Initializing... + {{end}} +
+ {{if gt .TotalArticles 0}} + {{.OffsetStart}}/{{.TotalArticles}} + {{end}} +
+ {{if gtUint64 .SpeedKB 0}} + {{.SpeedKB}} KByte/s + {{else}} + - + {{end}} +
CH: {{.LastArtPerfC}}/s
TT: {{.LastArtPerfT}}/s
+ {{end}} + + {{if .DemuxerStats}} +

Response Demuxer Statistics ({{len .DemuxerStats}} Workers)

+ Workers print only if they have non-empty channels or if last request is longer than 15s ago. + + + + + + + + + + + + {{range .DemuxerStats}} + {{if or (gt .PendingCommands 0) (gt .CheckResponsesQueued 0) (gt .TTResponsesQueued 0) (gt .IdleSeconds 15)}} + + + + + + + + {{end}} + {{end}} + +
Worker IDPending CommandsCHECK Responses QueuedTAKETHIS Responses QueuedIdle Time (seconds)
Worker #{{.WorkerID}}{{.PendingCommands}}{{.CheckResponsesQueued}}{{.TTResponsesQueued}}{{.IdleSeconds}}
+ {{end}} + + +` + + tmpl, err := template.New("index").Funcs(template.FuncMap{ + "subtract": func(a, b int64) int64 { return a - b }, + "eq": func(a, b int64) bool { return a == b }, + "gt": func(a, b int64) bool { return a > b }, + "gtUint64": func(a, b uint64) bool { return a > b }, + "divide": func(a, b int64) float64 { + if b == 0 { + return 0 + } + return float64(a) / float64(b) + }, + "multiply": func(a float64, b int) int { return int(a * float64(b)) }, + }).Parse(htmlTemplate) + if err != nil { + http.Error(w, "Template error", http.StatusInternalServerError) + return + } + + // Calculate started and finished counts and collect progress details + type ProgressInfo struct { + Name string + OffsetStart int64 + BatchStart int64 + BatchEnd int64 + TotalArticles int64 + Started string + LastUpdated string + Finished bool + SpeedKB uint64 + Duration string + TimeSince string + LastArtPerfC uint64 + LastArtPerfT uint64 + } + + started := int64(len(nntp.NewsgroupTransferProgressMap)) + var finished int64 + var progressList []ProgressInfo + var liveCH, liveTT uint64 + for name, progress := range nntp.NewsgroupTransferProgressMap { + progress.CalcSpeed() + progress.Mux.RLock() + if progress.Finished { + finished++ + progress.Mux.RUnlock() + continue // Skip finished newsgroups - they're already in results + } + + duration := time.Since(progress.Started).Round(time.Second).String() + + progressList = append(progressList, ProgressInfo{ + Name: name, + OffsetStart: progress.OffsetStart, + //BatchStart: progress.BatchStart, + //BatchEnd: progress.BatchEnd, + TotalArticles: progress.TotalArticles, + Started: progress.Started.Format("15:04:05"), + LastUpdated: progress.LastUpdated.Format("15:04:05"), + TimeSince: time.Since(progress.LastUpdated).Round(time.Second).String(), + SpeedKB: progress.LastSpeedKB, + LastArtPerfC: progress.LastArtPerfC, + LastArtPerfT: progress.LastArtPerfT, + Finished: false, + Duration: duration, + }) + liveCH += progress.LastArtPerfC + liveTT += progress.LastArtPerfT + progress.Mux.RUnlock() + } + + // Sort progress list by newsgroup name for consistent display + sort.Slice(progressList, func(i, j int) bool { + return progressList[i].Name < progressList[j].Name + }) + + // Collect demuxer statistics + type DemuxerStats struct { + WorkerID int + PendingCommands int64 + CheckResponsesQueued int64 + TTResponsesQueued int64 + LastRequest time.Time + IdleSeconds int64 + } + var demuxerStats []DemuxerStats + DemuxersMutex.RLock() + for i, demux := range Demuxers { + if demux != nil { + pending, checkQueued, ttQueued, lastReq := demux.GetDemuxerStats() + var idleSeconds int64 + if !lastReq.IsZero() { + idleSeconds = int64(time.Since(lastReq).Seconds()) + } + demuxerStats = append(demuxerStats, DemuxerStats{ + WorkerID: i, + PendingCommands: pending, + CheckResponsesQueued: checkQueued, + TTResponsesQueued: ttQueued, + LastRequest: lastReq, + IdleSeconds: idleSeconds, + }) + } + } + DemuxersMutex.RUnlock() + + data := struct { + TotalNewsgroups int64 + NewsgroupsToProcess int64 + Results []string + Started int64 + Finished int64 + Progress []ProgressInfo + DemuxerStats []DemuxerStats + Timestamp string + StartDate string + EndDate string + ServerHostName string + GlobalSpeed uint64 + TotalArticles uint64 + TotalRedisCacheHits uint64 + TotalRedisCacheBeforeCheck uint64 + TotalRedisCacheBeforeTakethis uint64 + TotalChecked uint64 + TotalCheckSentCount uint64 + TotalUnwanted uint64 + TotalWanted uint64 + TotalTransferred uint64 + TotalTTSentCount uint64 + TotalRejected uint64 + TotalRetry uint64 + TotalSkipped uint64 + TotalTXErrors uint64 + TotalConnErrors uint64 + LiveCH uint64 + LiveTT uint64 + }{ + TotalNewsgroups: TotalNewsgroups, + NewsgroupsToProcess: NewsgroupsToProcess, + Results: results, + Started: started, + Finished: finished, + Progress: progressList, + DemuxerStats: demuxerStats, + Timestamp: time.Now().Format("2006-01-02 15:04:05"), + StartDate: StartDate, + EndDate: EndDate, + ServerHostName: ServerHostName, + GlobalSpeed: GlobalSpeed, + TotalArticles: globalTotalArticles, + TotalRedisCacheHits: totalRedisCacheHits, + TotalRedisCacheBeforeCheck: totalRedisCacheBeforeCheck, + TotalRedisCacheBeforeTakethis: totalRedisCacheBeforeTakethis, + TotalChecked: totalChecked, + TotalCheckSentCount: totalCheckSentCount, + TotalWanted: totalWanted, + TotalUnwanted: totalUnwanted, + TotalTransferred: totalTransferred, + TotalTTSentCount: totalTTSentCount, + TotalRejected: totalRejected, + TotalRetry: totalRetry, + TotalSkipped: totalSkipped, + TotalTXErrors: totalTXErrors, + TotalConnErrors: totalConnErrors, + LiveCH: liveCH, + LiveTT: liveTT, + } + + w.Header().Set("Content-Type", "text/html; charset=utf-8") + if err := tmpl.Execute(w, data); err != nil { + log.Printf("Template execution error: %v", err) + } +} + +// handleResults serves the results page as plain text +func handleResults(w http.ResponseWriter, r *http.Request) { + nntp.ResultsMutex.RLock() + defer nntp.ResultsMutex.RUnlock() + + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + + if len(results) == 0 { + fmt.Fprintln(w, "No transfer results yet. Waiting for transfers to complete...") + return + } + + for _, result := range results { + fmt.Fprintln(w, result) + } +} diff --git a/cmd/recover-db/main.go b/cmd/recover-db/main.go index d99b5783..d8b2a7d3 100644 --- a/cmd/recover-db/main.go +++ b/cmd/recover-db/main.go @@ -33,15 +33,17 @@ func main() { database.NO_CACHE_BOOT = true // prevents booting caches log.Printf("go-pugleaf Database Recovery Tool (version: %s)", config.AppVersion) var ( - dbPath = flag.String("db", "data", "Data Path to main data directory (required)") - newsgroup = flag.String("group", "$all", "Newsgroup name to check (required) (\\$all to check for all or news.* to check for all in that hierarchy)") - verbose = flag.Bool("v", true, "Verbose output") - repair = flag.Bool("repair", false, "Attempt to repair detected inconsistencies") - parseDates = flag.Bool("parsedates", false, "Check and log date parsing differences between date_string and date_sent") - rewriteDates = flag.Bool("rewritedates", false, "Rewrite incorrect dates (requires -parsedates)") - rebuildThreads = flag.Bool("rebuild-threads", false, "Rebuild all thread relationships from scratch (destructive)") - maxPar = flag.Int("max-par", 1, "use with -rebuild-threads to process N newsgroups") - dataDir = flag.String("data", "./data", "Directory to store database files") + dbPath = flag.String("db", "data", "Data Path to main data directory (required)") + newsgroup = flag.String("group", "$all", "Newsgroup name to check (required) (\\$all to check for all or news.* to check for all in that hierarchy)") + verbose = flag.Bool("v", true, "Verbose output") + repair = flag.Bool("repair", false, "Attempt to repair detected inconsistencies") + parseDates = flag.Bool("parsedates", false, "Check and log date parsing differences between date_string and date_sent") + rewriteDates = flag.Bool("rewritedates", false, "Rewrite incorrect dates (requires -parsedates)") + rebuildThreads = flag.Bool("rebuild-threads", false, "Rebuild all thread relationships from scratch (destructive)") + maxPar = flag.Int("max-par", 1, "use with -rebuild-threads to process N newsgroups") + dataDir = flag.String("data", "./data", "Directory to store database files") + scanOutOfOrderCheck = flag.Bool("scan-out-of-order-overview", false, "Scan newsgroups for articles with out-of-order date_sent values") + reorderByDateSent = flag.Bool("reorder-by-datesent", false, "Reorder all articles by date_sent and write to new database file with .new extension") ) flag.Parse() @@ -146,7 +148,7 @@ func main() { wg.Done() }(wg) fmt.Printf("🧵 [%d/%d] Rebuilding threads for newsgroup: %s\n", i+1, len(newsgroups), newsgroup.Name) - report, err := db.RebuildThreadsFromScratch(newsgroup.Name, *verbose) + report, err := db.RebuildThreadsFromScratch(newsgroup.Name, *verbose, nil) if err != nil { fmt.Printf("❌ Failed to rebuild threads for '%s': %v\n", newsgroup.Name, err) return @@ -184,6 +186,28 @@ func main() { os.Exit(0) } + // If only out-of-order scan is requested, run that and exit + if *scanOutOfOrderCheck { + fmt.Printf("🔍 Starting out-of-order article scan...\n") + fmt.Printf("=====================================\n") + err := scanOutOfOrderOverview(db, newsgroups, *verbose) + if err != nil { + log.Fatalf("Out-of-order scan failed: %v", err) + } + os.Exit(0) + } + + // If reorder-by-datesent is requested, run that and exit + if *reorderByDateSent { + fmt.Printf("🔄 Starting article reordering by date_sent...\n") + fmt.Printf("=====================================\n") + err := reorderArticlesByDateSent(db, newsgroups, *verbose, *maxPar) + if err != nil { + log.Fatalf("Article reordering failed: %v", err) + } + os.Exit(0) + } + // Initialize tracking variables var results []GroupResult var ( @@ -258,7 +282,7 @@ func main() { // Optionally rebuild threads after repair if there were thread-related issues if len(report.OrphanedThreads) > 0 { fmt.Printf("🧵 Rebuilding thread relationships after repair...\n") - threadReport, err := db.RebuildThreadsFromScratch(newsgroup.Name, *verbose) + threadReport, err := db.RebuildThreadsFromScratch(newsgroup.Name, *verbose, nil) if err != nil { fmt.Printf("❌ Failed to rebuild threads: %v\n", err) } else { @@ -336,11 +360,11 @@ func repairDatabase(db *database.Database, newsgroup string, report *database.Co fmt.Printf("Starting repair process for %s...\n", newsgroup) // Get group databases - groupDB, err := db.GetGroupDBs(newsgroup) + groupDB, err := db.GetGroupDB(newsgroup) if err != nil { return fmt.Errorf("failed to get group databases: %w", err) } - defer groupDB.Return(db) + defer groupDB.Return() repairCount := 0 @@ -455,7 +479,7 @@ func repairDatabase(db *database.Database, newsgroup string, report *database.Co /* // createMissingOverview creates a missing overview entry from the corresponding article -func createMissingOverview(groupDB *database.GroupDBs, articleNum int64) error { +func createMissingOverview(groupDB *database.GroupDB, articleNum int64) error { var article struct { MessageID string Subject string @@ -691,7 +715,7 @@ func checkAndFixDates(db *database.Database, newsgroups []*models.Newsgroup, rew for i, newsgroup := range newsgroups { fmt.Printf("📅 [%d/%d] Checking dates in newsgroup: %s\n", i+1, len(newsgroups), newsgroup.Name) - groupDB, err := db.GetGroupDBs(newsgroup.Name) + groupDB, err := db.GetGroupDB(newsgroup.Name) if err != nil { fmt.Printf(" ❌ Failed to get group database: %v\n", err) continue @@ -700,14 +724,14 @@ func checkAndFixDates(db *database.Database, newsgroups []*models.Newsgroup, rew fixed, checked, problems, err := checkGroupDates(groupDB, newsgroup.Name, rewriteDates, verbose) if err != nil { fmt.Printf(" ❌ Failed to check dates: %v\n", err) - groupDB.Return(db) + groupDB.Return() continue } totalFixed += fixed totalChecked += checked allProblems = append(allProblems, problems...) - groupDB.Return(db) + groupDB.Return() if checked > 0 { if fixed > 0 { @@ -734,7 +758,7 @@ func checkAndFixDates(db *database.Database, newsgroups []*models.Newsgroup, rew } // checkGroupDates checks and optionally fixes date mismatches in a single newsgroup -func checkGroupDates(groupDB *database.GroupDBs, newsgroupName string, rewriteDates, verbose bool) (int64, int64, []DateProblem, error) { +func checkGroupDates(groupDB *database.GroupDB, newsgroupName string, rewriteDates, verbose bool) (int64, int64, []DateProblem, error) { // Query all articles with their date information - get date_sent as string to avoid timezone parsing issues rows, err := database.RetryableQuery(groupDB.DB, ` SELECT article_num, message_id, date_string, date_sent @@ -988,3 +1012,376 @@ func printDateProblemsSummary(problems []DateProblem, rewriteDates bool) { } fmt.Printf("=====================================\n") } + +// OutOfOrderArticle represents an article that has an out-of-order date_sent +type OutOfOrderArticle struct { + Newsgroup string + ArticleNum int64 + DateSent time.Time + PreviousArtNum int64 + PreviousDate time.Time +} + +// scanOutOfOrderOverview scans newsgroups for articles with out-of-order date_sent values +func scanOutOfOrderOverview(db *database.Database, newsgroups []*models.Newsgroup, verbose bool) error { + const batchSize = 10000 + var totalArticles int64 + var totalOutOfOrder int64 + var allProblems []OutOfOrderArticle + + for _, newsgroup := range newsgroups { + fmt.Printf("\n📊 Scanning newsgroup: %s\n", newsgroup.Name) + + // Get newsgroup database + groupDB, err := db.GetGroupDB(newsgroup.Name) + if err != nil { + fmt.Printf("❌ Failed to get database for '%s': %v\n", newsgroup.Name, err) + continue + } + if groupDB == nil || groupDB.DB == nil { + fmt.Printf("⚠️ No database found for '%s', skipping...\n", newsgroup.Name) + continue + } + + // Get total article count + var count int64 + err = groupDB.DB.QueryRow("SELECT COUNT(*) FROM articles").Scan(&count) + if err != nil { + fmt.Printf("❌ Failed to count articles: %v\n", err) + continue + } + + if count == 0 { + fmt.Printf("📭 Empty newsgroup, skipping...\n") + continue + } + + fmt.Printf(" Total articles: %d\n", count) + totalArticles += count + + var groupProblems []OutOfOrderArticle + var lastArticleNum int64 + var lastDateSent time.Time + offset := int64(0) + query := ` + SELECT article_num, date_sent + FROM articles + ORDER BY article_num ASC + LIMIT ? OFFSET ? + ` + for { + // Query articles ordered by date_sent ASC using OFFSET pagination + + rows, err := groupDB.DB.Query(query, batchSize, offset) + if err != nil { + return fmt.Errorf("failed to query articles at offset %d: %v", offset, err) + } + + articlesInBatch := 0 + for rows.Next() { + var articleNum int64 + var dateSentStr string + + if err := rows.Scan(&articleNum, &dateSentStr); err != nil { + rows.Close() + return fmt.Errorf("failed to scan article: %v", err) + } + + // Parse date_sent + dateSent, err := time.Parse("2006-01-02 15:04:05", dateSentStr) + if err != nil { + rows.Close() + return fmt.Errorf("failed to parse date_sent '%s': %v", dateSentStr, err) + } + + // Check if this article's date is before the previous one + if offset > 0 || articlesInBatch > 0 { + if dateSent.Before(lastDateSent) { + problem := OutOfOrderArticle{ + Newsgroup: newsgroup.Name, + ArticleNum: articleNum, + DateSent: dateSent, + PreviousArtNum: lastArticleNum, + PreviousDate: lastDateSent, + } + groupProblems = append(groupProblems, problem) + allProblems = append(allProblems, problem) + } + } + + lastArticleNum = articleNum + lastDateSent = dateSent + articlesInBatch++ + } + rows.Close() + + if articlesInBatch == 0 { + break + } + + offset += int64(articlesInBatch) + + if verbose && offset%100000 == 0 { + fmt.Printf(" Progress: %d/%d articles scanned...\n", offset, count) + } + } + + if len(groupProblems) > 0 { + fmt.Printf("⚠️ Found %d out-of-order articles in %s\n", len(groupProblems), newsgroup.Name) + totalOutOfOrder += int64(len(groupProblems)) + + // Show first few problems + showCount := len(groupProblems) + if showCount > 10 { + showCount = 10 + } + for i := 0; i < showCount; i++ { + p := groupProblems[i] + fmt.Printf(" • Article %d (%s) comes AFTER article %d (%s)\n", + p.ArticleNum, p.DateSent.Format("2006-01-02 15:04:05"), + p.PreviousArtNum, p.PreviousDate.Format("2006-01-02 15:04:05")) + } + if len(groupProblems) > 10 { + fmt.Printf(" ... and %d more out-of-order articles\n", len(groupProblems)-10) + } + } else { + fmt.Printf("✅ All articles are in chronological order by date_sent\n") + } + } + + // Print summary + fmt.Printf("\n=====================================\n") + fmt.Printf("📊 OUT-OF-ORDER SCAN SUMMARY\n") + fmt.Printf("=====================================\n") + fmt.Printf("Total articles scanned: %d\n", totalArticles) + fmt.Printf("Out-of-order articles: %d\n", totalOutOfOrder) + if totalOutOfOrder > 0 { + fmt.Printf("\n⚠️ WARNING: Found articles with dates that don't match their chronological position!\n") + fmt.Printf("This can cause issues with keyset pagination in nntp-transfer.\n") + fmt.Printf("\n💡 RECOMMENDATIONS:\n") + fmt.Printf(" • Review the import process for late-arriving articles\n") + fmt.Printf(" • Consider using article_num-based pagination for affected newsgroups\n") + fmt.Printf(" • The stuck article detection in nntp-transfer should handle these cases\n") + } else { + fmt.Printf("\n✅ All articles are properly ordered by date_sent across all newsgroups!\n") + } + fmt.Printf("=====================================\n") + + return nil +} + +// reorderArticlesByDateSent reorders articles by date_sent and writes to a new database +func reorderArticlesByDateSent(db *database.Database, newsgroups []*models.Newsgroup, verbose bool, maxPar int) error { + fmt.Printf("Processing %d newsgroups with max parallelism: %d\n\n", len(newsgroups), maxPar) + + parChan := make(chan struct{}, maxPar) + var wg sync.WaitGroup + var totalArticles, totalNewsgroups int64 + var mu sync.Mutex + + for i, ng := range newsgroups { + parChan <- struct{}{} // acquire lock + wg.Add(1) + + go func(ng *models.Newsgroup, index int) { + defer func() { + <-parChan // release lock + wg.Done() + }() + + fmt.Printf("🔄 [%d/%d] Processing: %s\n", index+1, len(newsgroups), ng.Name) + + articleCount, err := reorderSingleNewsgroup(db, ng.Name, verbose) + if err != nil { + fmt.Printf("❌ [%d/%d] Failed to reorder %s: %v\n", index+1, len(newsgroups), ng.Name, err) + return + } + + mu.Lock() + totalArticles += articleCount + if articleCount > 0 { + totalNewsgroups++ + } + mu.Unlock() + + if articleCount > 0 { + fmt.Printf("✅ [%d/%d] Completed: %s (%d articles)\n", index+1, len(newsgroups), ng.Name, articleCount) + } else { + fmt.Printf("⚠️ [%d/%d] Skipped: %s (no articles)\n", index+1, len(newsgroups), ng.Name) + } + }(ng, i) + } + + wg.Wait() + + fmt.Printf("\n=====================================\n") + fmt.Printf("📊 REORDER SUMMARY\n") + fmt.Printf("=====================================\n") + fmt.Printf("Newsgroups processed: %d\n", totalNewsgroups) + fmt.Printf("Total articles reordered: %d\n", totalArticles) + fmt.Printf("=====================================\n") + + return nil +} + +// reorderSingleNewsgroup reorders articles in a single newsgroup by date_sent +const query_reorderSingleNewsgroup_insertSQL = `INSERT INTO articles (message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at, spam, hide) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` + +func reorderSingleNewsgroup(db *database.Database, newsgroupName string, verbose bool) (int64, error) { + // Open source database + sourceDB, err := db.GetGroupDB(newsgroupName) + if err != nil { + return 0, fmt.Errorf("failed to open source database: %w", err) + } + defer db.ForceCloseGroupDB(sourceDB) + + // Count articles in source database + var totalArticles int64 + err = sourceDB.DB.QueryRow("SELECT COUNT(*) FROM articles WHERE date_sent IS NOT NULL").Scan(&totalArticles) + if err != nil { + return 0, fmt.Errorf("failed to count articles: %w", err) + } + + if totalArticles == 0 { + return 0, nil // No articles to process + } + + // Open destination database with db.new suffix in db folder path + destDB, destPath, err := db.GetGroupDBWithSuffix(newsgroupName, "new") + if err != nil { + return 0, fmt.Errorf("failed to create destination database: %w", err) + } + defer database.CloseGroupDBDirectly(destDB) + + if verbose { + fmt.Printf(" Source: %s\n", sourceDB.Newsgroup) + fmt.Printf(" Dest: %s\n", destPath) + fmt.Printf(" Articles to reorder: %d\n", totalArticles) + } + + // Process articles in batches to avoid memory issues with large newsgroups + const batchSize = 1000 + var processed int64 + var offset int64 = 0 + + // Create a temporary newsgroup model for GetArticlesBatchWithDateFilter + tempNG := &models.Newsgroup{ + Name: newsgroupName, + } + + for offset < totalArticles { + // Use existing GetArticlesBatchWithDateFilter function to fetch articles + // Pass nil for startTime/endTime to get all articles, ordered by date_sent + articles, err := db.GetArticlesBatchWithDateFilter(tempNG, offset, nil, nil, batchSize) + if err != nil { + return processed, fmt.Errorf("failed to query source database at offset %d: %w", offset, err) + } + + // Break if no articles returned + if len(articles) == 0 { + break + } + + // Begin transaction for this batch + tx, err := destDB.Begin() + if err != nil { + return processed, fmt.Errorf("failed to begin transaction at offset %d: %w", offset, err) + } + + stmt, err := tx.Prepare(query_reorderSingleNewsgroup_insertSQL) + if err != nil { + tx.Rollback() + return processed, fmt.Errorf("failed to prepare insert statement: %w", err) + } + + // Process batch + batchCount := 0 + for _, article := range articles { + if article == nil { + continue + } + + // Insert with new sequential article_num + _, err = stmt.Exec( + article.MessageID, + article.Subject, + article.FromHeader, + article.DateSent, + article.DateString, + article.References, + article.Bytes, + article.Lines, + article.ReplyCount, + article.Path, + article.HeadersJSON, + article.BodyText, + article.ImportedAt, + article.Spam, + article.Hide, + ) + if err != nil { + stmt.Close() + tx.Rollback() + return processed, fmt.Errorf("failed to insert article %w", err) + } + + processed++ + batchCount++ + } + + stmt.Close() + + // Commit batch transaction + if err = tx.Commit(); err != nil { + return processed, fmt.Errorf("failed to commit transaction at offset %d: %w", offset, err) + } + + if verbose { + fmt.Printf(" Progress: %d/%d articles (batch: %d)\n", processed, totalArticles, batchCount) + } + + // Break if we got fewer articles than batch size (end of data) + if batchCount < int(batchSize) { + break + } + + offset += int64(batchCount) + } + + // Close destination database before rebuilding threads + if err := database.CloseGroupDBDirectly(destDB); err != nil { + log.Printf("Warning: failed to close destination database: %v", err) + } + + // Rebuild threads in the new database + if verbose { + fmt.Printf(" Rebuilding thread relationships...\n") + } + + // Reopen the destination database for thread rebuild + destDB, err = sql.Open("sqlite3", destPath) + if err != nil { + return processed, fmt.Errorf("failed to reopen destination database for thread rebuild: %w", err) + } + defer destDB.Close() + + // Create a temporary GroupDB wrapper for the new database + tempGroupDB := &database.GroupDB{ + Newsgroup: newsgroupName, + DB: destDB, + } + + // Rebuild threads using the existing RebuildThreadsFromScratch function + var report *database.ThreadRebuildReport + report, err = db.RebuildThreadsFromScratch(newsgroupName, verbose, tempGroupDB) + if err != nil { + return processed, fmt.Errorf("failed to rebuild threads: %w", err) + } + + if verbose { + fmt.Printf(" Thread rebuild complete: %d threads from %d articles\n", + report.ThreadsRebuilt, report.TotalArticles) + } + + return processed, nil +} diff --git a/cmd/test-MsgIdItemCache/main.go b/cmd/test-MsgIdItemCache/main.go.off similarity index 100% rename from cmd/test-MsgIdItemCache/main.go rename to cmd/test-MsgIdItemCache/main.go.off diff --git a/cmd/web/main.go b/cmd/web/main.go index 5bee454d..f64317f6 100644 --- a/cmd/web/main.go +++ b/cmd/web/main.go @@ -66,6 +66,10 @@ var ( // Compare flags compareActiveFile string compareActiveMinArticles int64 + disableActiveFile string + + // Expiry update flag + updateNewsgroupsExpiryFile string // Bridge flags (disabled by default) /* code path disabled (not tested) @@ -83,13 +87,14 @@ var appVersion = "-unset-" func main() { config.AppVersion = appVersion + models.DisableSanitizedCache = false // Initialize embedded filesystems database.SetEmbeddedMigrations(database.EmbeddedMigrationsFS) flag.IntVar(&maxSanArtCache, "maxsanartcache", 10000, "maximum number of cached sanitized articles (default: 10000)") flag.IntVar(&maxSanArtCacheExpiry, "maxsanartcacheexpiry", 30, "expiry of cached sanitized articles in minutes (default: 30 minutes)") - flag.IntVar(&maxNGpageCache, "maxngpagecache", 4096, "maximum number of cached newsgroup pages (25 groups per page) (default: 4K pages) [~12-16 KB/entry * 4096 = ~64 MB (+overhead) with 100k active groups!]") + flag.IntVar(&maxNGpageCache, "maxngpagecache", 4096, "maximum number of cached newsgroup pages (default: 4K pages)") flag.IntVar(&maxNGpageCacheExpiry, "maxngpagecacheexpiry", 5, "expiry of cached newsgroup pages in minutes (default: 5 minutes)") flag.IntVar(&maxArticleCache, "maxarticlecache", 10000, "maximum number of cached articles (default: 10000) [~8-12 KB/entry] 10000 = ~128 MB") flag.IntVar(&maxArticleCacheExpiry, "maxarticlecacheexpiry", 60, "expiry of cached articles in minutes (default: 60 minutes)") @@ -107,13 +112,13 @@ func main() { flag.IntVar(&nntptlsport, "nntptlsport", 0, "NNTP TLS port") flag.StringVar(&nntpcertFile, "nntpcertfile", "", "NNTP TLS certificate file (/path/to/fullchain.pem)") flag.StringVar(&nntpkeyFile, "nntpkeyfile", "", "NNTP TLS key file (/path/to/privkey.pem)") - flag.BoolVar(&forceReloadDesc, "update-descr", false, "Updates (overwrites existing!) internal newsgroup descriptions from file preload/newsgroups.descriptions (default: false)") - flag.StringVar(&importActiveFile, "import-active", "", "Import newsgroups from NNTP active file (format: groupname highwater lowwater status)") - flag.StringVar(&importDescFile, "import-desc", "", "Import newsgroups from descriptions file (format: groupname\\tdescription)") - flag.BoolVar(&importCreateMissing, "import-create", false, "Create missing newsgroups when importing from descriptions file (default: false)") + flag.BoolVar(&forceReloadDesc, "update-overwrite-newsgroups-descriptions", false, "Updates (overwrites existing!) internal newsgroup descriptions from file preload/newsgroups.descriptions (default: false)") + flag.StringVar(&importDescFile, "update-newsgroups-descriptions-file", "", "Import newsgroups from descriptions file (format: groupname\\tdescription)") + flag.BoolVar(&importCreateMissing, "update-newsgroups-descriptions-import-create-newsgroups", false, "Create missing newsgroups when importing from descriptions file (default: false)") flag.BoolVar(&repairWatermarks, "repair-watermarks", false, "Repair corrupted newsgroup watermarks caused by preloader (default: false)") - flag.IntVar(&updateNGAConcurrent, "update-newsgroups-activity", 0, "Updates newsgroup updated_at timestamps to reflect actual article activity. N = process this many newsgroups concurrently") + flag.IntVar(&updateNGAConcurrent, "update-newsgroups-activity", 0, "Updates newsgroup updated_at timestamps to reflect actual article activity. Set N to process this many newsgroups concurrently") flag.BoolVar(&updateNewsgroupsHideFuture, "update-newsgroups-hide-futureposts", false, "Hide articles posted more than 48 hours in the future (default: false)") + flag.StringVar(&importActiveFile, "import-active", "", "Import newsgroups from NNTP active file (format: groupname highwater lowwater status)") flag.StringVar(&writeActiveFile, "write-active-file", "", "Write NNTP active file from main database newsgroups table to specified path") flag.BoolVar(&writeActiveOnly, "write-active-only", true, "use with -write-active-file (false writes only non active groups!)") flag.StringVar(&rsyncInactiveGroups, "rsync-inactive-groups", "", "path to new data dir, uses rsync to copy all inactive group databases to new data folder.") @@ -122,7 +127,9 @@ func main() { flag.BoolVar(&noCronjobs, "no-cronjobs", false, "use this flag to not run cron jobs") flag.BoolVar(&findOrphanDBs, "find-orphan-dbs", false, "Find orphaned database folders in data/db that don't correspond to any newsgroup in main database") flag.StringVar(&compareActiveFile, "compare-active", "", "Compare active file with database and show missing groups (format: groupname highwater lowwater status)") + flag.StringVar(&disableActiveFile, "disable-active", "", "Disable newsgroups not listed in active file (format: groupname highwater lowwater status)") flag.Int64Var(&compareActiveMinArticles, "compare-active-min-articles", 0, "use with -compare-active: only show groups with more than N articles (calculated as high-low)") + flag.StringVar(&updateNewsgroupsExpiryFile, "update-newsgroups-expiry-from-file", "", "Update newsgroup expiry_days from file (format: newsgroup:days, one per line)") flag.BoolVar(&verbose, "verbose", false, "print more (debug) output") flag.StringVar(&dataDir, "data", "./data", "path to database and config directory") @@ -294,6 +301,18 @@ func main() { } } + // disableActiveFile + if disableActiveFile != "" { + log.Printf("[WEB]: Disabling newsgroups not listed in active file: %s", disableActiveFile) + if err := disableNewsgroupsNotInActiveFile(db, disableActiveFile); err != nil { + log.Printf("[WEB]: Error: Failed to disable newsgroups: %v", err) + os.Exit(1) + } else { + log.Printf("[WEB]: Newsgroup disabling completed successfully") + os.Exit(0) + } + } + // findOrphanDBs if findOrphanDBs { log.Printf("[WEB]: Finding orphaned database folders...") @@ -306,6 +325,18 @@ func main() { } } + // updateNewsgroupsExpiryFile + if updateNewsgroupsExpiryFile != "" { + log.Printf("[WEB]: Updating newsgroup expiry from file: %s", updateNewsgroupsExpiryFile) + if err := updateNewsgroupsExpiryFromFile(db, updateNewsgroupsExpiryFile); err != nil { + log.Printf("[WEB]: Error: Failed to update newsgroup expiry: %v", err) + os.Exit(1) + } else { + log.Printf("[WEB]: Newsgroup expiry update completed successfully") + os.Exit(0) + } + } + // Get or set history UseShortHashLen configuration finalUseShortHashLen, isLocked, err := db.GetHistoryUseShortHashLen(useShortHashLen) if err != nil { diff --git a/cmd/web/main_adapters.go b/cmd/web/main_adapters.go index 61b4ad9d..e7df0fc1 100644 --- a/cmd/web/main_adapters.go +++ b/cmd/web/main_adapters.go @@ -24,8 +24,8 @@ func (pa *ProcessorAdapter) ProcessIncomingArticle(article *models.Article) (int } // Lookup checks if a message-ID exists in history -func (pa *ProcessorAdapter) Lookup(msgIdItem *history.MessageIdItem) (int, error) { - return pa.processor.History.Lookup(msgIdItem) +func (pa *ProcessorAdapter) Lookup(msgIdItem *history.MessageIdItem, quick bool) (response int, newsgroupIDs []int64, err error) { + return pa.processor.History.Lookup(msgIdItem, quick) } // CheckNoMoreWorkInHistory checks if there's no more work in history diff --git a/cmd/web/main_functions.go b/cmd/web/main_functions.go index e4d8d65a..bdf98fbb 100644 --- a/cmd/web/main_functions.go +++ b/cmd/web/main_functions.go @@ -59,12 +59,12 @@ func updateNewsgroupLastActivity(db *database.Database, updateNGAConcurrent int) mux.Unlock() wg.Done() }(wg) - groupDBs, err := db.GetGroupDBs(name) + groupDB, err := db.GetGroupDB(name) if err != nil { log.Printf("[WEB]: ERROR updateNewsgroupLastActivity GetGroupDB %s: %v", name, err) return } - if err := updateNewsGroupActivityValue(db, id, groupDBs); err == nil { + if err := updateNewsGroupActivityValue(db, id, groupDB); err == nil { mux.Lock() updatedCount++ mux.Unlock() @@ -87,27 +87,27 @@ func updateNewsgroupLastActivity(db *database.Database, updateNGAConcurrent int) const query_updateNewsGroupActivityValue1 = "SELECT MAX(date_sent) FROM articles WHERE hide = 0" const query_updateNewsGroupActivityValue2 = "UPDATE newsgroups SET updated_at = ? WHERE id = ? AND updated_at != ?" -func updateNewsGroupActivityValue(db *database.Database, id int, groupDBs *database.GroupDBs) error { - defer db.ForceCloseGroupDBs(groupDBs) +func updateNewsGroupActivityValue(db *database.Database, id int, groupDB *database.GroupDB) error { + defer db.ForceCloseGroupDB(groupDB) var formattedDate string var parsedDate time.Time var latestDate sql.NullString /* - _, err = database.RetryableExec(groupDBs.DB, "UPDATE articles SET spam = 1 WHERE spam = 0 AND hide = 1", nil) + _, err = database.RetryableExec(groupDB.DB, "UPDATE articles SET spam = 1 WHERE spam = 0 AND hide = 1", nil) if err != nil { - db.ForceCloseGroupDBs(groupDBs) + db.ForceCloseGroupDB(groupDB) log.Printf("[WEB]: Failed to update spam flags for newsgroup %s: %v", name, err) continue } */ // Query the latest article date from the group's articles table (excluding hidden articles) - rows, err := database.RetryableQuery(groupDBs.DB, query_updateNewsGroupActivityValue1, nil, latestDate) - //groupDBs.Return(db) // Always return the database connection + rows, err := database.RetryableQuery(groupDB.DB, query_updateNewsGroupActivityValue1, nil, latestDate) + //groupDB.Return(db) // Always return the database connection if err != nil { - log.Printf("[WEB]: updateNewsgroupLastActivity RetryableQueryRowScan %s: %v", groupDBs.Newsgroup, err) + log.Printf("[WEB]: updateNewsgroupLastActivity RetryableQueryRowScan %s: %v", groupDB.Newsgroup, err) return err } defer rows.Close() @@ -115,13 +115,13 @@ func updateNewsGroupActivityValue(db *database.Database, id int, groupDBs *datab rows.Scan(&latestDate) // Only update if we found a latest date if !latestDate.Valid { - return fmt.Errorf("error updateNewsgroupLastActivity no valid latestDate in ng: '%s'", groupDBs.Newsgroup) + return fmt.Errorf("error updateNewsgroupLastActivity no valid latestDate in ng: '%s'", groupDB.Newsgroup) } // Parse the date and format it consistently as UTC if latestDate.String == "" { - log.Printf("[WEB]: updateNewsgroupLastActivity empty latestDate.String in ng: '%s'", groupDBs.Newsgroup) - return fmt.Errorf("error updateNewsgroupLastActivity empty latestDate.String in ng: '%s'", groupDBs.Newsgroup) + log.Printf("[WEB]: updateNewsgroupLastActivity empty latestDate.String in ng: '%s'", groupDB.Newsgroup) + return fmt.Errorf("error updateNewsgroupLastActivity empty latestDate.String in ng: '%s'", groupDB.Newsgroup) } // Try multiple date formats to handle various edge cases for _, format := range testFormats { @@ -131,7 +131,7 @@ func updateNewsGroupActivityValue(db *database.Database, id int, groupDBs *datab } } if err != nil { - log.Printf("[WEB]: updateNewsgroupLastActivity parsing date '%s' for %s: %v", latestDate.String, groupDBs.Newsgroup, err) + log.Printf("[WEB]: updateNewsgroupLastActivity parsing date '%s' for %s: %v", latestDate.String, groupDB.Newsgroup, err) return err } @@ -139,11 +139,11 @@ func updateNewsGroupActivityValue(db *database.Database, id int, groupDBs *datab formattedDate = parsedDate.UTC().Format("2006-01-02 15:04:05") result, err := db.GetMainDB().Exec(query_updateNewsGroupActivityValue2, formattedDate, id, formattedDate) if err != nil { - log.Printf("[WEB]: error updateNewsgroupLastActivity updating newsgroup %s: %v", groupDBs.Newsgroup, err) + log.Printf("[WEB]: error updateNewsgroupLastActivity updating newsgroup %s: %v", groupDB.Newsgroup, err) return err } if _, err := result.RowsAffected(); err != nil { - log.Printf("[WEB]: updateNewsgroupLastActivity: '%s' dateStr=%s formattedDate=%s", groupDBs.Newsgroup, latestDate.String, formattedDate) + log.Printf("[WEB]: updateNewsgroupLastActivity: '%s' dateStr=%s formattedDate=%s", groupDB.Newsgroup, latestDate.String, formattedDate) } } return nil @@ -176,7 +176,7 @@ func hideFuturePosts(db *database.Database) error { } // Get the group database for this newsgroup - groupDBs, err := db.GetGroupDBs(name) + groupDB, err := db.GetGroupDB(name) if err != nil { log.Printf("[WEB]: Future posts migration error getting group DB for %s: %v", name, err) skippedGroups++ @@ -184,10 +184,10 @@ func hideFuturePosts(db *database.Database) error { } // Find articles that are posted more than 48 hours in the future and not already hidden - articleRows, err := groupDBs.DB.Query("SELECT article_num FROM articles WHERE date_sent > ? AND hide = 0", cutoffTime.Format("2006-01-02 15:04:05")) + articleRows, err := groupDB.DB.Query("SELECT article_num FROM articles WHERE date_sent > ? AND hide = 0", cutoffTime.Format("2006-01-02 15:04:05")) if err != nil { log.Printf("[WEB]: Future posts migration error querying articles for %s: %v", name, err) - db.ForceCloseGroupDBs(groupDBs) + db.ForceCloseGroupDB(groupDB) skippedGroups++ continue } @@ -202,7 +202,7 @@ func hideFuturePosts(db *database.Database) error { futureArticles = append(futureArticles, articleNum) } articleRows.Close() - db.ForceCloseGroupDBs(groupDBs) + db.ForceCloseGroupDB(groupDB) // Process each future article using the proper spam increment system groupArticleCount := 0 @@ -217,13 +217,13 @@ func hideFuturePosts(db *database.Database) error { } // Also set the hide flag for these future-dated articles - groupDBs, err := db.GetGroupDBs(name) + groupDB, err := db.GetGroupDB(name) if err != nil { log.Printf("[WEB]: Future posts migration error getting group DB for hide update %s: %v", name, err) continue } - _, err = database.RetryableExec(groupDBs.DB, "UPDATE articles SET hide = 1 WHERE article_num = ?", articleNum) - db.ForceCloseGroupDBs(groupDBs) + _, err = database.RetryableExec(groupDB.DB, "UPDATE articles SET hide = 1 WHERE article_num = ?", articleNum) + db.ForceCloseGroupDB(groupDB) if err != nil { log.Printf("[WEB]: Future posts migration error setting hide flag for %s article %d: %v", name, articleNum, err) @@ -934,3 +934,161 @@ func formatBytes(bytes int64) string { } return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp]) } + +// updateNewsgroupsExpiryFromFile reads a file with newsgroup:days format and updates expiry_days +func updateNewsgroupsExpiryFromFile(db *database.Database, filename string) error { + file, err := os.Open(filename) + if err != nil { + return fmt.Errorf("failed to open file: %w", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + lineNum := 0 + updated := 0 + errors := 0 + + for scanner.Scan() { + lineNum++ + line := strings.TrimSpace(scanner.Text()) + + // Skip empty lines and comments + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + // Parse line: newsgroup:days + parts := strings.SplitN(line, ":", 2) + if len(parts) != 2 { + log.Printf("[EXPIRY] Line %d: invalid format (expected 'newsgroup:days'): %s", lineNum, line) + errors++ + continue + } + + newsgroup := strings.TrimSpace(parts[0]) + daysStr := strings.TrimSpace(parts[1]) + + // Strip optional 'd' suffix (e.g., "30d" -> "30") + daysStr = strings.TrimSuffix(daysStr, "d") + daysStr = strings.TrimSuffix(daysStr, "D") + + // Parse days as integer + days, err := strconv.Atoi(daysStr) + if err != nil { + log.Printf("[EXPIRY] Line %d: invalid days value '%s' for newsgroup '%s': %v", lineNum, daysStr, newsgroup, err) + errors++ + continue + } + + // Update database + if err := db.UpdateNewsgroupExpiry(newsgroup, days); err != nil { + log.Printf("[EXPIRY] Line %d: failed to update newsgroup '%s': %v", lineNum, newsgroup, err) + errors++ + continue + } + + updated++ + if verbose { + log.Printf("[EXPIRY] Updated '%s' to %d days", newsgroup, days) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading file: %w", err) + } + + log.Printf("[EXPIRY] Processed %d lines: %d updated, %d errors", lineNum, updated, errors) + + if errors > 0 { + return fmt.Errorf("completed with %d errors", errors) + } + + return nil +} + +// disableNewsgroupsNotInActiveFile disables newsgroups NOT listed in the active file +func disableNewsgroupsNotInActiveFile(db *database.Database, activeFilePath string) error { + log.Printf("[WEB]: Disabling newsgroups not listed in active file: %s", activeFilePath) + + // Open and read the active file + file, err := os.Open(activeFilePath) + if err != nil { + return fmt.Errorf("failed to open active file '%s': %w", activeFilePath, err) + } + defer file.Close() + + // Parse active file to get list of groups to KEEP active + activeGroups := make(map[string]bool) + scanner := bufio.NewScanner(file) + lineNum := 0 + + for scanner.Scan() { + lineNum++ + line := strings.TrimSpace(scanner.Text()) + + // Skip empty lines and comments + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + // Parse active file format: groupname high low status + fields := strings.Fields(line) + if len(fields) < 4 { + log.Printf("[WEB]: Warning: Skipping malformed line %d in active file: %s", lineNum, line) + continue + } + + groupName := fields[0] + if groupName != "" { + activeGroups[groupName] = true + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading active file: %w", err) + } + + log.Printf("[WEB]: Found %d active groups in file", len(activeGroups)) + + // Get all newsgroups from database + dbGroups, err := db.MainDBGetAllNewsgroups() + if err != nil { + return fmt.Errorf("failed to get newsgroups from database: %w", err) + } + + log.Printf("[WEB]: Found %d total newsgroups in database", len(dbGroups)) + + // Disable groups NOT in active file + disabledCount := 0 + alreadyInactiveCount := 0 + keptActiveCount := 0 + + for _, group := range dbGroups { + if _, exists := activeGroups[group.Name]; !exists { + // Group not in active file - disable it + if group.Active { + if err := db.UpdateNewsgroupActive(group.Name, false); err != nil { + log.Printf("[WEB]: Warning: Failed to disable newsgroup '%s': %v", group.Name, err) + continue + } + disabledCount++ + if disabledCount%1000 == 0 { + log.Printf("[WEB]: Disabled %d newsgroups so far...", disabledCount) + } + } else { + alreadyInactiveCount++ + } + } else { + // Group IS in active file - keep it active + keptActiveCount++ + } + } + + log.Printf("[WEB]: Disable operation completed:") + log.Printf("[WEB]: - Groups kept active (in active file): %d", keptActiveCount) + log.Printf("[WEB]: - Groups newly disabled (not in active file): %d", disabledCount) + log.Printf("[WEB]: - Groups already inactive (not in active file): %d", alreadyInactiveCount) + log.Printf("[WEB]: - Total groups processed: %d", len(dbGroups)) + + return nil +} diff --git a/go.mod b/go.mod index 51d3817e..a99ca7a2 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/go-while/go-pugleaf -go 1.25.1 +go 1.25.3 require ( github.com/gin-contrib/secure v1.1.2 @@ -8,10 +8,10 @@ require ( github.com/go-while/go-cpu-mem-profiler v0.0.0-20240612221627-856954a5fc83 github.com/mattn/go-sqlite3 v1.14.32 github.com/redis/go-redis/v9 v9.14.0 - golang.org/x/crypto v0.42.0 - golang.org/x/net v0.44.0 - golang.org/x/term v0.35.0 - golang.org/x/text v0.29.0 + golang.org/x/crypto v0.43.0 + golang.org/x/net v0.46.0 + golang.org/x/term v0.36.0 + golang.org/x/text v0.30.0 ) require ( @@ -25,7 +25,7 @@ require ( github.com/gin-contrib/sse v1.1.0 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/go-playground/validator/v10 v10.27.0 // indirect + github.com/go-playground/validator/v10 v10.28.0 // indirect github.com/goccy/go-json v0.10.5 // indirect github.com/goccy/go-yaml v1.18.0 // indirect github.com/gorilla/mux v1.8.1 // indirect @@ -37,14 +37,14 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/quic-go/qpack v0.5.1 // indirect - github.com/quic-go/quic-go v0.54.1 // indirect + github.com/quic-go/quic-go v0.55.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.3.0 // indirect go.uber.org/mock v0.6.0 // indirect - golang.org/x/arch v0.21.0 // indirect - golang.org/x/mod v0.28.0 // indirect + golang.org/x/arch v0.22.0 // indirect + golang.org/x/mod v0.29.0 // indirect golang.org/x/sync v0.17.0 // indirect - golang.org/x/sys v0.36.0 // indirect - golang.org/x/tools v0.37.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/tools v0.38.0 // indirect google.golang.org/protobuf v1.36.10 // indirect ) diff --git a/go.sum b/go.sum index 5ec12236..56d665f6 100644 --- a/go.sum +++ b/go.sum @@ -31,8 +31,8 @@ github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/o github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4= -github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo= +github.com/go-playground/validator/v10 v10.28.0 h1:Q7ibns33JjyW48gHkuFT91qX48KG0ktULL6FgHdG688= +github.com/go-playground/validator/v10 v10.28.0/go.mod h1:GoI6I1SjPBh9p7ykNE/yj3fFYbyDOpwMn5KXd+m2hUU= github.com/go-while/go-cpu-mem-profiler v0.0.0-20240612221627-856954a5fc83 h1:vehfiL7LsK8bJQZdVuJxcMem4AP2HwPEQ8orUUnIA+E= github.com/go-while/go-cpu-mem-profiler v0.0.0-20240612221627-856954a5fc83/go.mod h1:31AY7BccUb7qvKi4LQCwLOYsvcza9TdQtl/zHEIOcRE= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= @@ -65,8 +65,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI= github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg= -github.com/quic-go/quic-go v0.54.1 h1:4ZAWm0AhCb6+hE+l5Q1NAL0iRn/ZrMwqHRGQiFwj2eg= -github.com/quic-go/quic-go v0.54.1/go.mod h1:e68ZEaCdyviluZmy44P6Iey98v/Wfz6HCjQEm+l8zTY= +github.com/quic-go/quic-go v0.55.0 h1:zccPQIqYCXDt5NmcEabyYvOnomjs8Tlwl7tISjJh9Mk= +github.com/quic-go/quic-go v0.55.0/go.mod h1:DR51ilwU1uE164KuWXhinFcKWGlEjzys2l8zUl5Ss1U= github.com/redis/go-redis/v9 v9.14.0 h1:u4tNCjXOyzfgeLN+vAZaW1xUooqWDqVEsZN0U01jfAE= github.com/redis/go-redis/v9 v9.14.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -84,25 +84,25 @@ github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4= go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= -golang.org/x/arch v0.21.0 h1:iTC9o7+wP6cPWpDWkivCvQFGAHDQ59SrSxsLPcnkArw= -golang.org/x/arch v0.21.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= -golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= -golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= -golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= -golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= -golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= -golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/arch v0.22.0 h1:c/Zle32i5ttqRXjdLyyHZESLD/bB90DCU1g9l/0YBDI= +golang.org/x/arch v0.22.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= +golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= +golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= -golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= -golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= -golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/common/common.go b/internal/common/common.go index 8d430b61..3c46a63c 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -1,11 +1,15 @@ package common -import "sync" +import ( + "log" + "sync" +) var shutdownMutex sync.Mutex var closedShutdownChan bool var ShutdownChan = make(chan struct{}) +// ForceShutdown signals all goroutines to forcefully shut down because we triggered an error! func ForceShutdown() { shutdownMutex.Lock() defer shutdownMutex.Unlock() @@ -15,6 +19,8 @@ func ForceShutdown() { } } +// WantShutdown will be called in places where we want to break out from any running jobs! +// To Check for a clean shutdown call db.IsDBshutdown() and wait for workers to finish their jobs! func WantShutdown() bool { select { case _, ok := <-ShutdownChan: @@ -28,10 +34,74 @@ func WantShutdown() bool { } func IsClosedChannel(ch chan struct{}) bool { + select { + case _, ok := <-ch: + if !ok { + // channel is closed + return true + } + default: + } + return false +} + +func ChanLock(lockChan chan struct{}) { + // try acquire lock + lockChan <- struct{}{} +} + +func ChanRelease(lockChan chan struct{}) { + // release lock + <-lockChan +} + +func SignalErrChan(errChan chan struct{}) { + select { + case errChan <- struct{}{}: + default: + // already signaled + } +} + +func SignalTickChan(achan chan struct{}) { + select { + case achan <- struct{}{}: + default: + // already signaled + } +} + +var StructChansCap1 = make(chan chan struct{}, 16384) + +// GetStructChanCap1 returns a recycled chan struct{} or makes a new one with capacity of 1 if none are available +func GetStructChanCap1() chan struct{} { + select { + case ch := <-StructChansCap1: + return ch + default: + return make(chan struct{}, 1) + } +} + +// RecycleStructChan recycles a chan struct{} for later use +func RecycleStructChanCap1(ch chan struct{}) { + if cap(ch) != 1 { + log.Printf("Warning: Attempt to recycle chan struct{} with wrong capacity: %d", cap(ch)) + return + } + // empty out the channel select { case <-ch: - return true + // successfully emptied + default: + // is already empty + } + // recycle it + select { + case StructChansCap1 <- ch: + // successfully recycled default: - return false + log.Printf("Warning: RecycleStructChan buffer full: %d", len(StructChansCap1)) + // recycle buffer full, let it go } } diff --git a/internal/common/headers.go b/internal/common/headers.go index 47293c9d..66d258a4 100644 --- a/internal/common/headers.go +++ b/internal/common/headers.go @@ -12,11 +12,11 @@ import ( "github.com/go-while/go-pugleaf/internal/models" ) -var VerboseHeaders bool = false +var VERBOSE_HEADERS bool = false var IgnoreGoogleHeaders bool = false var UseStrictGroupValidation bool = false var ErrNoNewsgroups = fmt.Errorf("ErrNoNewsgroups") -var unwantedChars = "\t\x00;:,<>#*§()[]{}?!%$§/\\@\"'`" +var unwantedChars = "\t\x00;:,<>#*()[]{}?!%$§/\\@\"'`" var ( // Do NOT change this here! these are needed for runtime ! // validGroupNameRegex validates newsgroup names according to RFC standards @@ -41,6 +41,18 @@ var IgnoreHeadersMap = map[string]bool{ "xref": true, } +// refers to +// https://github.com/InterNetNews/inn/blob/ba39e0ace92aea2f9e59117f1757deaf28416d91/innd/innd.c#L79 +// const ARTHEADER ARTheaders[] ---> HTreq +var RequiredHeadersMap = map[string]bool{ + "date": true, + "from": true, + "message-id": true, + "newsgroups": true, + "path": true, + "subject": true, +} + var formats = []string{ time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700" time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST" @@ -131,6 +143,24 @@ func parseDateReceivedHeader(dateStr string) time.Time { return time.Time{} } +func parseReferencesToContinuedLineString(input string) string { + if len(input) < 1000 { + return input + } + var builder strings.Builder + refs := strings.Fields(input) + lineLen := 0 + for _, ref := range refs { + builder.WriteString(ref) + lineLen += len(ref) + if lineLen > 500 { + builder.WriteString("\r\n ") + lineLen = 1 // Account for the space added at the beginning of the new line + } + } + return builder.String() +} + // ReconstructHeaders reconstructs the header lines from an article for transmission func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *string, newsgroup string) ([]string, error) { var headers []string @@ -139,6 +169,9 @@ func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *st if article.MessageID == "" { return nil, fmt.Errorf("article missing Message-ID") } + if strings.HasSuffix(article.MessageID, "@msgid-missing>") { + return nil, fmt.Errorf("article has placeholder Message-ID") + } if article.Subject == "" { return nil, fmt.Errorf("article missing Subject") } @@ -156,7 +189,7 @@ func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *st // DateString is not RFC compliant, try DateSent first if !article.DateSent.IsZero() && article.DateSent.Year() >= 1979 { dateHeader = article.DateSent.UTC().Format(time.RFC1123Z) - if VerboseHeaders { + if VERBOSE_HEADERS { log.Printf("Using DateSent '%s' instead of DateString '%s' for article %s", dateHeader, article.DateString, article.MessageID) } } else { @@ -166,7 +199,7 @@ func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *st parsedTime := parseDateReceivedHeader(dateReceivedStr) if !parsedTime.IsZero() && parsedTime.Year() >= 1979 { dateHeader = parsedTime.UTC().Format(time.RFC1123Z) - //if VerboseHeaders { + //if VERBOSE_HEADERS { log.Printf("Using Date-Received '%s' (parsed as '%s') instead of invalid DateString '%s' and invalid DateSent (year %d) for article %s", dateReceivedStr, dateHeader, article.DateString, article.DateSent.Year(), article.MessageID) //} article.DateSent = parsedTime // Update article DateSent with corrected time @@ -193,7 +226,7 @@ func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *st parsedTime := parseDateReceivedHeader(dateReceivedStr) if !parsedTime.IsZero() && parsedTime.Year() >= 1979 { dateHeader = parsedTime.UTC().Format(time.RFC1123Z) - if VerboseHeaders { + if VERBOSE_HEADERS { log.Printf("Using Date-Received '%s' (parsed as '%s') when DateString is empty and DateSent is invalid (year %d) for article %s", dateReceivedStr, dateHeader, article.DateSent.Year(), article.MessageID) } article.DateSent = parsedTime // Update article DateSent with corrected time @@ -206,16 +239,57 @@ func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *st } } } - headers = append(headers, "Message-ID: "+article.MessageID) - headers = append(headers, "Subject: "+article.Subject) headers = append(headers, "Date: "+dateHeader) headers = append(headers, "From: "+article.FromHeader) + headers = append(headers, "Message-ID: "+article.MessageID) + headers = append(headers, "Subject: "+article.Subject) if article.References != "" { - headers = append(headers, "References: "+article.References) + var refline string = "References:" + refs := strings.Fields(article.References) + for i, ref := range refs { + if i > 0 { + if len(refline)+1+len(ref) > 1000 { + // line would exceed 1000 chars, start a new line + headers = append(headers, refline) + refline = " " + ref // continuation line starts with space + } else { + refline += " " + ref + } + } else { + refline += " " + ref + } + } + // append remaining refline + if strings.TrimSpace(refline) != "" { + headers = append(headers, refline) + } } switch withPath { case true: if article.Path != "" { + var pathline string + if nntphostname != nil && *nntphostname != "" { + pathline = "Path: " + *nntphostname + "!.TX!" + article.Path + } else { + pathline = "Path: " + article.Path + } + paths := strings.Fields(pathline) + for i, path := range paths { + if i > 0 { + if len(pathline)+1+len(path) > 1000 { + // line would exceed 1000 chars, start a new line + headers = append(headers, pathline) + pathline = " " + path // continuation line starts with space + } else { + pathline += " " + path + } + } else { + pathline += " " + path + } + } + if strings.TrimSpace(pathline) != "" { + headers = append(headers, pathline) + } if nntphostname != nil && *nntphostname != "" { headers = append(headers, "Path: "+*nntphostname+"!.TX!"+article.Path) } else { @@ -266,7 +340,7 @@ checkHeader: } // check if first char is lowercase if unicode.IsLower(rune(headerLine[0])) { - if VerboseHeaders { + if VERBOSE_HEADERS { log.Printf("Lowercase header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) } headerLine = strings.ToUpper(string(headerLine[0])) + headerLine[1:] @@ -309,12 +383,14 @@ checkHeader: } if !strings.HasPrefix(header, "X-") { if headersMap[strings.ToLower(header)] { - log.Printf("Duplicate header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) - headerLine = "X-RW-" + headerLine + if RequiredHeadersMap[strings.ToLower(header)] { + log.Printf("Duplicate header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) + headerLine = "X-RW-" + headerLine + } } headersMap[strings.ToLower(header)] = true } - if header == "Newsgroups" { + if strings.ToLower(header) == "newsgroups" { // Check if Newsgroups header contains at least one valid newsgroup name // check if next headerlines are continued lines getLines: @@ -379,7 +455,9 @@ checkHeader: trimmedNG = strings.TrimSpace(trimmedNG) if trimmedNG == "" || strings.Contains(trimmedNG, " ") || !IsValidGroupName(trimmedNG) { if trimmedNG == "" { - log.Printf("Invalid newsgroup name: '%s' empty after cleanup in line=%d idx=%d in msgId='%s'", group, i, x, article.MessageID) + if VERBOSE_HEADERS { + log.Printf("Invalid newsgroup name: '%s' empty after cleanup in line=%d idx=%d in msgId='%s'", group, i, x, article.MessageID) + } } else { log.Printf("Invalid newsgroup name: '%s' in line=%d idx=%d in msgId='%s'", group, i, x, article.MessageID) } @@ -392,8 +470,9 @@ checkHeader: validNewsgroups = append(validNewsgroups, trimmedNG) } // end for checkGroups - if len(validNewsgroups) == 0 { - log.Printf("Invalid Newsgroups header: '%s' line=%d in msgId='%s' (return err)", headerLine, i, article.MessageID) + if len(validNewsgroups) == 0 && newsgroup == "" { + log.Printf("Invalid Newsgroups header: '%s' line=%d in msgId='%s'", headerLine, i, article.MessageID) + return nil, ErrNoNewsgroups } if badGroups > 0 { @@ -407,7 +486,7 @@ checkHeader: headers = append(headers, headerLine) } // end for moreHeaders - if VerboseHeaders && ignoredLines > 0 { + if VERBOSE_HEADERS && ignoredLines > 0 { log.Printf("Reconstructed %d header lines, ignored %d: msgId='%s'", len(headers), ignoredLines, article.MessageID) } fallbackNewsgroup := false @@ -513,7 +592,7 @@ func GetHeaderFirst(headers map[string][]string, key string) string { if vals, ok := headers[key]; ok && len(vals) > 0 { // For headers that can be folded across multiple lines (like References), // we need to join with spaces instead of newlines to properly unfold them - if key == "references" || key == "References" || key == "in-reply-to" || key == "In-Reply-To" { + if strings.ToLower(key) == "references" || strings.ToLower(key) == "in-reply-to" { return multiLineHeaderToStringSpaced(vals) } return multiLineHeaderToMergedString(vals) diff --git a/internal/database/config_cache.go b/internal/database/config_cache.go index 612cb809..7edf9a4d 100644 --- a/internal/database/config_cache.go +++ b/internal/database/config_cache.go @@ -105,7 +105,7 @@ func (cc *ConfigCache) RefreshCache() error { // getConfigValueDirect performs a direct database query (fallback method) func (cc *ConfigCache) getConfigValueDirect(key string) (string, error) { var value string - err := retryableQueryRowScan(cc.db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{key}, &value) + err := RetryableQueryRowScan(cc.db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{key}, &value) if err != nil { if err.Error() == "sql: no rows in result set" { return "", nil // Return empty string for missing keys diff --git a/internal/database/database.go b/internal/database/database.go index 3e049bcf..e1c0535c 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -41,17 +41,17 @@ func (db *Database) cleanupIdleGroups() { } var candidates []dbAge - for groupName, groupDBs := range db.groupDBs { - if groupDBs == nil { - log.Printf("cleanupIdleGroups Warning: GroupDBs for '%s' is nil, skipping", groupName) + for groupName, groupDB := range db.groupDB { + if groupDB == nil { + log.Printf("cleanupIdleGroups Warning: GroupDB for '%s' is nil, skipping", groupName) continue } - groupDBs.mux.RLock() + groupDB.mux.RLock() candidates = append(candidates, dbAge{ name: groupName, - age: time.Since(groupDBs.Idle), + age: time.Since(groupDB.Idle), }) - groupDBs.mux.RUnlock() + groupDB.mux.RUnlock() } // Sort by age (oldest first) @@ -71,22 +71,22 @@ func (db *Database) cleanupIdleGroups() { if db.openDBsNum <= MaxOpenDatabases/2 { break } - groupDBs := db.groupDBs[candidate.name] - if groupDBs != nil { - groupDBs.mux.Lock() - if groupDBs.Workers == 0 { - if err := groupDBs.Close("force cleanup"); err != nil { + groupDB := db.groupDB[candidate.name] + if groupDB != nil { + groupDB.mux.Lock() + if groupDB.Workers == 0 { + if err := groupDB.Close("force cleanup"); err != nil { log.Printf("Failed to force close group database for '%s': %v", candidate.name, err) } else { - delete(db.groupDBs, candidate.name) + delete(db.groupDB, candidate.name) db.openDBsNum-- closedCount++ log.Printf("Force closed idle DB ng: '%s' (age: %v)", candidate.name, candidate.age) } } else { - //log.Printf("Skipping force close for busy group DB '%s' (workers: %d)", candidate.name, groupDBs.Workers) + //log.Printf("Skipping force close for busy group DB '%s' (workers: %d)", candidate.name, groupDB.Workers) } - groupDBs.mux.Unlock() + groupDB.mux.Unlock() } } log.Printf("Force closed %d databases due to exceeding limit (%d >= %d)", closedCount, db.openDBsNum+closedCount, MaxOpenDatabases) @@ -97,37 +97,37 @@ func (db *Database) cleanupIdleGroups() { db.MainMutex.Lock() // normal idle processing with idle time - for groupName, groupDBs := range db.groupDBs { - if groupDBs == nil { - log.Printf("cleanupIdleGroups Warning: GroupDBs for '%s' is nil, skipping", groupName) + for groupName, groupDB := range db.groupDB { + if groupDB == nil { + log.Printf("cleanupIdleGroups Warning: GroupDB for '%s' is nil, skipping", groupName) continue } // Use a non-blocking check to avoid holding locks too long - groupDBs.mux.Lock() - if groupDBs.Workers < 0 { - log.Printf("Warning: Negative worker count for group '%s': %d", groupName, groupDBs.Workers) + groupDB.mux.Lock() + if groupDB.Workers < 0 { + log.Printf("Warning: Negative worker count for group '%s': %d", groupName, groupDB.Workers) } - isIdle := (groupDBs.Workers == 0 && time.Since(groupDBs.Idle) > DBidleTimeOut) + isIdle := (groupDB.Workers == 0 && time.Since(groupDB.Idle) > DBidleTimeOut) if isIdle { // Mark for closure and remove from active map immediately - if err := groupDBs.Close("cleanupIdleGroups"); err != nil { - log.Printf("Failed to close group database for '%s': %v", groupDBs.Newsgroup, err) - groupDBs.mux.Unlock() + if err := groupDB.Close("cleanupIdleGroups"); err != nil { + log.Printf("Failed to close group database for '%s': %v", groupDB.Newsgroup, err) + groupDB.mux.Unlock() continue } - //groupsToClose = append(groupsToClose, groupDBs) - delete(db.groupDBs, groupName) + //groupsToClose = append(groupsToClose, groupDB) + delete(db.groupDB, groupName) db.openDBsNum-- } - groupDBs.mux.Unlock() + groupDB.mux.Unlock() } db.MainMutex.Unlock() } func (db *Database) removePartialInitializedGroupDB(groupName string) { db.MainMutex.Lock() - db.groupDBs[groupName] = nil + db.groupDB[groupName] = nil db.MainMutex.Unlock() } @@ -137,16 +137,16 @@ func (db *Database) Shutdown() error { // Close per-group databases first (thousands of them) db.MainMutex.Lock() - log.Printf("[DATABASE] Closing %d group databases...", len(db.groupDBs)) + log.Printf("[DATABASE] Closing %d group databases...", len(db.groupDB)) groupCloseErrors := 0 - for groupName, groupDBs := range db.groupDBs { - if groupDBs != nil && groupDBs.DB != nil { - groupDBs.mux.Lock() - if err := groupDBs.DB.Close(); err != nil { + for groupName, groupDB := range db.groupDB { + if groupDB != nil && groupDB.DB != nil { + groupDB.mux.Lock() + if err := groupDB.DB.Close(); err != nil { errs = append(errs, fmt.Errorf("failed to close group database %s: %w", groupName, err)) groupCloseErrors++ } - groupDBs.mux.Unlock() + groupDB.mux.Unlock() } } db.MainMutex.Unlock() @@ -156,7 +156,7 @@ func (db *Database) Shutdown() error { // Clear the group databases map db.MainMutex.Lock() - db.groupDBs = make(map[string]*GroupDBs) + db.groupDB = make(map[string]*GroupDB) db.MainMutex.Unlock() log.Printf("[DATABASE] Group databases closed") @@ -190,7 +190,7 @@ type Stats struct { WaitCount int64 WaitDuration time.Duration } - GroupDBs map[string]struct { + GroupDB map[string]struct { OpenConnections int IdleConnections int WaitCount int64 @@ -201,7 +201,7 @@ type Stats struct { // GetDatabaseStats returns database connection statistics func (db *Database) GetDatabaseStats() *Stats { stats := &Stats{ - GroupDBs: make(map[string]struct { + GroupDB: make(map[string]struct { OpenConnections int IdleConnections int WaitCount int64 @@ -221,12 +221,12 @@ func (db *Database) GetDatabaseStats() *Stats { // Group database stats db.MainMutex.RLock() defer db.MainMutex.RUnlock() - for groupName, groupDBs := range db.groupDBs { - if groupDBs != nil { - groupDBs.mux.RLock() - if groupDBs.DB != nil { - dbStats := groupDBs.DB.Stats() - stats.GroupDBs[groupName] = struct { + for groupName, groupDB := range db.groupDB { + if groupDB != nil { + groupDB.mux.RLock() + if groupDB.DB != nil { + dbStats := groupDB.DB.Stats() + stats.GroupDB[groupName] = struct { OpenConnections int IdleConnections int WaitCount int64 @@ -238,7 +238,7 @@ func (db *Database) GetDatabaseStats() *Stats { WaitDuration: dbStats.WaitDuration, } } - groupDBs.mux.RUnlock() + groupDB.mux.RUnlock() } } @@ -252,7 +252,7 @@ func (db *Database) GetHistoryUseShortHashLen(defaultValue int) (int, bool, erro var locked string // Get the UseShortHashLen value - err := retryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{"history_use_short_hash_len"}, &value) + err := RetryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{"history_use_short_hash_len"}, &value) if err != nil { if err == sql.ErrNoRows { // Not found, use default @@ -262,7 +262,7 @@ func (db *Database) GetHistoryUseShortHashLen(defaultValue int) (int, bool, erro } // Check if config is locked - err = retryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{"history_config_locked"}, &locked) + err = RetryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{"history_config_locked"}, &locked) if err != nil && err != sql.ErrNoRows { return 0, false, fmt.Errorf("failed to query history_config_locked: %w", err) } @@ -296,14 +296,14 @@ func (db *Database) SetHistoryUseShortHashLen(value int) error { } // Store the value - _, err = retryableExec(db.mainDB, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", + _, err = RetryableExec(db.mainDB, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", "history_use_short_hash_len", fmt.Sprintf("%d", value)) if err != nil { return fmt.Errorf("failed to store history_use_short_hash_len: %w", err) } // Lock the configuration to prevent future changes - _, err = retryableExec(db.mainDB, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", + _, err = RetryableExec(db.mainDB, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", "history_config_locked", "true") if err != nil { return fmt.Errorf("failed to lock history configuration: %w", err) @@ -330,7 +330,7 @@ func (db *Database) InitializeSystemStatus(appVersion string) error { hostname, _ := os.Hostname() pid := os.Getpid() - _, err := retryableExec(db.mainDB, query, appVersion, pid, hostname) + _, err := RetryableExec(db.mainDB, query, appVersion, pid, hostname) if err != nil { return fmt.Errorf("failed to initialize system status: %w", err) } @@ -342,7 +342,7 @@ func (db *Database) InitializeSystemStatus(appVersion string) error { // GetNewsgroupID returns the ID of a newsgroup by name func (db *Database) GetNewsgroupID(groupName string) (int, error) { var id int - err := retryableQueryRowScan(db.mainDB, "SELECT id FROM newsgroups WHERE name = ?", []interface{}{groupName}, &id) + err := RetryableQueryRowScan(db.mainDB, "SELECT id FROM newsgroups WHERE name = ?", []interface{}{groupName}, &id) if err != nil { return 0, fmt.Errorf("failed to get newsgroup ID for '%s': %w", groupName, err) } @@ -361,15 +361,15 @@ func (db *Database) IncrementArticleSpam(groupName string, articleNum int64) err } log.Printf("DEBUG: Found newsgroupID=%d for group %s", newsgroupID, groupName) - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { log.Printf("DEBUG: Failed to get group databases for %s: %v", groupName, err) return fmt.Errorf("failed to get group databases: %w", err) } - defer groupDBs.Return(db) + defer groupDB.Return() // Update spam counter in group database - result, err := retryableExec(groupDBs.DB, "UPDATE articles SET spam = spam + 1 WHERE article_num = ?", articleNum) + result, err := RetryableExec(groupDB.DB, "UPDATE articles SET spam = spam + 1 WHERE article_num = ?", articleNum) if err != nil { log.Printf("DEBUG: Failed to update spam count in group DB: %v", err) return fmt.Errorf("failed to increment spam count: %w", err) @@ -379,7 +379,7 @@ func (db *Database) IncrementArticleSpam(groupName string, articleNum int64) err log.Printf("DEBUG: Updated %d rows in articles table for article %d", rowsAffected, articleNum) // Add to main database spam table - result2, err := retryableExec(db.mainDB, "INSERT OR IGNORE INTO spam (newsgroup_id, article_num) VALUES (?, ?)", newsgroupID, articleNum) + result2, err := RetryableExec(db.mainDB, "INSERT OR IGNORE INTO spam (newsgroup_id, article_num) VALUES (?, ?)", newsgroupID, articleNum) if err != nil { log.Printf("DEBUG: Failed to insert into spam table: %v", err) return fmt.Errorf("failed to add to spam table: %w", err) @@ -393,13 +393,13 @@ func (db *Database) IncrementArticleSpam(groupName string, articleNum int64) err // IncrementArticleHide increments the hide counter for a specific article func (db *Database) IncrementArticleHide(groupName string, articleNum int64) error { - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { return fmt.Errorf("failed to get group databases: %w", err) } - defer groupDBs.Return(db) + defer groupDB.Return() - _, err = retryableExec(groupDBs.DB, "UPDATE articles SET hide = 1 WHERE article_num = ? AND spam > 0", articleNum) + _, err = RetryableExec(groupDB.DB, "UPDATE articles SET hide = 1 WHERE article_num = ? AND spam > 0", articleNum) if err != nil { return fmt.Errorf("failed to increment hide count: %w", err) } @@ -409,13 +409,13 @@ func (db *Database) IncrementArticleHide(groupName string, articleNum int64) err // UnHideArticle sets the hide counter to zero for a specific article func (db *Database) UnHideArticle(groupName string, articleNum int64) error { - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { return fmt.Errorf("failed to get group databases: %w", err) } - defer groupDBs.Return(db) + defer groupDB.Return() - _, err = retryableExec(groupDBs.DB, "UPDATE articles SET hide = 0 WHERE article_num = ?", articleNum) + _, err = RetryableExec(groupDB.DB, "UPDATE articles SET hide = 0 WHERE article_num = ?", articleNum) if err != nil { return fmt.Errorf("failed to unhide: %w", err) } @@ -435,16 +435,16 @@ func (db *Database) DecrementArticleSpam(groupName string, articleNum int64) err } log.Printf("DEBUG: Found newsgroupID=%d for group %s", newsgroupID, groupName) - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { log.Printf("DEBUG: Failed to get group databases for %s: %v", groupName, err) return fmt.Errorf("failed to get group databases: %w", err) } - defer groupDBs.Return(db) + defer groupDB.Return() // Check current spam count first var currentSpam int - err = retryableQueryRowScan(groupDBs.DB, "SELECT spam FROM articles WHERE article_num = ?", []interface{}{articleNum}, ¤tSpam) + err = RetryableQueryRowScan(groupDB.DB, "SELECT spam FROM articles WHERE article_num = ?", []interface{}{articleNum}, ¤tSpam) if err != nil { log.Printf("DEBUG: Failed to get current spam count: %v", err) return fmt.Errorf("failed to get current spam count: %w", err) @@ -456,7 +456,7 @@ func (db *Database) DecrementArticleSpam(groupName string, articleNum int64) err } // Decrement spam counter in group database - result, err := retryableExec(groupDBs.DB, "UPDATE articles SET spam = spam - 1 WHERE article_num = ? AND spam > 0", articleNum) + result, err := RetryableExec(groupDB.DB, "UPDATE articles SET spam = spam - 1 WHERE article_num = ? AND spam > 0", articleNum) if err != nil { log.Printf("DEBUG: Failed to decrement spam count in group DB: %v", err) return fmt.Errorf("failed to decrement spam count: %w", err) @@ -467,7 +467,7 @@ func (db *Database) DecrementArticleSpam(groupName string, articleNum int64) err // If spam count reaches 0, remove from main database spam table and clear all user flags if currentSpam == 1 { - result2, err := retryableExec(db.mainDB, "DELETE FROM spam WHERE newsgroup_id = ? AND article_num = ?", newsgroupID, articleNum) + result2, err := RetryableExec(db.mainDB, "DELETE FROM spam WHERE newsgroup_id = ? AND article_num = ?", newsgroupID, articleNum) if err != nil { log.Printf("DEBUG: Failed to remove from spam table: %v", err) return fmt.Errorf("failed to remove from spam table: %w", err) @@ -477,7 +477,7 @@ func (db *Database) DecrementArticleSpam(groupName string, articleNum int64) err log.Printf("DEBUG: Removed %d rows from spam table", rowsAffected2) // Also remove all user spam flags for this article - result3, err := retryableExec(db.mainDB, "DELETE FROM user_spam_flags WHERE newsgroup_id = ? AND article_num = ?", newsgroupID, articleNum) + result3, err := RetryableExec(db.mainDB, "DELETE FROM user_spam_flags WHERE newsgroup_id = ? AND article_num = ?", newsgroupID, articleNum) if err != nil { log.Printf("DEBUG: Failed to clear user spam flags: %v", err) return fmt.Errorf("failed to clear user spam flags: %w", err) @@ -499,7 +499,7 @@ func (db *Database) HasUserFlaggedSpam(userID int64, groupName string, articleNu } var count int - err = retryableQueryRowScan(db.mainDB, ` + err = RetryableQueryRowScan(db.mainDB, ` SELECT COUNT(*) FROM user_spam_flags WHERE user_id = ? AND newsgroup_id = ? AND article_num = ?`, []interface{}{userID, newsgroupID, articleNum}, &count) @@ -519,7 +519,7 @@ func (db *Database) RecordUserSpamFlag(userID int64, groupName string, articleNu return fmt.Errorf("failed to get newsgroup ID: %w", err) } - _, err = retryableExec(db.mainDB, ` + _, err = RetryableExec(db.mainDB, ` INSERT OR IGNORE INTO user_spam_flags (user_id, newsgroup_id, article_num) VALUES (?, ?, ?)`, userID, newsgroupID, articleNum) diff --git a/internal/database/db_aimodels.go b/internal/database/db_aimodels.go index 5233387d..cd14519c 100644 --- a/internal/database/db_aimodels.go +++ b/internal/database/db_aimodels.go @@ -18,7 +18,7 @@ func (db *Database) GetActiveAIModels() ([]*models.AIModel, error) { WHERE is_active = 1 ORDER BY sort_order ASC, display_name ASC` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, err } @@ -52,7 +52,7 @@ func (db *Database) GetDefaultAIModel() (*models.AIModel, error) { LIMIT 1` model := &models.AIModel{} - err := retryableQueryRowScan(db.mainDB, query, nil, + err := RetryableQueryRowScan(db.mainDB, query, nil, &model.ID, &model.PostKey, &model.OllamaModelName, &model.DisplayName, &model.Description, &model.IsActive, &model.IsDefault, &model.SortOrder, &model.CreatedAt, &model.UpdatedAt, @@ -80,7 +80,7 @@ func (db *Database) GetFirstActiveAIModel() (*models.AIModel, error) { LIMIT 1` model := &models.AIModel{} - err := retryableQueryRowScan(db.mainDB, query, nil, + err := RetryableQueryRowScan(db.mainDB, query, nil, &model.ID, &model.PostKey, &model.OllamaModelName, &model.DisplayName, &model.Description, &model.IsActive, &model.IsDefault, &model.SortOrder, &model.CreatedAt, &model.UpdatedAt, @@ -102,7 +102,7 @@ func (db *Database) GetAIModelByPostKey(postKey string) (*models.AIModel, error) WHERE post_key = ?` model := &models.AIModel{} - err := retryableQueryRowScan(db.mainDB, query, []interface{}{postKey}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{postKey}, &model.ID, &model.PostKey, &model.OllamaModelName, &model.DisplayName, &model.Description, &model.IsActive, &model.IsDefault, &model.SortOrder, &model.CreatedAt, &model.UpdatedAt, @@ -122,7 +122,7 @@ func (db *Database) CreateAIModel(postKey, ollamaModelName, displayName, descrip query := `INSERT INTO ai_models (post_key, ollama_model_name, display_name, description, is_active, is_default, sort_order) VALUES (?, ?, ?, ?, ?, ?, ?)` - result, err := retryableExec(db.mainDB, query, postKey, ollamaModelName, displayName, description, isActive, isDefault, sortOrder) + result, err := RetryableExec(db.mainDB, query, postKey, ollamaModelName, displayName, description, isActive, isDefault, sortOrder) if err != nil { return nil, err } @@ -159,7 +159,7 @@ func (db *Database) UpdateAIModel(id int, ollamaModelName, displayName, descript SET ollama_model_name = ?, display_name = ?, description = ?, is_active = ?, is_default = ?, sort_order = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, ollamaModelName, displayName, description, isActive, isDefault, sortOrder, id) + _, err := RetryableExec(db.mainDB, query, ollamaModelName, displayName, description, isActive, isDefault, sortOrder, id) return err } @@ -168,7 +168,7 @@ func (db *Database) SetDefaultAIModel(id int) error { db.MainMutex.Lock() defer db.MainMutex.Unlock() - return retryableTransactionExec(db.mainDB, func(tx *sql.Tx) error { + return RetryableTransactionExec(db.mainDB, func(tx *sql.Tx) error { // First, unset all defaults _, err := tx.Exec("UPDATE ai_models SET is_default = 0") if err != nil { @@ -186,7 +186,7 @@ func (db *Database) DeleteAIModel(id int) error { db.MainMutex.Lock() defer db.MainMutex.Unlock() query := `DELETE FROM ai_models WHERE id = ?` - _, err := retryableExec(db.mainDB, query, id) + _, err := RetryableExec(db.mainDB, query, id) return err } @@ -199,7 +199,7 @@ func (db *Database) GetAllAIModels() ([]*models.AIModel, error) { FROM ai_models ORDER BY sort_order ASC, display_name ASC` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, err } diff --git a/internal/database/db_apitokens.go b/internal/database/db_apitokens.go index c7ee2af3..63f70487 100644 --- a/internal/database/db_apitokens.go +++ b/internal/database/db_apitokens.go @@ -52,7 +52,7 @@ func (db *Database) CreateAPIToken(ownerName string, ownerID int64, expiresAt *t db.MainMutex.Lock() defer db.MainMutex.Unlock() - result, err := retryableExec(db.mainDB, query_CreateAPIToken, hashedToken, ownerName, ownerID, expiresAt) + result, err := RetryableExec(db.mainDB, query_CreateAPIToken, hashedToken, ownerName, ownerID, expiresAt) if err != nil { return nil, "", err } @@ -88,7 +88,7 @@ func (db *Database) ValidateAPIToken(plainToken string) (*APIToken, error) { WHERE apitoken = ? AND is_enabled = 1` var token APIToken - err := retryableQueryRowScan(db.mainDB, query, []interface{}{hashedToken}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{hashedToken}, &token.ID, &token.APIToken, &token.OwnerName, &token.OwnerID, &token.CreatedAt, &token.LastUsedAt, &token.ExpiresAt, &token.IsEnabled, &token.UsageCount, @@ -114,7 +114,7 @@ func (db *Database) UpdateTokenUsage(tokenID int64) error { SET last_used_at = CURRENT_TIMESTAMP, usage_count = usage_count + 1 WHERE id = ?` - _, err := retryableExec(db.mainDB, query, tokenID) + _, err := RetryableExec(db.mainDB, query, tokenID) return err } @@ -127,7 +127,7 @@ func (db *Database) ListAPITokens() ([]*APIToken, error) { FROM api_tokens ORDER BY created_at DESC` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, err } @@ -156,7 +156,7 @@ func (db *Database) DisableAPIToken(tokenID int) error { defer db.MainMutex.Unlock() query := `UPDATE api_tokens SET is_enabled = 0 WHERE id = ?` - _, err := retryableExec(db.mainDB, query, tokenID) + _, err := RetryableExec(db.mainDB, query, tokenID) return err } @@ -166,7 +166,7 @@ func (db *Database) EnableAPIToken(tokenID int) error { defer db.MainMutex.Unlock() query := `UPDATE api_tokens SET is_enabled = 1 WHERE id = ?` - _, err := retryableExec(db.mainDB, query, tokenID) + _, err := RetryableExec(db.mainDB, query, tokenID) return err } @@ -176,7 +176,7 @@ func (db *Database) DeleteAPIToken(tokenID int) error { defer db.MainMutex.Unlock() query := `DELETE FROM api_tokens WHERE id = ?` - _, err := retryableExec(db.mainDB, query, tokenID) + _, err := RetryableExec(db.mainDB, query, tokenID) return err } @@ -186,7 +186,7 @@ func (db *Database) CleanupExpiredTokens() (int, error) { defer db.MainMutex.Unlock() query := `DELETE FROM api_tokens WHERE expires_at IS NOT NULL AND expires_at < CURRENT_TIMESTAMP` - result, err := retryableExec(db.mainDB, query) + result, err := RetryableExec(db.mainDB, query) if err != nil { return 0, err } diff --git a/internal/database/db_batch.go b/internal/database/db_batch.go index 170296f4..66799d7b 100644 --- a/internal/database/db_batch.go +++ b/internal/database/db_batch.go @@ -9,31 +9,24 @@ import ( "sync" "time" - "github.com/go-while/go-pugleaf/internal/history" "github.com/go-while/go-pugleaf/internal/models" ) // SQLite safety limits: split large batches to avoid parameter/length limits var BatchInterval = 3 * time.Second -var MaxBatchSize int = 100 - -// don't process more than N groups in parallel: better have some cpu & mem when importing hard! -var MaxBatchThreads = 16 // -max-batch-threads N -var MaxQueued = 16384 // -max-queue N -var InitialBatchChannelSize = MaxBatchSize // @AI: DO NOT CHANGE THIS!!!! per group cache channel size. should be less or equal to MaxBatch in processor aka MaxReadLinesXover in nntp-client-commands // Cache for placeholder strings to avoid rebuilding them repeatedly var placeholderCache sync.Map // map[int]string -const DefaultShutDownCounter = 5 +const DefaultShutDownCounter = 120 // getPlaceholders returns a comma-separated string of SQL placeholders (?) for the given count -func getPlaceholders(count int) string { +func (sq *SQ3batch) getPlaceholders(count int) string { if count <= 0 { return "" } - if count == MaxBatchSize { + if count == sq.maxDBbatch { if v, ok := placeholderCache.Load(count); ok { return v.(string) } @@ -47,7 +40,7 @@ func getPlaceholders(count int) string { s = strings.Repeat("?, ", count-1) + "?" } - if count == MaxBatchSize { + if count == sq.maxDBbatch { placeholderCache.Store(count, s) } return s @@ -61,14 +54,6 @@ type MsgIdTmpCacheItem struct { IsThreadRoot bool // True if this article is a thread root } -// OverviewBatch represents a staged overview waiting for batch processing -/* -type OverviewBatch struct { - Article *models.Article - //Newsgroup *string -} -*/ - // ThreadCacheBatch represents a staged thread cache initialization waiting for batch processing type ThreadCacheBatch struct { Newsgroup string @@ -76,30 +61,34 @@ type ThreadCacheBatch struct { Article *models.Article } -type ThreadingProcessor interface { - MsgIdExists(group *string, messageID string) bool +type ProcessorInterface interface { + //MsgIdExists(group *string, messageID string) bool // Add methods for history and cache operations - AddProcessedArticleToHistory(msgIdItem *history.MessageIdItem, newsgroup *string, articleNumber int64) + //AddProcessedArticleToHistory(msgIdItem *history.MessageIdItem) bool // interface // Add method for finding thread roots - matches proc_MsgIDtmpCache.go signature (updated to use pointer) - FindThreadRootInCache(groupName *string, refs []string) *MsgIdTmpCacheItem + //FindThreadRootInCache(groupName *string, refs []string) *MsgIdTmpCacheItem + // Add method for checking if there is no more work in history CheckNoMoreWorkInHistory() bool // Add method for force closing group databases - ForceCloseGroupDBs(groupsDB *GroupDBs) error + ForceCloseGroupDB(groupsDB *GroupDB) error } // SetProcessor sets the threading processor callback interface -func (c *SQ3batch) SetProcessor(proc ThreadingProcessor) { - c.proc = proc - if MaxBatchSize > 1000 { - //log.Printf("[BATCH] MaxBatchSize is set to %d, reduced to 1000 in db_batch", MaxBatchSize) - MaxBatchSize = 1000 +func (sq *SQ3batch) SetProcessor(proc ProcessorInterface) { + sq.proc = proc + if sq.maxDBbatch > 1000 { + //log.Printf("[BATCH] sq.maxDBbatch is set to %d, reduced to 1000 in db_batch", sq.maxDBbatch) + sq.maxDBbatch = 1000 } } type SQ3batch struct { db *Database // Reference to the main database - proc ThreadingProcessor // Threading processor interface for message ID checks + proc ProcessorInterface // processor interface for various callbacks orchestrator *BatchOrchestrator // Smart orchestrator for batch processing + maxDBbatch int // limits db batches to avoid mem and sqlite limits + maxDBthreads int // limits number of concurrent db batch threads + maxQueued int // limits number of queued articles for batch processing GMux sync.RWMutex // Mutex for TasksMap to ensure thread safety TasksMap map[string]*BatchTasks // Map which holds newsgroup cron taskspointers @@ -109,6 +98,8 @@ type SQ3batch struct { TmpStringPtrSlices chan []*string // holds temporary string slices for valuesClauses, messageIDs etc TmpInterfaceSlices chan []interface{} // holds temporary interface slices for args in threading operations queued int // number of queued articles for batch processing + LimitChan chan struct{} // limits number of concurrent db batch threads + LPending chan struct{} // limits number of pending db batch threads } type BatchTasks struct { @@ -122,14 +113,34 @@ type BatchTasks struct { } func NewSQ3batch(db *Database) *SQ3batch { + if db.dbconfig.MaxDBbatch < 100 { + db.dbconfig.MaxDBbatch = 100 // hardcoded lower limit + } else if db.dbconfig.MaxDBbatch > 1000 { + db.dbconfig.MaxDBbatch = 1000 // hardcoded upper limit + } + if db.dbconfig.MaxDBthreads < 1 { + db.dbconfig.MaxDBthreads = 1 // hardcoded lower limit + } else if db.dbconfig.MaxDBthreads > 256 { + db.dbconfig.MaxDBthreads = 256 // hardcoded upper limit + } + if db.dbconfig.MaxQueued < 16 { + db.dbconfig.MaxQueued = 16 // hardcoded lower limit + } else if db.dbconfig.MaxQueued > 100000 { + db.dbconfig.MaxQueued = 100000 // hardcoded upper limit + } batch := &SQ3batch{ db: db, + maxDBbatch: db.dbconfig.MaxDBbatch, + maxDBthreads: db.dbconfig.MaxDBthreads, + maxQueued: db.dbconfig.MaxQueued, TasksMap: make(map[string]*BatchTasks, 128), // Initialize TasksMap TmpTasksChans: make(chan chan *BatchTasks, 128), TmpArticleSlices: make(chan []*models.Article, 128), TmpStringSlices: make(chan []string, 128), TmpStringPtrSlices: make(chan []*string, 128), TmpInterfaceSlices: make(chan []interface{}, 128), + LimitChan: make(chan struct{}, db.dbconfig.MaxDBthreads), + LPending: make(chan struct{}, 1), } batch.orchestrator = NewBatchOrchestrator(batch) return batch @@ -229,25 +240,25 @@ func (sq *SQ3batch) GetOrCreateTasksMapKey(newsgroup string) *BatchTasks { } // CheckNoMoreWorkInMaps checks if all batch channels are empty and not processing -func (c *SQ3batch) CheckNoMoreWorkInMaps() bool { +func (sq *SQ3batch) CheckNoMoreWorkInMaps() bool { if len(BatchDividerChan) > 0 { return false } - if c.proc == nil { - log.Printf("CheckNoMoreWorkInMaps c.proc not set") + if sq.proc == nil { + log.Printf("CheckNoMoreWorkInMaps sq.proc not set") return true } - if !c.proc.CheckNoMoreWorkInHistory() { + if !sq.proc.CheckNoMoreWorkInHistory() { log.Printf("[CRON-SHUTDOWN] History still has work") return false } - c.GMux.RLock() // Lock the mutex to ensure thread safety - defer c.GMux.RUnlock() // Ensure we unlock the mutex when done + sq.GMux.RLock() // Lock the mutex to ensure thread safety + defer sq.GMux.RUnlock() // Ensure we unlock the mutex when done // Check if all maps are empty and not processing //log.Printf("[CRON-SHUTDOWN] do CheckNoMoreWorkInMaps...") // Iterate through all tasks to check channels - for newsgroup, tasks := range c.TasksMap { + for newsgroup, tasks := range sq.TasksMap { tasks.Mux.RLock() isEmpty := len(tasks.BATCHchan) == 0 && !tasks.BATCHprocessing batchChan, batchProc := len(tasks.BATCHchan), tasks.BATCHprocessing @@ -264,35 +275,31 @@ func (c *SQ3batch) CheckNoMoreWorkInMaps() bool { return true } -var QueryChan = make(chan struct{}, MaxBatchThreads) -var LimitChan = make(chan struct{}, MaxBatchThreads) -var LPending = make(chan struct{}, 1) - const LockLimitBlocking = true -func LockLimitChan() bool { +func (sq *SQ3batch) LockLimitChan() bool { if !LockLimitBlocking { select { - case LimitChan <- struct{}{}: + case sq.LimitChan <- struct{}{}: // Successfully locked return true default: // pass } } else { - LimitChan <- struct{}{} + sq.LimitChan <- struct{}{} return true } return false } -func ReturnLimitChan() { - <-LimitChan +func (sq *SQ3batch) ReturnLimitChan() { + <-sq.LimitChan } -func LockPending() bool { +func (sq *SQ3batch) LockPending() bool { select { - case LPending <- struct{}{}: + case sq.LPending <- struct{}{}: // Successfully locked default: return false @@ -300,22 +307,22 @@ func LockPending() bool { return true } -func ReturnPending() { - <-LPending +func (sq *SQ3batch) ReturnPending() { + <-sq.LPending } -func (c *SQ3batch) returnTmpTasksChan(tasksChan chan *BatchTasks) { +func (sq *SQ3batch) returnTmpTasksChan(tasksChan chan *BatchTasks) { select { - case c.TmpTasksChans <- tasksChan: + case sq.TmpTasksChans <- tasksChan: // Successfully returned to pool default: // Pool is full, discard the channel } } -func (c *SQ3batch) getOrCreateTmpTasksChan() (tasksChan chan *BatchTasks) { +func (sq *SQ3batch) getOrCreateTmpTasksChan() (tasksChan chan *BatchTasks) { select { - case tasksChan = <-c.TmpTasksChans: + case tasksChan = <-sq.TmpTasksChans: return tasksChan default: return make(chan *BatchTasks, 128) @@ -323,21 +330,21 @@ func (c *SQ3batch) getOrCreateTmpTasksChan() (tasksChan chan *BatchTasks) { } // processAllPendingBatches processes all pending batches in the correct sequential order -func (c *SQ3batch) processAllPendingBatches(wgProcessAllBatches *sync.WaitGroup, limit int) { - if !LockPending() { +func (sq *SQ3batch) processAllPendingBatches(wgProcessAllBatches *sync.WaitGroup, limit int) (moreWork bool) { + if !sq.LockPending() { log.Printf("[BATCH] processAllPendingBatches: LockPending failed") return } - defer ReturnPending() + defer sq.ReturnPending() // Get a snapshot of tasks to avoid holding the lock too long //log.Printf("[BATCH-DEBUG] processAllPendingBatches: acquiring GMux.RLock to get tasks snapshot") - c.GMux.RLock() - tasksToProcess := c.getOrCreateTmpTasksChan() - defer c.returnTmpTasksChan(tasksToProcess) + sq.GMux.RLock() + tasksToProcess := sq.getOrCreateTmpTasksChan() + defer sq.returnTmpTasksChan(tasksToProcess) queued := 0 fill: - for _, task := range c.TasksMap { + for _, task := range sq.TasksMap { task.Mux.RLock() if task.BATCHchan == nil || task.BATCHprocessing { task.Mux.RUnlock() @@ -348,12 +355,13 @@ fill: select { case tasksToProcess <- task: // Send the task to the channel default: + moreWork = true break fill } } queued += len(task.BATCHchan) } - c.GMux.RUnlock() + sq.GMux.RUnlock() if len(tasksToProcess) == 0 { return } @@ -390,7 +398,7 @@ process: task.BATCHprocessing = true task.Mux.Unlock() - if !LockLimitChan() { + if !sq.LockLimitChan() { //log.Printf("[BATCH-DEBUG] processAllPendingBatches: LimitChan acquisition failed for task '%s', resetting processing flag", *task.Newsgroup) task.Mux.Lock() task.BATCHprocessing = false @@ -406,116 +414,117 @@ process: defer wgProcessAllBatches.Done() //log.Printf("[BATCH] RUN processAllPendingBatches: processNewsgroupBatch task='%s'", *task.Newsgroup) gostart := time.Now() - c.processNewsgroupBatch(task) + sq.processNewsgroupBatch(task) log.Printf("[BATCH] END processAllPendingBatches: processNewsgroupBatch task='%s' took %v", *task.Newsgroup, time.Since(gostart)) }(task, wgProcessAllBatches) // Pass the task and wait group } } // end for tasksToProcess //log.Printf("[BATCH] processAllPendingBatches: launched %d goroutines, remaining tasks %d / %d, done %d", launched, len(tasksToProcess)-toProcess, len(tasksToProcess), len(doneProcessing)) wgProcessAllBatches.Wait() // Wait for all goroutines to finish + return } -// processNewsgroupBatch processes a single newsgroup's batch in the correct sequential order: -// 1. Complete article insertion (unified overview + article data) -// 2. Threading processing (relationships) -// 3. Thread cache updates -const query_processNewsgroupBatch = ` - INSERT INTO newsgroups (name, message_count, last_article, updated_at) - VALUES (?, ?, ?, ?) - ON CONFLICT(name) DO UPDATE SET - message_count = message_count + excluded.message_count, - last_article = CASE - WHEN excluded.last_article > last_article THEN excluded.last_article - ELSE last_article - END, - updated_at = excluded.updated_at` - -func (c *SQ3batch) returnModelsArticleSlice(batches []*models.Article) { +func (sq *SQ3batch) returnModelsArticleSlice(batches []*models.Article) { for i := range batches { batches[i] = nil } select { - case c.TmpArticleSlices <- batches: + case sq.TmpArticleSlices <- batches: default: log.Printf("[BATCH] returnModelsArticleSlice: TmpArticleSlices full, discarding slice of len %d", len(batches)) } } -func (c *SQ3batch) getOrCreateModelsArticleSlice() []*models.Article { +func (sq *SQ3batch) getOrCreateModelsArticleSlice() []*models.Article { select { - case retchan := <-c.TmpArticleSlices: + case retchan := <-sq.TmpArticleSlices: return retchan default: } - return make([]*models.Article, 0, MaxBatchSize) + return make([]*models.Article, 0, sq.maxDBbatch) } -func (c *SQ3batch) getOrCreateStringSlice() []string { +func (sq *SQ3batch) getOrCreateStringSlice() []string { select { - case retslice := <-c.TmpStringSlices: + case retslice := <-sq.TmpStringSlices: return retslice default: } - return make([]string, 0, MaxBatchSize) + return make([]string, 0, sq.maxDBbatch) } -func (c *SQ3batch) returnStringSlice(slice []string) { +func (sq *SQ3batch) returnStringSlice(slice []string) { // Clear the slice contents for i := range slice { slice[i] = "" } slice = slice[:0] select { - case c.TmpStringSlices <- slice: + case sq.TmpStringSlices <- slice: default: // Pool is full, discard the slice } } -func (c *SQ3batch) getOrCreateStringPtrSlice() []*string { +func (sq *SQ3batch) getOrCreateStringPtrSlice() []*string { select { - case retslice := <-c.TmpStringPtrSlices: + case retslice := <-sq.TmpStringPtrSlices: return retslice default: } - return make([]*string, 0, MaxBatchSize) + return make([]*string, 0, sq.maxDBbatch) } -func (c *SQ3batch) returnStringPtrSlice(slice []*string) { +func (sq *SQ3batch) returnStringPtrSlice(slice []*string) { // Clear the slice contents for i := range slice { slice[i] = nil } slice = slice[:0] select { - case c.TmpStringPtrSlices <- slice: + case sq.TmpStringPtrSlices <- slice: default: // Pool is full, discard the slice } } -func (c *SQ3batch) returnInterfaceSlice(slice []interface{}) { +func (sq *SQ3batch) returnInterfaceSlice(slice []interface{}) { // Clear the slice contents for i := range slice { slice[i] = nil } slice = slice[:0] select { - case c.TmpInterfaceSlices <- slice: + case sq.TmpInterfaceSlices <- slice: default: // Pool is full, discard the slice } } -func (c *SQ3batch) getOrCreateInterfaceSlice() []interface{} { +func (sq *SQ3batch) getOrCreateInterfaceSlice() []interface{} { select { - case retslice := <-c.TmpInterfaceSlices: + case retslice := <-sq.TmpInterfaceSlices: return retslice default: } - return make([]interface{}, 0, MaxBatchSize*3) // up to 3x for reply count updates + return make([]interface{}, 0, sq.maxDBbatch*3) // up to 3x for reply count updates } +// processNewsgroupBatch processes a single newsgroup's batch in the correct sequential order: +// 1. Complete article insertion (unified overview + article data) +// 2. Threading processing (relationships) +// 3. Thread cache updates +const query_updateNewsgroupsStats = ` + INSERT INTO newsgroups (name, message_count, last_article, updated_at) + VALUES (?, ?, ?, ?) + ON CONFLICT(name) DO UPDATE SET + message_count = message_count + excluded.message_count, + last_article = CASE + WHEN excluded.last_article > last_article THEN excluded.last_article + ELSE last_article + END, + updated_at = excluded.updated_at` + func (sq *SQ3batch) processNewsgroupBatch(task *BatchTasks) { startTime := time.Now() task.Mux.Lock() @@ -528,17 +537,16 @@ func (sq *SQ3batch) processNewsgroupBatch(task *BatchTasks) { task.Mux.Unlock() //totalDuration := time.Since(startTime) //log.Printf("[BATCH] processNewsgroupBatch newsgroup '%s' took %v: now Return LimitChan", *task.Newsgroup, totalDuration) - ReturnLimitChan() + sq.ReturnLimitChan() //log.Printf("[BATCH] processNewsgroupBatch newsgroup '%s' returned LimitChan", *task.Newsgroup) }(task, startTime) // Collect all batches for this newsgroup batches := sq.getOrCreateModelsArticleSlice() defer sq.returnModelsArticleSlice(batches) - // Drain the channel drainChannel: - for len(batches) < MaxBatchSize { + for len(batches) < sq.maxDBbatch { select { case article := <-task.BATCHchan: batches = append(batches, article) @@ -550,23 +558,27 @@ drainChannel: if len(batches) == 0 { return } - + defer func(batched int) { + sq.GMux.Lock() + sq.queued -= batched + sq.GMux.Unlock() + }(len(batches)) log.Printf("[BATCH] processNewsgroupBatch: ng: '%s' with %d articles (more queued: %d)", *task.Newsgroup, len(batches), len(task.BATCHchan)) retry1: // Get database connection for this newsgroup - groupDBs, err := sq.db.GetGroupDBs(*task.Newsgroup) + groupDB, err := sq.db.GetGroupDB(*task.Newsgroup) if err != nil { log.Printf("[BATCH] processNewsgroupBatch Failed to get database for group '%s': %v", *task.Newsgroup, err) return } // PHASE 1: Insert complete articles (overview + article data unified) and set article numbers directly on batches - if err := sq.batchInsertOverviews(*task.Newsgroup, batches, groupDBs, task.Newsgroup); err != nil { - if groupDBs != nil { - sq.proc.ForceCloseGroupDBs(groupDBs) - log.Printf("[BATCH] processNewsgroupBatch Failed1 to process batch for group '%s': %v groupDBs='%#v'", *task.Newsgroup, err, groupDBs) - groupDBs = nil + if err := sq.batchInsertOverviews(*task.Newsgroup, batches, groupDB, task.Newsgroup); err != nil { + if groupDB != nil { + sq.proc.ForceCloseGroupDB(groupDB) + log.Printf("[BATCH] processNewsgroupBatch Failed1 to process batch for group '%s': %v groupDB='%#v'", *task.Newsgroup, err, groupDB) + groupDB = nil } time.Sleep(time.Second) goto retry1 @@ -590,36 +602,36 @@ retry1: //log.Printf("[BATCH] processNewsgroupBatch Starting threading phase for %d articles in group '%s'", len(batches), *task.Newsgroup) //start := time.Now() retry2: - if groupDBs == nil { - groupDBs, err = sq.db.GetGroupDBs(*task.Newsgroup) + if groupDB == nil { + groupDB, err = sq.db.GetGroupDB(*task.Newsgroup) if err != nil { log.Printf("[BATCH] processNewsgroupBatch Failed2 to get database for group '%s': %v", *task.Newsgroup, err) return } } - if err := sq.batchProcessThreading(task.Newsgroup, batches, groupDBs); err != nil { + if err := sq.batchProcessThreading(task.Newsgroup, batches, groupDB); err != nil { time.Sleep(time.Second) - if groupDBs != nil { - groupDBs.Return(sq.db) - log.Printf("[BATCH] processNewsgroupBatch Failed2 to process threading for group '%s': %v groupDBs='%#v'", *task.Newsgroup, err, groupDBs) - groupDBs = nil + if groupDB != nil { + sq.proc.ForceCloseGroupDB(groupDB) + log.Printf("[BATCH] processNewsgroupBatch Failed2 to process threading for group '%s': %v groupDB='%#v'", *task.Newsgroup, err, groupDB) + groupDB = nil } goto retry2 } - defer groupDBs.Return(sq.db) + defer groupDB.Return() //threadingDuration := time.Since(start) //log.Printf("[BATCH] processNewsgroupBatch Completed threading phase for group '%s' in %v", *task.Newsgroup, threadingDuration) // PHASE 3: Handle history and processor cache updates //log.Printf("[BATCH] processNewsgroupBatch Starting history/cache updates for %d articles in group '%s'", len(batches), *task.Newsgroup) //start = time.Now() - + var latestDate time.Time for _, article := range batches { //log.Printf("[BATCH] processNewsgroupBatch Updating history/cache for article %d/%d in group '%s'", i+1, len(batches), *task.Newsgroup) // Read article number under read lock to avoid concurrent map access - article.Mux.RLock() - sq.proc.AddProcessedArticleToHistory(article.MsgIdItem, task.Newsgroup, article.ArticleNums[task.Newsgroup]) - article.Mux.RUnlock() + //article.Mux.RLock() + //sq.proc.AddProcessedArticleToHistory(article.MsgIdItem, task.Newsgroup, article.ArticleNums[task.Newsgroup]) + //article.Mux.RUnlock() article.Mux.Lock() if len(article.NewsgroupsPtr) > 0 { index := -1 @@ -638,10 +650,13 @@ retry2: continue } } - // The MsgIdItem is now in history system, clear the Article's reference to it + // clear values and unlink pointers to free memory article.MessageID = "" article.Subject = "" article.FromHeader = "" + if article.DateSent.After(latestDate) { + latestDate = article.DateSent + } article.DateSent = time.Time{} article.DateString = "" article.References = "" @@ -660,19 +675,17 @@ retry2: } //historyDuration := time.Since(start) //log.Printf("[BATCH] processNewsgroupBatch Completed history/cache updates for group '%s' in %v", *task.Newsgroup, historyDuration) - // Update newsgroup statistics with retryable transaction to avoid race conditions - // Safety check for nil database connection if sq.db == nil || sq.db.mainDB == nil { log.Printf("[BATCH] processNewsgroupBatch Main database connection is nil, cannot update newsgroup stats for '%s'", *task.Newsgroup) err = fmt.Errorf("processNewsgroupBatch main database connection is nil") } else { - //LockQueryChan() - //defer ReturnQueryChan() - // Use retryable transaction to prevent race conditions between concurrent batches - err = retryableTransactionExec(sq.db.mainDB, func(tx *sql.Tx) error { - // Use UPSERT to handle both new and existing newsgroups - _, txErr := tx.Exec(query_processNewsgroupBatch, - *task.Newsgroup, len(batches), maxArticleNum, time.Now().UTC().Format("2006-01-02 15:04:05")) + if latestDate.After(time.Now().UTC()) { + latestDate = time.Now().UTC() + } + //lastUpdate = time.Now().UTC().Format("2006-01-02 15:04:05") + err = RetryableTransactionExec(sq.db.mainDB, func(tx *sql.Tx) error { + _, txErr := tx.Exec(query_updateNewsgroupsStats, + *task.Newsgroup, len(batches), maxArticleNum, latestDate.UTC().Format("2006-01-02 15:04:05")) return txErr }) @@ -691,20 +704,17 @@ retry2: log.Printf("[BATCH] processNewsgroupBatch Failed to update newsgroup stats for '%s': %v", *task.Newsgroup, err) } log.Printf("[BATCH-END] newsgroup '%s' processed articles: %d (took %v)", *task.Newsgroup, len(batches), time.Since(startTime)) - sq.GMux.Lock() - sq.queued -= len(batches) - sq.GMux.Unlock() } -// batchInsertOverviews - now sets ArticleNum directly on each batch's Article and reuses the GroupDBs connection -func (c *SQ3batch) batchInsertOverviews(newsgroup string, batches []*models.Article, groupDBs *GroupDBs, taskNewsgroup *string) error { +// batchInsertOverviews - now sets ArticleNum directly on each batch's Article and reuses the GroupDB connection +func (sq *SQ3batch) batchInsertOverviews(newsgroup string, batches []*models.Article, groupDB *GroupDB, taskNewsgroup *string) error { if len(batches) == 0 { return fmt.Errorf("no batches to process for group '%s'", newsgroup) } - if len(batches) <= MaxBatchSize { + if len(batches) <= sq.maxDBbatch { // Small batch - process directly - if err := c.processOverviewBatch(groupDBs, batches, taskNewsgroup); err != nil { + if err := sq.processOverviewBatch(groupDB, batches, taskNewsgroup); err != nil { log.Printf("[OVB-BATCH] Failed to process small batch for group '%s': %v", newsgroup, err) return fmt.Errorf("failed to process small batch for group '%s': %w", newsgroup, err) } @@ -712,13 +722,13 @@ func (c *SQ3batch) batchInsertOverviews(newsgroup string, batches []*models.Arti } // Large batch - split into chunks - for i := 0; i < len(batches); i += MaxBatchSize { - end := i + MaxBatchSize + for i := 0; i < len(batches); i += sq.maxDBbatch { + end := i + sq.maxDBbatch if end > len(batches) { end = len(batches) } - if err := c.processOverviewBatch(groupDBs, batches[i:end], taskNewsgroup); err != nil { + if err := sq.processOverviewBatch(groupDB, batches[i:end], taskNewsgroup); err != nil { log.Printf("[OVB-BATCH] Failed to process chunk %d-%d for group '%s': %v", i, end, newsgroup, err) return fmt.Errorf("failed to process chunk %d-%d for group '%s': %w", i, end, newsgroup, err) } @@ -730,12 +740,12 @@ const query_processOverviewBatch = `INSERT OR IGNORE INTO articles (message_id, const query_processOverviewBatch2 = `SELECT message_id, article_num FROM articles WHERE message_id IN (` // processSingleUnifiedArticleBatch handles a single batch that's within SQLite limits -func (c *SQ3batch) processOverviewBatch(groupDBs *GroupDBs, batches []*models.Article, taskNewsgroup *string) error { +func (sq *SQ3batch) processOverviewBatch(groupDB *GroupDB, batches []*models.Article, taskNewsgroup *string) error { // Get timestamp once for the entire batch instead of per article importedAt := time.Now() // Use a transaction for the batch insert - tx, err := groupDBs.DB.Begin() + tx, err := groupDB.DB.Begin() if err != nil { return fmt.Errorf("failed to begin transaction: %w", err) } @@ -784,12 +794,12 @@ func (c *SQ3batch) processOverviewBatch(groupDBs *GroupDBs, batches []*models.Ar args = append(args, article.MessageID) } // ORDER BY not needed; we map by message_id - query := query_processOverviewBatch2 + getPlaceholders(len(args)) + `)` - log.Printf("[OVB-BATCH] group '%s': Selecting article numbers for %d articles queryLen=%d", groupDBs.Newsgroup, len(batches), len(query)) - rows, err := retryableQuery(groupDBs.DB, query, args...) + query := query_processOverviewBatch2 + sq.getPlaceholders(len(args)) + `)` + log.Printf("[OVB-BATCH] group '%s': Selecting article numbers for %d articles queryLen=%d", groupDB.Newsgroup, len(batches), len(query)) + rows, err := RetryableQuery(groupDB.DB, query, args...) if err != nil { - log.Printf("[OVB-BATCH] group '%s': Failed to execute batch select: %v", groupDBs.Newsgroup, err) - return fmt.Errorf("failed to execute batch select for group '%s': %w", groupDBs.Newsgroup, err) + log.Printf("[OVB-BATCH] group '%s': Failed to execute batch select: %v", groupDB.Newsgroup, err) + return fmt.Errorf("failed to execute batch select for group '%s': %w", groupDB.Newsgroup, err) } defer rows.Close() @@ -799,7 +809,7 @@ func (c *SQ3batch) processOverviewBatch(groupDBs *GroupDBs, batches []*models.Ar // Iterate through the results and map article numbers back to batches for rows.Next() { if err := rows.Scan(&messageID, &articleNum); err != nil { - log.Printf("[OVB-BATCH] group '%s': Failed to scan article number: %v", groupDBs.Newsgroup, err) + log.Printf("[OVB-BATCH] group '%s': Failed to scan article number: %v", groupDB.Newsgroup, err) continue } // O(n²) complexity: nested loop through batches for each DB row @@ -814,7 +824,7 @@ func (c *SQ3batch) processOverviewBatch(groupDBs *GroupDBs, batches []*models.Ar if article.ArticleNums[taskNewsgroup] == 0 { article.ArticleNums[taskNewsgroup] = articleNum } else { - log.Printf("[OVB-BATCH] group '%s': Article with message_id %s already assigned article number %d, did not reassign from db: %d", groupDBs.Newsgroup, messageID, article.ArticleNums[taskNewsgroup], articleNum) + log.Printf("[OVB-BATCH] group '%s': Article with message_id %s already assigned article number %d, did not reassign from db: %d", groupDB.Newsgroup, messageID, article.ArticleNums[taskNewsgroup], articleNum) } article.Mux.Unlock() timeSpent += time.Since(startN).Microseconds() @@ -828,12 +838,12 @@ func (c *SQ3batch) processOverviewBatch(groupDBs *GroupDBs, batches []*models.Ar if timeSpent > 1000 { spentms = timeSpent / 1000 } - log.Printf("[OVB-BATCH] group '%s': assigned %d/%d articles (took %d ms, spent %d microsec (%d ms) loops: %d)", groupDBs.Newsgroup, idToArticleNum, len(batches), took, timeSpent, spentms, loops) + log.Printf("[OVB-BATCH] group '%s': assigned %d/%d articles (took %d ms, spent %d microsec (%d ms) loops: %d)", groupDB.Newsgroup, idToArticleNum, len(batches), took, timeSpent, spentms, loops) return nil } -// batchProcessThreading processes all threading operations using existing GroupDBs connection -func (c *SQ3batch) batchProcessThreading(taskNewsgroup *string, batches []*models.Article, groupDBs *GroupDBs) error { +// batchProcessThreading processes all threading operations using existing GroupDB connection +func (sq *SQ3batch) batchProcessThreading(taskNewsgroup *string, batches []*models.Article, groupDB *GroupDB) error { if len(batches) == 0 { return nil } @@ -860,7 +870,7 @@ func (c *SQ3batch) batchProcessThreading(taskNewsgroup *string, batches []*model // Process thread roots first (they need to exist before replies can reference them) if roots > 0 { - if err := c.batchProcessThreadRoots(groupDBs, batches, taskNewsgroup); err != nil { + if err := sq.batchProcessThreadRoots(groupDB, batches, taskNewsgroup); err != nil { log.Printf("[THR-BATCH] group '%s': Failed to batch process thread roots: %v", *taskNewsgroup, err) // Continue processing - don't fail the whole batch } @@ -868,7 +878,7 @@ func (c *SQ3batch) batchProcessThreading(taskNewsgroup *string, batches []*model // Process replies if replies > 0 { - if err := c.batchProcessReplies(groupDBs, batches, taskNewsgroup); err != nil { + if err := sq.batchProcessReplies(groupDB, batches, taskNewsgroup); err != nil { log.Printf("[THR-BATCH] group '%s': Failed to batch process replies: %v", *taskNewsgroup, err) // Continue processing - don't fail the whole batch } @@ -880,13 +890,13 @@ func (c *SQ3batch) batchProcessThreading(taskNewsgroup *string, batches []*model const query_batchProcessThreadRoots = "INSERT INTO threads (root_article, parent_article, child_article, depth, thread_order) VALUES (?, ?, ?, 0, 0)" // batchProcessThreadRoots processes thread root articles in TRUE batch -func (c *SQ3batch) batchProcessThreadRoots(groupDBs *GroupDBs, rootBatches []*models.Article, taskNewsgroup *string) error { +func (sq *SQ3batch) batchProcessThreadRoots(groupDB *GroupDB, rootBatches []*models.Article, taskNewsgroup *string) error { if len(rootBatches) == 0 { return nil } // Use a transaction with prepared statement for cleaner, more efficient execution - tx, err := groupDBs.DB.Begin() + tx, err := groupDB.DB.Begin() if err != nil { return fmt.Errorf("failed to begin transaction in batchProcessThreadRoots: %w", err) } @@ -948,8 +958,8 @@ func (c *SQ3batch) batchProcessThreadRoots(groupDBs *GroupDBs, rootBatches []*mo // Do post-processing AFTER transaction is committed to avoid SQLite lock conflicts for _, entry := range threadCacheEntries { entry.article.Mux.RLock() - if err := c.db.InitializeThreadCache(groupDBs, entry.articleNum, entry.article); err != nil { - log.Printf("[P-BATCH] group '%s': Failed to initialize thread cache for root %d: %v", groupDBs.Newsgroup, entry.articleNum, err) + if err := sq.db.InitializeThreadCache(groupDB, entry.articleNum, entry.article); err != nil { + log.Printf("[P-BATCH] group '%s': Failed to initialize thread cache for root %d: %v", groupDB.Newsgroup, entry.articleNum, err) // Don't fail the whole operation for cache errors } entry.article.Mux.RUnlock() @@ -959,11 +969,11 @@ func (c *SQ3batch) batchProcessThreadRoots(groupDBs *GroupDBs, rootBatches []*mo } // batchProcessReplies processes reply articles in TRUE batch -func (c *SQ3batch) batchProcessReplies(groupDBs *GroupDBs, replyBatches []*models.Article, taskNewsgroup *string) error { +func (sq *SQ3batch) batchProcessReplies(groupDB *GroupDB, replyBatches []*models.Article, taskNewsgroup *string) error { if len(replyBatches) == 0 { return nil } - parentMessageIDs := make(map[*string]int, MaxBatchSize) // Pre-allocate map with expected size + parentMessageIDs := make(map[*string]int, sq.maxDBbatch) // Pre-allocate map with expected size defer func() { for k := range parentMessageIDs { delete(parentMessageIDs, k) @@ -993,7 +1003,7 @@ func (c *SQ3batch) batchProcessReplies(groupDBs *GroupDBs, replyBatches []*model // Find thread root and collect data var threadRoot int64 - if root, err := c.findThreadRoot(groupDBs, article.RefSlice); err == nil { + if root, err := sq.findThreadRoot(groupDB, article.RefSlice); err == nil { threadRoot = root } @@ -1010,14 +1020,14 @@ func (c *SQ3batch) batchProcessReplies(groupDBs *GroupDBs, replyBatches []*model // Batch update reply counts for articles table (single call since overview is unified) if len(parentMessageIDs) > 0 { - if err := c.batchUpdateReplyCounts(groupDBs, parentMessageIDs); err != nil { - log.Printf("[P-BATCH] group '%s': Failed to batch update article reply counts: %v", groupDBs.Newsgroup, err) + if err := sq.batchUpdateReplyCounts(groupDB, parentMessageIDs); err != nil { + log.Printf("[P-BATCH] group '%s': Failed to batch update article reply counts: %v", groupDB.Newsgroup, err) } } if preAllocThreadRoots > 0 { threadUpdates := make(map[int64][]threadCacheUpdateData, preAllocThreadRoots) - //log.Printf("[P-BATCH] group '%s': Pre-allocated thread updates map with capacity %d", groupDBs.Newsgroup, preAllocThreadRoots) + //log.Printf("[P-BATCH] group '%s': Pre-allocated thread updates map with capacity %d", groupDB.Newsgroup, preAllocThreadRoots) for _, data := range replyData { if data.threadRoot > 0 { threadUpdates[data.threadRoot] = append(threadUpdates[data.threadRoot], threadCacheUpdateData{ @@ -1029,10 +1039,10 @@ func (c *SQ3batch) batchProcessReplies(groupDBs *GroupDBs, replyBatches []*model // Execute ALL thread cache updates in a single transaction if len(threadUpdates) > 0 { - if err := c.batchUpdateThreadCache(groupDBs, threadUpdates); err != nil { - log.Printf("[P-BATCH] group '%s': Failed to batch update thread cache: %v", groupDBs.Newsgroup, err) + if err := sq.batchUpdateThreadCache(groupDB, threadUpdates); err != nil { + log.Printf("[P-BATCH] group '%s': Failed to batch update thread cache: %v", groupDB.Newsgroup, err) } - log.Printf("[P-BATCH] group '%s': Updated thread cache for %d thread roots", groupDBs.Newsgroup, len(threadUpdates)) + log.Printf("[P-BATCH] group '%s': Updated thread cache for %d thread roots", groupDB.Newsgroup, len(threadUpdates)) } } @@ -1043,19 +1053,19 @@ var query_batchUpdateReplyCounts1 string = "WHEN message_id = ? THEN reply_count var query_batchUpdateReplyCounts2 string = "UPDATE articles SET reply_count = CASE %s END WHERE message_id IN (%s)" // batchUpdateReplyCounts performs batch update of reply counts using CASE WHEN -func (c *SQ3batch) batchUpdateReplyCounts(groupDBs *GroupDBs, parentCounts map[*string]int) error { +func (sq *SQ3batch) batchUpdateReplyCounts(groupDB *GroupDB, parentCounts map[*string]int) error { if len(parentCounts) == 0 { return nil } // Get pooled slices to avoid repeated memory allocations - messageIDs := c.getOrCreateStringPtrSlice() - args := c.getOrCreateInterfaceSlice() + messageIDs := sq.getOrCreateStringPtrSlice() + args := sq.getOrCreateInterfaceSlice() defer func() { // Reset and return all to pools - c.returnStringPtrSlice(messageIDs) - c.returnInterfaceSlice(args) + sq.returnStringPtrSlice(messageIDs) + sq.returnInterfaceSlice(args) }() // Build args efficiently - no string copying needed @@ -1072,33 +1082,26 @@ func (c *SQ3batch) batchUpdateReplyCounts(groupDBs *GroupDBs, parentCounts map[* // Build the complete batch UPDATE statement with a single sprintf // Use strings.Repeat for efficient SQL building - zero string copies // Execute the batch UPDATE - //log.Printf("[P-BATCH] group '%s': update batch reply count for %d articles (queryLen=%d)", groupDBs.Newsgroup, len(messageIDs), len(sql)) - _, err := retryableExec(groupDBs.DB, fmt.Sprintf(query_batchUpdateReplyCounts2, strings.Repeat(query_batchUpdateReplyCounts1, len(messageIDs)), getPlaceholders(len(messageIDs))), args...) + //log.Printf("[P-BATCH] group '%s': update batch reply count for %d articles (queryLen=%d)", groupDB.Newsgroup, len(messageIDs), len(sql)) + _, err := RetryableExec(groupDB.DB, fmt.Sprintf(query_batchUpdateReplyCounts2, strings.Repeat(query_batchUpdateReplyCounts1, len(messageIDs)), sq.getPlaceholders(len(messageIDs))), args...) if err != nil { - log.Printf("[P-BATCH] group '%s': Failed to execute batch reply count update: %v", groupDBs.Newsgroup, err) + log.Printf("[P-BATCH] group '%s': Failed to execute batch reply count update: %v", groupDB.Newsgroup, err) } return err } // findThreadRootForBatch is a simplified version for batch processing -func (c *SQ3batch) findThreadRoot(groupDBs *GroupDBs, refs []string) (int64, error) { - if len(refs) == 0 || c.proc == nil { +func (sq *SQ3batch) findThreadRoot(groupDB *GroupDB, refs []string) (int64, error) { + if len(refs) == 0 || sq.proc == nil { return 0, fmt.Errorf("no references or processor not available") } - - // Try to find thread root in processor cache first - if cachedRoot := c.proc.FindThreadRootInCache(c.GetNewsgroupPointer(groupDBs.Newsgroup), refs); cachedRoot != nil { - return cachedRoot.ArtNum, nil - } - - // Fall back to database search for any referenced message for i := len(refs) - 1; i >= 0; i-- { refMessageID := refs[i] // Check if this article is a thread root with retryable logic var rootArticle int64 threadQuery := `SELECT root_article FROM threads WHERE root_article = (SELECT article_num FROM articles WHERE message_id = ? LIMIT 1) LIMIT 1` - err := retryableQueryRowScan(groupDBs.DB, threadQuery, []interface{}{refMessageID}, &rootArticle) + err := RetryableQueryRowScan(groupDB.DB, threadQuery, []interface{}{refMessageID}, &rootArticle) if err == nil { return rootArticle, nil } @@ -1107,34 +1110,38 @@ func (c *SQ3batch) findThreadRoot(groupDBs *GroupDBs, refs []string) (int64, err return 0, fmt.Errorf("could not find thread root for any reference") } +const query_batchUpdateThreadCacheSelect = `SELECT child_articles, message_count FROM thread_cache WHERE thread_root = ?` +const query_batchUpdateThreadCacheUpdate = `UPDATE thread_cache SET child_articles = ?, message_count = ?, last_child_number = ?, last_activity = ? WHERE thread_root = ?` +const query_batchUpdateThreadCacheInsert = `INSERT INTO thread_cache (thread_root, root_date, message_count, child_articles, last_child_number, last_activity) VALUES (?, ?, 1, '', ?, ?) ON CONFLICT(thread_root) DO UPDATE SET root_date = excluded.root_date, last_child_number = excluded.last_child_number, last_activity = excluded.last_activity` + // batchUpdateThreadCache performs TRUE batch update of thread cache entries in a single transaction with retry logic -func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map[int64][]threadCacheUpdateData) error { +func (sq *SQ3batch) batchUpdateThreadCache(groupDB *GroupDB, threadUpdates map[int64][]threadCacheUpdateData) error { if len(threadUpdates) == 0 { return nil } var updatedCount int var initializedCount int - // Use retryableTransactionExec for SQLite lock safety - err := retryableTransactionExec(groupDBs.DB, func(tx *sql.Tx) error { + // Use RetryableTransactionExec for SQLite lock safety + err := RetryableTransactionExec(groupDB.DB, func(tx *sql.Tx) error { // Reset ShutDownCounters for each retry attempt updatedCount = 0 initializedCount = 0 // Prepare statements for batch operations - selectStmt, err := tx.Prepare(`SELECT child_articles, message_count FROM thread_cache WHERE thread_root = ?`) + selectStmt, err := tx.Prepare(query_batchUpdateThreadCacheSelect) if err != nil { return fmt.Errorf("failed to prepare select statement: %w", err) } defer selectStmt.Close() - updateStmt, err := tx.Prepare(`UPDATE thread_cache SET child_articles = ?, message_count = ?, last_child_number = ?, last_activity = ? WHERE thread_root = ?`) + updateStmt, err := tx.Prepare(query_batchUpdateThreadCacheUpdate) if err != nil { return fmt.Errorf("failed to prepare update statement: %w", err) } defer updateStmt.Close() - initStmt, err := tx.Prepare(`INSERT INTO thread_cache (thread_root, root_date, message_count, child_articles, last_child_number, last_activity) VALUES (?, ?, 1, '', ?, ?) ON CONFLICT(thread_root) DO UPDATE SET root_date = excluded.root_date, last_child_number = excluded.last_child_number, last_activity = excluded.last_activity`) + initStmt, err := tx.Prepare(query_batchUpdateThreadCacheInsert) if err != nil { return fmt.Errorf("failed to prepare init statement: %w", err) } @@ -1142,19 +1149,20 @@ func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map // Process each thread root and its accumulated updates for threadRoot, updates := range threadUpdates { - // Get current cache state with retryable logic + // Get current cache state - NO retry needed, outer transaction handles it var currentChildren string var currentCount int - err := retryableStmtQueryRowScan(selectStmt, []interface{}{threadRoot}, ¤tChildren, ¤tCount) + row := selectStmt.QueryRow(threadRoot) + err := row.Scan(¤tChildren, ¤tCount) if err != nil { // Thread cache entry doesn't exist, initialize it with the first update //firstUpdate := updates[0] // Format dates as UTC strings to avoid timezone encoding issues firstUpdateDateUTC := updates[0].childDate.UTC().Format("2006-01-02 15:04:05") - _, err = retryableStmtExec(initStmt, threadRoot, firstUpdateDateUTC, updates[0].childArticleNum, firstUpdateDateUTC) + _, err = initStmt.Exec(threadRoot, firstUpdateDateUTC, updates[0].childArticleNum, firstUpdateDateUTC) if err != nil { - log.Printf("[BATCH-CACHE] Failed to initialize thread cache for root %d after retries: %v", threadRoot, err) + log.Printf("[BATCH-CACHE] Failed to initialize thread cache for root %d: %v", threadRoot, err) return fmt.Errorf("failed to initialize thread cache for root %d: %w", threadRoot, err) } currentChildren = "" @@ -1182,24 +1190,24 @@ func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map newCount := currentCount + len(updates) - // Execute the batch update for this thread with retryable logic + // Execute the batch update for this thread - NO retry needed, outer transaction handles it // Format lastActivity as UTC string to avoid timezone encoding issues lastActivityUTC := lastActivity.UTC().Format("2006-01-02 15:04:05") - _, err = retryableStmtExec(updateStmt, newChildren, newCount, lastChildNum, lastActivityUTC, threadRoot) + _, err = updateStmt.Exec(newChildren, newCount, lastChildNum, lastActivityUTC, threadRoot) if err != nil { - log.Printf("[BATCH-CACHE] Failed to update thread cache for root %d after retries: %v", threadRoot, err) + log.Printf("[BATCH-CACHE] Failed to update thread cache for root %d: %v", threadRoot, err) return fmt.Errorf("failed to update thread cache for root %d: %w", threadRoot, err) } updatedCount++ // Update memory cache if available if sq.db.MemThreadCache != nil { - sq.db.MemThreadCache.UpdateThreadMetadata(groupDBs.Newsgroup, threadRoot, newCount, lastActivity, newChildren) + sq.db.MemThreadCache.UpdateThreadMetadata(groupDB.Newsgroup, threadRoot, newCount, lastActivity, newChildren) } } - return nil // Transaction will be committed by retryableTransactionExec + return nil // Transaction will be committed by RetryableTransactionExec }) if err != nil { @@ -1207,7 +1215,7 @@ func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map } /* log.Printf("[BATCH-CACHE] group '%s': Successfully batch updated %d thread cache entries (initialized %d) in single retryable transaction with %d total updates", - groupDBs.Newsgroup, updatedCount, initializedCount, len(threadUpdates)) + groupDB.Newsgroup, updatedCount, initializedCount, len(threadUpdates)) */ return nil } @@ -1220,7 +1228,7 @@ type threadCacheUpdateData struct { type BatchOrchestrator struct { // Configuration BatchInterval time.Duration // Timer interval for fallback processing - + mux sync.RWMutex // Control batch *SQ3batch } @@ -1242,20 +1250,23 @@ func (o *BatchOrchestrator) StartOrch() { var wgProcessAllBatches sync.WaitGroup ShutDownCounter := DefaultShutDownCounter wantShutdown := false +wait: for { - time.Sleep(time.Second / 2) + time.Sleep(time.Second / 8) // 125 ms + nosleep: if o.batch.db.IsDBshutdown() { - if ShutDownCounter == DefaultShutDownCounter { + if ShutDownCounter == DefaultShutDownCounter || ShutDownCounter%10 == 0 { log.Printf("[ORCHESTRATOR1] Database shutdown detected ShutDownCounter=%d", ShutDownCounter) } - o.batch.processAllPendingBatches(&wgProcessAllBatches, MaxBatchSize) if !wantShutdown { wantShutdown = true } - + if o.batch.processAllPendingBatches(&wgProcessAllBatches, o.batch.maxDBbatch) { + goto nosleep + } if !o.batch.CheckNoMoreWorkInMaps() { ShutDownCounter = DefaultShutDownCounter - continue + continue wait } else { ShutDownCounter-- } @@ -1265,9 +1276,12 @@ func (o *BatchOrchestrator) StartOrch() { } if !wantShutdown { if time.Since(lastFlush) > o.BatchInterval { - //log.Printf("[ORCHESTRATOR1] Timer triggered - processing all pending batches smaller than MaxBatchSize") - o.batch.processAllPendingBatches(&wgProcessAllBatches, MaxBatchSize-1) lastFlush = time.Now() + //log.Printf("[ORCHESTRATOR1] Timer triggered - processing all pending batches smaller than sq.maxDBbatch") + loop: + if o.batch.processAllPendingBatches(&wgProcessAllBatches, o.batch.maxDBbatch-1) { + goto loop + } } } } @@ -1291,14 +1305,10 @@ func (o *BatchOrchestrator) StartOrchestrator() { hasWork := o.checkThresholds() //log.Printf("[ORCHESTRATOR2] Current sleep interval: (%d ms) hasWork=%t", sleep, hasWork) if o.batch.db.IsDBshutdown() { - if o.batch.proc == nil { - log.Printf("[ORCHESTRATOR2] o.batch.proc not set. shutting down.") - return - } - if ShutDownCounter == DefaultShutDownCounter { + sleep = 250 * 1000 // 250 ms + if ShutDownCounter == DefaultShutDownCounter || ShutDownCounter%10 == 0 { log.Printf("[ORCHESTRATOR2] Database shutdown detected ShutDownCounter=%d", ShutDownCounter) } - sleep = 500 * 1000 if !wantShutdown { wantShutdown = true } @@ -1325,9 +1335,9 @@ func (o *BatchOrchestrator) StartOrchestrator() { } // @AI: DO NOT CHANGE THIS!!!! } else { // Fast recovery when work is found - sleep = sleep / 4 // @AI: DO NOT CHANGE THIS!!!! - if sleep < 16*1000 { // @AI: DO NOT CHANGE THIS!!!! - sleep = 16 * 1000 // @AI: DO NOT CHANGE THIS!!!! + sleep = sleep / 4 // @AI: DO NOT CHANGE THIS!!!! + if sleep < 8*1000 { // @AI: DO NOT CHANGE THIS!!!! + sleep = 8 * 1000 // @AI: DO NOT CHANGE THIS!!!! } } } @@ -1341,7 +1351,7 @@ func (o *BatchOrchestrator) checkThresholds() (haswork bool) { fillQ: for _, task := range o.batch.TasksMap { task.Mux.RLock() - if task.BATCHprocessing || len(task.BATCHchan) < MaxBatchSize { + if task.BATCHprocessing || len(task.BATCHchan) < o.batch.maxDBbatch { task.Mux.RUnlock() continue } @@ -1369,7 +1379,7 @@ fillQ: } totalQueued += batchCount - if batchCount >= MaxBatchSize { + if batchCount >= o.batch.maxDBbatch { haswork = true task.Mux.Lock() if task.BATCHprocessing { @@ -1380,10 +1390,10 @@ fillQ: task.BATCHprocessing = true task.Mux.Unlock() - if !LockLimitChan() { + if !o.batch.LockLimitChan() { /* log.Printf("[ORCHESTRATOR] Threshold exceeded for group '%s': %d articles (threshold: %d) LimitChan acquisition failed, retry later", - *task.Newsgroup, batchCount, MaxBatchSize) + *task.Newsgroup, batchCount, sq.maxDBbatch) */ //log.Printf("[BATCH-PROC] LimitChan acquisition failed for group '%s', resetting processing flag", *task.Newsgroup) task.Mux.Lock() @@ -1391,7 +1401,7 @@ fillQ: task.Mux.Unlock() return true } else { - //log.Printf("[BATCH-BIG] Threshold exceeded for group '%s': %d articles (threshold: %d)", *task.Newsgroup, batchCount, MaxBatchSize) + //log.Printf("[BATCH-BIG] Threshold exceeded for group '%s': %d articles (threshold: %d)", *task.Newsgroup, batchCount, sq.maxDBbatch) go o.batch.processNewsgroupBatch(task) totalQueued -= batchCount } @@ -1399,7 +1409,7 @@ fillQ: // Log groups with pending work but below threshold /* log.Printf("[ORCHESTRATOR-PENDING] Group '%s' has %d articles (below threshold: %d)", - *task.Newsgroup, batchCount, MaxBatchSize) + *task.Newsgroup, batchCount, sq.maxDBbatch) */ } } // end for @@ -1413,31 +1423,29 @@ fillQ: return haswork } -var BatchDividerChan = make(chan *models.Article, 1) +var BatchDividerChan = make(chan *models.Article, 128) // BatchDivider reads incoming articles and routes them to the appropriate per-newsgroup channel // It also enforces the global MaxQueued limit to prevent overload // Each newsgroup channel is created lazily on first use -// This runs as a single goroutine to avoid locking issues - func (sq *SQ3batch) BatchDivider() { var tmpQueued, realQueue int - var maxQueue int = MaxQueued / 100 * 80 - var target int = MaxQueued / 100 * 20 + var maxQueue int = sq.maxQueued / 100 * 80 + var target int = sq.maxQueued / 100 * 20 for { var newsgroupPtr *string - task := <-BatchDividerChan - if task == nil { + article := <-BatchDividerChan + if article == nil { log.Printf("[BATCH-DIVIDER] Received nil task?!") continue } - task.Mux.Lock() + article.Mux.Lock() select { - case newsgroupPtr = <-task.ProcessQueue: - task.Mux.Unlock() + case newsgroupPtr = <-article.ProcessQueue: + article.Mux.Unlock() default: - task.Mux.Unlock() - log.Printf("Error in BatchDivider, received task (%#v) but no newsgroupPtr", task) + article.Mux.Unlock() + log.Printf("Error in BatchDivider, received task (%#v) but no newsgroupPtr", article) continue } //log.Printf("[BATCH-DIVIDER] Received task for group '%s'", *task.Newsgroup) @@ -1446,14 +1454,14 @@ func (sq *SQ3batch) BatchDivider() { tasks.Mux.Lock() // Lazily create the per-group channel on first enqueue if tasks.BATCHchan == nil { - tasks.BATCHchan = make(chan *models.Article, InitialBatchChannelSize) + tasks.BATCHchan = make(chan *models.Article, sq.maxDBbatch) } tasks.Mux.Unlock() if realQueue >= maxQueue { - log.Printf("[BATCH-DIVIDER] MaxQueued reached (%d), waiting to enqueue more (current Queue=%d, tmpQueued=%d)", MaxQueued, realQueue, tmpQueued) + log.Printf("[BATCH-DIVIDER] MaxQueued reached (%d), waiting to enqueue more (current Queue=%d, tmpQueued=%d)", sq.maxQueued, realQueue, tmpQueued) for { - time.Sleep(100 * time.Millisecond) + time.Sleep(16 * time.Millisecond) sq.GMux.RLock() if sq.queued <= target { realQueue = sq.queued @@ -1463,9 +1471,9 @@ func (sq *SQ3batch) BatchDivider() { sq.GMux.RUnlock() } } - tasks.BATCHchan <- task + tasks.BATCHchan <- article tmpQueued++ - if tmpQueued >= MaxBatchSize { + if tmpQueued >= sq.maxDBbatch { sq.GMux.Lock() //log.Printf("[BATCH-DIVIDER] Enqueued %d articles to group '%s' (current Queue=%d, tmpQueued=%d)", tmpQueued, *newsgroupPtr, realQueue, tmpQueued) sq.queued += tmpQueued diff --git a/internal/database/db_config.go b/internal/database/db_config.go index ad3f9259..899e6ef3 100644 --- a/internal/database/db_config.go +++ b/internal/database/db_config.go @@ -16,7 +16,7 @@ func (db *Database) GetConfigValue(key string) (string, error) { // getConfigValueDirect retrieves a configuration value directly from the database func (db *Database) getConfigValueDirect(key string) (string, error) { var value string - err := retryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{key}, &value) + err := RetryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{key}, &value) if err != nil { if err == sql.ErrNoRows { return "", nil // Return empty string for missing keys @@ -37,7 +37,7 @@ func (db *Database) SetConfigValue(key, value string) error { // setConfigValueDirect sets or updates a configuration value directly in the database func (db *Database) setConfigValueDirect(key, value string) error { - _, err := retryableExec(db.mainDB, ` + _, err := RetryableExec(db.mainDB, ` INSERT OR REPLACE INTO config (key, value) VALUES (?, ?) `, key, value) diff --git a/internal/database/db_cron_jobs.go b/internal/database/db_cron_jobs.go index f5e3c023..7c47cfcf 100644 --- a/internal/database/db_cron_jobs.go +++ b/internal/database/db_cron_jobs.go @@ -13,7 +13,7 @@ const query_GetAllCronJobs = `SELECT id, name, command, interval_minutes, start_ // GetAllCronJobs retrieves all cron jobs func (db *Database) GetAllCronJobs() ([]*models.CronJob, error) { - rows, err := retryableQuery(db.mainDB, query_GetAllCronJobs) + rows, err := RetryableQuery(db.mainDB, query_GetAllCronJobs) if err != nil { return nil, err } @@ -45,7 +45,7 @@ func (db *Database) GetCronJobByID(id int64) (*models.CronJob, error) { var cronJob models.CronJob var lastRun sql.NullTime - err := retryableQueryRowScan(db.mainDB, query_GetCronJobByID, []interface{}{id}, + err := RetryableQueryRowScan(db.mainDB, query_GetCronJobByID, []interface{}{id}, &cronJob.ID, &cronJob.Name, &cronJob.Command, &cronJob.IntervalMinutes, &cronJob.StartHourMinute, &cronJob.Enabled, &lastRun, &cronJob.RunCount, &cronJob.CreatedAt, &cronJob.UpdatedAt) if err != nil { @@ -63,7 +63,7 @@ const query_InsertCronJob = `INSERT INTO cron_jobs (name, command, interval_minu // InsertCronJob creates a new cron job func (db *Database) InsertCronJob(cronJob *models.CronJob) error { - _, err := retryableExec(db.mainDB, query_InsertCronJob, cronJob.Name, cronJob.Command, cronJob.IntervalMinutes, cronJob.StartHourMinute, cronJob.Enabled) + _, err := RetryableExec(db.mainDB, query_InsertCronJob, cronJob.Name, cronJob.Command, cronJob.IntervalMinutes, cronJob.StartHourMinute, cronJob.Enabled) return err } @@ -71,7 +71,7 @@ const query_UpdateCronJob = `UPDATE cron_jobs SET name = ?, command = ?, interva // UpdateCronJob updates an existing cron job func (db *Database) UpdateCronJob(cronJob *models.CronJob) error { - _, err := retryableExec(db.mainDB, query_UpdateCronJob, cronJob.Name, cronJob.Command, cronJob.IntervalMinutes, cronJob.StartHourMinute, cronJob.Enabled, cronJob.ID) + _, err := RetryableExec(db.mainDB, query_UpdateCronJob, cronJob.Name, cronJob.Command, cronJob.IntervalMinutes, cronJob.StartHourMinute, cronJob.Enabled, cronJob.ID) return err } @@ -79,7 +79,7 @@ const query_DeleteCronJob = `DELETE FROM cron_jobs WHERE id = ?` // DeleteCronJob deletes a cron job func (db *Database) DeleteCronJob(id int64) error { - _, err := retryableExec(db.mainDB, query_DeleteCronJob, id) + _, err := RetryableExec(db.mainDB, query_DeleteCronJob, id) return err } @@ -91,7 +91,7 @@ func (db *Database) ToggleCronJob(id int64) error { if err != nil { return err } - _, err = retryableExec(db.mainDB, query_ToggleCronJob, !cronJob.Enabled, id) + _, err = RetryableExec(db.mainDB, query_ToggleCronJob, !cronJob.Enabled, id) return err } @@ -99,6 +99,6 @@ const query_UpdateCronJobRunStats = `UPDATE cron_jobs SET last_run = ?, run_coun // UpdateCronJobRunStats updates the run statistics after a cron job execution func (db *Database) UpdateCronJobRunStats(id int64) error { - _, err := retryableExec(db.mainDB, query_UpdateCronJobRunStats, time.Now(), id) + _, err := RetryableExec(db.mainDB, query_UpdateCronJobRunStats, time.Now(), id) return err } diff --git a/internal/database/db_groupdbs.go b/internal/database/db_groupdbs.go index b55596b5..6cf47729 100644 --- a/internal/database/db_groupdbs.go +++ b/internal/database/db_groupdbs.go @@ -7,14 +7,17 @@ import ( "path/filepath" "sync" "time" + + "github.com/go-while/go-pugleaf/internal/history" + "github.com/go-while/go-pugleaf/internal/models" ) const MaxOpenDatabases = 256 const stateCREATED = 1 -// GroupDBs holds a single database connection for a group -type GroupDBs struct { +// GroupDB holds a single database connection for a group +type GroupDB struct { state int64 // 0 = not initialized, 1 = initialized mux sync.RWMutex Newsgroup string // Name of the newsgroup TODO: remove and use ptr below @@ -24,48 +27,115 @@ type GroupDBs struct { DB *sql.DB // Single database containing articles, overview, threads, etc. } -// GetGroupDBs returns groupDB for a specific newsgroup -func (db *Database) GetGroupDBs(groupName string) (*GroupDBs, error) { +// NewsgroupDBsIDcache cache maps newsgroup IDs to names +var NewsgroupDBsIDcache = &NewsgroupDBsIDcacheStruct{ + cache: make(map[int64]string), +} + +type NewsgroupDBsIDcacheStruct struct { + mux sync.RWMutex + cache map[int64]string // map[newsgroupID]newsgroupName +} + +func (c *NewsgroupDBsIDcacheStruct) GetNewsgroupNameByID(newsgroupID int64, db *Database) (ngname string, exists bool) { + c.mux.RLock() + ngname, exists = c.cache[newsgroupID] + c.mux.RUnlock() + if !exists { + ng, err := db.MainDBGetNewsgroupByID(newsgroupID) + if err != nil { + log.Printf("GetNewsgroupNameByID: failed to get newsgroup name for ID %d: %v", newsgroupID, err) + return + } + c.mux.Lock() + c.cache[newsgroupID] = ng.Name + c.mux.Unlock() + ngname = ng.Name + exists = true + } + return ngname, exists +} + +func (c *NewsgroupDBsIDcacheStruct) SetNewsgroupNameByID(newsgroupID int64, newsgroupName string) { + c.mux.Lock() + c.cache[newsgroupID] = newsgroupName + c.mux.Unlock() +} + +func (db *Database) GetAnyNewsgroupDBfromIDs(newsgroupIDs []int64) (*GroupDB, error) { + for _, ngID := range newsgroupIDs { + if ngName, exists := NewsgroupDBsIDcache.GetNewsgroupNameByID(ngID, db); exists { + return db.GetGroupDB(ngName) + } + } + return nil, fmt.Errorf("failed to get any newsgroup DB for IDs: %v", newsgroupIDs) +} + +func (db *Database) GetArticleFromAnyNewsgroupDB(msgIdItem *history.MessageIdItem) (*models.Article, error) { + for _, ngID := range msgIdItem.NewsgroupIDs { + if ngName, exists := NewsgroupDBsIDcache.GetNewsgroupNameByID(ngID, db); exists { + groupDB, err := db.GetGroupDB(ngName) + if err != nil { + continue + } + article, err := db.GetArticleByMessageID(groupDB, msgIdItem.MessageId) + if err == nil { + return article, nil + } + } + } + return nil, fmt.Errorf("failed to get article from any newsgroup DB for message ID: %s", msgIdItem.MessageId) +} + +func (db *Database) GetNewsgroupsDBbyID(newsgroupID int64) (*GroupDB, error) { + if ngName, exists := NewsgroupDBsIDcache.GetNewsgroupNameByID(newsgroupID, db); exists { + return db.GetGroupDB(ngName) + } + return nil, fmt.Errorf("failed to get newsgroup DB for ID: %d", newsgroupID) +} + +// GetGroupDB returns groupDB for a specific newsgroup +func (db *Database) GetGroupDB(groupName string) (*GroupDB, error) { if db.dbconfig == nil { log.Printf(("Database configuration is not set, cannot get group DBs for '%s'"), groupName) return nil, fmt.Errorf("database configuration is not set") } - db.MainMutex.Lock() - groupDBs := db.groupDBs[groupName] - if groupDBs != nil { - db.MainMutex.Unlock() - + db.MainMutex.Lock() //mux #d2ef40e0 + groupDB := db.groupDB[groupName] + if groupDB != nil { + db.MainMutex.Unlock() //mux #d2ef40e0 for { - groupDBs.mux.RLock() - if groupDBs.state == stateCREATED { - groupDBs.mux.RUnlock() - groupDBs.IncrementWorkers() - return groupDBs, nil + groupDB.mux.RLock() + if groupDB.state == stateCREATED { + groupDB.mux.RUnlock() + groupDB.IncrementWorkers() + return groupDB, nil } - groupDBs.mux.RUnlock() + groupDB.mux.RUnlock() time.Sleep(10 * time.Millisecond) } } else { - groupDBs = &GroupDBs{ + groupDB = &GroupDB{ Newsgroup: groupName, NewsgroupPtr: db.Batch.GetNewsgroupPointer(groupName), DB: nil, Idle: time.Now(), + Workers: 1, } - db.groupDBs[groupName] = groupDBs - db.MainMutex.Unlock() + db.groupDB[groupName] = groupDB + db.MainMutex.Unlock() //mux #d2ef40e0 groupsHash := GroupHashMap.GroupToHash(groupName) - //log.Printf("Open DB for newsgroup '%s' hash='%s' db.openDBsNum=%d db.groupDBs=%d", groupName, groupsHash, db.openDBsNum, len(db.groupDBs)) + //log.Printf("Open DB for newsgroup '%s' hash='%s' db.openDBsNum=%d db.groupDB=%d", groupName, groupsHash, db.openDBsNum, len(db.groupDB)) // Create single database filename baseGroupDBdir := filepath.Join(db.dbconfig.DataDir, "/db/"+groupsHash) if err := createDirIfNotExists(baseGroupDBdir); err != nil { db.removePartialInitializedGroupDB(groupName) - return nil, fmt.Errorf("failed to create group database directory: %w", err) + return nil, fmt.Errorf("failed to create group %s database directory: %w", groupName, err) } groupDBfile := filepath.Join(baseGroupDBdir + "/" + SanitizeGroupName(groupName) + ".db") @@ -73,7 +143,7 @@ func (db *Database) GetGroupDBs(groupName string) (*GroupDBs, error) { dbExists := FileExists(groupDBfile) // Open single database - groupDB, err := sql.Open("sqlite3", groupDBfile) + groupsDB, err := sql.Open("sqlite3", groupDBfile) if err != nil { db.removePartialInitializedGroupDB(groupName) return nil, err @@ -83,46 +153,44 @@ func (db *Database) GetGroupDBs(groupName string) (*GroupDBs, error) { var pragmaErr error if dbExists { // Use optimized pragmas for existing DBs (no page_size) - pragmaErr = db.applySQLitePragmasGroupDB(groupDB) + pragmaErr = db.applySQLitePragmasGroupDB(groupsDB) } if pragmaErr != nil { - if cerr := groupDB.Close(); cerr != nil { - log.Printf("Failed to close groupDB during pragma error: %v", cerr) + if cerr := groupsDB.Close(); cerr != nil { + log.Printf("Failed to close groupsDB %s during pragma error: %v", groupName, cerr) } db.removePartialInitializedGroupDB(groupName) return nil, pragmaErr } - groupDBs.mux.Lock() - groupDBs.Idle = time.Now() - groupDBs.DB = groupDB - groupDBs.mux.Unlock() + groupDB.mux.Lock() + groupDB.Idle = time.Now() + groupDB.DB = groupsDB + groupDB.mux.Unlock() // Apply schemas using the new migration system instead of direct file application // Apply all migrations to ensure schema is up to date - if err := db.migrateGroupDB(groupDBs); err != nil { - if cerr := groupDB.Close(); cerr != nil { - log.Printf("Failed to close groupDB during migration error: %v", cerr) + if err := db.migrateGroupDB(groupDB, true); err != nil { + if cerr := groupsDB.Close(); cerr != nil { + log.Printf("Failed to close groupsDB %s during migration error: %v", groupName, cerr) } db.removePartialInitializedGroupDB(groupName) return nil, fmt.Errorf("failed to migrate group database %s: %w", groupName, err) } - groupDBs.IncrementWorkers() - db.MainMutex.Lock() db.openDBsNum++ db.MainMutex.Unlock() - groupDBs.mux.Lock() - groupDBs.state = stateCREATED - groupDBs.mux.Unlock() + groupDB.mux.Lock() + groupDB.state = stateCREATED + groupDB.mux.Unlock() - return groupDBs, nil + return groupDB, nil } } -func (db *Database) ForceCloseGroupDBs(groupsDB *GroupDBs) error { +func (db *Database) ForceCloseGroupDB(groupsDB *GroupDB) error { if db.dbconfig == nil { log.Printf(("Database configuration is not set, cannot get group DBs for '%s'"), groupsDB.Newsgroup) return fmt.Errorf("database configuration is not set") @@ -132,25 +200,25 @@ func (db *Database) ForceCloseGroupDBs(groupsDB *GroupDBs) error { groupsDB.mux.Lock() if groupsDB.Workers < 1 { groupsDB.mux.Unlock() - return fmt.Errorf("error in ForceCloseGroupDBs: workers <= 0") + return fmt.Errorf("error in ForceCloseGroupDB: workers <= 0") } groupsDB.Workers-- if groupsDB.Workers > 0 { groupsDB.mux.Unlock() return nil } - if err := groupsDB.Close("ForceCloseGroupDBs"); err != nil { + if err := groupsDB.Close("ForceCloseGroupDB"); err != nil { groupsDB.mux.Unlock() - return fmt.Errorf("error ForceCloseGroupDBs groupsDB.Close ng:'%s' err='%v'", groupsDB.Newsgroup, err) + return fmt.Errorf("error ForceCloseGroupDB groupsDB.Close ng:'%s' err='%v'", groupsDB.Newsgroup, err) } groupsDB.mux.Unlock() db.openDBsNum-- - delete(db.groupDBs, groupsDB.Newsgroup) - //log.Printf("ForceCloseGroupDBs: closed group DB for '%s', openDBsNum=%d, groupDBs=%d", groupsDB.Newsgroup, db.openDBsNum, len(db.groupDBs)) + delete(db.groupDB, groupsDB.Newsgroup) + //log.Printf("ForceCloseGroupDB: closed group DB for '%s', openDBsNum=%d, groupDB=%d", groupsDB.Newsgroup, db.openDBsNum, len(db.groupDB)) return nil } -func (dbs *GroupDBs) IncrementWorkers() { +func (dbs *GroupDB) IncrementWorkers() { dbs.mux.Lock() dbs.Workers++ //log.Printf("DEBUG: IncrementWorkers for group '%s': %d", dbs.Newsgroup, dbs.Workers) @@ -158,30 +226,30 @@ func (dbs *GroupDBs) IncrementWorkers() { dbs.mux.Unlock() } -func (dbs *GroupDBs) Return(db *Database) { - if dbs != nil && db != nil { +func (dbs *GroupDB) Return() { + if dbs != nil && dbs.DB != nil { dbs.mux.Lock() dbs.Idle = time.Now() // Update idle time to now dbs.Workers-- dbs.mux.Unlock() } else { - log.Printf("Warning: Attempted to return a nil db=%#v dbs=%#v", db, dbs) + log.Printf("Warning: Attempted to return a nil db=%#v dbs=%#v", dbs.DB, dbs) } } -func (db *GroupDBs) ExistsMsgIdInArticlesDB(messageID string) bool { +func (db *GroupDB) ExistsMsgIdInArticlesDB(messageID string) bool { query := "SELECT 1 FROM articles WHERE message_id = ? LIMIT 1" var exists bool - if err := retryableQueryRowScan(db.DB, query, []interface{}{messageID}, &exists); err != nil { + if err := RetryableQueryRowScan(db.DB, query, []interface{}{messageID}, &exists); err != nil { return false } return exists } -func (dbs *GroupDBs) Close(who string) error { +func (dbs *GroupDB) Close(who string) error { if dbs == nil { - log.Printf("Warning: Attempted to close nil GroupDBs") - return fmt.Errorf("nil GroupDBs cannot be closed") + log.Printf("Warning: Attempted to close nil GroupDB") + return fmt.Errorf("nil GroupDB cannot be closed") } if dbs.DB != nil { if err := dbs.DB.Close(); err != nil { @@ -192,3 +260,58 @@ func (dbs *GroupDBs) Close(who string) error { } return nil } + +// GetGroupDBWithSuffix opens a group database with a custom suffix (e.g., ".new") +// Returns the database connection, the full file path, and any error +func (db *Database) GetGroupDBWithSuffix(groupName, suffix string) (*sql.DB, string, error) { + if db.dbconfig == nil { + return nil, "", fmt.Errorf("database configuration is not set") + } + + groupsHash := GroupHashMap.GroupToHash(groupName) + baseGroupDBdir := filepath.Join(db.dbconfig.DataDir, "/db."+suffix+"/"+groupsHash) + + if err := createDirIfNotExists(baseGroupDBdir); err != nil { + return nil, "", fmt.Errorf("failed to create group database directory: %w", err) + } + + groupDBfile := filepath.Join(baseGroupDBdir + "/" + SanitizeGroupName(groupName) + ".db") + + // Open database + groupDB, err := sql.Open("sqlite3", groupDBfile) + if err != nil { + return nil, "", fmt.Errorf("failed to open database: %w", err) + } + + // Apply pragmas for new database + if err := db.applySQLitePragmasGroupDB(groupDB); err != nil { + if cerr := groupDB.Close(); cerr != nil { + log.Printf("Failed to close groupDB during pragma error: %v", cerr) + } + return nil, "", fmt.Errorf("failed to apply pragmas: %w", err) + } + + // Apply schema/migrations + tempGroupDB := &GroupDB{ + Newsgroup: groupName, + DB: groupDB, + Idle: time.Now(), + } + + if err := db.migrateGroupDB(tempGroupDB, false); err != nil { + if cerr := groupDB.Close(); cerr != nil { + log.Printf("Failed to close groupDB during migration error: %v", cerr) + } + return nil, "", fmt.Errorf("failed to migrate group database: %w", err) + } + + return groupDB, groupDBfile, nil +} + +// CloseGroupDBDirectly closes a database connection directly +func CloseGroupDBDirectly(db *sql.DB) error { + if db == nil { + return nil + } + return db.Close() +} diff --git a/internal/database/db_init.go b/internal/database/db_init.go index af0b0bd4..fdda9758 100644 --- a/internal/database/db_init.go +++ b/internal/database/db_init.go @@ -36,8 +36,8 @@ type Database struct { mainDB *sql.DB // Per-group database connections (cached) - groupDBs map[string]*GroupDBs // map with open database pointers - openDBsNum int // Total number of open group databases + groupDB map[string]*GroupDB // map with open database pointers + openDBsNum int // Total number of open group databases MainMutex sync.RWMutex @@ -68,10 +68,13 @@ type DBConfig struct { ConnMaxLifetime time.Duration // Performance settings - WALMode bool // Write-Ahead Logging - SyncMode string // OFF, NORMAL, FULL - CacheSize int // KB - TempStore string // MEMORY, FILE + WALMode bool // Write-Ahead Logging + SyncMode string // OFF, NORMAL, FULL + CacheSize int // KB + TempStore string // MEMORY, FILE + MaxDBbatch int // maximum number of DB operations in a batch + MaxDBthreads int // maximum number of concurrent DB batch threads + MaxQueued int // maximum number of queued articles for batch processing // Backup settings BackupEnabled bool @@ -81,6 +84,7 @@ type DBConfig struct { // Cache settings ArticleCacheSize int // Maximum number of cached articles ArticleCacheExpiry time.Duration // Cache expiry duration + } // DefaultDBConfig returns default database configuration @@ -99,6 +103,9 @@ func DefaultDBConfig() (dbconfig *DBConfig) { BackupDir: "./backups", ArticleCacheSize: 1000, // Default cache size ArticleCacheExpiry: 15 * time.Minute, // Default cache expiry + MaxDBbatch: 1000, // default max DB batch size (db_batch.go -> *SQ3batch) + MaxDBthreads: 16, // default max DB threads (db_batch.go -> *SQ3batch) + MaxQueued: 1280, // default max queued articles (db_batch.go -> *SQ3batch) } } @@ -141,7 +148,7 @@ func OpenDatabase(dbconfig *DBConfig) (*Database, error) { db := &Database{ dbconfig: dbconfig, - groupDBs: make(map[string]*GroupDBs), + groupDB: make(map[string]*GroupDB), WG: &sync.WaitGroup{}, // Initialize wait group for background tasks } diff --git a/internal/database/db_migrate.go b/internal/database/db_migrate.go index cf4f51c2..7e1b9fb2 100644 --- a/internal/database/db_migrate.go +++ b/internal/database/db_migrate.go @@ -179,7 +179,7 @@ func ensureMigrationsTable(db *sql.DB, dbType string) error { func getAppliedMigrations(db *sql.DB, dbType string) (map[string]bool, error) { applied := make(map[string]bool) - rows, err := retryableQuery(db, `SELECT filename FROM schema_migrations WHERE db_type = ? OR db_type = ''`, dbType) + rows, err := RetryableQuery(db, `SELECT filename FROM schema_migrations WHERE db_type = ? OR db_type = ''`, dbType) if err != nil { log.Printf("Failed to query applied migrations for %s: %v", dbType, err) return nil, fmt.Errorf("failed to query applied migrations for %s: %w", dbType, err) @@ -277,20 +277,8 @@ func (db *Database) migrateMainDB() error { return nil } -// MigrateGroup applies migrations for a specific newsgroup database -func (db *Database) MigrateGroup(groupName string) error { - groupDBs, err := db.GetGroupDBs(groupName) - if err != nil { - log.Printf("Failed to get group database for %s: %v", groupName, err) - return fmt.Errorf("failed to get group database: %w", err) - } - defer groupDBs.Return(db) - - return db.migrateGroupDB(groupDBs) -} - // migrateGroupDB applies migrations to a group database -func (db *Database) migrateGroupDB(groupDBs *GroupDBs) error { +func (db *Database) migrateGroupDB(groupDB *GroupDB, useCache bool) error { // Initialize cache if needed initMigratedDBsCache() @@ -300,28 +288,30 @@ func (db *Database) migrateGroupDB(groupDBs *GroupDBs) error { log.Printf("Failed to get migration files: %v", err) return err } - - // Create a cache key based on group name - cacheKey := fmt.Sprintf("%s:group", groupDBs.Newsgroup) - - // Check if this database is already known to be fully migrated - migratedDBsMux.RLock() - isFullyMigrated := migratedDBsCache[cacheKey] - migratedDBsMux.RUnlock() - - if isFullyMigrated { - // Skip migration checks for this database - return nil + var cacheKey string + if useCache { + // Create a cache key based on group name + cacheKey = fmt.Sprintf("%s:group", groupDB.Newsgroup) + + // Check if this database is already known to be fully migrated + migratedDBsMux.RLock() + isFullyMigrated := migratedDBsCache[cacheKey] + migratedDBsMux.RUnlock() + + if isFullyMigrated { + // Skip migration checks for this database + return nil + } } // Ensure migrations table exists - if err := ensureMigrationsTable(groupDBs.DB, "group"); err != nil { + if err := ensureMigrationsTable(groupDB.DB, "group"); err != nil { log.Printf("Failed to ensure migrations table for group: %v", err) return fmt.Errorf("failed to ensure migrations table for group: %w", err) } // Get applied migrations - applied, err := getAppliedMigrations(groupDBs.DB, "group") + applied, err := getAppliedMigrations(groupDB.DB, "group") if err != nil { log.Printf("Failed to get applied migrations for group: %v", err) return fmt.Errorf("failed to get applied migrations for group: %w", err) @@ -339,17 +329,20 @@ func (db *Database) migrateGroupDB(groupDBs *GroupDBs) error { } } } - // If all migrations are applied, cache this fact - if allApplied && migrationsToApply > 0 { - migratedDBsMux.Lock() - migratedDBsCache[cacheKey] = true - migratedDBsMux.Unlock() + + if useCache { + // If all migrations are applied, cache this fact + if allApplied && migrationsToApply > 0 { + migratedDBsMux.Lock() + migratedDBsCache[cacheKey] = true + migratedDBsMux.Unlock() + } } // Apply missing migrations for group database for _, migration := range migrations { if migration.Type == MigrationTypeGroup && !applied[migration.FileName] { - if err := applyMigration(groupDBs.DB, migration, "group"); err != nil { + if err := applyMigration(groupDB.DB, migration, "group"); err != nil { return fmt.Errorf("failed to apply migration %s to group database: %w", migration.FileName, err) } //log.Printf("Done: apply migration %s to group database\n", migration.FileName) diff --git a/internal/database/db_nntp_users.go b/internal/database/db_nntp_users.go index 6fe85b27..b8de9dcd 100644 --- a/internal/database/db_nntp_users.go +++ b/internal/database/db_nntp_users.go @@ -21,7 +21,7 @@ func (db *Database) InsertNNTPUser(u *models.NNTPUser) error { query := `INSERT INTO nntp_users (username, password, maxconns, posting, web_user_id, is_active) VALUES (?, ?, ?, ?, ?, ?)` - _, err = retryableExec(db.mainDB, query, u.Username, string(hashedPassword), u.MaxConns, u.Posting, u.WebUserID, u.IsActive) + _, err = RetryableExec(db.mainDB, query, u.Username, string(hashedPassword), u.MaxConns, u.Posting, u.WebUserID, u.IsActive) return err } @@ -31,7 +31,7 @@ func (db *Database) GetNNTPUserByUsername(username string) (*models.NNTPUser, er FROM nntp_users WHERE username = ? AND is_active = 1` var u models.NNTPUser - err := retryableQueryRowScan(db.mainDB, query, []interface{}{username}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{username}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, &u.CreatedAt, &u.UpdatedAt, &u.LastLogin, &u.IsActive) if err != nil { return nil, err @@ -45,7 +45,7 @@ func (db *Database) GetNNTPUserByID(id int) (*models.NNTPUser, error) { FROM nntp_users WHERE id = ?` var u models.NNTPUser - err := retryableQueryRowScan(db.mainDB, query, []interface{}{id}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{id}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, &u.CreatedAt, &u.UpdatedAt, &u.LastLogin, &u.IsActive) if err != nil { return nil, err @@ -59,7 +59,7 @@ func (db *Database) GetNNTPUserByWebUserID(webUserID int64) (*models.NNTPUser, e FROM nntp_users WHERE web_user_id = ? AND is_active = 1` var u models.NNTPUser - err := retryableQueryRowScan(db.mainDB, query, []interface{}{webUserID}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{webUserID}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, &u.CreatedAt, &u.UpdatedAt, &u.LastLogin, &u.IsActive) if err != nil { return nil, err @@ -76,7 +76,7 @@ func (db *Database) SearchNNTPUsers(searchTerm string, limit int) ([]*models.NNT LIMIT ?` searchPattern := "%" + searchTerm + "%" - rows, err := retryableQuery(db.mainDB, query, searchPattern, limit) + rows, err := RetryableQuery(db.mainDB, query, searchPattern, limit) if err != nil { return nil, err } @@ -97,7 +97,7 @@ func (db *Database) SearchNNTPUsers(searchTerm string, limit int) ([]*models.NNT func (db *Database) GetAllNNTPUsers() ([]*models.NNTPUser, error) { query := `SELECT id, username, password, maxconns, posting, web_user_id, created_at, updated_at, last_login, is_active FROM nntp_users ORDER BY username` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, err } @@ -141,48 +141,48 @@ func (db *Database) UpdateNNTPUserPassword(userID int, password string) error { } query := `UPDATE nntp_users SET password = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err = retryableExec(db.mainDB, query, string(hashedPassword), userID) + _, err = RetryableExec(db.mainDB, query, string(hashedPassword), userID) return err } // UpdateNNTPUserLastLogin updates the last login timestamp func (db *Database) UpdateNNTPUserLastLogin(userID int) error { query := `UPDATE nntp_users SET last_login = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } // UpdateNNTPUserPermissions updates maxconns and posting permissions func (db *Database) UpdateNNTPUserPermissions(userID int, maxConns int, posting bool) error { query := `UPDATE nntp_users SET maxconns = ?, posting = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, maxConns, posting, userID) + _, err := RetryableExec(db.mainDB, query, maxConns, posting, userID) return err } // DeactivateNNTPUser deactivates an NNTP user (soft delete) func (db *Database) DeactivateNNTPUser(userID int) error { query := `UPDATE nntp_users SET is_active = 0, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } // ActivateNNTPUser activates an NNTP user (reverses soft delete) func (db *Database) ActivateNNTPUser(userID int) error { query := `UPDATE nntp_users SET is_active = 1, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } // DeleteNNTPUser permanently deletes an NNTP user func (db *Database) DeleteNNTPUser(userID int) error { // First delete any sessions - _, err := retryableExec(db.mainDB, `DELETE FROM nntp_sessions WHERE user_id = ?`, userID) + _, err := RetryableExec(db.mainDB, `DELETE FROM nntp_sessions WHERE user_id = ?`, userID) if err != nil { return fmt.Errorf("failed to delete NNTP sessions: %w", err) } // Then delete the user - _, err = retryableExec(db.mainDB, `DELETE FROM nntp_users WHERE id = ?`, userID) + _, err = RetryableExec(db.mainDB, `DELETE FROM nntp_users WHERE id = ?`, userID) return err } @@ -191,21 +191,21 @@ func (db *Database) DeleteNNTPUser(userID int) error { // CreateNNTPSession creates a new NNTP session func (db *Database) CreateNNTPSession(userID int, connectionID, remoteAddr string) error { query := `INSERT INTO nntp_sessions (user_id, connection_id, remote_addr) VALUES (?, ?, ?)` - _, err := retryableExec(db.mainDB, query, userID, connectionID, remoteAddr) + _, err := RetryableExec(db.mainDB, query, userID, connectionID, remoteAddr) return err } // UpdateNNTPSessionActivity updates the last activity timestamp func (db *Database) UpdateNNTPSessionActivity(connectionID string) error { query := `UPDATE nntp_sessions SET last_activity = CURRENT_TIMESTAMP WHERE connection_id = ? AND is_active = 1` - _, err := retryableExec(db.mainDB, query, connectionID) + _, err := RetryableExec(db.mainDB, query, connectionID) return err } // CloseNNTPSession marks a session as inactive func (db *Database) CloseNNTPSession(connectionID string) error { query := `UPDATE nntp_sessions SET is_active = 0 WHERE connection_id = ?` - _, err := retryableExec(db.mainDB, query, connectionID) + _, err := RetryableExec(db.mainDB, query, connectionID) return err } @@ -213,7 +213,7 @@ func (db *Database) CloseNNTPSession(connectionID string) error { func (db *Database) GetActiveNNTPSessionsForUser(userID int) (int, error) { query := `SELECT COUNT(*) FROM nntp_sessions WHERE user_id = ? AND is_active = 1` var count int - err := retryableQueryRowScan(db.mainDB, query, []interface{}{userID}, &count) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{userID}, &count) return count, err } @@ -221,7 +221,7 @@ func (db *Database) GetActiveNNTPSessionsForUser(userID int) (int, error) { func (db *Database) CleanupOldNNTPSessions(olderThan time.Duration) error { cutoff := time.Now().Add(-olderThan) query := `DELETE FROM nntp_sessions WHERE is_active = 0 AND last_activity < ?` - _, err := retryableExec(db.mainDB, query, cutoff) + _, err := RetryableExec(db.mainDB, query, cutoff) return err } diff --git a/internal/database/db_rescan.go b/internal/database/db_rescan.go index 47d7a684..da2bd939 100644 --- a/internal/database/db_rescan.go +++ b/internal/database/db_rescan.go @@ -1,6 +1,7 @@ package database import ( + "database/sql" "fmt" "log" "strings" @@ -8,18 +9,18 @@ import ( ) // RecoverDatabase attempts to recover the database by checking for missing articles and last_insert_ids mismatches -var RescanBatchSize int64 = 25000 +var RescanBatchSize int64 = 1000 func (db *Database) GetLatestArticleNumberFromOverview(newsgroup string) (int64, error) { // Since overview table is unified with articles, query articles table instead - groupDB, err := db.GetGroupDBs(newsgroup) + groupDB, err := db.GetGroupDB(newsgroup) if err != nil { return 0, err } - defer groupDB.Return(db) + defer groupDB.Return() var latestArticle int64 - err = retryableQueryRowScan(groupDB.DB, ` + err = RetryableQueryRowScan(groupDB.DB, ` SELECT MAX(article_num) FROM articles `, []interface{}{}, &latestArticle) @@ -32,7 +33,7 @@ func (db *Database) GetLatestArticleNumberFromOverview(newsgroup string) (int64, func (db *Database) GetLatestArticleNumbers(newsgroup string) (map[string]int64, error) { // Query the latest article numbers for the specified newsgroup - rows, err := retryableQuery(db.GetMainDB(), ` + rows, err := RetryableQuery(db.GetMainDB(), ` SELECT name, last_article FROM newsgroups WHERE name = ? @@ -97,15 +98,15 @@ func (db *Database) CheckDatabaseConsistency(newsgroup string) (*ConsistencyRepo } // 2. Get group databases - groupDB, err := db.GetGroupDBs(newsgroup) + groupDB, err := db.GetGroupDB(newsgroup) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get group databases: %v", err)) return report, nil } - defer groupDB.Return(db) + defer groupDB.Return() // 3. Get max article numbers from each table (handle NULL for empty tables) - err = retryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(article_num), 0) FROM articles", []interface{}{}, &report.ArticlesMaxNum) + err = RetryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(article_num), 0) FROM articles", []interface{}{}, &report.ArticlesMaxNum) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get max article_num from articles: %v", err)) } @@ -113,13 +114,13 @@ func (db *Database) CheckDatabaseConsistency(newsgroup string) (*ConsistencyRepo // Since overview is now unified with articles, OverviewMaxNum equals ArticlesMaxNum report.OverviewMaxNum = report.ArticlesMaxNum - err = retryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(root_article), 0) FROM threads", []interface{}{}, &report.ThreadsMaxNum) + err = RetryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(root_article), 0) FROM threads", []interface{}{}, &report.ThreadsMaxNum) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get max root_article from threads: %v", err)) } // 4. Get counts from each table - err = retryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM articles", []interface{}{}, &report.ArticleCount) + err = RetryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM articles", []interface{}{}, &report.ArticleCount) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get article count: %v", err)) } @@ -127,7 +128,7 @@ func (db *Database) CheckDatabaseConsistency(newsgroup string) (*ConsistencyRepo // Since overview is now unified with articles, OverviewCount equals ArticleCount report.OverviewCount = report.ArticleCount - err = retryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM threads", []interface{}{}, &report.ThreadCount) + err = RetryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM threads", []interface{}{}, &report.ThreadCount) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get thread count: %v", err)) } @@ -157,7 +158,7 @@ func (db *Database) CheckDatabaseConsistency(newsgroup string) (*ConsistencyRepo } // findMissingArticles finds gaps in article numbering using batched processing -func (db *Database) findMissingArticles(groupDB *GroupDBs, maxArticleNum int64) []int64 { +func (db *Database) findMissingArticles(groupDB *GroupDB, maxArticleNum int64) []int64 { var missing []int64 if maxArticleNum <= 0 { return missing @@ -170,7 +171,7 @@ func (db *Database) findMissingArticles(groupDB *GroupDBs, maxArticleNum int64) for offset < maxArticleNum { // Get batch of article numbers - rows, err := retryableQuery(groupDB.DB, + rows, err := RetryableQuery(groupDB.DB, "SELECT article_num FROM articles WHERE article_num > ? ORDER BY article_num LIMIT ?", offset, RescanBatchSize) if err != nil { @@ -217,7 +218,7 @@ func (db *Database) findMissingArticles(groupDB *GroupDBs, maxArticleNum int64) } // findOrphanedThreads finds thread entries pointing to non-existent articles using batched processing -func (db *Database) findOrphanedThreads(groupDB *GroupDBs) []int64 { +func (db *Database) findOrphanedThreads(groupDB *GroupDB) []int64 { var orphaned []int64 log.Printf("Building article index in batches of %d", RescanBatchSize) @@ -229,7 +230,7 @@ func (db *Database) findOrphanedThreads(groupDB *GroupDBs) []int64 { for { // Get batch of article numbers - rows, err := retryableQuery(groupDB.DB, + rows, err := RetryableQuery(groupDB.DB, "SELECT article_num FROM articles WHERE article_num > ? ORDER BY article_num LIMIT ?", offset, RescanBatchSize) if err != nil { @@ -275,7 +276,7 @@ func (db *Database) findOrphanedThreads(groupDB *GroupDBs) []int64 { for { // Get batch of distinct root_article numbers from threads table - rows, err := retryableQuery(groupDB.DB, + rows, err := RetryableQuery(groupDB.DB, "SELECT DISTINCT root_article FROM threads WHERE root_article > ? ORDER BY root_article LIMIT ?", offset, RescanBatchSize) if err != nil { @@ -375,11 +376,12 @@ const query_RebuildThreadsFromScratch1 = "SELECT COUNT(*) FROM articles" const query_RebuildThreadsFromScratch2 = "SELECT COUNT(*) FROM threads" const query_RebuildThreadsFromScratch3 = "DELETE FROM %s" const query_RebuildThreadsFromScratch4 = "DELETE FROM sqlite_sequence WHERE name = 'threads'" -const query_RebuildThreadsFromScratch5 = "SELECT article_num, message_id FROM articles ORDER BY article_num LIMIT ? OFFSET ?" +const query_RebuildThreadsFromScratch5 = "SELECT article_num, message_id FROM articles ORDER BY date_sent ASC LIMIT ? OFFSET ?" // RebuildThreadsFromScratch completely rebuilds all thread relationships for a newsgroup // This function deletes all existing threads and rebuilds them from article 1 based on message references -func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (*ThreadRebuildReport, error) { +// If groupDB is provided (not nil), it will use that instead of opening the newsgroup's database +func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool, groupDB *GroupDB) (*ThreadRebuildReport, error) { report := &ThreadRebuildReport{ Newsgroup: newsgroup, StartTime: time.Now(), @@ -390,20 +392,37 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (* log.Printf("RebuildThreadsFromScratch: Starting complete thread rebuild for newsgroup '%s'", newsgroup) } - // Get group database - groupDB, err := db.GetGroupDBs(newsgroup) - if err != nil { - report.Errors = append(report.Errors, fmt.Sprintf("Failed to get group database: %v", err)) - return report, err + // TIMER: Get group database + stepStart := time.Now() + var shouldCloseDB bool + if groupDB == nil { + var err error + groupDB, err = db.GetGroupDB(newsgroup) + if err != nil { + report.Errors = append(report.Errors, fmt.Sprintf("Failed to get group database: %v", err)) + return report, err + } + shouldCloseDB = true + } + if verbose { + log.Printf("⏱️ TIMER: GetGroupDB took %v", time.Since(stepStart)) } - defer groupDB.Return(db) - // Get total article count - err = retryableQueryRowScan(groupDB.DB, query_RebuildThreadsFromScratch1, []interface{}{}, &report.TotalArticles) + if shouldCloseDB { + defer groupDB.Return() + } + + // TIMER: Get total article count + stepStart = time.Now() + var err error + err = RetryableQueryRowScan(groupDB.DB, query_RebuildThreadsFromScratch1, []interface{}{}, &report.TotalArticles) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get article count: %v", err)) return report, err } + if verbose { + log.Printf("⏱️ TIMER: Get article count took %v", time.Since(stepStart)) + } if report.TotalArticles == 0 { if verbose { @@ -418,7 +437,8 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (* log.Printf("RebuildThreadsFromScratch: Found %d articles to process", report.TotalArticles) } - // Step 1: Clear existing thread data + // TIMER: Step 1: Clear existing thread data + stepStart = time.Now() if verbose { log.Printf("RebuildThreadsFromScratch: Clearing existing thread data...") } @@ -438,11 +458,15 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (* // Clear thread-related tables in dependency order tables := []string{"tree_stats", "cached_trees", "thread_cache", "threads"} for _, table := range tables { + tableStart := time.Now() _, err = tx.Exec(fmt.Sprintf(query_RebuildThreadsFromScratch3, table)) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to clear table %s: %v", table, err)) return report, err } + if verbose { + log.Printf("⏱️ TIMER: Clear table %s took %v", table, time.Since(tableStart)) + } } // Reset auto-increment for threads table @@ -462,24 +486,28 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (* if verbose { log.Printf("RebuildThreadsFromScratch: Cleared %d existing thread entries", existingThreads) + log.Printf("⏱️ TIMER: Clear existing thread data (total) took %v", time.Since(stepStart)) } - // Step 2: Build message-ID to article-number mapping + // TIMER: Step 2: Build message-ID to article-number mapping + stepStart = time.Now() if verbose { log.Printf("RebuildThreadsFromScratch: Building message-ID mapping...") } msgIDToArticleNum := make(map[string]int64) var offset int64 = 0 + var totalBatches int64 = 0 for offset < report.TotalArticles { + batchStart := time.Now() currentBatchSize := RescanBatchSize if offset+RescanBatchSize > report.TotalArticles { currentBatchSize = report.TotalArticles - offset } // Load batch of article mappings - rows, err := retryableQuery(groupDB.DB, query_RebuildThreadsFromScratch5, currentBatchSize, offset) + rows, err := RetryableQuery(groupDB.DB, query_RebuildThreadsFromScratch5, currentBatchSize, offset) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to query articles batch: %v", err)) @@ -499,23 +527,30 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (* rows.Close() offset += int64(currentBatchSize) + totalBatches++ - if verbose && offset%1000 == 0 { - log.Printf("RebuildThreadsFromScratch: Built message-ID mapping: %d/%d articles", offset, report.TotalArticles) + if verbose && offset%10000 == 0 { + log.Printf("RebuildThreadsFromScratch: Built message-ID mapping: %d/%d articles (batch took %v)", + offset, report.TotalArticles, time.Since(batchStart)) } } if verbose { + avgBatchTime := time.Since(stepStart) / time.Duration(totalBatches) log.Printf("RebuildThreadsFromScratch: Message-ID mapping complete: %d entries", len(msgIDToArticleNum)) + log.Printf("⏱️ TIMER: Build message-ID mapping (total) took %v (avg batch: %v)", time.Since(stepStart), avgBatchTime) } - // Step 3: Process articles in batches to build thread relationships + // TIMER: Step 3: Process articles in batches to build thread relationships + stepStart = time.Now() if verbose { log.Printf("RebuildThreadsFromScratch: Building thread relationships...") } offset = 0 + totalBatches = 0 for offset < report.TotalArticles { + batchStart := time.Now() currentBatchSize := RescanBatchSize if offset+RescanBatchSize > report.TotalArticles { currentBatchSize = report.TotalArticles - offset @@ -529,13 +564,19 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (* report.ThreadsRebuilt += int64(threadsBuilt) offset += int64(currentBatchSize) + totalBatches++ - if verbose && offset%1000 == 0 { - log.Printf("RebuildThreadsFromScratch: Threading progress: %d/%d articles processed, %d threads built", - offset, report.TotalArticles, report.ThreadsRebuilt) + if verbose && offset%10000 == 0 { + log.Printf("RebuildThreadsFromScratch: Threading progress: %d/%d articles processed, %d threads built (batch took %v)", + offset, report.TotalArticles, report.ThreadsRebuilt, time.Since(batchStart)) } } + if verbose { + avgBatchTime := time.Since(stepStart) / time.Duration(totalBatches) + log.Printf("⏱️ TIMER: Build thread relationships (total) took %v (avg batch: %v)", time.Since(stepStart), avgBatchTime) + } + report.EndTime = time.Now() report.Duration = report.EndTime.Sub(report.StartTime) @@ -553,7 +594,7 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (* const query_processThreadBatch1 = ` SELECT article_num, message_id, "references", date_sent FROM articles - ORDER BY article_num + ORDER BY date_sent ASC LIMIT ? OFFSET ? ` const query_processThreadBatch2 = "INSERT INTO threads (root_article, parent_article, child_article, depth, thread_order) VALUES (?, ?, ?, 0, 0)" @@ -561,15 +602,22 @@ const query_processThreadBatch3 = "INSERT INTO threads (root_article, parent_art // processThreadBatch processes a batch of articles to build thread relationships // Based on the actual threading system: only ROOT articles go in threads table, replies only update thread_cache -func (db *Database) processThreadBatch(groupDB *GroupDBs, msgIDToArticleNum map[string]int64, offset, batchSize int64, verbose bool) (int, error) { - // Get batch of articles with their references and dates - rows, err := retryableQuery(groupDB.DB, query_processThreadBatch1, batchSize, offset) +func (db *Database) processThreadBatch(groupDB *GroupDB, msgIDToArticleNum map[string]int64, offset, batchSize int64, verbose bool) (int, error) { + batchStartTime := time.Now() + + // TIMER: Get batch of articles with their references and dates + stepStart := time.Now() + rows, err := RetryableQuery(groupDB.DB, query_processThreadBatch1, batchSize, offset) if err != nil { return 0, fmt.Errorf("failed to query articles: %w", err) } defer rows.Close() + if verbose { + log.Printf("⏱️ processThreadBatch: Query articles took %v", time.Since(stepStart)) + } - // Separate roots and replies for processing + // TIMER: Separate roots and replies for processing + stepStart = time.Now() var threadRoots []struct { articleNum int64 dateSent time.Time @@ -583,6 +631,7 @@ func (db *Database) processThreadBatch(groupDB *GroupDBs, msgIDToArticleNum map[ } // Process each article to determine if it's a root or reply + rowCount := 0 for rows.Next() { var articleNum int64 var messageID, references string @@ -595,6 +644,7 @@ func (db *Database) processThreadBatch(groupDB *GroupDBs, msgIDToArticleNum map[ } continue } + rowCount++ refs := db.parseReferences(references) @@ -646,11 +696,16 @@ func (db *Database) processThreadBatch(groupDB *GroupDBs, msgIDToArticleNum map[ if err = rows.Err(); err != nil { return 0, fmt.Errorf("error iterating articles: %w", err) } + if verbose { + log.Printf("⏱️ processThreadBatch: Process %d rows (roots=%d, replies=%d) took %v", + rowCount, len(threadRoots), len(threadReplies), time.Since(stepStart)) + } threadsBuilt := 0 - // Step 1: Insert thread ROOTS into threads table + // TIMER: Step 1: Insert thread ROOTS into threads table if len(threadRoots) > 0 { + stepStart = time.Now() tx, err := groupDB.DB.Begin() if err != nil { return 0, fmt.Errorf("failed to begin threads transaction: %w", err) @@ -678,21 +733,26 @@ func (db *Database) processThreadBatch(groupDB *GroupDBs, msgIDToArticleNum map[ if err := tx.Commit(); err != nil { return threadsBuilt, fmt.Errorf("failed to commit threads transaction: %w", err) } + if verbose { + log.Printf("⏱️ processThreadBatch: Insert %d thread roots took %v", len(threadRoots), time.Since(stepStart)) + } - // Step 2: Initialize thread_cache for roots - for _, root := range threadRoots { - err := db.initializeThreadCacheSimple(groupDB, root.articleNum, root.dateSent) - if err != nil { - if verbose { - log.Printf("processThreadBatch: Failed to initialize thread cache for root %d: %v", root.articleNum, err) - } - // Don't fail the whole operation for cache errors + // TIMER: Step 2: Initialize thread_cache for roots (BATCHED) + stepStart = time.Now() + if err := db.batchInitializeThreadCache(groupDB, threadRoots); err != nil { + if verbose { + log.Printf("processThreadBatch: Failed to batch initialize thread cache: %v", err) } + // Don't fail the whole operation for cache errors + } + if verbose { + log.Printf("⏱️ processThreadBatch: Batch initialize %d thread caches took %v", len(threadRoots), time.Since(stepStart)) } } - // Step 3: Insert REPLIES into threads table + // TIMER: Step 3: Insert REPLIES into threads table if len(threadReplies) > 0 { + stepStart = time.Now() tx, err := groupDB.DB.Begin() if err != nil { return threadsBuilt, fmt.Errorf("failed to begin replies transaction: %w", err) @@ -722,11 +782,12 @@ func (db *Database) processThreadBatch(groupDB *GroupDBs, msgIDToArticleNum map[ return threadsBuilt, fmt.Errorf("failed to commit replies transaction: %w", err) } - if verbose && repliesBuilt > 0 { - log.Printf("processThreadBatch: Inserted %d replies into threads table", repliesBuilt) + if verbose { + log.Printf("⏱️ processThreadBatch: Insert %d replies took %v", repliesBuilt, time.Since(stepStart)) } - // Step 4: Update thread_cache for replies (build cache updates from replies) + // TIMER: Step 4: Update thread_cache for replies (build cache updates from replies) + stepStart = time.Now() threadCacheUpdates := make(map[int64][]int64) // Changed to map[rootID][]childArticleNums for _, reply := range threadReplies { threadCacheUpdates[reply.rootNum] = append(threadCacheUpdates[reply.rootNum], reply.articleNum) @@ -740,6 +801,13 @@ func (db *Database) processThreadBatch(groupDB *GroupDBs, msgIDToArticleNum map[ // Don't fail the whole operation for cache errors } } + if verbose { + log.Printf("⏱️ processThreadBatch: Update thread cache for %d roots took %v", len(threadCacheUpdates), time.Since(stepStart)) + } + } + + if verbose { + log.Printf("⏱️ processThreadBatch: TOTAL batch processing took %v", time.Since(batchStartTime)) } return threadsBuilt, nil @@ -752,7 +820,8 @@ const query_initializeThreadCacheSimple1 = ` ` // initializeThreadCacheSimple initializes thread cache for a root article -func (db *Database) initializeThreadCacheSimple(groupDB *GroupDBs, threadRoot int64, rootDate time.Time) error { +// DEPRECATED: Use batchInitializeThreadCache for better performance +func (db *Database) initializeThreadCacheSimple(groupDB *GroupDB, threadRoot int64, rootDate time.Time) error { // Validate root date - skip obvious future posts now := time.Now().UTC() futureLimit := now.Add(25 * time.Hour) @@ -764,7 +833,7 @@ func (db *Database) initializeThreadCacheSimple(groupDB *GroupDBs, threadRoot in rootDate = now } - _, err := retryableExec(groupDB.DB, query_initializeThreadCacheSimple1, + _, err := RetryableExec(groupDB.DB, query_initializeThreadCacheSimple1, threadRoot, rootDate.UTC().Format("2006-01-02 15:04:05"), threadRoot, // last_child_number starts as the root itself @@ -778,6 +847,59 @@ func (db *Database) initializeThreadCacheSimple(groupDB *GroupDBs, threadRoot in return nil } +// batchInitializeThreadCache initializes thread cache for multiple root articles in a single transaction +func (db *Database) batchInitializeThreadCache(groupDB *GroupDB, threadRoots []struct { + articleNum int64 + dateSent time.Time +}) error { + if len(threadRoots) == 0 { + return nil + } + + now := time.Now().UTC() + futureLimit := now.Add(25 * time.Hour) + + // Use RetryableTransactionExec for SQLite lock safety + err := RetryableTransactionExec(groupDB.DB, func(tx *sql.Tx) error { + // Prepare statement for batch inserts + stmt, err := tx.Prepare(query_initializeThreadCacheSimple1) + if err != nil { + return fmt.Errorf("failed to prepare thread cache insert statement: %w", err) + } + defer stmt.Close() + + // Batch insert all thread cache entries + for _, root := range threadRoots { + rootDate := root.dateSent + + // Validate root date - skip obvious future posts + if rootDate.UTC().After(futureLimit) { + log.Printf("batchInitializeThreadCache: Using current time for thread root %d with future date %v", + root.articleNum, rootDate.Format("2006-01-02 15:04:05")) + rootDate = now + } + + _, err = stmt.Exec( + root.articleNum, + rootDate.UTC().Format("2006-01-02 15:04:05"), + root.articleNum, // last_child_number starts as the root itself + rootDate.UTC().Format("2006-01-02 15:04:05"), + ) + if err != nil { + return fmt.Errorf("failed to initialize thread cache for root %d: %w", root.articleNum, err) + } + } + + return nil // Transaction will be committed by RetryableTransactionExec + }) + + if err != nil { + return fmt.Errorf("failed to batch initialize thread cache: %w", err) + } + + return nil +} + const query_updateThreadCacheWithChildren1 = "SELECT message_count, child_articles FROM thread_cache WHERE thread_root = ?" const query_updateThreadCacheWithChildren2 = ` UPDATE thread_cache @@ -787,18 +909,27 @@ const query_updateThreadCacheWithChildren2 = ` ` // updateThreadCacheWithChildren updates the thread_cache table with child article lists -func (db *Database) updateThreadCacheWithChildren(groupDB *GroupDBs, rootUpdates map[int64][]int64, verbose bool) error { +func (db *Database) updateThreadCacheWithChildren(groupDB *GroupDB, rootUpdates map[int64][]int64, verbose bool) error { if len(rootUpdates) == 0 { return nil } + funcStart := time.Now() + + // TIMER: Begin transaction + stepStart := time.Now() tx, err := groupDB.DB.Begin() if err != nil { return fmt.Errorf("failed to begin thread cache transaction: %w", err) } defer tx.Rollback() + txTime := time.Since(stepStart) + + // TIMER: Update each thread root's cache + stepStart = time.Now() + updatedCount := 0 + skippedCount := 0 - // Update each thread root's cache for rootArticle, childArticleNums := range rootUpdates { // Build comma-separated child articles list childArticlesStr := "" @@ -815,9 +946,10 @@ func (db *Database) updateThreadCacheWithChildren(groupDB *GroupDBs, rootUpdates var currentChildren string err := tx.QueryRow(query_updateThreadCacheWithChildren1, rootArticle).Scan(¤tCount, ¤tChildren) if err != nil { - if verbose { - log.Printf("updateThreadCacheWithChildren: No thread cache entry for root %d, skipping", rootArticle) - } + //if verbose { + // log.Printf("updateThreadCacheWithChildren: No thread cache entry for root %d, skipping", rootArticle) + //} + skippedCount++ continue } @@ -850,19 +982,28 @@ func (db *Database) updateThreadCacheWithChildren(groupDB *GroupDBs, rootUpdates _, err = tx.Exec(query_updateThreadCacheWithChildren2, finalChildrenStr, newMessageCount, rootArticle) if err != nil { - if verbose { - log.Printf("updateThreadCacheWithChildren: Failed to update cache for root %d: %v", rootArticle, err) - } + log.Printf("updateThreadCacheWithChildren: Failed to update cache for root %d: %v", rootArticle, err) continue } + updatedCount++ - if verbose { - log.Printf("updateThreadCacheWithChildren: Updated root %d with %d replies: %s", - rootArticle, len(finalChildren), finalChildrenStr) - } + //if verbose { + // log.Printf("updateThreadCacheWithChildren: Updated root %d with %d replies: %s", rootArticle, len(finalChildren), finalChildrenStr) + //} + } + updateTime := time.Since(stepStart) + + // TIMER: Commit transaction + stepStart = time.Now() + commitErr := tx.Commit() + commitTime := time.Since(stepStart) + + if verbose { + log.Printf("⏱️ updateThreadCacheWithChildren: Begin=%v, Update %d roots=%v, Commit=%v, TOTAL=%v (skipped %d)", + txTime, updatedCount, updateTime, commitTime, time.Since(funcStart), skippedCount) } - return tx.Commit() + return commitErr } // parseReferences parses the references header into individual message IDs diff --git a/internal/database/db_sections.go b/internal/database/db_sections.go index 8e124ab7..4f5a17b9 100644 --- a/internal/database/db_sections.go +++ b/internal/database/db_sections.go @@ -15,7 +15,7 @@ func (db *Database) GetAllSections() ([]*models.Section, error) { ORDER BY sort_order ASC, display_name ASC ` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, fmt.Errorf("failed to query sections: %w", err) } @@ -57,7 +57,7 @@ func (db *Database) GetAllSectionsWithCounts() ([]*models.Section, error) { ORDER BY s.sort_order ASC, s.display_name ASC ` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, fmt.Errorf("failed to query sections with counts: %w", err) } @@ -94,7 +94,7 @@ func (db *Database) GetAllSectionGroups() ([]*models.SectionGroup, error) { ORDER BY section_id ASC, sort_order ASC, newsgroup_name ASC ` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, fmt.Errorf("failed to query section groups: %w", err) } @@ -130,7 +130,7 @@ func (db *Database) GetSectionByID(id int) (*models.Section, error) { ` section := &models.Section{} - err := retryableQueryRowScan(db.mainDB, query, []interface{}{id}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{id}, §ion.ID, §ion.Name, §ion.DisplayName, @@ -156,7 +156,7 @@ func (db *Database) SectionNameExists(name string) (bool, error) { query := `SELECT COUNT(*) FROM sections WHERE name = ?` var count int - err := retryableQueryRowScan(db.mainDB, query, []interface{}{name}, &count) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{name}, &count) if err != nil { return false, fmt.Errorf("failed to check section name existence: %w", err) } @@ -169,7 +169,7 @@ func (db *Database) SectionNameExistsExcluding(name string, excludeID int) (bool query := `SELECT COUNT(*) FROM sections WHERE name = ? AND id != ?` var count int - err := retryableQueryRowScan(db.mainDB, query, []interface{}{name, excludeID}, &count) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{name, excludeID}, &count) if err != nil { return false, fmt.Errorf("failed to check section name existence: %w", err) } @@ -184,7 +184,7 @@ func (db *Database) CreateSection(section *models.Section) error { VALUES (?, ?, ?, ?, ?, ?, ?) ` - result, err := retryableExec(db.mainDB, query, + result, err := RetryableExec(db.mainDB, query, section.Name, section.DisplayName, section.Description, @@ -215,7 +215,7 @@ func (db *Database) UpdateSection(section *models.Section) error { WHERE id = ? ` - result, err := retryableExec(db.mainDB, query, + result, err := RetryableExec(db.mainDB, query, section.Name, section.DisplayName, section.Description, @@ -243,7 +243,7 @@ func (db *Database) UpdateSection(section *models.Section) error { // DeleteSection deletes a section and all its group assignments func (db *Database) DeleteSection(id int) error { - return retryableTransactionExec(db.mainDB, func(tx *sql.Tx) error { + return RetryableTransactionExec(db.mainDB, func(tx *sql.Tx) error { // Delete section groups first (foreign key constraint) _, err := tx.Exec("DELETE FROM section_groups WHERE section_id = ?", id) if err != nil { @@ -278,7 +278,7 @@ func (db *Database) GetSectionGroupByID(id int) (*models.SectionGroup, error) { ` sg := &models.SectionGroup{} - err := retryableQueryRowScan(db.mainDB, query, []interface{}{id}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{id}, &sg.ID, &sg.SectionID, &sg.NewsgroupName, @@ -303,7 +303,7 @@ func (db *Database) SectionGroupExists(sectionID int, newsgroupName string) (boo query := `SELECT COUNT(*) FROM section_groups WHERE section_id = ? AND newsgroup_name = ?` var count int - err := retryableQueryRowScan(db.mainDB, query, []interface{}{sectionID, newsgroupName}, &count) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{sectionID, newsgroupName}, &count) if err != nil { return false, fmt.Errorf("failed to check section group existence: %w", err) } @@ -318,7 +318,7 @@ func (db *Database) CreateSectionGroup(sg *models.SectionGroup) error { VALUES (?, ?, ?, ?, ?, ?) ` - result, err := retryableExec(db.mainDB, query, + result, err := RetryableExec(db.mainDB, query, sg.SectionID, sg.NewsgroupName, sg.GroupDescription, @@ -344,7 +344,7 @@ func (db *Database) CreateSectionGroup(sg *models.SectionGroup) error { func (db *Database) DeleteSectionGroup(id int) error { query := `DELETE FROM section_groups WHERE id = ?` - result, err := retryableExec(db.mainDB, query, id) + result, err := RetryableExec(db.mainDB, query, id) if err != nil { return fmt.Errorf("failed to delete section group: %w", err) } @@ -371,7 +371,7 @@ func (db *Database) GetNewsgroupByName(name string) (*models.Newsgroup, error) { ` ng := &models.Newsgroup{} - err := retryableQueryRowScan(db.mainDB, query, []interface{}{name}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{name}, &ng.ID, &ng.Name, &ng.Active, diff --git a/internal/database/db_sessions.go b/internal/database/db_sessions.go index 5b58dabf..ae0a9b27 100644 --- a/internal/database/db_sessions.go +++ b/internal/database/db_sessions.go @@ -49,7 +49,7 @@ func (db *Database) CreateUserSession(userID int64, remoteIP string) (string, er updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err = retryableExec(db.mainDB, query, sessionID, remoteIP, expiresAt, userID) + _, err = RetryableExec(db.mainDB, query, sessionID, remoteIP, expiresAt, userID) if err != nil { return "", fmt.Errorf("failed to create user session: %w", err) } @@ -69,7 +69,7 @@ func (db *Database) ValidateUserSession(sessionID string) (*models.User, error) FROM users WHERE session_id = ? AND session_expires_at > CURRENT_TIMESTAMP` var user models.User - err := retryableQueryRowScan(db.mainDB, query, []interface{}{sessionID}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{sessionID}, &user.ID, &user.Username, &user.Email, &user.PasswordHash, &user.DisplayName, &user.SessionID, &user.LastLoginIP, &user.SessionExpiresAt, &user.LoginAttempts, &user.CreatedAt, &user.UpdatedAt) @@ -81,7 +81,7 @@ func (db *Database) ValidateUserSession(sessionID string) (*models.User, error) // Extend session expiration (sliding timeout) in UTC - write operation newExpiresAt := time.Now().UTC().Add(SessionTimeout) updateQuery := `UPDATE users SET session_expires_at = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err = retryableExec(db.mainDB, updateQuery, newExpiresAt, user.ID) + _, err = RetryableExec(db.mainDB, updateQuery, newExpiresAt, user.ID) if err != nil { // Log error but don't fail validation fmt.Printf("Warning: Failed to extend session expiration: %v\n", err) @@ -99,7 +99,7 @@ func (db *Database) InvalidateUserSession(userID int64) error { session_expires_at = NULL, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } @@ -110,7 +110,7 @@ func (db *Database) InvalidateUserSessionBySessionID(sessionID string) error { session_expires_at = NULL, updated_at = CURRENT_TIMESTAMP WHERE session_id = ?` - _, err := retryableExec(db.mainDB, query, sessionID) + _, err := RetryableExec(db.mainDB, query, sessionID) return err } @@ -121,7 +121,7 @@ func (db *Database) IncrementLoginAttempts(username string) error { updated_at = CURRENT_TIMESTAMP WHERE username = ?` - _, err := retryableExec(db.mainDB, query, username) + _, err := RetryableExec(db.mainDB, query, username) return err } @@ -132,7 +132,7 @@ func (db *Database) ResetLoginAttempts(userID int64) error { updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } @@ -142,7 +142,7 @@ func (db *Database) IsUserLockedOut(username string) (bool, error) { var attempts int var updatedAt time.Time - err := retryableQueryRowScan(db.mainDB, query, []interface{}{username}, &attempts, &updatedAt) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{username}, &attempts, &updatedAt) if err != nil { return false, err } @@ -156,7 +156,7 @@ func (db *Database) IsUserLockedOut(username string) (bool, error) { } else { // Lockout period expired, reset attempts resetQuery := `UPDATE users SET login_attempts = 0, updated_at = CURRENT_TIMESTAMP WHERE username = ?` - retryableExec(db.mainDB, resetQuery, username) + RetryableExec(db.mainDB, resetQuery, username) } } @@ -171,7 +171,7 @@ func (db *Database) CleanupExpiredSessions() error { updated_at = CURRENT_TIMESTAMP WHERE session_expires_at < CURRENT_TIMESTAMP` - result, err := retryableExec(db.mainDB, query) + result, err := RetryableExec(db.mainDB, query) if err != nil { return err } diff --git a/internal/database/progress.go b/internal/database/progress.go index 08e2a5bf..935cf374 100644 --- a/internal/database/progress.go +++ b/internal/database/progress.go @@ -7,6 +7,7 @@ import ( "log" "os" "path/filepath" + "sync" "time" _ "github.com/mattn/go-sqlite3" @@ -14,7 +15,8 @@ import ( // ProgressDB tracks fetching progress for newsgroups per backend type ProgressDB struct { - db *sql.DB + db *sql.DB + mux sync.RWMutex } // ProgressEntry represents the fetching progress for a newsgroup on a backend @@ -85,20 +87,20 @@ func (p *ProgressDB) initSchema() (err error) { return err } -const query_GetLastArticle = ` -SELECT last_article FROM progress -WHERE backend_name = ? AND newsgroup_name = ? -` +const query_GetLastArticle = `SELECT last_article FROM progress WHERE backend_name = ? AND newsgroup_name = ?` // GetLastArticle returns the last fetched article number for a newsgroup on a backend func (p *ProgressDB) GetLastArticle(backendName, newsgroupName string) (int64, error) { + //p.mux.RLock() var lastArticle int64 - err := retryableQueryRowScan(p.db, query_GetLastArticle, []interface{}{backendName, newsgroupName}, &lastArticle) - + err := RetryableQueryRowScan(p.db, query_GetLastArticle, []interface{}{backendName, newsgroupName}, &lastArticle) if err == sql.ErrNoRows { + //p.mux.RUnlock() //log.Printf("progressDB.GetLastArticle: provider '%s', newsgroup '%s' has no progress", backendName, newsgroupName) - return 0, nil // No previous progress, start from 0 + p.UpdateProgress(backendName, newsgroupName, 0) // Initialize progress + return 0, nil // No previous progress, start from 0 } + //p.mux.RUnlock() if err != nil { return -999, fmt.Errorf("failed to get last article: %w", err) } @@ -109,8 +111,7 @@ func (p *ProgressDB) GetLastArticle(backendName, newsgroupName string) (int64, e return lastArticle, nil } -const query_UpdateProgress = ` -INSERT INTO progress (backend_name, newsgroup_name, last_article, last_fetched, updated_at) +const query_UpdateProgress = `INSERT INTO progress (backend_name, newsgroup_name, last_article, last_fetched, updated_at) VALUES (?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) ON CONFLICT(backend_name, newsgroup_name) DO UPDATE SET last_article = excluded.last_article, @@ -120,7 +121,9 @@ ON CONFLICT(backend_name, newsgroup_name) DO UPDATE SET // UpdateProgress updates the fetching progress for a newsgroup on a backend func (p *ProgressDB) UpdateProgress(backendName, newsgroupName string, lastArticle int64) error { - _, err := retryableExec(p.db, query_UpdateProgress, backendName, newsgroupName, lastArticle) + //p.mux.Lock() + //defer p.mux.Unlock() + _, err := RetryableExec(p.db, query_UpdateProgress, backendName, newsgroupName, lastArticle) if err != nil { return fmt.Errorf("failed to update progress: %w", err) } @@ -138,7 +141,9 @@ ORDER BY backend_name, newsgroup_name // GetAllProgress returns all progress entries func (p *ProgressDB) GetAllProgress() ([]*ProgressEntry, error) { - rows, err := retryableQuery(p.db, query_GetAllProgress) + //p.mux.RLock() + //defer p.mux.RUnlock() + rows, err := RetryableQuery(p.db, query_GetAllProgress) if err != nil { return nil, fmt.Errorf("failed to query progress: %w", err) } @@ -178,7 +183,9 @@ ORDER BY newsgroup_name // GetProgressForBackend returns progress entries for a specific backend func (p *ProgressDB) GetProgressForBackend(backendName string) ([]*ProgressEntry, error) { - rows, err := retryableQuery(p.db, query_GetProgressForBackend, backendName) + //p.mux.RLock() + //defer p.mux.RUnlock() + rows, err := RetryableQuery(p.db, query_GetProgressForBackend, backendName) if err != nil { return nil, fmt.Errorf("failed to query progress for backend: %w", err) } diff --git a/internal/database/queries.go b/internal/database/queries.go index 3cc9b59e..f022878b 100644 --- a/internal/database/queries.go +++ b/internal/database/queries.go @@ -54,7 +54,7 @@ const query_AddProvider = `INSERT INTO providers (name, grp, host, port, ssl, us VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` func (db *Database) AddProvider(provider *models.Provider) error { - _, err := retryableExec(db.mainDB, query_AddProvider, + _, err := RetryableExec(db.mainDB, query_AddProvider, provider.Name, provider.Grp, provider.Host, provider.Port, provider.SSL, provider.Username, provider.Password, provider.MaxConns, provider.Enabled, provider.Priority, @@ -69,7 +69,7 @@ func (db *Database) AddProvider(provider *models.Provider) error { // DeleteProvider deletes a provider from the main database func (db *Database) DeleteProvider(id int) error { - _, err := retryableExec(db.mainDB, `DELETE FROM providers WHERE id = ?`, id) + _, err := RetryableExec(db.mainDB, `DELETE FROM providers WHERE id = ?`, id) if err != nil { return fmt.Errorf("failed to delete provider %d: %w", id, err) } @@ -83,7 +83,7 @@ const query_SetProvider = `UPDATE providers SET WHERE id = ?` func (db *Database) SetProvider(provider *models.Provider) error { - _, err := retryableExec(db.mainDB, query_SetProvider, + _, err := RetryableExec(db.mainDB, query_SetProvider, provider.Grp, provider.Host, provider.Port, provider.SSL, provider.Username, provider.Password, provider.MaxConns, provider.Enabled, provider.Priority, @@ -100,7 +100,7 @@ func (db *Database) SetProvider(provider *models.Provider) error { const query_GetProviders = `SELECT id, enabled, priority, name, host, port, ssl, username, password, max_conns, max_art_size, posting, created_at, proxy_enabled, proxy_type, proxy_host, proxy_port, proxy_username, proxy_password FROM providers order by priority ASC` func (db *Database) GetProviders() ([]*models.Provider, error) { - rows, err := retryableQuery(db.mainDB, query_GetProviders) + rows, err := RetryableQuery(db.mainDB, query_GetProviders) if err != nil { return nil, err } @@ -124,7 +124,7 @@ func (db *Database) InsertNewsgroup(g *models.Newsgroup) error { if g.Hierarchy == "" { g.Hierarchy = ExtractHierarchyFromGroupName(g.Name) } - _, err := retryableExec(db.mainDB, query_InsertNewsgroup, g.Name, g.Description, g.LastArticle, g.MessageCount, g.Active, g.ExpiryDays, g.MaxArticles, g.MaxArtSize, g.HighWater, g.LowWater, g.Status, g.Hierarchy) + _, err := RetryableExec(db.mainDB, query_InsertNewsgroup, g.Name, g.Description, g.LastArticle, g.MessageCount, g.Active, g.ExpiryDays, g.MaxArticles, g.MaxArtSize, g.HighWater, g.LowWater, g.Status, g.Hierarchy) // Invalidate hierarchy cache for the affected hierarchy if err == nil && db.HierarchyCache != nil { @@ -138,7 +138,7 @@ const query_MainDBGetAllNewsgroupsCount = `SELECT COUNT(*) FROM newsgroups` func (db *Database) MainDBGetAllNewsgroupsCount() int64 { var count int64 - err := retryableQueryRowScan(db.mainDB, query_MainDBGetAllNewsgroupsCount, nil, &count) + err := RetryableQueryRowScan(db.mainDB, query_MainDBGetAllNewsgroupsCount, nil, &count) if err != nil { log.Printf("MainDBGetNewsgroupsCount: Failed to get newsgroups count: %v", err) return 0 @@ -151,7 +151,7 @@ const query_GetUsersCount = `SELECT COUNT(*) FROM users` func (db *Database) GetUsersCount() int64 { var count int64 - err := retryableQueryRowScan(db.mainDB, query_GetUsersCount, nil, &count) + err := RetryableQueryRowScan(db.mainDB, query_GetUsersCount, nil, &count) if err != nil { log.Printf("GetUsersCount: Failed to get users count: %v", err) return 0 @@ -166,7 +166,7 @@ WHERE u.disabled = 0 AND (u.id = 1 OR up.permission = 'admin')` func (db *Database) GetAdminUsersCount() int64 { var count int64 - err := retryableQueryRowScan(db.mainDB, query_GetAdminUsersCount, nil, &count) + err := RetryableQueryRowScan(db.mainDB, query_GetAdminUsersCount, nil, &count) if err != nil { log.Printf("GetAdminUsersCount: Failed to get admin users count: %v", err) return 0 @@ -178,7 +178,7 @@ const query_MainDBGetNewsgroupsActiveCount = `SELECT COUNT(*) FROM newsgroups WH func (db *Database) MainDBGetNewsgroupsActiveCount() int64 { var count int64 - err := retryableQueryRowScan(db.mainDB, query_MainDBGetNewsgroupsActiveCount, nil, &count) + err := RetryableQueryRowScan(db.mainDB, query_MainDBGetNewsgroupsActiveCount, nil, &count) if err != nil { log.Printf("MainDBGetNewsgroupsActiveCount: Failed to get newsgroups count: %v", err) return 0 @@ -190,7 +190,7 @@ func (db *Database) MainDBGetNewsgroupsActiveCount() int64 { const query_MainDBGetAllNewsgroups = `SELECT id, name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, high_water, low_water, status, hierarchy, created_at FROM newsgroups order by name` func (db *Database) MainDBGetAllNewsgroups() ([]*models.Newsgroup, error) { - rows, err := retryableQuery(db.mainDB, query_MainDBGetAllNewsgroups) + rows, err := RetryableQuery(db.mainDB, query_MainDBGetAllNewsgroups) if err != nil { log.Printf("MainDBGetAllNewsgroups: Failed to query newsgroups: %v", err) return nil, err @@ -211,7 +211,7 @@ func (db *Database) MainDBGetAllNewsgroups() ([]*models.Newsgroup, error) { const query_MainDBGetNewsgroup = `SELECT id, name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, high_water, low_water, status, hierarchy, created_at FROM newsgroups WHERE name = ?` func (db *Database) MainDBGetNewsgroup(newsgroup string) (*models.Newsgroup, error) { - rows, err := retryableQuery(db.mainDB, query_MainDBGetNewsgroup, newsgroup) + rows, err := RetryableQuery(db.mainDB, query_MainDBGetNewsgroup, newsgroup) if err != nil { log.Printf("MainDBGetNewsgroup: Failed to query newsgroup '%s': %v", newsgroup, err) return nil, err @@ -235,7 +235,7 @@ func (db *Database) MainDBGetNewsgroup(newsgroup string) (*models.Newsgroup, err const query_MainDBGetNewsgroupByID = `SELECT id, name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, high_water, low_water, status, hierarchy, created_at FROM newsgroups WHERE id = ?` func (db *Database) MainDBGetNewsgroupByID(id int64) (*models.Newsgroup, error) { - rows, err := retryableQuery(db.mainDB, query_MainDBGetNewsgroupByID, id) + rows, err := RetryableQuery(db.mainDB, query_MainDBGetNewsgroupByID, id) if err != nil { log.Printf("MainDBGetNewsgroupByID: Failed to query newsgroup with ID %d: %v", id, err) return nil, err @@ -264,7 +264,7 @@ func (db *Database) UpdateNewsgroup(g *models.Newsgroup) error { g.Hierarchy = ExtractHierarchyFromGroupName(g.Name) } - _, err := retryableExec(db.mainDB, query_UpdateNewsgroup, + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroup, g.Description, g.LastArticle, g.MessageCount, g.Active, g.ExpiryDays, g.MaxArticles, g.HighWater, g.LowWater, g.Status, g.Hierarchy, g.Name, ) @@ -280,7 +280,7 @@ func (db *Database) UpdateNewsgroup(g *models.Newsgroup) error { const query_UpdateNewsgroupExpiry = `UPDATE newsgroups SET expiry_days = ? WHERE name = ?` func (db *Database) UpdateNewsgroupExpiry(name string, expiryDays int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupExpiry, expiryDays, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupExpiry, expiryDays, name) return err } @@ -288,7 +288,7 @@ func (db *Database) UpdateNewsgroupExpiry(name string, expiryDays int) error { const query_UpdateNewsgroupExpiryPrefix = `UPDATE newsgroups SET expiry_days = ? WHERE name LIKE ? ` func (db *Database) UpdateNewsgroupExpiryPrefix(name string, expiryDays int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupExpiryPrefix, expiryDays, name+"%") + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupExpiryPrefix, expiryDays, name+"%") return err } @@ -296,7 +296,7 @@ func (db *Database) UpdateNewsgroupExpiryPrefix(name string, expiryDays int) err const query_UpdateNewsgroupMaxArticles = `UPDATE newsgroups SET max_articles = ? WHERE name = ?` func (db *Database) UpdateNewsgroupMaxArticles(name string, maxArticles int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupMaxArticles, maxArticles, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupMaxArticles, maxArticles, name) return err } @@ -304,7 +304,7 @@ func (db *Database) UpdateNewsgroupMaxArticles(name string, maxArticles int) err const query_UpdateNewsgroupMaxArticlesPrefix = `UPDATE newsgroups SET max_articles = ? WHERE name LIKE ?` func (db *Database) UpdateNewsgroupMaxArticlesPrefix(name string, maxArticles int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupMaxArticlesPrefix, maxArticles, name+"%") + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupMaxArticlesPrefix, maxArticles, name+"%") return err } @@ -312,7 +312,7 @@ func (db *Database) UpdateNewsgroupMaxArticlesPrefix(name string, maxArticles in const query_UpdateNewsgroupMaxArtSize = `UPDATE newsgroups SET max_art_size = ? WHERE name = ?` func (db *Database) UpdateNewsgroupMaxArtSize(name string, maxArtSize int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupMaxArtSize, maxArtSize, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupMaxArtSize, maxArtSize, name) return err } @@ -320,7 +320,7 @@ const query_UpdateNewsgroupActive = `UPDATE newsgroups SET active = ? WHERE name // UpdateNewsgroupActive updates the active status for a newsgroup func (db *Database) UpdateNewsgroupActive(name string, active bool) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupActive, active, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupActive, active, name) // Update hierarchy cache with new active status instead of invalidating if err == nil && db.HierarchyCache != nil { @@ -438,7 +438,7 @@ func (db *Database) BulkDeleteNewsgroups(names []string) (int, error) { const query_UpdateNewsgroupDescription = `UPDATE newsgroups SET description = ? WHERE name = ?` func (db *Database) UpdateNewsgroupDescription(name string, description string) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupDescription, description, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupDescription, description, name) return err } @@ -456,7 +456,7 @@ func (db *Database) DeleteNewsgroup(name string) error { hierarchy = ExtractHierarchyFromGroupName(name) } - _, err = retryableExec(db.mainDB, query_DeleteNewsgroup, name) + _, err = RetryableExec(db.mainDB, query_DeleteNewsgroup, name) // Invalidate hierarchy cache for the affected hierarchy if err == nil && db.HierarchyCache != nil { @@ -468,10 +468,10 @@ func (db *Database) DeleteNewsgroup(name string) error { const query_GetThreadsCount = `SELECT COUNT(*) FROM threads` -func (db *Database) GetThreadsCount(groupDBs *GroupDBs) (int64, error) { +func (db *Database) GetThreadsCount(groupDB *GroupDB) (int64, error) { var count int64 - err := retryableQueryRowScan(groupDBs.DB, query_GetThreadsCount, nil, &count) + err := RetryableQueryRowScan(groupDB.DB, query_GetThreadsCount, nil, &count) if err != nil { return 0, err } @@ -480,10 +480,10 @@ func (db *Database) GetThreadsCount(groupDBs *GroupDBs) (int64, error) { const query_GetArticlesCount = `SELECT COUNT(*) FROM articles` -func (db *Database) GetArticlesCount(groupDBs *GroupDBs) (int64, error) { +func (db *Database) GetArticlesCount(groupDB *GroupDB) (int64, error) { var count int64 - err := retryableQueryRowScan(groupDBs.DB, query_GetArticlesCount, nil, &count) + err := RetryableQueryRowScan(groupDB.DB, query_GetArticlesCount, nil, &count) if err != nil { return 0, err } @@ -505,12 +505,12 @@ func (db *Database) GetArticleCountFromMainDB(groupName string) (int64, error) { // Returns nil if no articles found const query_GetLastArticleDate = `SELECT MAX(date_sent) FROM articles WHERE hide = 0` -func (db *Database) GetLastArticleDate(groupDBs *GroupDBs) (*time.Time, error) { +func (db *Database) GetLastArticleDate(groupDB *GroupDB) (*time.Time, error) { var lastDateStr sql.NullString - err := retryableQueryRowScan(groupDBs.DB, query_GetLastArticleDate, nil, &lastDateStr) + err := RetryableQueryRowScan(groupDB.DB, query_GetLastArticleDate, nil, &lastDateStr) if err != nil { - return nil, fmt.Errorf("failed to get last article date for group %s: %w", groupDBs.Newsgroup, err) + return nil, fmt.Errorf("failed to get last article date for group %s: %w", groupDB.Newsgroup, err) } if !lastDateStr.Valid || lastDateStr.String == "" { @@ -520,7 +520,7 @@ func (db *Database) GetLastArticleDate(groupDBs *GroupDBs) (*time.Time, error) { // Parse the date string using the adapter lastDate := parseDateString(lastDateStr.String) if lastDate.IsZero() { - return nil, fmt.Errorf("failed to parse last article date '%s' for group %s", lastDateStr.String, groupDBs.Newsgroup) + return nil, fmt.Errorf("failed to parse last article date '%s' for group %s", lastDateStr.String, groupDB.Newsgroup) } return &lastDate, nil @@ -529,12 +529,12 @@ func (db *Database) GetLastArticleDate(groupDBs *GroupDBs) (*time.Time, error) { const query_GetArticlesBatch = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles ORDER BY article_num ASC LIMIT ? OFFSET ?` // GetArticlesBatch retrieves articles from a group database in batches for memory efficiency -func (db *Database) GetArticlesBatch(groupDBs *GroupDBs, limit, offset int) ([]*models.Article, error) { +func (db *Database) GetArticlesBatch(groupDB *GroupDB, limit, offset int) ([]*models.Article, error) { if limit <= 0 { limit = 100 // Default batch size } - rows, err := retryableQuery(groupDBs.DB, query_GetArticlesBatch, limit, offset) + rows, err := RetryableQuery(groupDB.DB, query_GetArticlesBatch, limit, offset) if err != nil { return nil, err } @@ -548,8 +548,8 @@ func (db *Database) GetArticlesBatch(groupDBs *GroupDBs, limit, offset int) ([]* return nil, err } a.ArticleNums = make(map[*string]int64) - a.ArticleNums[groupDBs.NewsgroupPtr] = artnum - a.NewsgroupsPtr = append(a.NewsgroupsPtr, groupDBs.NewsgroupPtr) + a.ArticleNums[groupDB.NewsgroupPtr] = artnum + a.NewsgroupsPtr = append(a.NewsgroupsPtr, groupDB.NewsgroupPtr) out = append(out, &a) } return out, nil @@ -558,8 +558,8 @@ func (db *Database) GetArticlesBatch(groupDBs *GroupDBs, limit, offset int) ([]* // InsertThread inserts a thread into a group's threads database const query_InsertThread = `INSERT INTO threads (root_article, parent_article, child_article, depth, thread_order) VALUES (?, ?, ?, ?, ?)` -func (db *Database) InsertThread(groupDBs *GroupDBs, t *models.Thread, a *models.Article) error { - _, err := retryableExec(groupDBs.DB, query_InsertThread, +func (db *Database) InsertThread(groupDB *GroupDB, t *models.Thread, a *models.Article) error { + _, err := RetryableExec(groupDB.DB, query_InsertThread, t.RootArticle, t.ParentArticle, t.ChildArticle, t.Depth, t.ThreadOrder, ) @@ -568,8 +568,8 @@ func (db *Database) InsertThread(groupDBs *GroupDBs, t *models.Thread, a *models const query_GetThreads = `SELECT id, root_article, parent_article, child_article, depth, thread_order FROM threads` -func (db *Database) GetThreads(groupDBs *GroupDBs) ([]*models.Thread, error) { - rows, err := retryableQuery(groupDBs.DB, query_GetThreads) +func (db *Database) GetThreads(groupDB *GroupDB) ([]*models.Thread, error) { + rows, err := RetryableQuery(groupDB.DB, query_GetThreads) if err != nil { return nil, err } @@ -593,7 +593,7 @@ func (db *Database) GetThreads(groupDBs *GroupDBs) ([]*models.Thread, error) { const query_InsertOverview = `INSERT INTO articles (subject, from_header, date_sent, date_string, message_id, "references", bytes, lines, reply_count, downloaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` const query_ImportOverview = `INSERT INTO articles (article_num, subject, from_header, date_sent, date_string, message_id, "references", bytes, lines, reply_count, downloaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` -func (db *Database) InsertOverview(groupDBs *GroupDBs, o *models.Overview) (int64, error) { +func (db *Database) InsertOverview(groupDB *GroupDB, o *models.Overview) (int64, error) { var res sql.Result var err error @@ -602,12 +602,12 @@ func (db *Database) InsertOverview(groupDBs *GroupDBs, o *models.Overview) (int6 if o.ArticleNum == 0 { // Auto-increment article_num - don't include it in INSERT - res, err = retryableExec(groupDBs.DB, query_InsertOverview, + res, err = RetryableExec(groupDB.DB, query_InsertOverview, o.Subject, o.FromHeader, dateSentStr, o.DateString, o.MessageID, o.References, o.Bytes, o.Lines, o.ReplyCount, o.Downloaded, ) } else { // Explicit article_num provided (e.g. from ImportOverview) - res, err = retryableExec(groupDBs.DB, query_ImportOverview, + res, err = RetryableExec(groupDB.DB, query_ImportOverview, o.ArticleNum, o.Subject, o.FromHeader, dateSentStr, o.DateString, o.MessageID, o.References, o.Bytes, o.Lines, o.ReplyCount, o.Downloaded, ) } @@ -620,10 +620,10 @@ func (db *Database) InsertOverview(groupDBs *GroupDBs, o *models.Overview) (int6 const query_GetOverviews = `SELECT article_num, subject, from_header, date_sent, date_string, message_id, "references", bytes, lines, reply_count, downloaded FROM articles` -func (db *Database) GetOverviews(groupDBs *GroupDBs) ([]*models.Overview, error) { - log.Printf("GetOverviews: group '%s' fetching overviews from articles table", groupDBs.Newsgroup) +func (db *Database) GetOverviews(groupDB *GroupDB) ([]*models.Overview, error) { + log.Printf("GetOverviews: group '%s' fetching overviews from articles table", groupDB.Newsgroup) - rows, err := retryableQuery(groupDBs.DB, query_GetOverviews) + rows, err := RetryableQuery(groupDB.DB, query_GetOverviews) if err != nil { return nil, err } @@ -641,11 +641,11 @@ func (db *Database) GetOverviews(groupDBs *GroupDBs) ([]*models.Overview, error) /* // SetOverviewDownloaded sets the downloaded flag for an article in the articles table -func (db *Database) SetOverviewDownloaded(groupDBs *GroupDBs, articleNum int64, downloaded int) error { - db.Batch.BatchCaptureSetOverviewDownloaded(groupDBs.Newsgroup, articleNum) +func (db *Database) SetOverviewDownloaded(groupDB *GroupDB, articleNum int64, downloaded int) error { + db.Batch.BatchCaptureSetOverviewDownloaded(groupDB.Newsgroup, articleNum) // - _, err := groupDBs.DB.Exec( + _, err := groupDB.DB.Exec( `UPDATE articles SET downloaded = ? WHERE article_num = ?`, downloaded, articleNum, ) @@ -659,8 +659,8 @@ func (db *Database) SetOverviewDownloaded(groupDBs *GroupDBs, articleNum int64, // GetUndownloadedOverviews returns all overview entries from articles table that have not been downloaded const query_GetUndownloadedOverviews = `SELECT article_num, subject, from_header, date_sent, date_string, message_id, "references", bytes, lines, reply_count, downloaded FROM articles WHERE downloaded = 0 ORDER BY article_num ASC LIMIT ?` -func (db *Database) GetUndownloadedOverviews(groupDBs *GroupDBs, fetchMax int) ([]*models.Overview, error) { - rows, err := groupDBs.DB.Query(query_GetUndownloadedOverviews, fetchMax) +func (db *Database) GetUndownloadedOverviews(groupDB *GroupDB, fetchMax int) ([]*models.Overview, error) { + rows, err := groupDB.DB.Query(query_GetUndownloadedOverviews, fetchMax) if err != nil { return nil, err } @@ -818,26 +818,26 @@ func (db *Database) GetUserPermissions(userID int64) ([]*models.UserPermission, // GetArticleByNum retrieves an article by its article number const query_GetArticleByNum = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles WHERE article_num = ?` -func (db *Database) GetArticleByNum(groupDBs *GroupDBs, articleNum int64) (*models.Article, error) { +func (db *Database) GetArticleByNum(groupDB *GroupDB, articleNum int64) (*models.Article, error) { // Try cache first if db.ArticleCache != nil { - if article, found := db.ArticleCache.Get(groupDBs.Newsgroup, articleNum); found { + if article, found := db.ArticleCache.Get(groupDB.Newsgroup, articleNum); found { return article, nil } } - row := groupDBs.DB.QueryRow(query_GetArticleByNum, articleNum) + row := groupDB.DB.QueryRow(query_GetArticleByNum, articleNum) var a models.Article var artnum int64 if err := row.Scan(&artnum, &a.MessageID, &a.Subject, &a.FromHeader, &a.DateSent, &a.DateString, &a.References, &a.Bytes, &a.Lines, &a.ReplyCount, &a.Path, &a.HeadersJSON, &a.BodyText, &a.ImportedAt); err != nil { return nil, err } a.ArticleNums = make(map[*string]int64) - a.ArticleNums[groupDBs.NewsgroupPtr] = artnum - a.NewsgroupsPtr = append(a.NewsgroupsPtr, groupDBs.NewsgroupPtr) + a.ArticleNums[groupDB.NewsgroupPtr] = artnum + a.NewsgroupsPtr = append(a.NewsgroupsPtr, groupDB.NewsgroupPtr) // Cache the result if db.ArticleCache != nil { - db.ArticleCache.Put(groupDBs.Newsgroup, articleNum, &a) + db.ArticleCache.Put(groupDB.Newsgroup, articleNum, &a) } return &a, nil } @@ -845,22 +845,23 @@ func (db *Database) GetArticleByNum(groupDBs *GroupDBs, articleNum int64) (*mode // GetArticleByMessageID retrieves an article by its message ID const query_GetArticleByMessageID = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles WHERE message_id = ?` -func (db *Database) GetArticleByMessageID(groupDBs *GroupDBs, messageID string) (*models.Article, error) { - //log.Printf("GetArticleByMessageID: group '%s' fetching article with message ID '%s'", groupDBs.Newsgroup, messageID) +func (db *Database) GetArticleByMessageID(groupDB *GroupDB, messageID string) (*models.Article, error) { + //log.Printf("GetArticleByMessageID: group '%s' fetching article with message ID '%s'", groupDB.Newsgroup, messageID) - row := groupDBs.DB.QueryRow(query_GetArticleByMessageID, messageID) + row := groupDB.DB.QueryRow(query_GetArticleByMessageID, messageID) var a models.Article var artnum int64 if err := row.Scan(&artnum, &a.MessageID, &a.Subject, &a.FromHeader, &a.DateSent, &a.DateString, &a.References, &a.Bytes, &a.Lines, &a.ReplyCount, &a.Path, &a.HeadersJSON, &a.BodyText, &a.ImportedAt); err != nil { return nil, err } a.ArticleNums = make(map[*string]int64) - a.ArticleNums[groupDBs.NewsgroupPtr] = artnum - a.NewsgroupsPtr = append(a.NewsgroupsPtr, groupDBs.NewsgroupPtr) + a.ArticleNums[groupDB.NewsgroupPtr] = artnum + a.DBArtNum = artnum + a.NewsgroupsPtr = append(a.NewsgroupsPtr, groupDB.NewsgroupPtr) // Cache the result if db.ArticleCache != nil { - db.ArticleCache.Put(groupDBs.Newsgroup, a.ArticleNums[groupDBs.NewsgroupPtr], &a) + db.ArticleCache.Put(groupDB.Newsgroup, a.ArticleNums[groupDB.NewsgroupPtr], &a) } return &a, nil } @@ -868,16 +869,16 @@ func (db *Database) GetArticleByMessageID(groupDBs *GroupDBs, messageID string) // UpdateReplyCount updates the reply count for an article const query_UpdateReplyCount = `UPDATE articles SET reply_count = ? WHERE message_id = ?` -func (db *Database) UpdateReplyCount(groupDBs *GroupDBs, messageID string, replyCount int) error { - _, err := retryableExec(groupDBs.DB, query_UpdateReplyCount, replyCount, messageID) +func (db *Database) UpdateReplyCount(groupDB *GroupDB, messageID string, replyCount int) error { + _, err := RetryableExec(groupDB.DB, query_UpdateReplyCount, replyCount, messageID) return err } // IncrementReplyCount increments the reply count for an article const query_IncrementReplyCount = `UPDATE articles SET reply_count = reply_count + 1 WHERE message_id = ?` -func (db *Database) IncrementReplyCount(groupDBs *GroupDBs, messageID string) error { - _, err := retryableExec(groupDBs.DB, +func (db *Database) IncrementReplyCount(groupDB *GroupDB, messageID string) error { + _, err := RetryableExec(groupDB.DB, query_IncrementReplyCount, messageID, ) @@ -887,8 +888,8 @@ func (db *Database) IncrementReplyCount(groupDBs *GroupDBs, messageID string) er // GetReplyCount gets the current reply count for an article const query_GetReplyCount = `SELECT reply_count FROM articles WHERE message_id = ?` -func (db *Database) GetReplyCount(groupDBs *GroupDBs, messageID string) (int, error) { - row := groupDBs.DB.QueryRow( +func (db *Database) GetReplyCount(groupDB *GroupDB, messageID string) (int, error) { + row := groupDB.DB.QueryRow( query_GetReplyCount, messageID, ) @@ -902,16 +903,16 @@ func (db *Database) GetReplyCount(groupDBs *GroupDBs, messageID string) (int, er // UpdateArticleDateSent updates the date_sent field for an article const query_UpdateArticleDateSent = `UPDATE articles SET date_sent = ?, date_string = ? WHERE message_id = ?` -func (db *Database) UpdateArticleDateSent(groupDBs *GroupDBs, messageID string, dateSent time.Time, dateString string) error { - _, err := retryableExec(groupDBs.DB, query_UpdateArticleDateSent, dateSent.UTC().Format("2006-01-02 15:04:05"), dateString, messageID) +func (db *Database) UpdateArticleDateSent(groupDB *GroupDB, messageID string, dateSent time.Time, dateString string) error { + _, err := RetryableExec(groupDB.DB, query_UpdateArticleDateSent, dateSent.UTC().Format("2006-01-02 15:04:05"), dateString, messageID) return err } // UpdateOverviewReplyCount updates the reply count for an article in the articles table const query_UpdateOverviewReplyCount = `UPDATE articles SET reply_count = ? WHERE message_id = ?` -func (db *Database) UpdateOverviewReplyCount(groupDBs *GroupDBs, messageID string, replyCount int) error { - _, err := retryableExec(groupDBs.DB, +func (db *Database) UpdateOverviewReplyCount(groupDB *GroupDB, messageID string, replyCount int) error { + _, err := RetryableExec(groupDB.DB, query_UpdateOverviewReplyCount, replyCount, messageID, ) @@ -921,9 +922,9 @@ func (db *Database) UpdateOverviewReplyCount(groupDBs *GroupDBs, messageID strin // IncrementOverviewReplyCount increments the reply count for an article in the articles table const query_IncrementOverviewReplyCount = `UPDATE articles SET reply_count = reply_count + 1 WHERE message_id = ?` -func (db *Database) IncrementOverviewReplyCount(groupDBs *GroupDBs, messageID string) error { +func (db *Database) IncrementOverviewReplyCount(groupDB *GroupDB, messageID string) error { - _, err := retryableExec(groupDBs.DB, + _, err := RetryableExec(groupDB.DB, query_IncrementOverviewReplyCount, messageID, ) @@ -1029,7 +1030,7 @@ func (db *Database) GetNewsgroupsPaginated(page, pageSize int) ([]*models.Newsgr // GetNewsgroupsPaginatedAdmin returns ALL newsgroups with pagination const query_GetNewsgroupsPaginatedAdmin1 = `SELECT COUNT(*) FROM newsgroups` -const query_GetNewsgroupsPaginatedAdmin2 = `SELECT id, name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, created_at +const query_GetNewsgroupsPaginatedAdmin2 = `SELECT id, name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, status, created_at FROM newsgroups ORDER BY name LIMIT ? OFFSET ?` @@ -1053,7 +1054,7 @@ func (db *Database) GetNewsgroupsPaginatedAdmin(page, pageSize int) ([]*models.N var out []*models.Newsgroup for rows.Next() { var g models.Newsgroup - if err := rows.Scan(&g.ID, &g.Name, &g.Description, &g.LastArticle, &g.MessageCount, &g.Active, &g.ExpiryDays, &g.MaxArticles, &g.MaxArtSize, &g.CreatedAt); err != nil { + if err := rows.Scan(&g.ID, &g.Name, &g.Description, &g.LastArticle, &g.MessageCount, &g.Active, &g.ExpiryDays, &g.MaxArticles, &g.MaxArtSize, &g.Status, &g.CreatedAt); err != nil { return nil, 0, err } out = append(out, &g) @@ -1079,11 +1080,11 @@ const query_GetOverviewsPaginated3 = `SELECT article_num FROM articles ORDER BY article_num DESC LIMIT 1` -func (db *Database) GetOverviewsPaginated(groupDBs *GroupDBs, lastArticleNum int64, pageSize int) ([]*models.Overview, int, bool, error) { +func (db *Database) GetOverviewsPaginated(groupDB *GroupDB, lastArticleNum int64, pageSize int) ([]*models.Overview, int, bool, error) { // Get total count from newsgroups table in main database (much faster than COUNT(*) on articles) var totalCount int - newsgroupInfo, err := db.MainDBGetNewsgroup(groupDBs.Newsgroup) + newsgroupInfo, err := db.MainDBGetNewsgroup(groupDB.Newsgroup) if err != nil || newsgroupInfo == nil { totalCount = -1 // Fallback if newsgroup not found } else { @@ -1095,7 +1096,7 @@ func (db *Database) GetOverviewsPaginated(groupDBs *GroupDBs, lastArticleNum int if lastArticleNum > 0 { // Continue from last seen article (descending order by article_num) args := []interface{}{lastArticleNum, pageSize} - rows, err = groupDBs.DB.Query(query_GetOverviewsPaginated1, args...) + rows, err = groupDB.DB.Query(query_GetOverviewsPaginated1, args...) if err != nil { return nil, 0, false, err } @@ -1103,7 +1104,7 @@ func (db *Database) GetOverviewsPaginated(groupDBs *GroupDBs, lastArticleNum int } else { // First page args := []interface{}{pageSize} - rows, err = groupDBs.DB.Query(query_GetOverviewsPaginated2, args...) + rows, err = groupDB.DB.Query(query_GetOverviewsPaginated2, args...) if err != nil { return nil, 0, false, err } @@ -1123,7 +1124,7 @@ func (db *Database) GetOverviewsPaginated(groupDBs *GroupDBs, lastArticleNum int hasMore := false if len(out) == pageSize { var nextArticleNum int64 - err := groupDBs.DB.QueryRow(query_GetOverviewsPaginated3, + err := groupDB.DB.QueryRow(query_GetOverviewsPaginated3, out[len(out)-1].ArticleNum).Scan(&nextArticleNum) if err == nil { hasMore = true @@ -1418,20 +1419,20 @@ func (db *Database) GetTotalThreadsCount() (int64, error) { var totalThreads int64 for _, group := range groups { // Get group database - groupDBs, err := db.GetGroupDBs(group.Name) + groupDB, err := db.GetGroupDB(group.Name) if err != nil { continue // Skip groups that don't have databases yet } // Count threads in this group - threadCount, err := db.GetThreadsCount(groupDBs) + threadCount, err := db.GetThreadsCount(groupDB) if err != nil { - groupDBs.Return(db) + groupDB.Return() continue // Skip groups with errors } totalThreads += threadCount - groupDBs.Return(db) + groupDB.Return() } return totalThreads, nil @@ -1439,7 +1440,7 @@ func (db *Database) GetTotalThreadsCount() (int64, error) { // SearchNewsgroups searches for newsgroups by name pattern with pagination const query_SearchNewsgroups = ` - SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, created_at, updated_at + SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, status, created_at, updated_at FROM newsgroups WHERE active = 1 AND (name LIKE ? COLLATE NOCASE OR description LIKE ? COLLATE NOCASE) @@ -1449,7 +1450,7 @@ const query_SearchNewsgroups = ` // Search queries with description const query_SearchNewsgroupsWithDesc = ` - SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, created_at, updated_at + SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, status, created_at, updated_at FROM newsgroups WHERE active = 1 AND (name LIKE ? COLLATE NOCASE OR description LIKE ? COLLATE NOCASE) ORDER BY message_count DESC, name ASC @@ -1457,7 +1458,7 @@ const query_SearchNewsgroupsWithDesc = ` ` const query_SearchNewsgroupsAdminWithDesc = ` - SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, created_at, updated_at + SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, status, created_at, updated_at FROM newsgroups WHERE (name LIKE ? COLLATE NOCASE OR description LIKE ? COLLATE NOCASE) ORDER BY message_count DESC, name ASC @@ -1466,7 +1467,7 @@ const query_SearchNewsgroupsAdminWithDesc = ` // Search queries name-only const query_SearchNewsgroupsNameOnly = ` - SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, created_at, updated_at + SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, status, created_at, updated_at FROM newsgroups WHERE active = 1 AND name LIKE ? COLLATE NOCASE ORDER BY message_count DESC, name ASC @@ -1474,7 +1475,7 @@ const query_SearchNewsgroupsNameOnly = ` ` const query_SearchNewsgroupsAdminNameOnly = ` - SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, created_at, updated_at + SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, status, created_at, updated_at FROM newsgroups WHERE name LIKE ? COLLATE NOCASE ORDER BY message_count DESC, name ASC @@ -1482,7 +1483,7 @@ const query_SearchNewsgroupsAdminNameOnly = ` ` const query_SearchNewsgroupsAdmin = ` - SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, created_at, updated_at + SELECT name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, status, created_at, updated_at FROM newsgroups WHERE (name LIKE ? COLLATE NOCASE OR description LIKE ? COLLATE NOCASE) @@ -1534,7 +1535,7 @@ func (db *Database) SearchNewsgroupsWithOptions(searchTerm string, limit, offset g := &models.Newsgroup{} err := rows.Scan( &g.Name, &g.Description, &g.LastArticle, &g.MessageCount, - &g.Active, &g.ExpiryDays, &g.MaxArticles, &g.MaxArtSize, &g.CreatedAt, &g.UpdatedAt, + &g.Active, &g.ExpiryDays, &g.MaxArticles, &g.MaxArtSize, &g.Status, &g.CreatedAt, &g.UpdatedAt, ) if err != nil { return nil, err @@ -1668,7 +1669,7 @@ const query_GetOverviewsRange = `SELECT article_num, subject, from_header, date_ WHERE article_num >= ? AND article_num <= ? ORDER BY article_num ASC` -func (db *Database) GetOverviewsRange(groupDBs *GroupDBs, startNum, endNum int64) ([]*models.Overview, error) { +func (db *Database) GetOverviewsRange(groupDB *GroupDB, startNum, endNum int64) ([]*models.Overview, error) { if startNum > endNum { return nil, fmt.Errorf("start number %d is greater than end number %d", startNum, endNum) } @@ -1678,7 +1679,7 @@ func (db *Database) GetOverviewsRange(groupDBs *GroupDBs, startNum, endNum int64 endNum = startNum + 1000 } - rows, err := groupDBs.DB.Query(query_GetOverviewsRange, startNum, endNum) + rows, err := groupDB.DB.Query(query_GetOverviewsRange, startNum, endNum) if err != nil { return nil, err } @@ -1705,9 +1706,9 @@ const query_GetOverviewByMessageID = ` WHERE message_id = ? LIMIT 1 ` -func (db *Database) GetOverviewByMessageID(groupDBs *GroupDBs, messageID string) (*models.Overview, error) { +func (db *Database) GetOverviewByMessageID(groupDB *GroupDB, messageID string) (*models.Overview, error) { overview := &models.Overview{} - err := groupDBs.DB.QueryRow(query_GetOverviewByMessageID, messageID).Scan( + err := groupDB.DB.QueryRow(query_GetOverviewByMessageID, messageID).Scan( &overview.ArticleNum, &overview.Subject, &overview.FromHeader, &overview.DateSent, &overview.DateString, &overview.MessageID, &overview.References, &overview.Bytes, &overview.Lines, @@ -1730,7 +1731,7 @@ const query_GetHeaderFieldRange5 = `SELECT article_num, "references" FROM articl const query_GetHeaderFieldRange6 = `SELECT article_num, bytes FROM articles WHERE article_num >= ? AND article_num <= ? ORDER BY article_num ASC` const query_GetHeaderFieldRange7 = `SELECT article_num, lines FROM articles WHERE article_num >= ? AND article_num <= ? ORDER BY article_num ASC` -func (db *Database) GetHeaderFieldRange(groupDBs *GroupDBs, field string, startNum, endNum int64) (map[int64]string, error) { +func (db *Database) GetHeaderFieldRange(groupDB *GroupDB, field string, startNum, endNum int64) (map[int64]string, error) { if startNum > endNum { return nil, fmt.Errorf("start number %d is greater than end number %d", startNum, endNum) } @@ -1746,25 +1747,25 @@ func (db *Database) GetHeaderFieldRange(groupDBs *GroupDBs, field string, startN switch strings.ToLower(field) { case "subject": query = query_GetHeaderFieldRange1 - dbToQuery = groupDBs.DB + dbToQuery = groupDB.DB case "from": query = query_GetHeaderFieldRange2 - dbToQuery = groupDBs.DB + dbToQuery = groupDB.DB case "date": query = query_GetHeaderFieldRange3 - dbToQuery = groupDBs.DB + dbToQuery = groupDB.DB case "message-id": query = query_GetHeaderFieldRange4 - dbToQuery = groupDBs.DB + dbToQuery = groupDB.DB case "references": query = query_GetHeaderFieldRange5 - dbToQuery = groupDBs.DB + dbToQuery = groupDB.DB case "bytes": query = query_GetHeaderFieldRange6 - dbToQuery = groupDBs.DB + dbToQuery = groupDB.DB case "lines": query = query_GetHeaderFieldRange7 - dbToQuery = groupDBs.DB + dbToQuery = groupDB.DB default: // For other headers, try to get from the full article headers // For now, return empty result for unsupported headers @@ -2892,16 +2893,16 @@ func (db *Database) ResetNewsgroupData(newsgroupName string) error { log.Printf("ResetNewsgroupData: Resetting data for newsgroup '%s'", newsgroupName) // Get the group database connection - groupDBs, err := db.GetGroupDBs(newsgroupName) + groupDB, err := db.GetGroupDB(newsgroupName) if err != nil { // If group database doesn't exist yet, nothing to reset log.Printf("ResetNewsgroupData: No database found for newsgroup '%s', skipping", newsgroupName) return nil } - defer groupDBs.Return(db) + defer groupDB.Return() // Begin transaction for atomic reset - tx, err := groupDBs.DB.Begin() + tx, err := groupDB.DB.Begin() if err != nil { return fmt.Errorf("failed to start transaction for newsgroup '%s': %w", newsgroupName, err) } @@ -2980,7 +2981,7 @@ func (db *Database) ResetNewsgroupCounters(newsgroupName string) error { log.Printf("ResetNewsgroupCounters: Resetting counters for newsgroup '%s'", newsgroupName) // Reset all counters to 0 and water marks to default values - _, err := retryableExec(db.mainDB, query_ResetNewsgroupCounters, newsgroupName) + _, err := RetryableExec(db.mainDB, query_ResetNewsgroupCounters, newsgroupName) if err != nil { return fmt.Errorf("failed to reset counters for newsgroup '%s': %w", newsgroupName, err) @@ -2997,7 +2998,7 @@ const query_GetAllSiteNews = `SELECT id, subject, content, date_published, is_vi FROM site_news ORDER BY date_published DESC` func (db *Database) GetAllSiteNews() ([]*models.SiteNews, error) { - rows, err := retryableQuery(db.mainDB, query_GetAllSiteNews) + rows, err := RetryableQuery(db.mainDB, query_GetAllSiteNews) if err != nil { return nil, fmt.Errorf("failed to query all site news: %w", err) } @@ -3026,7 +3027,7 @@ const query_GetVisibleSiteNews = `SELECT id, subject, content, date_published, i FROM site_news WHERE is_visible = 1 ORDER BY date_published DESC` func (db *Database) GetVisibleSiteNews() ([]*models.SiteNews, error) { - rows, err := retryableQuery(db.mainDB, query_GetVisibleSiteNews) + rows, err := RetryableQuery(db.mainDB, query_GetVisibleSiteNews) if err != nil { return nil, fmt.Errorf("failed to query visible site news: %w", err) } @@ -3057,7 +3058,7 @@ const query_GetSiteNewsByID = `SELECT id, subject, content, date_published, is_v func (db *Database) GetSiteNewsByID(id int) (*models.SiteNews, error) { var item models.SiteNews var isVisibleInt int - err := retryableQueryRowScan(db.mainDB, query_GetSiteNewsByID, []interface{}{id}, + err := RetryableQueryRowScan(db.mainDB, query_GetSiteNewsByID, []interface{}{id}, &item.ID, &item.Subject, &item.Content, &item.DatePublished, &isVisibleInt, &item.CreatedAt, &item.UpdatedAt) @@ -3082,7 +3083,7 @@ func (db *Database) CreateSiteNews(news *models.SiteNews) error { isVisibleInt = 1 } - result, err := retryableExec(db.mainDB, query_CreateSiteNews, news.Subject, news.Content, + result, err := RetryableExec(db.mainDB, query_CreateSiteNews, news.Subject, news.Content, news.DatePublished, isVisibleInt) if err != nil { return fmt.Errorf("failed to create site news: %w", err) @@ -3107,7 +3108,7 @@ func (db *Database) UpdateSiteNews(news *models.SiteNews) error { isVisibleInt = 1 } - _, err := retryableExec(db.mainDB, query_UpdateSiteNews, news.Subject, news.Content, + _, err := RetryableExec(db.mainDB, query_UpdateSiteNews, news.Subject, news.Content, news.DatePublished, isVisibleInt, news.ID) if err != nil { return fmt.Errorf("failed to update site news ID %d: %w", news.ID, err) @@ -3120,7 +3121,7 @@ func (db *Database) UpdateSiteNews(news *models.SiteNews) error { const query_DeleteSiteNews = `DELETE FROM site_news WHERE id = ?` func (db *Database) DeleteSiteNews(id int) error { - _, err := retryableExec(db.mainDB, query_DeleteSiteNews, id) + _, err := RetryableExec(db.mainDB, query_DeleteSiteNews, id) if err != nil { return fmt.Errorf("failed to delete site news ID %d: %w", id, err) } @@ -3131,7 +3132,7 @@ func (db *Database) DeleteSiteNews(id int) error { const query_ToggleSiteNewsVisibility = `UPDATE site_news SET is_visible = (1 - is_visible) WHERE id = ?` func (db *Database) ToggleSiteNewsVisibility(id int) error { - _, err := retryableExec(db.mainDB, query_ToggleSiteNewsVisibility, id) + _, err := RetryableExec(db.mainDB, query_ToggleSiteNewsVisibility, id) if err != nil { return fmt.Errorf("failed to toggle visibility for site news ID %d: %w", id, err) } @@ -3175,7 +3176,7 @@ func (db *Database) GetSpamArticles(offset, limit int) ([]*models.Overview, []st } // Get article details from the specific newsgroup database - groupDBs, err := db.GetGroupDBs(newsgroupName) + groupDB, err := db.GetGroupDB(newsgroupName) if err != nil { // Log error but continue with next article log.Printf("Failed to get group database for %s: %v", newsgroupName, err) @@ -3183,8 +3184,8 @@ func (db *Database) GetSpamArticles(offset, limit int) ([]*models.Overview, []st } // Use existing function to get article overview - overview, err := db.GetOverviewByArticleNum(groupDBs, int64(articleNum)) - groupDBs.Return(db) + overview, err := db.GetOverviewByArticleNum(groupDB, int64(articleNum)) + groupDB.Return() if err != nil { // Log error but continue with next article - article might have been deleted @@ -3280,3 +3281,298 @@ func (db *Database) SearchUserByComputedHash(targetHash string, nonce string) (* // No matching user found return nil, nil } + +const query_getMessageIDsBatchWithDateFilter_selectPart = `SELECT message_id FROM articles` +const query_getMessageIDsBatchWithDateFilter_orderby = " ORDER BY date_sent ASC" +const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, +date_sent, date_string, "references", bytes, lines, headers_json, body_text, +path, imported_at, spam, hide FROM articles` +const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" + +// GetArticlesByIDs retrieves articles by their message IDs, ordered by date_sent +func (db *Database) GetArticlesByIDs(newsgroup *string, wantedIDs []*string) ([]*models.Article, error) { + if len(wantedIDs) == 0 { + return nil, nil + } + + groupDB, err := db.GetGroupDB(*newsgroup) + if err != nil { + return nil, err + } + defer groupDB.Return() + + // Build the IN clause with placeholders + placeholders := make([]string, len(wantedIDs)) + args := make([]interface{}, len(wantedIDs)) + for i, id := range wantedIDs { + placeholders[i] = "?" + args[i] = id + } + + // Build query with IN clause for message_ids + query := query_getArticlesBatchWithDateFilter_selectPart + + " WHERE message_id IN (" + strings.Join(placeholders, ",") + ") " + + "ORDER BY date_sent ASC" + + rows, err := groupDB.DB.Query(query, args...) + if err != nil { + return nil, fmt.Errorf("failed to query articles by IDs: %w", err) + } + defer rows.Close() + + var articles []*models.Article + for rows.Next() { + article := &models.Article{} + if err := rows.Scan( + &article.DBArtNum, + &article.MessageID, + &article.Subject, + &article.FromHeader, + &article.DateSent, + &article.DateString, + &article.References, + &article.Bytes, + &article.Lines, + &article.HeadersJSON, + &article.BodyText, + &article.Path, + &article.ImportedAt, + &article.Spam, + &article.Hide, + ); err != nil { + return nil, fmt.Errorf("failed to scan article: %w", err) + } + articles = append(articles, article) + } + + if err = rows.Err(); err != nil { + return nil, fmt.Errorf("error iterating article rows: %w", err) + } + + return articles, nil +} + +// GetArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering +func (db *Database) GetMessageIDsWithDateFilter(ng *models.Newsgroup, startTime, endTime *time.Time, batchCheck int64, resultChan chan []*string) error { + // Get group database + groupDB, err := db.GetGroupDB(ng.Name) + if err != nil { + return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) + } + defer close(resultChan) + defer db.ForceCloseGroupDB(groupDB) + var loaded uint64 + var query string + var args []interface{} + start := time.Now() + if startTime != nil || endTime != nil { + // Build query with date filtering + var whereConditions []string + + // Always exclude NULL date_sent values when using ORDER BY date_sent + whereConditions = append(whereConditions, "date_sent IS NOT NULL") + + if startTime != nil { + whereConditions = append(whereConditions, "date_sent >= ?") + args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) + } + + if endTime != nil { + whereConditions = append(whereConditions, "date_sent <= ?") + args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) + } + + whereClause := "" + if len(whereConditions) > 0 { + whereClause = " WHERE " + strings.Join(whereConditions, " AND ") + } + + query = query_getMessageIDsBatchWithDateFilter_selectPart + whereClause + query_getMessageIDsBatchWithDateFilter_orderby + } else { + // No date filtering + query = query_getMessageIDsBatchWithDateFilter_selectPart + query_getMessageIDsBatchWithDateFilter_orderby + } + log.Printf("Newsgroup: '%s' | GetMessageIDsWithDateFilter: Executing query: '%s' with args: '%v'", ng.Name, query, args) + rows, err := groupDB.DB.Query(query, args...) + if err != nil { + log.Printf("ERROR Newsgroup: '%s' | GetMessageIDsWithDateFilter: Failed to execute query: %v", ng.Name, err) + return err + } + defer rows.Close() + //out := make([]*models.Article, dbBatchSize) + tmpChan := make(chan *string, batchCheck) + for rows.Next() { + var msgid string + if err := rows.Scan( + &msgid, + ); err != nil { + log.Printf("ERROR GetMessageIDsWithDateFilter: Failed to scan message ID in newsgroup '%s': %v", ng.Name, err) + return err + } + + if msgid != "" { + load: + for { + select { + case tmpChan <- &msgid: + break load + default: + // chan full + loaded += db.releaseTmpChan(tmpChan, resultChan, cap(tmpChan)) + tmpChan <- &msgid + break load + } + } + } + } + loaded += db.releaseTmpChan(tmpChan, resultChan, 0) + log.Printf("Newsgroup: '%s' | GetMessageIDsWithDateFilter: Loaded %d message IDs | took: %v", ng.Name, loaded, time.Since(start)) + return nil +} + +func (db *Database) releaseTmpChan(tmpChan chan *string, resultChan chan []*string, limit int) (loaded uint64) { + if limit > 0 && len(tmpChan) < limit && len(tmpChan) < cap(tmpChan) { + return 0 + } + if len(tmpChan) > 0 { + out := make([]*string, 0, len(tmpChan)) + empty: + for { + select { + case mid := <-tmpChan: + out = append(out, mid) + default: + break empty + } + } + if len(out) > 0 { + resultChan <- out + loaded += uint64(len(out)) + } + } + return loaded +} + +// GetArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering +func (db *Database) GetArticlesBatchWithDateFilter(ng *models.Newsgroup, offset int64, startTime, endTime *time.Time, dbBatchSize int64) ([]*models.Article, error) { + // Get group database + groupDB, err := db.GetGroupDB(ng.Name) + if err != nil { + return nil, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) + } + + var query string + var args []interface{} + + if startTime != nil || endTime != nil { + // Build query with date filtering + var whereConditions []string + + // Always exclude NULL date_sent values when using ORDER BY date_sent + whereConditions = append(whereConditions, "date_sent IS NOT NULL") + + if startTime != nil { + whereConditions = append(whereConditions, "date_sent >= ?") + args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) + } + + if endTime != nil { + whereConditions = append(whereConditions, "date_sent <= ?") + args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) + } + + whereClause := "" + if len(whereConditions) > 0 { + whereClause = " WHERE " + strings.Join(whereConditions, " AND ") + } + + query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby + args = append(args, dbBatchSize, offset) + } else { + // No date filtering - simple OFFSET pagination + query = query_getArticlesBatchWithDateFilter_selectPart + query_getArticlesBatchWithDateFilter_orderby + args = []interface{}{dbBatchSize, offset} + } + + rows, err := groupDB.DB.Query(query, args...) + if err != nil { + db.ForceCloseGroupDB(groupDB) + return nil, err + } + defer rows.Close() + //out := make([]*models.Article, dbBatchSize) + var out []*models.Article + for rows.Next() { + a := models.NewArticle() + if err := rows.Scan(&a.DBArtNum, &a.MessageID, &a.Subject, &a.FromHeader, &a.DateSent, &a.DateString, &a.References, &a.Bytes, &a.Lines, &a.HeadersJSON, &a.BodyText, &a.Path, &a.ImportedAt, &a.Spam, &a.Hide); err != nil { + return nil, err + } + out = append(out, a) + } + + if int64(len(out)) < dbBatchSize { + db.ForceCloseGroupDB(groupDB) + } else { + groupDB.Return() + } + return out, nil +} + +// GetArticleCountWithDateFilter gets the total count of articles with optional date filtering +// When no date filter is specified, uses cached message_count from newsgroups table in main DB +func (db *Database) GetArticleCountWithDateFilter(groupDB *GroupDB, startTime, endTime *time.Time) (int64, error) { + var query string + var args []interface{} + var count int64 + start := time.Now() + + if startTime != nil || endTime != nil { + // Build count query with date filtering - must use live COUNT(*) query + var whereConditions []string + + // Always exclude NULL date_sent values to match SELECT query behavior + whereConditions = append(whereConditions, "date_sent IS NOT NULL") + + if startTime != nil { + whereConditions = append(whereConditions, "date_sent >= ?") + args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) + } + + if endTime != nil { + whereConditions = append(whereConditions, "date_sent <= ?") + args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) + } + + whereClause := "" + if len(whereConditions) > 0 { + whereClause = " WHERE " + strings.Join(whereConditions, " AND ") + } + + query = "SELECT COUNT(*) FROM articles" + whereClause + err := groupDB.DB.QueryRow(query, args...).Scan(&count) + if err != nil { + return 0, err + } + } else { + // No date filtering - use cached message_count from newsgroups table in main DB + // This is MUCH faster than COUNT(*) on large tables (O(1) vs O(N)) + query = "SELECT COALESCE(message_count, 0) FROM newsgroups WHERE name = ?" + err := db.GetMainDB().QueryRow(query, groupDB.Newsgroup).Scan(&count) + if err != nil { + // Fallback to direct COUNT if newsgroups table doesn't have the entry + log.Printf("WARNING: Could not get message_count from newsgroups table for '%s', falling back to COUNT(*): %v", groupDB.Newsgroup, err) + query = "SELECT COUNT(*) FROM articles" + err = groupDB.DB.QueryRow(query).Scan(&count) + if err != nil { + return 0, err + } + } + } + + elapsed := time.Since(start) + if elapsed > 5*time.Second { + log.Printf("WARNING: Slow COUNT query for group '%s' took %v (count=%d)", groupDB.Newsgroup, elapsed, count) + } + + return count, nil +} diff --git a/internal/database/sqlite_retry.go b/internal/database/sqlite_retry.go index cc406feb..89d8b8b8 100644 --- a/internal/database/sqlite_retry.go +++ b/internal/database/sqlite_retry.go @@ -5,13 +5,14 @@ import ( "log" "math/rand" "strings" + "sync/atomic" "time" ) const ( - maxRetries = 1000 + maxRetries = 10000 baseDelay = 10 * time.Millisecond - maxDelay = 25 * time.Millisecond + maxDelay = 2500 * time.Millisecond ) // isRetryableError checks if the error is a retryable SQLite error @@ -28,10 +29,11 @@ func isRetryableError(err error) bool { } // retryableExec executes a SQL statement with retry logic for lock conflicts -func retryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, error) { +func RetryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, error) { var result sql.Result var err error - + start := time.Now() + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { result, err = db.Exec(query, args...) @@ -47,11 +49,11 @@ func retryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, e } // Add random jitter (up to 50% of delay) - jitter := time.Duration(rand.Int63n(int64(delay) / 2)) + jitter := time.Duration(rand.Int63n(int64(delay) / 100 * 50)) time.Sleep(delay + jitter) - log.Printf("[WARN] SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v", - attempt+1, maxRetries, truncateString(query, 50), err) + log.Printf("(#%d) SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v took %v (retry in: %v)", + atomic.LoadUint64(&queryID), attempt+1, maxRetries, truncateString(query, 50), err, time.Since(start), delay+jitter) } } @@ -59,7 +61,7 @@ func retryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, e } // retryableExecPtr executes a SQL statement with retry logic for lock conflicts -func retryableExecPtr(db *sql.DB, query *strings.Builder, args ...interface{}) (sql.Result, error) { +func RetryableExecPtr(db *sql.DB, query *strings.Builder, args ...interface{}) (sql.Result, error) { var result sql.Result var err error @@ -89,16 +91,10 @@ func retryableExecPtr(db *sql.DB, query *strings.Builder, args ...interface{}) ( return result, err } -// retryableQueryRow executes a query that returns a single row with retry logic -func retryableQueryRow(db *sql.DB, query string, args ...interface{}) *sql.Row { - // For QueryRow, we can't detect errors until Scan() is called - // Return the row directly - callers should handle retryable errors in their Scan() calls - return db.QueryRow(query, args...) -} - // retryableQueryRowScan executes a QueryRow and Scan with retry logic -func retryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest ...interface{}) error { +func RetryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest ...interface{}) error { var err error + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { row := db.QueryRow(query, args...) @@ -118,8 +114,8 @@ func retryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest .. jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for QueryRow scan (first 50 chars): %s... Error: %v", - attempt+1, maxRetries, truncateString(query, 50), err) + log.Printf("(#%d) SQLite retry attempt %d/%d for QueryRow scan (first 50 chars): %s... Error: %v", + atomic.LoadUint64(&queryID), attempt+1, maxRetries, truncateString(query, 50), err) } } @@ -127,10 +123,10 @@ func retryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest .. } // retryableQuery executes a query that returns multiple rows with retry logic -func retryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, error) { +func RetryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, error) { var rows *sql.Rows var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { rows, err = db.Query(query, args...) @@ -154,8 +150,8 @@ func retryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, e jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v", - attempt+1, maxRetries, truncateString(query, 50), err) + log.Printf("(#%d) SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v", + atomic.LoadUint64(&queryID), attempt+1, maxRetries, truncateString(query, 50), err) } } @@ -168,9 +164,9 @@ func retryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, e } // retryableTransactionExec executes a transaction with retry logic -func retryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { +func RetryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { tx, err := db.Begin() if err != nil { @@ -186,7 +182,7 @@ func retryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { // Add random jitter (up to 50% of delay) jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for transaction begin: %v", attempt+1, maxRetries, err) + log.Printf("(#%d) SQLite retry attempt %d/%d for transaction begin: %v", atomic.LoadUint64(&queryID), attempt+1, maxRetries, err) continue } return err @@ -207,7 +203,7 @@ func retryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { // Add random jitter (up to 50% of delay) jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for transaction: %v", attempt+1, maxRetries, err) + log.Printf("(#%d) SQLite retry attempt %d/%d for transaction: %v", atomic.LoadUint64(&queryID), attempt+1, maxRetries, err) continue } return err @@ -242,11 +238,13 @@ func truncateString(s string, length int) string { return s[:length] } +var queryID uint64 + // retryableStmtExec executes a prepared statement with retry logic for lock conflicts -func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) { +func RetryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) { var result sql.Result var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { result, err = stmt.Exec(args...) @@ -265,8 +263,7 @@ func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for prepared statement exec. Error: %v", - attempt+1, maxRetries, err) + log.Printf("(#%d) SQLite retry attempt %d/%d for prepared statement exec. Error: %v stmt=%v", atomic.LoadUint64(&queryID), attempt+1, maxRetries, err, stmt) } } @@ -274,9 +271,9 @@ func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) } // retryableStmtQueryRowScan executes a prepared statement QueryRow and Scan with retry logic -func retryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...interface{}) error { +func RetryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...interface{}) error { var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { row := stmt.QueryRow(args...) err = row.Scan(dest...) @@ -296,47 +293,9 @@ func retryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...inter jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for prepared statement QueryRow scan. Error: %v", - attempt+1, maxRetries, err) + log.Printf("(#%d) SQLite retry attempt %d/%d for prepared statement QueryRow scan. Error: %v", atomic.LoadUint64(&queryID), attempt+1, maxRetries, err) } } return err } - -// Exported wrapper functions for use by other packages - -// RetryableExec executes a SQL statement with retry logic for lock conflicts -func RetryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, error) { - return retryableExec(db, query, args...) -} - -// RetryableQuery executes a SQL query with retry logic for lock conflicts -func RetryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, error) { - return retryableQuery(db, query, args...) -} - -// RetryableQueryRow executes a SQL query and returns a single row with retry logic -func RetryableQueryRow(db *sql.DB, query string, args ...interface{}) *sql.Row { - return retryableQueryRow(db, query, args...) -} - -// RetryableQueryRowScan executes a SQL query and scans the result with retry logic -func RetryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest ...interface{}) error { - return retryableQueryRowScan(db, query, args, dest...) -} - -// RetryableTransactionExec executes a transaction with retry logic for lock conflicts -func RetryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { - return retryableTransactionExec(db, txFunc) -} - -// RetryableStmtExec executes a prepared statement with retry logic for lock conflicts -func RetryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) { - return retryableStmtExec(stmt, args...) -} - -// RetryableStmtQueryRowScan executes a prepared statement QueryRow and scans with retry logic -func RetryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...interface{}) error { - return retryableStmtQueryRowScan(stmt, args, dest...) -} diff --git a/internal/database/thread_cache.go b/internal/database/thread_cache.go index 6cbc0ca3..4dd9d54b 100644 --- a/internal/database/thread_cache.go +++ b/internal/database/thread_cache.go @@ -25,7 +25,7 @@ type ThreadCacheEntry struct { } // InitializeThreadCache creates a new cache entry for a thread root -func (db *Database) InitializeThreadCache(groupDBs *GroupDBs, threadRoot int64, rootArticle *models.Article) error { +func (db *Database) InitializeThreadCache(groupDB *GroupDB, threadRoot int64, rootArticle *models.Article) error { query := ` INSERT INTO thread_cache ( @@ -40,7 +40,7 @@ func (db *Database) InitializeThreadCache(groupDBs *GroupDBs, threadRoot int64, // Format dates as UTC strings to avoid timezone encoding issues rootDateUTC := rootArticle.DateSent.UTC().Format("2006-01-02 15:04:05") - _, err := retryableExec(groupDBs.DB, query, + _, err := RetryableExec(groupDB.DB, query, threadRoot, rootDateUTC, threadRoot, // last_child_number starts as the root itself @@ -55,14 +55,14 @@ func (db *Database) InitializeThreadCache(groupDBs *GroupDBs, threadRoot int64, } // UpdateThreadCache updates an existing cache entry when a reply is added -func (db *Database) UpdateThreadCache(groupDBs *GroupDBs, threadRoot int64, childArticleNum int64, childDate time.Time) error { +func (db *Database) UpdateThreadCache(groupDB *GroupDB, threadRoot int64, childArticleNum int64, childDate time.Time) error { // First, get the current cache entry var currentChildren string var currentCount int query := `SELECT child_articles, message_count FROM thread_cache WHERE thread_root = ?` - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, ¤tChildren, ¤tCount) + err := RetryableQueryRowScan(groupDB.DB, query, []interface{}{threadRoot}, ¤tChildren, ¤tCount) if err != nil { // If the thread cache entry doesn't exist, queue it for batch initialization // This can happen if the root article was processed without initializing the cache @@ -74,7 +74,7 @@ func (db *Database) UpdateThreadCache(groupDBs *GroupDBs, threadRoot int64, chil } // Initialize directly instead of batch processing - err = db.InitializeThreadCache(groupDBs, threadRoot, rootArticle) + err = db.InitializeThreadCache(groupDB, threadRoot, rootArticle) if err != nil { log.Printf("Failed to initialize thread cache for root %d: %v", threadRoot, err) // Continue with defaults to allow the update to proceed @@ -83,7 +83,7 @@ func (db *Database) UpdateThreadCache(groupDBs *GroupDBs, threadRoot int64, chil // Update memory cache immediately so subsequent operations can use it if db.MemThreadCache != nil { // Initialize with minimal values - batch processing will update the database - db.MemThreadCache.UpdateThreadMetadata(groupDBs.Newsgroup, threadRoot, 1, childDate, "") + db.MemThreadCache.UpdateThreadMetadata(groupDB.Newsgroup, threadRoot, 1, childDate, "") } // For now, set defaults so we can continue with the update @@ -112,7 +112,7 @@ func (db *Database) UpdateThreadCache(groupDBs *GroupDBs, threadRoot int64, chil // Format childDate as UTC string to avoid timezone encoding issues childDateUTC := childDate.UTC().Format("2006-01-02 15:04:05") - _, err = retryableExec(groupDBs.DB, updateQuery, + _, err = RetryableExec(groupDB.DB, updateQuery, newChildren, currentCount+1, childArticleNum, @@ -126,7 +126,7 @@ func (db *Database) UpdateThreadCache(groupDBs *GroupDBs, threadRoot int64, chil // Update memory cache if db.MemThreadCache != nil { - db.MemThreadCache.UpdateThreadMetadata(groupDBs.Newsgroup, threadRoot, currentCount+1, childDate, newChildren) + db.MemThreadCache.UpdateThreadMetadata(groupDB.Newsgroup, threadRoot, currentCount+1, childDate, newChildren) } return nil @@ -205,32 +205,32 @@ func (mem *MemCachedThreads) GetMemCachedTreadsCount(group string) int64 { // GetCachedThreads retrieves cached thread data with pagination (thread list only - no children) // First tries memory cache, falls back to database if cache miss -func (db *Database) GetCachedThreads(groupDBs *GroupDBs, page int64, pageSize int64) ([]*models.ForumThread, int64, error) { +func (db *Database) GetCachedThreads(groupDB *GroupDB, page int64, pageSize int64) ([]*models.ForumThread, int64, error) { startTime := time.Now() - log.Printf("[PERF:THREADS] Starting GetCachedThreads for group '%s', page %d, pageSize %d", groupDBs.Newsgroup, page, pageSize) + log.Printf("[PERF:THREADS] Starting GetCachedThreads for group '%s', page %d, pageSize %d", groupDB.Newsgroup, page, pageSize) // Try memory cache first (fast path) if db.MemThreadCache != nil { cacheStartTime := time.Now() - if threads, count, hit := db.MemThreadCache.GetCachedThreadsFromMemory(db, groupDBs, groupDBs.Newsgroup, page, pageSize); hit { - log.Printf("[PERF:THREADS] Memory cache HIT took %v for group '%s' (%d threads)", time.Since(cacheStartTime), groupDBs.Newsgroup, len(threads)) + if threads, count, hit := db.MemThreadCache.GetCachedThreadsFromMemory(db, groupDB, groupDB.Newsgroup, page, pageSize); hit { + log.Printf("[PERF:THREADS] Memory cache HIT took %v for group '%s' (%d threads)", time.Since(cacheStartTime), groupDB.Newsgroup, len(threads)) log.Printf("[PERF:THREADS] Total GetCachedThreads took %v (cache hit)", time.Since(startTime)) return threads, count, nil } - log.Printf("[PERF:THREADS] Memory cache MISS took %v for group '%s'", time.Since(cacheStartTime), groupDBs.Newsgroup) + log.Printf("[PERF:THREADS] Memory cache MISS took %v for group '%s'", time.Since(cacheStartTime), groupDB.Newsgroup) // Cache miss - refresh from database refreshStartTime := time.Now() - log.Printf("[MEM:MISS] Refreshing thread cache for group '%s'", groupDBs.Newsgroup) - if err := db.MemThreadCache.RefreshThreadCache(db, groupDBs, groupDBs.Newsgroup, page, pageSize); err != nil { + log.Printf("[MEM:MISS] Refreshing thread cache for group '%s'", groupDB.Newsgroup) + if err := db.MemThreadCache.RefreshThreadCache(db, groupDB, groupDB.Newsgroup, page, pageSize); err != nil { log.Printf("Failed to refresh thread cache: %v", err) // Continue to database fallback } else { - log.Printf("[PERF:THREADS] RefreshThreadCache took %v for group '%s'", time.Since(refreshStartTime), groupDBs.Newsgroup) + log.Printf("[PERF:THREADS] RefreshThreadCache took %v for group '%s'", time.Since(refreshStartTime), groupDB.Newsgroup) // Try memory cache again after refresh retryStartTime := time.Now() - if threads, count, hit := db.MemThreadCache.GetCachedThreadsFromMemory(db, groupDBs, groupDBs.Newsgroup, page, pageSize); hit { - log.Printf("[PERF:THREADS] Memory cache retry took %v for group '%s' (%d threads)", time.Since(retryStartTime), groupDBs.Newsgroup, len(threads)) + if threads, count, hit := db.MemThreadCache.GetCachedThreadsFromMemory(db, groupDB, groupDB.Newsgroup, page, pageSize); hit { + log.Printf("[PERF:THREADS] Memory cache retry took %v for group '%s' (%d threads)", time.Since(retryStartTime), groupDB.Newsgroup, len(threads)) log.Printf("[PERF:THREADS] Total GetCachedThreads took %v (after refresh)", time.Since(startTime)) return threads, count, nil } @@ -239,17 +239,17 @@ func (db *Database) GetCachedThreads(groupDBs *GroupDBs, page int64, pageSize in log.Printf("[WARN] MemThreadCache is nil") } log.Printf("[PERF:THREADS] Total GetCachedThreads FAILED took %v", time.Since(startTime)) - return nil, 0, fmt.Errorf("no cached threads found for group '%s'", groupDBs.Newsgroup) + return nil, 0, fmt.Errorf("no cached threads found for group '%s'", groupDB.Newsgroup) } // GetCachedThreadReplies retrieves paginated replies for a specific thread -func (db *Database) GetCachedThreadReplies(groupDBs *GroupDBs, threadRoot int64, page int, pageSize int) ([]*models.Overview, int, error) { +func (db *Database) GetCachedThreadReplies(groupDB *GroupDB, threadRoot int64, page int, pageSize int) ([]*models.Overview, int, error) { // Get the cached thread entry var childArticles string var totalReplies int query := `SELECT child_articles, message_count FROM thread_cache WHERE thread_root = ?` - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &childArticles, &totalReplies) + err := RetryableQueryRowScan(groupDB.DB, query, []interface{}{threadRoot}, &childArticles, &totalReplies) if err != nil { return nil, 0, fmt.Errorf("failed to get thread cache for root %d: %w", threadRoot, err) } @@ -297,7 +297,7 @@ func (db *Database) GetCachedThreadReplies(groupDBs *GroupDBs, threadRoot int64, args[i] = num } - rows, err := retryableQuery(groupDBs.DB, childQuery, args...) + rows, err := RetryableQuery(groupDB.DB, childQuery, args...) if err != nil { return nil, 0, fmt.Errorf("failed to query thread replies: %w", err) } @@ -321,7 +321,7 @@ func (db *Database) GetCachedThreadReplies(groupDBs *GroupDBs, threadRoot int64, } // GetOverviewByArticleNum gets a single overview from articles table by article number -func (db *Database) GetOverviewByArticleNum(groupDBs *GroupDBs, articleNum int64) (*models.Overview, error) { +func (db *Database) GetOverviewByArticleNum(groupDB *GroupDB, articleNum int64) (*models.Overview, error) { query := ` SELECT article_num, subject, from_header, date_sent, date_string, message_id, "references", bytes, lines, reply_count, downloaded, spam, hide @@ -330,7 +330,7 @@ func (db *Database) GetOverviewByArticleNum(groupDBs *GroupDBs, articleNum int64 ` overview := &models.Overview{} - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{articleNum}, + err := RetryableQueryRowScan(groupDB.DB, query, []interface{}{articleNum}, &overview.ArticleNum, &overview.Subject, &overview.FromHeader, &overview.DateSent, &overview.DateString, &overview.MessageID, &overview.References, &overview.Bytes, &overview.Lines, @@ -345,7 +345,7 @@ func (db *Database) GetOverviewByArticleNum(groupDBs *GroupDBs, articleNum int64 } // GetCachedThreadsFromMemory retrieves threads using the two-level memory cache -func (mem *MemCachedThreads) GetCachedThreadsFromMemory(db *Database, groupDBs *GroupDBs, group string, page int64, pageSize int64) ([]*models.ForumThread, int64, bool) { +func (mem *MemCachedThreads) GetCachedThreadsFromMemory(db *Database, groupDB *GroupDB, group string, page int64, pageSize int64) ([]*models.ForumThread, int64, bool) { startTime := time.Now() mem.mux.RLock() defer mem.mux.RUnlock() @@ -407,7 +407,7 @@ func (mem *MemCachedThreads) GetCachedThreadsFromMemory(db *Database, groupDBs * } // Get the root overview (this should be fast from articles table) - rootOverview, err := db.GetOverviewByArticleNum(groupDBs, rootID) // TODO maybe add caching here too + rootOverview, err := db.GetOverviewByArticleNum(groupDB, rootID) // TODO maybe add caching here too if err != nil { log.Printf("failed to get root overview for thread %d: %v", rootID, err) continue @@ -431,7 +431,7 @@ func (mem *MemCachedThreads) GetCachedThreadsFromMemory(db *Database, groupDBs * // RefreshThreadCache loads thread data from database and updates memory cache // Uses hybrid cursor+page pagination like articles for ultra-fast performance -func (mem *MemCachedThreads) RefreshThreadCache(db *Database, groupDBs *GroupDBs, group string, requestedPage int64, pageSize int64) error { +func (mem *MemCachedThreads) RefreshThreadCache(db *Database, groupDB *GroupDB, group string, requestedPage int64, pageSize int64) error { startTime := time.Now() log.Printf("[PERF:REFRESH] Starting RefreshThreadCache for group '%s', page %d", group, requestedPage) @@ -461,7 +461,7 @@ func (mem *MemCachedThreads) RefreshThreadCache(db *Database, groupDBs *GroupDBs ` args := []interface{}{cacheSize, cacheWindowStart} - rows, err := retryableQuery(groupDBs.DB, query, args...) + rows, err := RetryableQuery(groupDB.DB, query, args...) if err != nil { return fmt.Errorf("failed to query thread cache: %w", err) } @@ -490,7 +490,7 @@ func (mem *MemCachedThreads) RefreshThreadCache(db *Database, groupDBs *GroupDBs // Quick check if thread root article is hidden (fast single lookup) var hidden int checkQuery := `SELECT hide FROM articles WHERE article_num = ? LIMIT 1` - err = retryableQueryRowScan(groupDBs.DB, checkQuery, []interface{}{entry.ThreadRoot}, &hidden) + err = RetryableQueryRowScan(groupDB.DB, checkQuery, []interface{}{entry.ThreadRoot}, &hidden) if err != nil || hidden != 0 { continue // Skip hidden threads } @@ -515,7 +515,7 @@ func (mem *MemCachedThreads) RefreshThreadCache(db *Database, groupDBs *GroupDBs // Get the REAL total count from database (not just cached count) var realTotalCount int64 countQuery := `SELECT COUNT(*) FROM thread_cache` - err = retryableQueryRowScan(groupDBs.DB, countQuery, []interface{}{}, &realTotalCount) + err = RetryableQueryRowScan(groupDB.DB, countQuery, []interface{}{}, &realTotalCount) if err != nil { log.Printf("[PERF:REFRESH] Failed to get real total count: %v", err) realTotalCount = int64(len(threadRoots)) // Fallback to cached count diff --git a/internal/database/tree_cache.go b/internal/database/tree_cache.go index c81a34fc..7f9a53ec 100644 --- a/internal/database/tree_cache.go +++ b/internal/database/tree_cache.go @@ -63,9 +63,9 @@ func (tree *ThreadTree) GetTreeStats() TreeStats { } // BuildThreadTree constructs a hierarchical tree for a given thread root -func (db *Database) BuildThreadTree(groupDBs *GroupDBs, threadRoot int64) (*ThreadTree, error) { +func (db *Database) BuildThreadTree(groupDB *GroupDB, threadRoot int64) (*ThreadTree, error) { // First check if we have a cached tree - if tree, err := db.GetCachedTree(groupDBs, threadRoot); err == nil { + if tree, err := db.GetCachedTree(groupDB, threadRoot); err == nil { return tree, nil } @@ -75,7 +75,7 @@ func (db *Database) BuildThreadTree(groupDBs *GroupDBs, threadRoot int64) (*Thre // Get all articles in this thread from thread_cache var childArticles string query := `SELECT child_articles FROM thread_cache WHERE thread_root = ?` - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &childArticles) + err := RetryableQueryRowScan(groupDB.DB, query, []interface{}{threadRoot}, &childArticles) if err != nil { if err == sql.ErrNoRows { // Thread cache not yet built - fall back to single article @@ -100,7 +100,7 @@ func (db *Database) BuildThreadTree(groupDBs *GroupDBs, threadRoot int64) (*Thre // Get overview data for all articles to access References headers overviews := make(map[int64]*models.Overview) for _, artNum := range allArticles { - if overview, err := db.GetOverviewByArticleNum(groupDBs, artNum); err == nil { + if overview, err := db.GetOverviewByArticleNum(groupDB, artNum); err == nil { overviews[artNum] = overview } } @@ -187,7 +187,7 @@ func (db *Database) BuildThreadTree(groupDBs *GroupDBs, threadRoot int64) (*Thre tree.assignSortOrder() // Cache the tree structure - if err := db.CacheTreeStructure(groupDBs, tree); err != nil { + if err := db.CacheTreeStructure(groupDB, tree); err != nil { log.Printf("Failed to cache tree structure: %v", err) // Don't fail - tree is still usable } @@ -196,11 +196,11 @@ func (db *Database) BuildThreadTree(groupDBs *GroupDBs, threadRoot int64) (*Thre } // GetCachedTree retrieves a pre-computed tree from the cache -func (db *Database) GetCachedTree(groupDBs *GroupDBs, threadRoot int64) (*ThreadTree, error) { +func (db *Database) GetCachedTree(groupDB *GroupDB, threadRoot int64) (*ThreadTree, error) { // Check if tree cache exists and is recent var lastUpdated time.Time query := `SELECT last_updated FROM tree_stats WHERE thread_root = ?` - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &lastUpdated) + err := RetryableQueryRowScan(groupDB.DB, query, []interface{}{threadRoot}, &lastUpdated) if err != nil { return nil, fmt.Errorf("no cached tree found: %w", err) } @@ -211,7 +211,7 @@ func (db *Database) GetCachedTree(groupDBs *GroupDBs, threadRoot int64) (*Thread } // Load tree structure from cache - rows, err := retryableQuery(groupDBs.DB, ` + rows, err := RetryableQuery(groupDB.DB, ` SELECT article_num, parent_article, depth, child_count, descendant_count, tree_path, sort_order FROM cached_trees @@ -270,7 +270,7 @@ func (db *Database) GetCachedTree(groupDBs *GroupDBs, threadRoot int64) (*Thread // Load tree stats statsQuery := `SELECT max_depth, total_nodes, leaf_count FROM tree_stats WHERE thread_root = ?` - err = retryableQueryRowScan(groupDBs.DB, statsQuery, []interface{}{threadRoot}, + err = RetryableQueryRowScan(groupDB.DB, statsQuery, []interface{}{threadRoot}, &tree.MaxDepth, &tree.TotalNodes, &tree.LeafCount) if err != nil { log.Printf("Failed to load tree stats: %v", err) @@ -285,9 +285,9 @@ func (db *Database) GetCachedTree(groupDBs *GroupDBs, threadRoot int64) (*Thread } // CacheTreeStructure saves a computed tree to the cache -func (db *Database) CacheTreeStructure(groupDBs *GroupDBs, tree *ThreadTree) error { +func (db *Database) CacheTreeStructure(groupDB *GroupDB, tree *ThreadTree) error { // Start transaction for atomicity - tx, err := groupDBs.DB.Begin() + tx, err := groupDB.DB.Begin() if err != nil { return fmt.Errorf("failed to start transaction: %w", err) } @@ -385,13 +385,13 @@ func (db *Database) CacheTreeStructure(groupDBs *GroupDBs, tree *ThreadTree) err } // InvalidateTreeCache removes cached tree data when thread structure changes -func (db *Database) InvalidateTreeCache(groupDBs *GroupDBs, threadRoot int64) error { - _, err := retryableExec(groupDBs.DB, `DELETE FROM cached_trees WHERE thread_root = ?`, threadRoot) +func (db *Database) InvalidateTreeCache(groupDB *GroupDB, threadRoot int64) error { + _, err := RetryableExec(groupDB.DB, `DELETE FROM cached_trees WHERE thread_root = ?`, threadRoot) if err != nil { return fmt.Errorf("failed to invalidate tree cache: %w", err) } - _, err = retryableExec(groupDBs.DB, `DELETE FROM tree_stats WHERE thread_root = ?`, threadRoot) + _, err = RetryableExec(groupDB.DB, `DELETE FROM tree_stats WHERE thread_root = ?`, threadRoot) if err != nil { return fmt.Errorf("failed to invalidate tree stats: %w", err) } diff --git a/internal/database/tree_view_api.go b/internal/database/tree_view_api.go index 43b77765..101900ac 100644 --- a/internal/database/tree_view_api.go +++ b/internal/database/tree_view_api.go @@ -31,11 +31,11 @@ type TreeViewResponse struct { } // GetThreadTreeView returns a hierarchical tree view for a thread -func (db *Database) GetThreadTreeView(groupDBs *GroupDBs, threadRoot int64, options TreeViewOptions) (*TreeViewResponse, error) { +func (db *Database) GetThreadTreeView(groupDB *GroupDB, threadRoot int64, options TreeViewOptions) (*TreeViewResponse, error) { startTime := time.Now() // Build or retrieve the thread tree - tree, err := db.BuildThreadTree(groupDBs, threadRoot) + tree, err := db.BuildThreadTree(groupDB, threadRoot) if err != nil { return &TreeViewResponse{ ThreadRoot: threadRoot, @@ -45,7 +45,7 @@ func (db *Database) GetThreadTreeView(groupDBs *GroupDBs, threadRoot int64, opti // Apply view options if options.IncludeOverview { - if err := db.loadOverviewDataForTree(groupDBs, tree); err != nil { + if err := db.loadOverviewDataForTree(groupDB, tree); err != nil { log.Printf("Failed to load overview data for tree: %v", err) // Continue without overview data } @@ -67,10 +67,10 @@ func (db *Database) GetThreadTreeView(groupDBs *GroupDBs, threadRoot int64, opti } // loadOverviewDataForTree populates Overview data for all nodes in the tree -func (db *Database) loadOverviewDataForTree(groupDBs *GroupDBs, tree *ThreadTree) error { +func (db *Database) loadOverviewDataForTree(groupDB *GroupDB, tree *ThreadTree) error { for articleNum, node := range tree.NodeMap { if node.Overview == nil { - overview, err := db.GetOverviewByArticleNum(groupDBs, articleNum) + overview, err := db.GetOverviewByArticleNum(groupDB, articleNum) if err != nil { log.Printf("Failed to load overview for article %d: %v", articleNum, err) continue @@ -115,12 +115,12 @@ func (db *Database) HandleThreadTreeAPI(w http.ResponseWriter, r *http.Request) } // Get group database - groupDBs, err := db.GetGroupDBs(groupName) + groupDB, err := db.GetGroupDB(groupName) if err != nil { http.Error(w, fmt.Sprintf("Failed to get group database: %v", err), http.StatusInternalServerError) return } - defer groupDBs.Return(db) + defer groupDB.Return() // Parse options options := TreeViewOptions{ @@ -143,7 +143,7 @@ func (db *Database) HandleThreadTreeAPI(w http.ResponseWriter, r *http.Request) } // Get tree view - response, err := db.GetThreadTreeView(groupDBs, threadRoot, options) + response, err := db.GetThreadTreeView(groupDB, threadRoot, options) if err != nil { http.Error(w, fmt.Sprintf("Failed to get tree view: %v", err), http.StatusInternalServerError) return diff --git a/internal/history/history sqlite3 normal.go b/internal/history/history sqlite3 normal.go index 6220e8cf..cb978e54 100644 --- a/internal/history/history sqlite3 normal.go +++ b/internal/history/history sqlite3 normal.go @@ -1,74 +1,3 @@ package history -import ( - "database/sql" - "fmt" - "log" - "sync" - "time" -) - -// SQLite3DB represents a SQLite database connection pool -type SQLite3DB struct { - dbPath string - params string - maxOpen int - initOpen int - timeout int64 - DB *sql.DB - mux sync.RWMutex -} - -// NewSQLite3DB creates a new SQLite3 database pool -func NewSQLite3DB(opts *SQLite3Opts, createTables bool, useShortHashLen int, mode int) (*SQLite3DB, error) { - log.Printf("Opening database: %s", opts.dbPath) - - // Open database with just the file path, no connection parameters - // This follows the same pattern as group databases to avoid locking issues - connectionString := opts.dbPath - if opts.params != "" { - connectionString += opts.params - } - - db, err := sql.Open("sqlite3", connectionString) - if err != nil { - return nil, fmt.Errorf("failed to open database: %v", err) - } - - db.SetMaxOpenConns(opts.maxOpen) - db.SetMaxIdleConns(opts.initOpen) - db.SetConnMaxLifetime(time.Duration(opts.timeout) * time.Second) - - log.Printf("Testing database connection for: %s", opts.dbPath) - // Test connection - if err := db.Ping(); err != nil { - return nil, fmt.Errorf("failed to ping database: %v", err) - } - - log.Printf("Applying performance settings for: %s", opts.dbPath) - // Apply additional high-performance settings - if err := applyPerformanceSettings(db, mode); err != nil { - log.Printf("WARN: Failed to apply some performance settings: %v", err) - } - log.Printf("Performance settings applied for: %s", opts.dbPath) - - DB := &SQLite3DB{ - dbPath: opts.dbPath, - params: opts.params, - maxOpen: opts.maxOpen, - initOpen: opts.initOpen, - timeout: opts.timeout, - DB: db, - } - return DB, nil -} - -// Close closes the database connection -func (p *SQLite3DB) Close() error { - p.mux.Lock() - defer p.mux.Unlock() - if p.DB != nil { - return p.DB.Close() - } - return nil -} +// stub diff --git a/internal/history/history sqlite3_sharded.go b/internal/history/history sqlite3_sharded.go index 42935024..53ed829b 100644 --- a/internal/history/history sqlite3_sharded.go +++ b/internal/history/history sqlite3_sharded.go @@ -5,8 +5,31 @@ import ( "fmt" "log" "path/filepath" + "sync" + "time" ) +// SQLite3DB represents a SQLite database connection pool +type SQLite3DB struct { + dbPath string + params string + maxOpen int + initOpen int + timeout int64 + DB *sql.DB + mux sync.RWMutex +} + +// Close closes the database connection +func (p *SQLite3DB) Close() error { + p.mux.Lock() + defer p.mux.Unlock() + if p.DB != nil { + return p.DB.Close() + } + return nil +} + // SQLite3ShardedDB manages multiple SQLite databases for sharding type SQLite3ShardedDB struct { DBPools []*SQLite3DB @@ -16,7 +39,6 @@ type SQLite3ShardedDB struct { baseDir string maxOpen int timeout int64 - //mux sync.RWMutex // Mutex for thread-safe access } // ShardConfig defines the sharding configuration @@ -32,6 +54,50 @@ func GetShardConfig(mode int) (numDBs, tablesPerDB int, description string) { return 16, 256, "16 databases with 256 tables each" // unchangeable ! } +// NewSQLite3DB creates a new SQLite3 database pool +func NewSQLite3DB(opts *SQLite3Opts, createTables bool, mode int) (*SQLite3DB, error) { + log.Printf("Opening database: %s", opts.dbPath) + + // Open database with just the file path, no connection parameters + // This follows the same pattern as group databases to avoid locking issues + connectionString := opts.dbPath + if opts.params != "" { + connectionString += opts.params + } + + db, err := sql.Open("sqlite3", connectionString) + if err != nil { + return nil, fmt.Errorf("failed to open database: %v", err) + } + + db.SetMaxOpenConns(opts.maxOpen) + db.SetMaxIdleConns(opts.initOpen) + db.SetConnMaxLifetime(time.Duration(opts.timeout) * time.Second) + + log.Printf("Testing database connection for: %s", opts.dbPath) + // Test connection + if err := db.Ping(); err != nil { + return nil, fmt.Errorf("failed to ping database: %v", err) + } + + log.Printf("Applying performance settings for: %s", opts.dbPath) + // Apply additional high-performance settings + if err := applyPerformanceSettings(db, mode); err != nil { + log.Printf("WARN: Failed to apply some performance settings: %v", err) + } + log.Printf("Performance settings applied for: %s", opts.dbPath) + + DB := &SQLite3DB{ + dbPath: opts.dbPath, + params: opts.params, + maxOpen: opts.maxOpen, + initOpen: opts.initOpen, + timeout: opts.timeout, + DB: db, + } + return DB, nil +} + // NewSQLite3ShardedDB creates a new sharded SQLite3 database system func NewSQLite3ShardedDB(config *ShardConfig, createTables bool, useShortHashLen int) (*SQLite3ShardedDB, error) { numDBs, tablesPerDB, description := GetShardConfig(config.Mode) @@ -72,7 +138,7 @@ func NewSQLite3ShardedDB(config *ShardConfig, createTables bool, useShortHashLen timeout: config.Timeout, } - db, err := NewSQLite3DB(opts, false, useShortHashLen, config.Mode) // Don't create tables yet + db, err := NewSQLite3DB(opts, false, config.Mode) // Don't create tables yet if err != nil { return nil, fmt.Errorf("failed to create database pool %d: %v", i, err) } @@ -81,7 +147,7 @@ func NewSQLite3ShardedDB(config *ShardConfig, createTables bool, useShortHashLen } if createTables { - if err := s.CreateAllTables(useShortHashLen); err != nil { + if err := s.CreateAllTables(); err != nil { return nil, err } } @@ -110,12 +176,12 @@ func (s *SQLite3ShardedDB) Close() error { } // CreateAllTables creates all required tables across all databases -func (s *SQLite3ShardedDB) CreateAllTables(useShortHashLen int) error { +func (s *SQLite3ShardedDB) CreateAllTables() error { log.Printf("Creating tables for sharding mode %d (%d databases, %d tables per DB)", s.shardMode, s.numDBs, s.tablesPerDB) for dbIndex := 0; dbIndex < s.numDBs; dbIndex++ { - if err := s.createTablesForDB(dbIndex, useShortHashLen); err != nil { + if err := s.createTablesForDB(dbIndex); err != nil { return fmt.Errorf("failed to create tables for database %d: %v", dbIndex, err) } } @@ -125,31 +191,30 @@ func (s *SQLite3ShardedDB) CreateAllTables(useShortHashLen int) error { } // createTablesForDB creates tables for a specific database -func (s *SQLite3ShardedDB) createTablesForDB(dbIndex int, useShortHashLen int) error { +func (s *SQLite3ShardedDB) createTablesForDB(dbIndex int) error { db := s.DBPools[dbIndex].DB if db == nil { return fmt.Errorf("database connection is nil") } // Create multiple tables per database - tableNames := s.getTableNamesForDB() - for _, tableName := range tableNames { + for _, tableName := range s.getTableNamesForDB() { query := fmt.Sprintf(` CREATE TABLE IF NOT EXISTS %s ( - h CHAR(%d) NOT NULL PRIMARY KEY, - o TEXT + message_id TEXT NOT NULL PRIMARY KEY, + newsgroups TEXT ) WITHOUT ROWID; - `, tableName, useShortHashLen) + `, tableName) if _, err := db.Exec(query); err != nil { return fmt.Errorf("failed to create table %s: %v", tableName, err) } // Create index - indexQuery := fmt.Sprintf("CREATE INDEX IF NOT EXISTS idx_%s_h ON %s(h);", tableName, tableName) - if _, err := db.Exec(indexQuery); err != nil { - log.Printf("WARN: Failed to create index for table %s: %v", tableName, err) - } + //indexQuery := fmt.Sprintf("CREATE INDEX IF NOT EXISTS idx_%s_h ON %s(h);", tableName, tableName) + //if _, err := db.Exec(indexQuery); err != nil { + // log.Printf("WARN: Failed to create index for table %s: %v", tableName, err) + //} } return nil } @@ -158,7 +223,7 @@ func (s *SQLite3ShardedDB) createTablesForDB(dbIndex int, useShortHashLen int) e func (s *SQLite3ShardedDB) getTableNamesForDB() []string { var tables []string for i := 0; i < s.tablesPerDB; i++ { - tables = append(tables, fmt.Sprintf("s%02x", i)) + tables = append(tables, fmt.Sprintf("_%02x", i)) } return tables } diff --git a/internal/history/history.go b/internal/history/history.go index c312bbd3..53a41dfd 100644 --- a/internal/history/history.go +++ b/internal/history/history.go @@ -1,26 +1,23 @@ package history import ( - "bufio" "database/sql" "fmt" - "io" "log" "os" - "runtime" "strconv" "strings" "sync" "time" ) -const ENABLE_HISTORY = false // EXPERIMENTAL ! +const ENABLE_HISTORY = true // EXPERIMENTAL ! -var HistoryDEBUG = false // Set to true for spammy debug logs -var MaxLookupWorkers = runtime.NumCPU() // Use number of CPU cores for lookup workers +var ErrNoMatch = fmt.Errorf("no match") + +var HistoryDEBUG = false // Set to true for spammy debug logs const ( - //MaxLookupWorkers = 64 // History file constants DefaultHistoryDir = "./data/history" // TODO set via config @@ -56,20 +53,15 @@ func NewHistory(config *HistoryConfig, mainWG *sync.WaitGroup) (*History, error) return nil, fmt.Errorf("failed to create history directory: %s", config.HistoryDir) } } - MaxLookupWorkers = MaxLookupWorkers / 2 - if MaxLookupWorkers < 1 { - MaxLookupWorkers = 1 - } h := &History{ - config: config, - stats: &HistoryStats{}, - tickChan: make(chan struct{}, 1), - lookupChan: make(chan *MessageIdItem, MaxLookupWorkers*2), - writerChan: make(chan *MessageIdItem, int(config.BatchSize*2)), - dbChan: make(chan *MessageIdItem, int(config.BatchSize*2)), - stopChan: make(chan struct{}), - lastFlush: time.Now(), - mainWG: mainWG, // Store the main application's waitgroup + config: config, + stats: &HistoryStats{}, + tickChan: make(chan struct{}, 1), + dbChan: make(chan *MessageIdItem, config.BatchSize*2), + dbQueued: make(map[*MessageIdItem]bool, config.BatchSize*2), + stopChan: make(chan struct{}), + lastFlush: time.Now(), + mainWG: mainWG, // Store the main application's waitgroup } if ENABLE_HISTORY { // Initialize database backend @@ -78,46 +70,75 @@ func NewHistory(config *HistoryConfig, mainWG *sync.WaitGroup) (*History, error) } // Open history file - if err := h.openHistoryFile(); err != nil { - return nil, fmt.Errorf("failed to open history file: %w", err) - } - - // Start worker goroutines with main application waitgroup coordination - h.bootLookupWorkers() + //if err := h.openHistoryFile(); err != nil { + // return nil, fmt.Errorf("failed to open history file: %w", err) + //} } go h.writerWorker() // +1 wg mainWG waitGroup return h, nil } +func (h *History) AddDBQueued(msgIdItem *MessageIdItem) bool { + h.mux.Lock() + defer h.mux.Unlock() + if h.dbQueued[msgIdItem] { + return false + } + h.dbQueued[msgIdItem] = true + return true +} + +func (h *History) DelDBQueued(msgIdItem *MessageIdItem) { + h.mux.Lock() + defer h.mux.Unlock() + delete(h.dbQueued, msgIdItem) +} + +func (h *History) dbMoreQueued() bool { + h.mux.RLock() + defer h.mux.RUnlock() + return len(h.dbQueued) > 0 +} + +func (h *History) IsDBQueued(msgIdItem *MessageIdItem) bool { + h.mux.RLock() + defer h.mux.RUnlock() + return h.dbQueued[msgIdItem] +} + // Add adds a new message-ID to history -func (h *History) Add(msgIdItem *MessageIdItem) { +func (h *History) Add(msgIdItem *MessageIdItem) bool { if !ENABLE_HISTORY { msgIdItem.Mux.Lock() msgIdItem.Response = CaseDupes msgIdItem.CachedEntryExpires = time.Now().Add(15 * time.Second) msgIdItem.Mux.Unlock() - return + return false } if msgIdItem == nil { log.Printf("[HISTORY] ERROR: Add called with nil MessageIdItem") - return + return false + } + if !h.AddDBQueued(msgIdItem) { + log.Printf("[HISTORY] Add()->AddDBQueued(): msgId: '%s' already queued", msgIdItem.MessageId) + return false } msgIdItem.Mux.Lock() if msgIdItem.MessageId == "" { log.Printf("[HISTORY] ERROR: Add called with empty MessageId item='%#v'", msgIdItem) msgIdItem.Mux.Unlock() - return - } - if msgIdItem.StorageToken == "" && (msgIdItem.GroupName == nil || *msgIdItem.GroupName == "" || msgIdItem.ArtNum <= 0) { - log.Printf("[HISTORY] ERROR: Add called with invalid MessageIdItem='%#v'", msgIdItem) - msgIdItem.Mux.Unlock() - return - } - if msgIdItem.FileOffset > 0 { - log.Printf("[HISTORY] ERROR: Add called with already stored MessageIdItem='%v'", msgIdItem) - msgIdItem.Mux.Unlock() - return + return false } + //if msgIdItem.StorageToken == "" && (msgIdItem.GroupName == nil || *msgIdItem.GroupName == "" || msgIdItem.ArtNum <= 0) { + // log.Printf("[HISTORY] ERROR: Add called with invalid MessageIdItem='%#v'", msgIdItem) + // msgIdItem.Mux.Unlock() + // return + //} + //if msgIdItem.FileOffset > 0 { + // log.Printf("[HISTORY] ERROR: Add called with already stored MessageIdItem='%v'", msgIdItem) + // msgIdItem.Mux.Unlock() + // return + //} if msgIdItem.Response != CaseLock { /* if msgIdItem.MessageId == "<32304224.79C1@parkcity.com>" { @@ -126,7 +147,7 @@ func (h *History) Add(msgIdItem *MessageIdItem) { */ msgIdItem.Mux.Unlock() log.Printf("[HISTORY] DUPLICATE to Add msgId='%s' case: %x != %x", msgIdItem.MessageId, msgIdItem.Response, CaseLock) - return + return false } //msgIdItem.Response = CaseWrite // Set to write state // FIXMEE //msgIdItem.CachedEntryExpires = time.Now().Add(CachedEntryTTL) @@ -141,7 +162,7 @@ func (h *History) Add(msgIdItem *MessageIdItem) { // Check if already exists */ - if len(h.writerChan) >= h.config.BatchSize { + if len(h.dbChan) >= h.config.BatchSize { select { case h.tickChan <- NOTIFY: // pass @@ -149,99 +170,64 @@ func (h *History) Add(msgIdItem *MessageIdItem) { // full } } - h.lookupChan <- msgIdItem - // Add entry to pending batch - //log.Printf("[HISTORY] Adding msgId='%s' to writer chan (queued %d/%d)", msgIdItem.MessageId, len(h.writerChan), cap(h.writerChan)) - - // Successfully queued for batch processing - //h.updateStats(func(s *HistoryStats) { s.TotalAdds++ }) - -} - -func (h *History) bootLookupWorkers() { - log.Printf("[HISTORY] Starting %d lookup workers", MaxLookupWorkers) - for i := 1; i <= MaxLookupWorkers; i++ { - go h.LookupWorker(i) - } - time.Sleep(1000 * time.Millisecond) // Give workers time to start -} - -func (h *History) LookupWorker(wid int) { - //log.Printf("[HISTORY] LookupWorker %03d started", wid) - processed := 0 - maxWork := 65536 / MaxLookupWorkers - if maxWork < 16384 { - maxWork = 16384 // Ensure minimum work per worker - } - restart := false - for { - if restart { - break - } - msgIdItem := <-h.lookupChan - processed++ - if processed >= maxWork { - restart = true - } - start := time.Now() - if response, err := h.Lookup(msgIdItem); err != nil || response != CasePass { - msgIdItem.Mux.Lock() - if response != CaseDupes { - log.Printf("[HISTORY] DEBUG Add()->Lookup(): msgId: '%s'response = %x != CasePass entry.msgIdItem.Response=%x", msgIdItem.MessageId, response, msgIdItem.Response) - } - msgIdItem.CachedEntryExpires = time.Now().Add(3 * time.Second) - msgIdItem.Mux.Unlock() - //h.updateStats(func(s *HistoryStats) { s.Duplicates++ }) - if HistoryDEBUG { - log.Printf("[HISTORY] Add()->Lookup(): Duplicate msgId: '%s' lookup took %v err='%v'", msgIdItem.MessageId, time.Since(start), err) - } - continue - } + start := time.Now() + if response, _, err := h.Lookup(msgIdItem, true); err != nil || response != CasePass { msgIdItem.Mux.Lock() - msgIdItem.Response = CaseWrite // Set to write state // FIXMEE - //msgIdItem.CachedEntryExpires = time.Now().Add(CachedEntryTTL) - msgIdItem.Mux.Unlock() - h.writerChan <- msgIdItem // Send to writer channel for processing - if HistoryDEBUG { - log.Printf("[HISTORY] Add()->Lookup(): msgId: '%s' not found, lookup took %v (queued %d)", msgIdItem.MessageId, time.Since(start), len(h.writerChan)) + if response != CaseDupes { + log.Printf("[HISTORY] DEBUG Add()->Lookup(): msgId: '%s'response = %x != CasePass entry.msgIdItem.Response=%x", msgIdItem.MessageId, response, msgIdItem.Response) } + msgIdItem.CachedEntryExpires = time.Now().Add(3 * time.Second) + msgIdItem.Mux.Unlock() + //h.updateStats(func(s *HistoryStats) { s.Duplicates++ }) + //if HistoryDEBUG { + log.Printf("[HISTORY] Add()->Lookup(): Duplicate msgId: '%s' lookup took %v err='%v'", msgIdItem.MessageId, time.Since(start), err) + //} + return false } - //log.Printf("[HISTORY] LookupWorker (%03d/%03d) did %d/%d, restarting...", wid, MaxLookupWorkers, processed, maxWork) - go h.LookupWorker(wid) + msgIdItem.Mux.Lock() + msgIdItem.Response = CaseWrite // Set to write state // FIXMEE + //msgIdItem.CachedEntryExpires = time.Now().Add(CachedEntryTTL) + msgIdItem.Mux.Unlock() + h.dbChan <- msgIdItem + if HistoryDEBUG { + log.Printf("[HISTORY] Add()->Lookup(): msgId: '%s' not found, lookup took %v (queued %d)", msgIdItem.MessageId, time.Since(start), len(h.dbChan)) + } + return true } -// Lookup checks if a message-ID exists in history // Returns: ResponsePass (0) = not found, ResponseDuplicate (1) = found, ResponseRetry (2) = error -func (h *History) Lookup(msgIdItem *MessageIdItem) (int, error) { +func (h *History) Lookup(msgIdItem *MessageIdItem, quick bool) (response int, newsgroupIDs []int64, err error) { if !ENABLE_HISTORY { - return CasePass, nil + return CasePass, nil, nil } - found, err := h.lookupInDatabase(msgIdItem) + found, newsgroupIDs, err := h.LookupMID(msgIdItem, quick) //log.Printf("[HISTORY] Lookup for msgId='%s' found='%v', err='%v'", msgIdItem.MessageId, found, err) if err != nil { log.Printf("[HISTORY] ERROR: Lookup failed for msgId='%s': %v", msgIdItem.MessageId, err) //h.updateStats(func(s *HistoryStats) { s.Errors++ }) - return CaseError, err + return CaseError, nil, err } //log.Printf("Lookup for msgID: '%s' found='%v', offsets='%v'", messageID, found, offsetsData) if found { //h.updateStats(func(s *HistoryStats) { s.TotalLookups++ }) - return CaseDupes, nil + return CaseDupes, newsgroupIDs, nil } - return CasePass, nil + return CasePass, nil, nil } -// lookupInDatabase looks up a hash in the sharded database -// Returns: bool (found), error -func (h *History) lookupInDatabase(msgIdItem *MessageIdItem) (bool, error) { +// Lookup checks if a message-ID exists in history +func (h *History) LookupMID(msgIdItem *MessageIdItem, quick bool) (exists bool, newsgroupIDs []int64, err error) { + if !ENABLE_HISTORY { + return false, nil, nil + } // Route hash: 1st char -> DB, 2nd+3rd chars -> table, remaining -> stored value start1 := time.Now() - dbIndex, tableName, shortHash, err := h.routeHash(msgIdItem.MessageId) + dbIndex, tableName, err := h.routeHash(msgIdItem) if err != nil { - return false, fmt.Errorf("failed to route hash: %v", err) + return false, nil, fmt.Errorf("failed to route hash: %v", err) } if HistoryDEBUG { - log.Printf("[HISTORY] #0 lookupInDatabase: routed hash '%s' to dbIndex=%d, tableName='%s', shortHash='%s' took %v", msgIdItem.MessageIdHash, dbIndex, tableName, shortHash, time.Since(start1)) + log.Printf("[HISTORY] #0 lookupInDatabase: routed hash '%s' to dbIndex=%d, tableName='%s', took %v", msgIdItem.MessageIdHash, dbIndex, tableName, time.Since(start1)) } // Get database connection @@ -250,7 +236,7 @@ func (h *History) lookupInDatabase(msgIdItem *MessageIdItem) (bool, error) { db, err = h.db.GetShardedDB(dbIndex, false) if err != nil { - return false, fmt.Errorf("failed to get database connection: %v", err) + return false, nil, fmt.Errorf("failed to get database connection: %v", err) } /* @@ -260,18 +246,18 @@ func (h *History) lookupInDatabase(msgIdItem *MessageIdItem) (bool, error) { */ // Query database for file offsets with optimized retry logic - var offsetsData string + var newsgroupsData string start3 := time.Now() baseDelay := 10 * time.Millisecond for { - err = db.QueryRow("SELECT o FROM "+tableName+" WHERE h = ?", shortHash).Scan(&offsetsData) + err = db.QueryRow("SELECT newsgroups FROM "+tableName+" WHERE message_id = ?", msgIdItem.MessageId).Scan(&newsgroupsData) if err != nil { if err == sql.ErrNoRows { if HistoryDEBUG { - log.Printf("[HISTORY] #2.1 lookupInDatabase: sql.ErrNoRows dbIndex=%x tableName=%s shortHash='%s' took %v", dbIndex, tableName, shortHash, time.Since(start3)) + log.Printf("[HISTORY] #2.1 lookupInDatabase: sql.ErrNoRows dbIndex=%x tableName=%s took %v", dbIndex, tableName, time.Since(start3)) } - return false, nil // Not found + return false, nil, nil // Not found } // Check if it's a retryable error (database lock/busy) @@ -287,48 +273,31 @@ func (h *History) lookupInDatabase(msgIdItem *MessageIdItem) (bool, error) { if HistoryDEBUG { log.Printf("[HISTORY] #2.2 lookupInDatabase: database query took %v HistoryDEBUG=%t", time.Since(start3), HistoryDEBUG) } - // Parse file offsets - offsetStrings := strings.Split(offsetsData, ",") - var offsets []int64 // TODO GET FROM SYNC POOL - // Check each offset for hash collisions - for _, offsetStr := range offsetStrings { - offset, err := strconv.ParseInt(strings.TrimSpace(offsetStr), 10, 64) - if err != nil { - log.Printf("WARN: Invalid offset in database: %s", offsetStr) - continue + // Parse newsgroup IDs from comma-separated string + newsgroupIDsSlice := strings.Split(newsgroupsData, ",") + if quick { + if len(newsgroupIDsSlice) > 0 { + return true, nil, nil // Found matching entries } - offsets = append(offsets, offset) // Append to offsets slice + return false, nil, ErrNoMatch } - // Check each offset for hash collisions - for _, offset := range offsets { - // Read and verify the history entry at this offset - response, err := h.readHistoryEntryAtOffset(offset, msgIdItem) + anewsgroupIDs := make([]int64, 0, len(newsgroupIDsSlice)) + for _, ngIDsStr := range newsgroupIDsSlice { + newsgroupID, err := strconv.ParseInt(strings.TrimSpace(ngIDsStr), 10, 64) if err != nil { - log.Printf("WARN: Failed to read history entry at offset %d: %v", offset, err) + log.Printf("WARN: Invalid ngIDsStr in database: %s", ngIDsStr) continue } - //h.updateStats(func(s *HistoryStats) { s.TotalFileLookups++ }) - /* - if msgIdItem.MessageId == "<32304224.79C1@parkcity.com>" { - log.Printf("[DEBUG-HISTORY-STEP15] Target message ID readHistoryEntryAtOffset response: %x msgIdItem='%#v'", response, msgIdItem) - }*/ - - switch response { - case CaseError: - log.Printf("ERROR: Failed to read history entry at offset %d: %v", offset, err) - h.updateStats(func(s *HistoryStats) { s.Errors++ }) - return false, err - - case CaseRetry: - continue // Hash collision, not a match - - case CaseDupes: - // Found a matching entry, returns item with storage token added from readHistoryEntryAtOffset - return true, nil // Found a matching entry, return storage token in Item pointer - } // end switch response + anewsgroupIDs = append(anewsgroupIDs, newsgroupID) // Append to newsgroupIDs slice + } + if len(anewsgroupIDs) > 0 { + msgIdItem.Mux.Lock() + msgIdItem.NewsgroupIDs = anewsgroupIDs + msgIdItem.Mux.Unlock() + return true, anewsgroupIDs, nil // Found matching entries } // No matching entries found - return false, nil + return false, nil, ErrNoMatch } // GetStats returns current statistics @@ -393,7 +362,7 @@ func (h *History) writerWorker() { h.batchMux.RLock() lastFlush := time.Since(h.lastFlush) h.batchMux.RUnlock() - chansize = len(h.writerChan) + chansize = len(h.dbChan) chanlimit = chansize >= h.config.BatchSize if chanlimit { log.Printf("[HISTORY] ticker writerWorker: lastChanLimit: %v | lastFlush: %v", time.Since(lastChanlimit), lastFlush) @@ -436,7 +405,7 @@ func (h *History) writerWorker() { continue } h.flushPendingBatch() - if shutdownCounter <= 0 && len(h.writerChan) == 0 { + if shutdownCounter <= 0 && len(h.dbChan) == 0 { log.Printf("[HISTORY] writerWorker CheckNoMoreWorkInHistory ok. shutting down...") return } @@ -448,7 +417,7 @@ func (h *History) writerWorker() { } continue } - chanSize = len(h.writerChan) + chanSize = len(h.dbChan) if chanSize == 0 { continue } @@ -478,96 +447,9 @@ func (h *History) ServerShutdown() bool { return false } -// readHistoryEntryAtOffset reads and parses a history entry at a specific file offset -func (h *History) readHistoryEntryAtOffset(offset int64, msgIdItem *MessageIdItem) (int, error) { - getTimestamp := false - - msgIdItem.Mux.RLock() - if msgIdItem.Arrival == 0 { - getTimestamp = true - } - /* - if msgIdItem.MessageIdHash == "" { - msgIdItem.Mux.RUnlock() - return CaseError, fmt.Errorf("readHistoryEntryAtOffset called with empty MessageIdHash") - } - */ - - if msgIdItem.StorageToken != "" /*|| (msgIdItem.GroupName != nil && msgIdItem.ArtNum > 0) fucksup with history rebuild... TODO REVIEW */ { - //log.Printf("[HISTORY] readHist: already have storage token='%s' msgId='%s'", msgIdItem.StorageToken, msgIdItem.MessageId) - // already have storage token or group/article info, no need to read file - msgIdItem.Mux.RUnlock() - return CaseDupes, nil - } - //if HistoryDEBUG { - // log.Printf("[HISTORY] readHistoryEntryAtOffset called for MessageIdHash: '%s' at offset %d", msgIdItem.MessageIdHash, offset) - //} - msgIdItem.Mux.RUnlock() - - // Open read-only file handle for this specific read - file, err := os.Open(h.HistoryFilePath) - if err != nil { - return CaseError, fmt.Errorf("failed to open history file for reading: %v", err) - } - // Seek to offset and read line - _, err = file.Seek(offset, io.SeekStart) - if err != nil { - file.Close() - return CaseError, fmt.Errorf("failed to seek to offset %d: %v", offset, err) - } - - reader := bufio.NewReader(file) - line, err := reader.ReadString('\n') - if err != nil { - file.Close() - return CaseError, fmt.Errorf("failed to read line at offset %d: %v", offset, err) - } - file.Close() - //log.Printf("[HISTORY] readHistoryEntryAtOffset: read line at offset %d: '%s'", offset, result) - - // Parse history line: "hash storagetoken timestamp messageid" - parts := strings.SplitN(line, "\t", 4) - if len(parts) < 4 { - return CaseError, fmt.Errorf("invalid history format offset=%d line='%s'", offset, line) - } - - //messageID := parts[0] - //storageSystem := parts[1] - //storageToken := parts[2] - //timestampStr := parts[3] - - msgIdItem.Mux.Lock() - if msgIdItem.StorageToken != "" { - msgIdItem.Mux.Unlock() - return CaseDupes, nil - } - if parts[0] != msgIdItem.MessageId { - log.Printf("[HISTORY] readHistoryEntryAtOffset:mismatch for (msgId='%s') at offset %d parts='%#v'", msgIdItem.MessageId, offset, parts) - msgIdItem.Mux.Unlock() - return CaseRetry, nil - } - msgIdItem.StorageToken = parts[2] // Set storage token - msgIdItem.Mux.Unlock() - - if HistoryDEBUG { - log.Printf("[HISTORY] readHistoryEntryAtOffset: msgId='%s' at offset %d => token='%s'", msgIdItem.MessageId, offset, msgIdItem.StorageToken) - } - - if getTimestamp { - timestamp, err := strconv.ParseInt(parts[3][:len(parts[3])-1], 10, 64) - if err != nil { - return CaseError, fmt.Errorf("invalid timestamp in history file: %s", parts[3]) - } - msgIdItem.Mux.Lock() - msgIdItem.Arrival = timestamp // Set arrival time - msgIdItem.Mux.Unlock() - } - return CaseDupes, nil -} - // routeHash routes a hash to the correct database and table -// Returns: dbIndex, tableName, shortHash (for storage), error -func (h *History) routeHash(msgId string) (int, string, string, error) { +// Returns: dbIndex, tableName, error +func (h *History) routeHash(item *MessageIdItem) (dbIndex int, tableName string, err error) { /* minCharsNeeded := 3 + h.config.UseShortHashLen // routing chars + storage chars if len(hash) < minCharsNeeded { @@ -577,35 +459,27 @@ func (h *History) routeHash(msgId string) (int, string, string, error) { // For sharded database modes: // 1st char -> database index (0-f maps to 0-15) // 2nd+3rd chars -> table name (s + 00-ff) - // 4th-Nth chars -> stored hash value (configurable 2-7 chars) (max 10 bits of entropy = 16^10 = 1,099511628×10¹² ...) - hash := ComputeMessageIDHash(msgId)[:3+h.config.UseShortHashLen] // Compute the hash of the message ID - dbChar := hash[0:1] - tableChars := hash[1:3] - shortHash := hash[3:] - if len(shortHash) > h.config.UseShortHashLen { - shortHash = shortHash[:h.config.UseShortHashLen] // Limit to configured length - } + hash := ComputeMessageIDHash(item.MessageId)[:3] // Compute the hash of the message ID // Convert first hex char to database index - dbIndex, err := hexToInt(dbChar) + dbIndex, err = hexToInt(hash[0:1]) if err != nil { - return 0, "", "", fmt.Errorf("invalid hex char for database: %s", dbChar) + return 0, "", fmt.Errorf("invalid hex char for database: %s", hash[0:1]) } // Validate database index numDBs, _, _ := GetShardConfig(h.config.ShardMode) if dbIndex >= numDBs { - return 0, "", "", fmt.Errorf("database index %d exceeds available databases %d", dbIndex, numDBs) + return 0, "", fmt.Errorf("database index %d exceeds available databases %d", dbIndex, numDBs) } // Table name from 2nd+3rd hex chars (s + hex) - - return dbIndex, "s" + tableChars, shortHash, nil + return dbIndex, "_" + hash[1:3], nil } // flushPendingBatch processes all entries in the current batch atomically func (h *History) flushPendingBatch() { - if len(h.writerChan) == 0 { + if len(h.dbChan) == 0 { return } //log.Printf("[HISTORY] PRE Flushing batch of %d history entries", toProcess) @@ -624,170 +498,41 @@ func (h *History) flushPendingBatch() { // processBatch processes multiple entries atomically for optimal performance func (h *History) processBatch() { - jobs := len(h.writerChan) - if jobs == 0 { + if len(h.dbChan) == 0 { return // No items to process } - // Step 1: Write all entries to history file in sequence (atomic file operations) - start1 := time.Now() - if err := h.writeBatchToFile(); err != nil { - log.Printf("ERROR: Failed to write batch to file: %v", err) - return - } - if HistoryDEBUG { - log.Printf("[HISTORY] done BATCH writeBatchToFile: %d entries (took %v)", jobs, time.Since(start1)) - } - + log.Printf("[HISTORY] Starting batch processing of %d entries", len(h.dbChan)) start2 := time.Now() // Step 2: Write all entries to database in batch (transaction-based) - if err := h.writeBatchToDatabase(); err != nil { + if jobs, err := h.writeBatchToDatabase(); err != nil { log.Printf("ERROR: Failed to write batch to database: %v", err) return + } else { + if HistoryDEBUG { + log.Printf("[HISTORY] done BATCH writeBatchToDatabase: %d entries (took %v)", jobs, time.Since(start2)) + } } - if HistoryDEBUG { - log.Printf("[HISTORY] done BATCH writeBatchToDatabase: %d entries (took %v)", jobs, time.Since(start2)) - } - - // All entries processed successfully - //if HistoryDEBUG { - if jobs > h.config.BatchSize { - jobs = h.config.BatchSize - } - log.Printf("[HISTORY] ADD BATCHED: %d (took %v)", jobs, time.Since(start1)) - //} } const DefaultStorageSystem = 0x1 -// writeBatchToFile writes multiple entries to the history file atomically -/* -tail history.dat -3e516276738e1312fd0e8190ecdefacc de.rec.tiere.aquaristik:4491 1752178804 <3f389015$0$20726$91cee783@newsreader02.highway.telekom.at> -c4741046fca0ce7fac49df105e487cc5 de.rec.tiere.aquaristik:4492 1752178804 -5028257d375b9b51e6440b47cc5ec810 de.rec.tiere.aquaristik:4493 1752178804 -*/ -func (h *History) writeBatchToFile() error { - h.mux.Lock() - defer h.mux.Unlock() - /* - // Get the ACTUAL current file position instead of relying on cached offset - actualOffset, err := h.historyFile.Seek(0, io.SeekCurrent) - if err != nil { - return fmt.Errorf("failed to get current file position: %v", err) - } - - // Verify our cached offset matches reality - if actualOffset != h.offset { - log.Printf("[HISTORY] WARNING: Cached offset %d != actual file position %d, correcting", h.offset, actualOffset) - h.offset = actualOffset - } - - // Reset the buffered writer to ensure it's positioned correctly - h.fileWriter.Reset(h.historyFile) - */ - - // Capture file offsets for each entry BEFORE writing - currentOffset := h.offset - now := time.Now().Unix() - processed, skipped := 0, 0 - totalBytes := int64(0) - start := time.Now() - dbChanBefore := len(h.dbChan) -processingLoop: - for { - select { - case item := <-h.writerChan: - /* - if item.MessageId == "<32304224.79C1@parkcity.com>" { - // debug - log.Printf("[HISTORY] DEBUG: Writing batch entry for MessageId '%s' at offset %d", item.MessageId, currentOffset) - } - */ - - item.Mux.Lock() - // Only generate storage token if it's not already set (avoid redundant fmt.Sprintf calls) - if item.StorageToken == "" { - if item.GroupName == nil || item.ArtNum <= 0 { - log.Printf("[HISTORY-ERROR] Missing storage info for item: GroupName=%v ArtNum=%d", item.GroupName, item.ArtNum) - item.Mux.Unlock() - skipped++ - continue // Skip items without proper storage info - } - item.StorageToken = fmt.Sprintf("%s:%d", *item.GroupName, item.ArtNum) - } - // Set the file offset before writing - // Write directly to buffered writer - n, err := fmt.Fprintf(h.fileWriter, "%s\t%x\t%s\t%d\n", - item.MessageId, // message-Id - DefaultStorageSystem, // placeholder for storage system, e.g. 0x1 - item.StorageToken, // : - now, // timestamp - ) - if err != nil { - item.Mux.Unlock() - return fmt.Errorf("failed to write history line to file: %v", err) - } - item.Arrival = now // Set arrival time to current time - item.FileOffset = currentOffset - item.Mux.Unlock() - currentOffset += int64(n) - totalBytes += int64(n) - h.dbChan <- item - processed++ - if processed >= h.config.BatchSize { - //log.Printf("[HISTORY] writeBatchToFile reached %d entries, flushing...", processed) - // Flush batch immediately if size limit reached - break processingLoop - } - default: - //log.Printf("[HISTORY] writeBatchToFile processed %d entries, flushing...", processed) - break processingLoop // No more items to process - } - } - startFlush := time.Now() - // Flush buffered writer to ensure all data is written - if err := h.fileWriter.Flush(); err != nil { - return fmt.Errorf("failed to flush history file buffer: %v", err) - } - - // Update global offset - h.offset += totalBytes - - /* - // Ensure data is written to disk - if err := h.historyFile.Sync(); err != nil { - log.Printf("WARN: Failed to sync history file: %v", err) - // Continue anyway, data is in OS buffer - } - */ - log.Printf("writeBatchToFile: %d entries took %v h.dbChan=%d=>%d flush: %v (written: %d)", processed, time.Since(start), dbChanBefore, len(h.dbChan), time.Since(startFlush), totalBytes) - return nil -} - // writeBatchToDatabase writes multiple entries to the database using transactions -func (h *History) writeBatchToDatabase() error { +func (h *History) writeBatchToDatabase() (processed int, err error) { // Group entries by database index only - calculate routing on-demand dbGroups := make(map[int][]*MessageIdItem) - processed := 0 + var item *MessageIdItem processingLoop: for { select { - case item := <-h.dbChan: - /* - minCharsNeeded := 3 + h.config.UseShortHashLen - if len(item.MessageIdHash) < minCharsNeeded { - return fmt.Errorf("hash too short: need at least %d characters, got %d [item='%#v']", minCharsNeeded, len(item.MessageIdHash), item) - } - */ - // Calculate only database index - no temporary struct needed - dbIndex, _, _, err := h.routeHash(item.MessageId) + case item = <-h.dbChan: + dbIndex, _, err := h.routeHash(item) if err != nil { - return fmt.Errorf("failed to route hash: %v", err) - } - if len(dbGroups[dbIndex]) == 0 { - // First entry for this database, initialize any necessary structures - dbGroups[dbIndex] = make([]*MessageIdItem, 0, 640) // Preallocate slice for performance + return 0, fmt.Errorf("error in writeBatchToDatabase. failed to route hash for item '%#v': %v", item, err) } + //if len(dbGroups[dbIndex]) == 0 { + // // First entry for this database, initialize any necessary structures + // dbGroups[dbIndex] = make([]*MessageIdItem, 0, 64) // Preallocate slice for performance + //} // Group by database index using original MessageIdItem pointers dbGroups[dbIndex] = append(dbGroups[dbIndex], item) processed++ @@ -798,13 +543,16 @@ processingLoop: break processingLoop } } + if processed == 0 { + return 0, nil // Nothing to process + } // Process databases in parallel for better performance var wg sync.WaitGroup errChan := make(chan error, len(dbGroups)) - for dbIndex, entries := range dbGroups { - if len(entries) == 0 { + for dbIndex, msgIdItems := range dbGroups { + if len(msgIdItems) == 0 { continue // Skip empty groups } wg.Add(1) @@ -814,28 +562,34 @@ processingLoop: if err := h.writeBatchToHashDB(dbIdx, dbEntries); err != nil { errChan <- fmt.Errorf("failed to write batch to database dbIdx=%d: %v", dbIdx, err) } + for _, item := range dbEntries { + h.DelDBQueued(item) + } if HistoryDEBUG { log.Printf("[HISTORY] writeBatchToDatabase: dbIndex=%d, processed %d entries took %v", dbIndex, len(dbEntries), time.Since(start)) } - }(dbIndex, entries) + }(dbIndex, msgIdItems) } wg.Wait() close(errChan) // Check for any errors - for err := range errChan { - if err != nil { - return err + var errs []error + for aerr := range errChan { + if aerr != nil { + errs = append(errs, aerr) } } - - return nil + if len(errs) > 0 { + return processed, fmt.Errorf("error in writeBatchToDatabase: %v", errs) + } + return } // writeBatchToHashDB writes all entries for a single database in one giant transaction func (h *History) writeBatchToHashDB(dbIndex int, entries []*MessageIdItem) error { - const maxRetries = 1000 + const maxRetries = 10000 const baseDelay = 100 * time.Millisecond for attempt := 0; attempt < maxRetries; attempt++ { @@ -847,8 +601,8 @@ func (h *History) writeBatchToHashDB(dbIndex int, entries []*MessageIdItem) erro // Check if it's a transient lock error errStr := strings.ToLower(err.Error()) if strings.Contains(errStr, "database is locked") || strings.Contains(errStr, "busy") { - if attempt < maxRetries-1 { - time.Sleep(baseDelay) + if attempt < maxRetries { + time.Sleep(baseDelay * time.Duration(attempt+1)) // Exponential backoff continue } } @@ -886,47 +640,28 @@ func (h *History) executeDBTransaction(dbIndex int, entries []*MessageIdItem) er }() // Group entries by table and hash within this database - calculate routing on-demand - tableHashGroups := make(map[string]map[string][]*MessageIdItem) - for _, entry := range entries { + tableHashGroups := make(map[string][]*MessageIdItem) + for _, msgIdItem := range entries { // Calculate routing info on-demand (no struct allocation) - _, tableName, shortHash, err := h.routeHash(entry.MessageId) + _, tableName, err := h.routeHash(msgIdItem) if err != nil { return fmt.Errorf("failed to route hash for entry: %v", err) } - - if tableHashGroups[tableName] == nil { - tableHashGroups[tableName] = make(map[string][]*MessageIdItem) - } - tableHashGroups[tableName][shortHash] = append(tableHashGroups[tableName][shortHash], entry) - entry.Mux.Lock() - entry.Response = CaseDupes // Set response state to CaseDupes for processed articles - entry.CachedEntryExpires = time.Now().Add(3 * time.Second) // Set cache expiration - entry.Mux.Unlock() + tableHashGroups[tableName] = append(tableHashGroups[tableName], msgIdItem) + msgIdItem.Mux.Lock() + msgIdItem.Response = CaseDupes // Set response state to CaseDupes for processed articles + msgIdItem.CachedEntryExpires = time.Now().Add(3 * time.Second) // Set cache expiration + msgIdItem.Mux.Unlock() } // Process each table in this database with optimized batch operations - - for tableName, hashGroups := range tableHashGroups { - //txWg.Add(1) - //go func(tx *sql.Tx, tableName string, hashGroups map[string][]*EntryWithRouting) { - // defer txWg.Done() - // Process this table in the transaction - if err := h.processTableInTransaction(tx, tableName, hashGroups); err != nil { - log.Printf("ERROR: Failed to process table %s in transaction: %v", tableName, err) - /* - err := tx.Rollback() // Rollback on error - if err != nil { - log.Printf("ERROR: Failed to rollback transaction: %v", err) - } - */ - log.Printf("ERROR HISTORY Rolling back transaction due to error: %v", err) // @AI!! THIS IS A CRITICAL ERROR, WE MUST EXIT! DO NOT TOUCH THIS LINE OF CODE!! - return fmt.Errorf("failed to process table %s in transaction: %v", tableName, err) + for tableName, msgIdItems := range tableHashGroups { + if err := h.processTableInTransaction(tx, tableName, msgIdItems); err != nil { + return fmt.Errorf("error in history: failed to process table %s in transaction: %v", tableName, err) } - //}(tx, tableName, hashGroups) } - //txWg.Wait() // Wait for all table processing goroutines to finish - // Commit the giant transaction + // Commit the transaction err = tx.Commit() if err != nil { return fmt.Errorf("failed to commit transaction: %v", err) @@ -938,48 +673,52 @@ func (h *History) executeDBTransaction(dbIndex int, entries []*MessageIdItem) er } // processTableInTransaction efficiently processes all hash groups for a single table -func (h *History) processTableInTransaction(tx *sql.Tx, tableName string, hashGroups map[string][]*MessageIdItem) error { +func (h *History) processTableInTransaction(tx *sql.Tx, tableName string, msgIdItems []*MessageIdItem) error { start := time.Now() for { // Execute efficient bulk UPSERT operations - if len(hashGroups) > 0 { + if len(msgIdItems) > 0 { //start := time.Now() // Build bulk UPSERT with VALUES syntax var valuesList []string var args []interface{} - for shortHash, hashEntries := range hashGroups { + for _, msgIdItem := range msgIdItems { // Build new offsets for this hash - optimized for 1-2 offsets typical case - valuesList = append(valuesList, "(?, ?)") + valuesList = append(valuesList, "(?,?)") - var offsetString string - if len(hashEntries) == 1 { + var newsgroupIDsString string + if len(msgIdItem.NewsgroupIDs) == 1 { // Most common case: single offset - offsetString = fmt.Sprintf("%d", hashEntries[0].FileOffset) - } else if len(hashEntries) == 2 { + newsgroupIDsString = fmt.Sprintf("%d", msgIdItem.NewsgroupIDs[0]) + } else if len(msgIdItem.NewsgroupIDs) == 2 { // Second most common case: two offsets - offsetString = fmt.Sprintf("%d,%d", hashEntries[0].FileOffset, hashEntries[1].FileOffset) + newsgroupIDsString = fmt.Sprintf("%d,%d", msgIdItem.NewsgroupIDs[0], msgIdItem.NewsgroupIDs[1]) } else { // Rare case: 3+ offsets, fall back to slice+join var newOffsets []string - for _, entry := range hashEntries { - newOffsets = append(newOffsets, fmt.Sprintf("%d", entry.FileOffset)) + for _, entry := range msgIdItem.NewsgroupIDs { + newOffsets = append(newOffsets, fmt.Sprintf("%d", entry)) } - offsetString = strings.Join(newOffsets, ",") + newsgroupIDsString = strings.Join(newOffsets, ",") } - args = append(args, shortHash, offsetString) + args = append(args, msgIdItem.MessageId, newsgroupIDsString) //log.Printf("[HISTORY] processTableInTransaction: hash '%s' has %d entries, offset string: '%s'", shortHash, len(hashEntries), offsetString) } // Execute single bulk UPSERT statement - bulkUpsertQuery := fmt.Sprintf(` - INSERT INTO %s (h, o) VALUES %s - ON CONFLICT(h) DO UPDATE SET - o = o || ',' || excluded.o - `, tableName, strings.Join(valuesList, ", ")) + /* disabled, old + bulkUpsertQuery := fmt.Sprintf(` + INSERT INTO %s (message_id, newsgroups) VALUES %s + ON CONFLICT(message_id) DO UPDATE SET + newsgroups = newsgroups || ',' || excluded.newsgroups + `, tableName, strings.Join(valuesList, ", ")) + */ + // Execute single bulk UPSERT statement + bulkUpsertQuery := fmt.Sprintf("INSERT INTO %s (message_id, newsgroups) VALUES %s", tableName, strings.Join(valuesList, ", ")) if _, err := tx.Exec(bulkUpsertQuery, args...); err != nil { log.Printf("failed to bulk upsert into table %s: %v", tableName, err) time.Sleep(100 * time.Millisecond) // Wait before retrying @@ -992,76 +731,16 @@ func (h *History) processTableInTransaction(tx *sql.Tx, tableName string, hashGr } if HistoryDEBUG { log.Printf("[HISTORY] processTableInTransaction: bulk processed table %s with %d hashes in %v", - tableName, len(hashGroups), time.Since(start)) + tableName, len(msgIdItems), time.Since(start)) } } return nil } -// LookupStorageToken looks up a message-ID and returns its storage token -// This function: -// 1. Looks up the message-ID in the history database to get file offsets -// 2. Reads the actual history file entries at those offsets -// 3. Matches the hash and message-ID to handle hash collisions -// 4. Returns the storage token if found -func (h *History) xxLookupStorageToken(msgIdItem *MessageIdItem) int { - log.Printf("ERROR LEGACY FUNCTION CALLED LookupStorageToken") - return CaseError // Default to error case - /* - msgIdItem.Mux.RLock() - iserror := msgIdItem.Response == CaseError || msgIdItem.MessageId == "" - isset := msgIdItem.StorageToken != "" - //ispass := msgIdItem.Response == CasePass - msgIdItem.Mux.RUnlock() - - if iserror { - msgIdItem.Mux.Lock() - msgIdItem.Response = CaseError - msgIdItem.Mux.Unlock() - log.Printf("[HISTORY] ERROR: LookupStorageToken called with invalid MessageIdItem") - return CaseError // Invalid input, return error code - } - if isset { - return CaseDupes // Already exists, return storage token - } - //if ispass { - // return CasePass // Already checked and not found, return empty string - //} - // Look up in database to get file offsets - found, err := h.lookupInDatabase(msgIdItem) - if err != nil { - msgIdItem.Mux.Lock() - msgIdItem.Response = CaseError - msgIdItem.Mux.Unlock() - h.updateStats(func(s *HistoryStats) { s.Errors++ }) - return CaseError // Error occurred, return empty string - } - if !found { - msgIdItem.Mux.Lock() - if msgIdItem.Response == 0 { - log.Printf("[HISTORY] LookupStorageToken: MessageIdHash '%s' not found in database, setting response to CasePass", msgIdItem.MessageIdHash) - // Only set to CasePass if not already set to something else - //log.Printf("[HISTORY] LookupStorageToken: MessageIdHash '%s' not found in database, setting response to CasePass", msgIdItem - //msgIdItem.Response = CasePass // Set response state to CasePass for not found - } - msgIdItem.Mux.Unlock() - return CasePass // Not found - } - msgIdItem.Mux.Lock() - if msgIdItem.Response == CasePass { - log.Printf("[HISTORY] LookupStorageToken: MessageIdHash '%s' found in database, setting response to CaseDupes", msgIdItem.MessageIdHash) - msgIdItem.Response = CaseDupes // only set to CaseDupes if not anything else is already set - } - msgIdItem.Mux.Unlock() - h.updateStats(func(s *HistoryStats) { s.TotalLookups++ }) - return CaseDupes - */ -} - // CheckNoMoreWorkInHistory checks if there's no more pending work (similar to CheckNoMoreWorkInMaps) func (h *History) CheckNoMoreWorkInHistory() bool { // Check if writer channel has pending entries - if len(h.writerChan) > 0 { + if h.dbMoreQueued() { return false } diff --git a/internal/history/history_MsgIdItemCache.go b/internal/history/history_MsgIdItemCache.go index 2c3691b7..b9ea6239 100644 --- a/internal/history/history_MsgIdItemCache.go +++ b/internal/history/history_MsgIdItemCache.go @@ -314,13 +314,13 @@ func (c *MsgIdItemCache) cleanupMessageIdItem(item *MessageIdItem) { // Clear string fields item.MessageId = "" item.MessageIdHash = "" - item.StorageToken = "" + //item.StorageToken = "" // Reset other fields to zero values - item.ArtNum = 0 + //item.ArtNum = 0 item.Arrival = 0 item.Response = 0 - item.GroupName = nil + //item.GroupName = nil item.Mux.Unlock() } @@ -447,6 +447,7 @@ func (c *MsgIdItemCache) GetResizeInfo() (bucketCount int, itemCount int, loadFa return } +/* // GetMsgIdFromCache retrieves threading information for a message ID in a specific group // This replaces the functionality from MsgTmpCache.GetMsgIdFromTmpCache func (c *MsgIdItemCache) GetMsgIdFromCache(newsgroupPtr *string, messageID string) (int64, int64, bool) { @@ -485,9 +486,10 @@ func (c *MsgIdItemCache) GetMsgIdFromCache(newsgroupPtr *string, messageID strin } // end for return 0, 0, false } - +*/ +/* // SetThreadingInfo sets threading information for a message ID in a specific group -func (c *MsgIdItemCache) SetThreadingInfo(messageID string, rootArticle int64, isThreadRoot bool) bool { +func (c *MsgIdItemCache) xxSetThreadingInfo(messageID string, rootArticle int64, isThreadRoot bool) bool { // This is a compatibility method that requires the item to already have a group set // For new code, use SetThreadingInfoForGroup instead with explicit group parameter item := c.GetORCreate(messageID) @@ -522,7 +524,7 @@ func (c *MsgIdItemCache) SetThreadingInfo(messageID string, rootArticle int64, i } // SetThreadingInfoForGroup sets threading information for a message ID in a specific group -func (c *MsgIdItemCache) SetThreadingInfoForGroup(newsgroupPtr *string, messageID string, artNum int64, rootArticle int64, isThreadRoot bool) bool { +func (c *MsgIdItemCache) xxSetThreadingInfoForGroup(newsgroupPtr *string, messageID string, artNum int64, rootArticle int64, isThreadRoot bool) bool { item := c.GetORCreate(messageID) if item == nil { return false @@ -549,7 +551,7 @@ func (c *MsgIdItemCache) SetThreadingInfoForGroup(newsgroupPtr *string, messageI } // AddMsgIdToCache adds a message ID with article number to the cache for a specific group -func (c *MsgIdItemCache) AddMsgIdToCache(newsgroupPtr *string, messageID string, articleNum int64) bool { +func (c *MsgIdItemCache) xxAddMsgIdToCache(newsgroupPtr *string, messageID string, articleNum int64) bool { item := c.GetORCreate(messageID) if item == nil { return false @@ -581,6 +583,7 @@ func (c *MsgIdItemCache) AddMsgIdToCache(newsgroupPtr *string, messageID string, return true } +*/ // CleanExpiredEntries removes expired temporary cache entries // This replaces the functionality from MsgTmpCache.CronClean @@ -775,33 +778,14 @@ func (c *MsgIdItemCache) StartCleanupRoutine() { }() } -// GetOrCreateForGroup gets or creates a message ID item for a specific group -// This provides group-specific functionality similar to MsgTmpCache -func (c *MsgIdItemCache) GetOrCreateForGroup(messageID string, newsgroupPtr *string) *MessageIdItem { - item := c.GetORCreate(messageID) - if item == nil { - return nil - } - - item.Mux.Lock() - defer item.Mux.Unlock() - - // If this is the first time we're seeing this messageID for this group, - // or if it's for a different group, update the group information - if item.GroupName == nil || item.GroupName != newsgroupPtr { - item.GroupName = newsgroupPtr - item.Arrival = time.Now().Unix() - } - - return item -} - +/* // HasMessageIDInGroup checks if a message ID exists in a specific group and hasn't expired -func (c *MsgIdItemCache) HasMessageIDInGroup(messageID string, newsgroupPtr *string) bool { +func (c *MsgIdItemCache) xxHasMessageIDInGroup(messageID string, newsgroupPtr *string) bool { artNum, _, _ := c.GetMsgIdFromCache(newsgroupPtr, messageID) return artNum != 0 // If artNum is 0, the item wasn't found or expired } + // FindThreadRootInCache searches for thread root in cache by following references // This replaces the functionality from MsgTmpCache.FindThreadRootInCache func (c *MsgIdItemCache) FindThreadRootInCache(newsgroupPtr *string, references []string) *MessageIdItem { @@ -854,10 +838,11 @@ func (c *MsgIdItemCache) FindThreadRootInCache(newsgroupPtr *string, references } return nil } - +*/ +/* // UpdateThreadRootToTmpCache updates an existing cache entry with thread root information // This replaces the functionality from MsgTmpCache.UpdateThreadRootToTmpCache -func (c *MsgIdItemCache) UpdateThreadRootToTmpCache(newsgroupPtr *string, messageID string, rootArticle int64, isThreadRoot bool) bool { +func (c *MsgIdItemCache) xxUpdateThreadRootToTmpCache(newsgroupPtr *string, messageID string, rootArticle int64, isThreadRoot bool) bool { // Get or create the item item := c.GetORCreate(messageID) if item == nil { @@ -892,12 +877,14 @@ func (c *MsgIdItemCache) UpdateThreadRootToTmpCache(newsgroupPtr *string, messag return true } + // MsgIdExists checks if a message ID exists in the cache for a specific group // This replaces the functionality from MsgTmpCache.MsgIdExists -func (c *MsgIdItemCache) MsgIdExists(newsgroupPtr *string, messageID string) *MessageIdItem { +func (c *MsgIdItemCache) xxMsgIdExists(newsgroupPtr *string, messageID string) *MessageIdItem { artNum, _, _ := c.GetMsgIdFromCache(newsgroupPtr, messageID) if artNum != 0 { return c.GetORCreate(messageID) } return nil } +*/ diff --git a/internal/history/history_config.go b/internal/history/history_config.go index 0938d636..2c6e10a2 100644 --- a/internal/history/history_config.go +++ b/internal/history/history_config.go @@ -6,10 +6,8 @@ import ( "database/sql" "encoding/hex" "fmt" - "io" "log" "os" - "path/filepath" "strconv" "sync" "time" @@ -52,14 +50,15 @@ type ThreadingInfo struct { type MessageIdItem struct { Mux sync.RWMutex // Protects all fields below CachedEntryExpires time.Time // Exported field for cache entry expiration - MessageIdHash string // Computed hash of the message-ID - StorageToken string // pointer to storage token MessageId string // pointer to article.messageid - ArtNum int64 // Article number in the history (primary/first occurrence) - GroupName *string // Group name this article belongs to (primary group) - Arrival int64 // When article arrived - Response int // @AI IGNORE Response``` FOR THE MOMENT; RESPONSE LOGIC NEEDS PROPER THINKING! NOT A JOB FOR NOW! - FileOffset int64 // File offset in history.dat where this entry is stored + MessageIdHash string // Computed hash of the message-ID + NewsgroupIDs []int64 // Newsgroup IDs this message-ID belongs to + //StorageToken string // pointer to storage token + + //ArtNums map[int64]int64 // maps newsgroup IDs (key) to Article numbers + //GroupName *string // Group name this article belongs to (primary group) + Arrival int64 // When article arrived + Response int // Group-specific threading information (replaces global threading fields) GroupThreading map[*string]*ThreadingInfo // Per-group threading info for crossposted articles @@ -72,22 +71,15 @@ type DatabaseWorkChecker interface { // History manages message-ID history tracking using INN2-style architecture type History struct { - config *HistoryConfig - mux sync.RWMutex - historyFile *os.File - HistoryFilePath string - offset int64 + config *HistoryConfig + mux sync.RWMutex + historyFile *os.File // Database backend (SQLite with sharding) db SQLite3ShardedPool - // L1 cache for recent lookups - //l1Cache *L1CACHE - - // Channels for async operations - lookupChan chan *MessageIdItem - writerChan chan *MessageIdItem - dbChan chan *MessageIdItem + dbChan chan *MessageIdItem + dbQueued map[*MessageIdItem]bool // Shutdown signaling (similar to db_batch.go pattern) stopChan chan struct{} @@ -319,25 +311,3 @@ func (h *History) initDatabase() error { description, numDBs, tablesPerDB) return nil } - -// openHistoryFile opens or creates the history.dat file -func (h *History) openHistoryFile() error { - h.HistoryFilePath = filepath.Join(h.config.HistoryDir, HistoryFileName) - - var err error - h.historyFile, err = os.OpenFile(h.HistoryFilePath, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0644) - if err != nil { - return fmt.Errorf("failed to open history file %s: %v", h.HistoryFilePath, err) - } - // Initialize buffered writer for efficient file operations - h.fileWriter = bufio.NewWriterSize(h.historyFile, 1024*1024) - - // Get current file offset - h.offset, err = h.historyFile.Seek(0, io.SeekEnd) - if err != nil { - return fmt.Errorf("failed to seek to end of history file: %v", err) - } - - log.Printf("History file opened: %s (offset: %d)", h.HistoryFilePath, h.offset) - return nil -} diff --git a/internal/models/cache.go b/internal/models/cache.go index 2af92167..ac25cedd 100644 --- a/internal/models/cache.go +++ b/internal/models/cache.go @@ -9,7 +9,7 @@ import ( // Global toggle to enable/disable the sanitized cache at compile-time. // Set to true to completely bypass Get/Set operations for sanitized cache. -const DisableSanitizedCache = true +var DisableSanitizedCache = true // Global sanitized cache instance var sanitizedCache *cache.SanitizedCache diff --git a/internal/models/models.go b/internal/models/models.go index 8d85885a..233a820d 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -158,6 +158,63 @@ type Article struct { ProcessQueue chan *string `json:"-" db:"-"` // newsgroup ptr for batching } +var ArticlePool = make(chan *Article, 128*1024) + +func NewArticle() *Article { + select { + case art := <-ArticlePool: + return art + default: + return &Article{} + } +} + +func RecycleArticles(arts []*Article) { + for _, art := range arts { + RecycleArticle(art) + } +} + +func RecycleArticle(art *Article) { + // Clear fields to avoid memory leaks + art.GetDataFunc = nil + art.DBArtNum = 0 + art.MessageID = "" + art.Subject = "" + art.FromHeader = "" + art.DateSent = time.Time{} + art.DateString = "" + art.References = "" + art.Bytes = 0 + art.Lines = 0 + art.ReplyCount = 0 + art.HeadersJSON = "" + art.BodyText = "" + art.Path = "" + art.ImportedAt = time.Time{} + art.Spam = 0 + art.Hide = 0 + art.Sanitized = false + art.MsgIdItem = nil + art.Headers = nil + art.ArticleNums = nil + art.NNTPhead = nil + art.NNTPbody = nil + art.IsThrRoot = false + art.IsReply = false + art.RefSlice = nil + art.NewsgroupsPtr = nil + art.ProcessQueue = nil + + select { + case ArticlePool <- art: + // Successfully recycled + default: + // Pool is full, let it be garbage collected + //log.Printf("Warning: ArticlePool is full, discarding article to be garbage collected") + } +} + func (a *Article) GetData(what string, group string) string { if a == nil { return "" diff --git a/internal/nntp/nntp-article-common.go b/internal/nntp/nntp-article-common.go index d765f9cc..c6a9320c 100644 --- a/internal/nntp/nntp-article-common.go +++ b/internal/nntp/nntp-article-common.go @@ -7,7 +7,6 @@ import ( "strings" "time" - "github.com/go-while/go-pugleaf/internal/database" "github.com/go-while/go-pugleaf/internal/history" "github.com/go-while/go-pugleaf/internal/models" ) @@ -16,32 +15,30 @@ import ( type ArticleRetrievalType int const ( - RetrievalArticle ArticleRetrievalType = iota // Headers + Body - RetrievalHead // Headers only + RetrievalArticle ArticleRetrievalType = iota // Head + Body + RetrievalHead // Head only RetrievalBody // Body only RetrievalStat // Status only (no content) ) -// ArticleRetrievalResult contains the result of article lookup -type ArticleRetrievalResult struct { - Article *models.Article - Overview *models.Overview - ArticleNum int64 - MsgIdItem *history.MessageIdItem - GroupDBs *database.GroupDBs +// handleArticle handles ARTICLE command +func (c *ClientConnection) handleArticle(args []string) error { + return c.retrieveArticleCommon(args, RetrievalArticle) } -// used in nntp-transfer/main.go -type TakeThisMode struct { - Wanted uint64 - Unwanted uint64 - Rejected uint64 - TX_Errors uint64 - ConnErrors uint64 - TmpSuccessCount uint64 - TmpTakeThisCount uint64 - - CheckMode bool // Start with TAKETHIS mode (false) +// handleHead handles HEAD command +func (c *ClientConnection) handleHead(args []string) error { + return c.retrieveArticleCommon(args, RetrievalHead) +} + +// handleBody handles BODY command +func (c *ClientConnection) handleBody(args []string) error { + return c.retrieveArticleCommon(args, RetrievalBody) +} + +// handleStat handles STAT command +func (c *ClientConnection) handleStat(args []string) error { + return c.retrieveArticleCommon(args, RetrievalStat) } // retrieveArticleCommon handles the common logic for ARTICLE, HEAD, BODY, and STAT commands @@ -49,20 +46,15 @@ func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType Ar time.Sleep(time.Second / 5) // TODO hardcoded ratelimit // Get article data using common logic - result, err := c.getArticleData(args) - if result == nil || err != nil { - log.Printf("retrieveArticleCommon Error retrieving article data: %v", err) - return nil // Error already handled in getArticleData + article := c.getArticleData(args, retrievalType) + if article == nil { + // 430 error handled in getArticleData + return nil } - defer func() { - if result.GroupDBs != nil { - result.GroupDBs.Return(c.server.DB) - } - }() - // Update current article if we have a current group if c.currentGroup != "" { - c.currentArticle = result.ArticleNum + c.currentArticle = article.DBArtNum + /* disabled task := c.server.DB.Batch.GetOrCreateTasksMapKey(c.currentGroup) if task != nil && result.MsgIdItem != nil { result.MsgIdItem.Mux.Lock() @@ -70,263 +62,167 @@ func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType Ar result.MsgIdItem.ArtNum = result.ArticleNum result.MsgIdItem.Mux.Unlock() } + */ } // Send appropriate response based on retrieval type switch retrievalType { case RetrievalArticle: - return c.sendArticleContent(result) + + return c.sendArticleContent(article) case RetrievalHead: - return c.sendHeadContent(result) + return c.sendHeadContent(article) case RetrievalBody: - return c.sendBodyContent(result) + return c.sendBodyContent(article) case RetrievalStat: - return c.sendStatContent(result) + return c.sendStatContent(article) default: return c.sendResponse(500, "Internal error: unknown retrieval type") } } // getArticleData handles the common article lookup logic -func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResult, error) { - var groupDBs *database.GroupDBs - var articleNum int64 +func (c *ClientConnection) getArticleData(args []string, retrievalType ArticleRetrievalType) (article *models.Article) { + var wantArticleNum int64 var msgIdItem *history.MessageIdItem - var err error - // Parse argument: can be article number or message-id if len(args) == 0 { - if c.currentGroup == "" { - c.rateLimitOnError() - c.sendResponse(412, "No newsgroup selected") - return nil, nil - } - // Use current article - articleNum = c.currentArticle - if articleNum == 0 { + c.rateLimitOnError() + c.sendResponse(501, "No article specified") + return + } + + if strings.HasPrefix(args[0], "<") && strings.HasSuffix(args[0], ">") { + // Message-ID format + msgIdItem = history.MsgIdCache.GetORCreate(args[0]) + if msgIdItem == nil { c.rateLimitOnError() - c.sendResponse(420, "Current article number is invalid") - return nil, nil + c.sendResponse(500, "Error MsgId Cache") + return } - // Get group database - groupDBs, err = c.server.DB.GetGroupDBs(c.currentGroup) - if err != nil { + if c.server.local430.Check(msgIdItem) { c.rateLimitOnError() - c.sendResponse(411, "No such newsgroup") - return nil, nil + c.sendResponse(430, "Cache says no!") + return } } else { - if strings.HasPrefix(args[0], "<") && strings.HasSuffix(args[0], ">") { - // Message-ID format - msgIdItem = history.MsgIdCache.GetORCreate(args[0]) - if msgIdItem == nil { - c.rateLimitOnError() - c.sendResponse(500, "Error MsgId Cache") - return nil, nil - } - if c.server.local430.Check(msgIdItem) { - c.rateLimitOnError() - c.sendResponse(430, "Cache says no!") - return nil, nil - } - } else { - if c.currentGroup == "" { - c.rateLimitOnError() - c.sendResponse(412, "No newsgroup selected") - return nil, nil - } - // Article number format - articleNum, err = strconv.ParseInt(args[0], 10, 64) - if err != nil { - c.rateLimitOnError() - c.sendResponse(501, "Invalid article number") - return nil, nil - } + if c.currentGroup == "" { + c.rateLimitOnError() + c.sendResponse(412, "No newsgroup selected") + return + } + // Article number format + awantArticleNum, err := strconv.ParseInt(args[0], 10, 64) + if err != nil { + c.rateLimitOnError() + c.sendResponse(501, "Invalid article number") + return } + wantArticleNum = awantArticleNum } // Get article - var article *models.Article - var overview *models.Overview - - if msgIdItem != nil { + if msgIdItem != nil && wantArticleNum == 0 { // Handle message-ID lookup - retCase, err := c.server.Processor.Lookup(msgIdItem) + response, _, err := c.server.Processor.Lookup(msgIdItem, false) if err != nil { c.server.local430.Add(msgIdItem) c.rateLimitOnError() - c.sendResponse(430, "NotF1") - return nil, nil + c.sendResponse(430, "NotF0") + return } - found := false - switch retCase { + switch response { case history.CaseError: c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF1") - return nil, nil + return case history.CasePass: // Not found in history - c.rateLimitOnError() log.Printf("MsgIdItem not found in history: '%#v'", msgIdItem) + c.rateLimitOnError() c.sendResponse(430, "NotF2") - return nil, nil + return case history.CaseDupes: - // Found in history - storage token should now be available + // Found in history- should have newsgroupIDs msgIdItem.Mux.RLock() - found = msgIdItem.StorageToken != "" || (msgIdItem.GroupName != nil && msgIdItem.ArtNum > 0) + found = len(msgIdItem.NewsgroupIDs) > 0 msgIdItem.Mux.RUnlock() } if !found { log.Printf("MsgIdItem not found in cache: %#v", msgIdItem) c.rateLimitOnError() - c.sendResponse(430, "NotF2") - return nil, nil - } - - // Extract storage token or use cached values - msgIdItem.Mux.RLock() - mustExtractStorageToken := (msgIdItem.GroupName == nil || msgIdItem.ArtNum == 0) && msgIdItem.StorageToken != "" - msgIdItem.Mux.RUnlock() - - if mustExtractStorageToken { - // Parse storage token: "group:articlenum" - msgIdItem.Mux.RLock() - parts := strings.SplitN(msgIdItem.StorageToken, ":", 2) - msgIdItem.Mux.RUnlock() - if len(parts) != 2 { - c.server.local430.Add(msgIdItem) - c.rateLimitOnError() - c.sendResponse(430, "NotF3") - log.Printf("Invalid storage token format: %#v", msgIdItem) - return nil, nil - } - - task := c.server.DB.Batch.GetOrCreateTasksMapKey(parts[0]) - if task == nil { - c.server.local430.Add(msgIdItem) - c.rateLimitOnError() - c.sendResponse(430, "NotF4") - return nil, nil - } - - articleNumParsed, err := strconv.ParseInt(parts[1], 10, 64) - if err != nil { - c.server.local430.Add(msgIdItem) - c.rateLimitOnError() - c.sendResponse(430, "NotF5") - return nil, nil - } - - msgIdItem.Mux.Lock() - msgIdItem.GroupName = task.Newsgroup - msgIdItem.ArtNum = articleNumParsed - msgIdItem.Mux.Unlock() - } else { - if msgIdItem.GroupName == nil || msgIdItem.ArtNum <= 0 { - c.server.local430.Add(msgIdItem) - c.rateLimitOnError() - c.sendResponse(430, "NotF6") - return nil, nil - } + c.sendResponse(430, "NotF3") + return } // Get group database for the specific group from storage token - if groupDBs == nil || groupDBs.Newsgroup != *msgIdItem.GroupName { - groupDBs, err = c.server.DB.GetGroupDBs(*msgIdItem.GroupName) - if err != nil { - c.server.local430.Add(msgIdItem) - c.rateLimitOnError() - c.sendResponse(430, "NotF7") - return nil, nil - } - } - - // Get article by the specific article number from storage token - article, err = c.server.DB.GetArticleByNum(groupDBs, msgIdItem.ArtNum) + article, err = c.server.DB.GetArticleFromAnyNewsgroupDB(msgIdItem) if err != nil { c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF8") - return nil, nil + return } - articleNum = article.ArticleNums[groupDBs.NewsgroupPtr] + return article - } else { + } else if wantArticleNum > 0 { // Handle article number lookup - if groupDBs == nil { - groupDBs, err = c.server.DB.GetGroupDBs(c.currentGroup) + groupDB, err := c.server.DB.GetGroupDB(c.currentGroup) + if err != nil { + c.rateLimitOnError() + c.sendResponse(411, "No such newsgroup") + return + } + defer groupDB.Return() + + if retrievalType == RetrievalStat { + // For STAT command, we can use overview instead of full article + overview, err := c.server.DB.GetOverviewByArticleNum(groupDB, wantArticleNum) if err != nil { c.rateLimitOnError() - c.sendResponse(411, "No such newsgroup") - return nil, nil + c.sendResponse(423, "No such article number") + return + } + return &models.Article{ + DBArtNum: wantArticleNum, + MessageID: overview.MessageID, } - } - - // For STAT command, we can use overview instead of full article - overview, err = c.server.DB.GetOverviewByArticleNum(groupDBs, articleNum) - if err != nil { - c.rateLimitOnError() - c.sendResponse(423, "No such article number") - return nil, nil } // For other commands, get the full article - article, err = c.server.DB.GetArticleByNum(groupDBs, articleNum) + article, err = c.server.DB.GetArticleByNum(groupDB, wantArticleNum) if err != nil { c.rateLimitOnError() c.sendResponse(423, "No such article number") - return nil, nil - } - if article.MessageID == "" { - log.Printf("Error in getArticleData: Article with no message-id: %#v", article) - return nil, fmt.Errorf("error in getArticleData: article with no message-id") - } - // Create or get msgIdItem - messageID := article.MessageID - if overview != nil { - messageID = overview.MessageID - } - msgIdItem = history.MsgIdCache.GetORCreate(messageID) - if msgIdItem == nil { - c.rateLimitOnError() - c.sendResponse(500, "Error MsgId Cache") - return nil, fmt.Errorf("error msgid cache") - } - - task := c.server.DB.Batch.GetOrCreateTasksMapKey(groupDBs.Newsgroup) - if task != nil { - msgIdItem.Mux.Lock() - msgIdItem.GroupName = task.Newsgroup - msgIdItem.ArtNum = articleNum - msgIdItem.Mux.Unlock() + return } + return article } - return &ArticleRetrievalResult{ - Article: article, - Overview: overview, - ArticleNum: articleNum, - MsgIdItem: msgIdItem, - GroupDBs: groupDBs, - }, nil + c.rateLimitOnError() + c.sendResponse(502, "Article not retrieved") + return nil } // sendArticleContent sends full article (headers + body) for ARTICLE command -func (c *ClientConnection) sendArticleContent(result *ArticleRetrievalResult) error { +func (c *ClientConnection) sendArticleContent(article *models.Article) error { + if c == nil || c.textConn == nil { + return fmt.Errorf("nil connection in sendArticleContent") + } // Parse headers and body from the article - log.Printf("sendArticleContent for result='%#v", result) - headers := c.parseArticleHeadersFull(result.Article) - bodyLines := c.parseArticleBody(result.Article) + //log.Printf("sendArticleContent for result='%#v", result) + headers := c.parseArticleHeadersFull(article) + bodyLines := c.parseArticleBody(article) // Send response: 220 n message-id Article follows - if err := c.sendResponse(220, fmt.Sprintf("%d %s Article follows", result.ArticleNum, result.MsgIdItem.MessageId)); err != nil { + if err := c.textConn.PrintfLine("220 %d %s Article follows", article.DBArtNum, article.MessageID); err != nil { return err } @@ -354,12 +250,15 @@ func (c *ClientConnection) sendArticleContent(result *ArticleRetrievalResult) er } // sendHeadContent sends only headers for HEAD command -func (c *ClientConnection) sendHeadContent(result *ArticleRetrievalResult) error { +func (c *ClientConnection) sendHeadContent(article *models.Article) error { + if c == nil || c.textConn == nil { + return fmt.Errorf("nil connection in sendHeadContent") + } // Parse headers from the article - headers := c.parseArticleHeadersFull(result.Article) + headers := c.parseArticleHeadersFull(article) // Send response: 221 n message-id Headers follow - if err := c.sendResponse(221, fmt.Sprintf("%d %s Headers follow", result.ArticleNum, result.MsgIdItem.MessageId)); err != nil { + if err := c.sendResponse(221, fmt.Sprintf("%d %s Headers follow", article.DBArtNum, article.MessageID)); err != nil { return err } @@ -375,12 +274,15 @@ func (c *ClientConnection) sendHeadContent(result *ArticleRetrievalResult) error } // sendBodyContent sends only body for BODY command -func (c *ClientConnection) sendBodyContent(result *ArticleRetrievalResult) error { +func (c *ClientConnection) sendBodyContent(article *models.Article) error { + if c == nil || c.textConn == nil { + return fmt.Errorf("nil connection in sendBodyContent") + } // Parse body from the article - bodyLines := c.parseArticleBody(result.Article) + bodyLines := c.parseArticleBody(article) // Send response: 222 n message-id Body follows - if err := c.sendResponse(222, fmt.Sprintf("%d %s Body follows", result.ArticleNum, result.MsgIdItem.MessageId)); err != nil { + if err := c.sendResponse(222, fmt.Sprintf("%d %s Body follows", article.DBArtNum, article.MessageID)); err != nil { return err } @@ -396,7 +298,10 @@ func (c *ClientConnection) sendBodyContent(result *ArticleRetrievalResult) error } // sendStatContent sends only status for STAT command -func (c *ClientConnection) sendStatContent(result *ArticleRetrievalResult) error { +func (c *ClientConnection) sendStatContent(article *models.Article) error { + if c == nil || c.textConn == nil { + return fmt.Errorf("nil connection in sendStatContent") + } // Send response: 223 n message-id status - return c.sendResponse(223, fmt.Sprintf("%d %s Article exists", result.ArticleNum, result.MsgIdItem.MessageId)) + return c.sendResponse(223, fmt.Sprintf("%d %s Article exists", article.DBArtNum, article.MessageID)) } diff --git a/internal/nntp/nntp-backend-pool.go b/internal/nntp/nntp-backend-pool.go index f967ce86..713ab740 100644 --- a/internal/nntp/nntp-backend-pool.go +++ b/internal/nntp/nntp-backend-pool.go @@ -1,8 +1,11 @@ package nntp import ( + "bufio" "fmt" "log" + "os" + "path/filepath" "sync" "time" @@ -71,7 +74,7 @@ func (pool *Pool) XOver(group string, start, end int64, enforceLimit bool) ([]Ov return result, nil } -func (pool *Pool) XHdr(group string, header string, start, end int64) ([]*HeaderLine, error) { +func (pool *Pool) XHdr(group string, header string, start, end int64) ([]HeaderLine, error) { // Get a connection from the pool client, err := pool.Get(MODE_READER_MV) if err != nil { @@ -90,10 +93,30 @@ func (pool *Pool) XHdr(group string, header string, start, end int64) ([]*Header return result, nil } +// ListNewsgroups lists available newsgroups from the NNTP server +func (pool *Pool) ListNewsgroups() ([]GroupInfo, error) { + // Get a connection from the pool + client, err := pool.Get(MODE_READER_MV) + if err != nil { + return nil, fmt.Errorf("failed to get connection: %w", err) + } + + remoteGroups, err := client.ListGroups() + if err != nil { + // Close connection on error + client.ForceCloseConn() + return nil, err + } + + // Put back connection only if no error + pool.Put(client) + return remoteGroups, nil +} + // XHdrStreamed performs XHDR command and streams results through a channel // The channel will be closed when all results are sent or an error occurs // NOTE: This function takes ownership of the connection and will return it to the pool when done -func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xhdrChan chan<- HeaderLine, shutdownChan <-chan struct{}) error { // Get a connection from the pool client, err := pool.Get(MODE_READER_MV) if err != nil { @@ -102,7 +125,7 @@ func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xh } // Handle connection cleanup in a goroutine so the function can return immediately - go func(client *BackendConn, group string, header string, start, end int64, resultChan chan<- *HeaderLine, shutdownChan <-chan struct{}) { + go func(client *BackendConn, group string, header string, start, end int64, resultChan chan<- HeaderLine, shutdownChan <-chan struct{}) { // Use the streaming XHdr function on the client if err := client.XHdrStreamed(group, header, start, end, resultChan, shutdownChan); err != nil { // If there's an error, close the connection instead of returning it @@ -132,7 +155,8 @@ func (pool *Pool) GetArticle(messageID *string, bulkmode bool) (*models.Article, article, err := client.GetArticle(messageID, bulkmode) if err != nil || article == nil { if err == ErrArticleNotFound || err == ErrArticleRemoved { - log.Printf("[NNTP-POOL] Article '%s' not found err='%v'", *messageID, err) + // <-- internal/nntp/nntp-client-commands.go:105 + //log.Printf("[NNTP-POOL] Article '%s' not found err='%v'", *messageID, err) pool.Put(client) return nil, err } else { @@ -162,7 +186,7 @@ func (pool *Pool) SelectGroup(group string) (*GroupInfo, error) { } gi, code, err := client.SelectGroup(group) - if err != nil && code != 411 { + if err != nil && (code != 411 && code != 480) { // Close connection on unexpected any other error than "group not found" client.ForceCloseConn() return nil, err @@ -171,9 +195,11 @@ func (pool *Pool) SelectGroup(group string) (*GroupInfo, error) { // Put back connection (even for code 411 - group not found) pool.Put(client) - if code == 411 { + switch code { + case 411, 480: err = ErrNewsgroupNotFound // silence error } + return gi, err } @@ -248,6 +274,7 @@ newConn: pool.activeConns-- pool.failedConns++ pool.mux.Unlock() + log.Printf("[NNTP-POOL] Failed to create new connection: provider='%s': %v", pool.Backend.Provider.Name, err) return nil, err } err = pconn.SwitchMode(wantMode) @@ -312,7 +339,7 @@ func (pool *Pool) Put(conn *BackendConn) error { // Check if connection should be closed if conn != nil { conn.mux.Lock() - if conn.forceClose || !conn.connected { + if conn.forceClose || !conn.IsConnected() { forceClose = true } conn.mux.Unlock() @@ -332,7 +359,6 @@ func (pool *Pool) Put(conn *BackendConn) error { pool.mux.Unlock() return nil } - conn.writer.Reset(conn.conn) pool.mux.RUnlock() conn.UpdateLastUsed() // set lastused before returning to pool @@ -383,11 +409,26 @@ func (pool *Pool) ClosePool() error { close(pool.connections) } log.Printf("[NNTP-POOL] Closing (%s:%d) active=%d", pool.Backend.Host, pool.Backend.Port, pool.activeConns) + allClosed := pool.activeConns == 0 pool.mux.Unlock() - // Close all connections in the pool - for client := range pool.connections { // drain channel - client.ForceCloseConn() + if !allClosed { + // Close all connections in the pool + closeWait: + for { + select { + case conn, ok := <-pool.connections: + if !ok { + break closeWait + } + if conn != nil { + conn.ForceCloseConn() + } + default: + // pass + break closeWait + } + } } pool.mux.Lock() @@ -434,7 +475,7 @@ func (pool *Pool) createConnection() (*BackendConn, error) { log.Printf("[NNTP-POOL] Failed to create connection to %s:%d: %v", pool.Backend.Host, pool.Backend.Port, err) return nil, fmt.Errorf("failed to create connection: %w", err) } - //log.Printf("[NNTP-POOL] Successfully created connection to %s:%d", pool.Backend.Host, pool.Backend.Port) + log.Printf("[NNTP-POOL] Successfully created connection to %s:%d", pool.Backend.Host, pool.Backend.Port) return client, nil } @@ -448,7 +489,7 @@ func (pool *Pool) isConnectionValid(client *BackendConn) bool { client.mux.Lock() defer client.mux.Unlock() - if client.forceClose || !client.connected { + if client.forceClose || !client.IsConnected() { return false } @@ -503,17 +544,100 @@ done: // startCleanupWorker starts a goroutine that periodically cleans up expired connections func (pool *Pool) startCleanupWorker() { + var closed bool for { time.Sleep(5 * time.Second) pool.Cleanup() - // Check if pool is closed pool.mux.RLock() - closed := pool.closed + closed = pool.closed pool.mux.RUnlock() - if closed { return } } } + +func (pool *Pool) FileCachedListNewsgroups() ([]string, error) { + cacheFile := filepath.Join("data", "cache", fmt.Sprintf("%s.list", pool.Backend.Provider.Host)) + groups, err := LoadNewsgroupListFromFile(cacheFile) + if len(groups) > 0 && err == nil { + return groups, nil + } else if err != nil { + log.Printf("[NNTP-POOL] Failed to load cached newsgroup list from %s: %v", cacheFile, err) + } + log.Printf("[NNTP-POOL] No valid cached newsgroup list found at %s, fetching from server...", cacheFile) + remoteGroups, err := pool.ListNewsgroups() + if err != nil { + return nil, err + } + if err := WriteNewsgroupListToFile(cacheFile, remoteGroups); err != nil { + log.Printf("[NNTP-POOL] Failed to write cached newsgroup list to %s: %v", cacheFile, err) + } + var returnGroups []string + for i := range remoteGroups { + returnGroups = append(returnGroups, remoteGroups[i].Name) + } + return returnGroups, nil +} + +func WriteNewsgroupListToFile(filename string, groups []GroupInfo) error { + // Ensure the directory exists + dir := filepath.Dir(filename) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create directory %s: %w", dir, err) + } + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer file.Close() + writer := bufio.NewWriter(file) + for _, group := range groups { + line := fmt.Sprintf("%s\n", group.Name) + _, err := writer.WriteString(line) + if err != nil { + return fmt.Errorf("failed to write to file: %w", err) + } + } + if err := writer.Flush(); err != nil { + return fmt.Errorf("failed to flush writer: %w", err) + } + return nil +} + +func LoadNewsgroupListFromFile(filename string) ([]string, error) { + var groups []string + file, err := os.Open(filename) + if err != nil { + return nil, fmt.Errorf("failed to open file: %w", err) + } + defer file.Close() + // check file age + info, err := file.Stat() + if err != nil { + return nil, fmt.Errorf("failed to stat file: %w", err) + } + if time.Since(info.ModTime()) > 24*time.Hour { + err := os.Remove(filename) + if err != nil { + return nil, fmt.Errorf("failed to remove stale cache file: %w", err) + } + log.Printf("[NNTP-POOL] Cache file %s is stale (age: %v), refreshing...", filename, time.Since(info.ModTime())) + return nil, nil + } + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if line == "" { + log.Printf("[NNTP-POOL] Failed to parse group info from line %q: %v", line, err) + continue + } + groups = append(groups, line) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failed to read file: %w", err) + } + log.Printf("[NNTP-POOL] Loaded %d newsgroups from cache file %s", len(groups), filename) + return groups, nil +} diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 924dfb99..be7d2ae8 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -3,6 +3,7 @@ package nntp // Package nntp provides NNTP command implementations for go-pugleaf. import ( + "bufio" "fmt" "log" "strconv" @@ -28,35 +29,26 @@ var MaxReadLinesXover int64 = 100 // XOVER command typically retrieves overview // MaxReadLinesBody Maximum lines for BODY command, which retrieves the body of an article const MaxReadLinesBody = MaxReadLinesArticle - MaxReadLinesHeaders -func (c *BackendConn) ForceCloseConn() { - c.mux.Lock() - defer c.mux.Unlock() - if !c.forceClose { - c.forceClose = true - go c.Pool.Put(c) - } -} - // StatArticle checks if an article exists on the server func (c *BackendConn) StatArticle(messageID string) (bool, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return false, fmt.Errorf("not connected") } c.lastUsed = time.Now() - id, err := c.textConn.Cmd("STAT %s", messageID) + id, err := c.TextConn.Cmd("STAT %s", messageID) if err != nil { return false, fmt.Errorf("failed to send STAT command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, _, err := c.textConn.ReadCodeLine(223) + code, _, err := c.TextConn.ReadCodeLine(223) if err != nil { return false, fmt.Errorf("failed to read STAT response: %w", err) } @@ -76,7 +68,7 @@ func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Arti c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } @@ -93,15 +85,15 @@ func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Arti } }() */ - id, err := c.textConn.Cmd("ARTICLE %s", *messageID) + id, err := c.TextConn.Cmd("ARTICLE %s", *messageID) if err != nil { return nil, fmt.Errorf("failed to send ARTICLE '%s' command: %w", *messageID, err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(ArticleFollows) + code, message, err := c.TextConn.ReadCodeLine(ArticleFollows) if err != nil && code == 0 { log.Printf("[ERROR] failed to read ARTICLE '%s' code=%d message='%s' err: %v", *messageID, code, message, err) return nil, fmt.Errorf("failed to read ARTICLE '%s' code=%d message='%s' err: %v", *messageID, code, message, err) @@ -110,7 +102,8 @@ func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Arti if code != ArticleFollows { switch code { case NoSuchArticle: - log.Printf("[BECONN] GetArticle: not found: '%s' code=%d message='%s' err='%v'", *messageID, code, message, err) + // ---> internal/nntp/nntp-backend-pool.go:158 + //log.Printf("[BECONN] GetArticle: not found: '%s' code=%d message='%s' err='%v'", *messageID, code, message, err) return nil, ErrArticleNotFound case DMCA: log.Printf("[BECONN] GetArticle: removed (DMCA): '%s' code=%d message='%s' err='%v'", *messageID, code, message, err) @@ -140,21 +133,21 @@ func (c *BackendConn) GetHead(messageID string) (*models.Article, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } c.lastUsed = time.Now() - id, err := c.textConn.Cmd("HEAD %s", messageID) + id, err := c.TextConn.Cmd("HEAD %s", messageID) if err != nil { return nil, fmt.Errorf("failed to send HEAD command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(HeadFollows) + code, message, err := c.TextConn.ReadCodeLine(HeadFollows) if err != nil { return nil, fmt.Errorf("failed to read HEAD response: %w", err) } @@ -196,21 +189,21 @@ func (c *BackendConn) GetBody(messageID string) ([]byte, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } c.lastUsed = time.Now() - id, err := c.textConn.Cmd("BODY %s", messageID) + id, err := c.TextConn.Cmd("BODY %s", messageID) if err != nil { return nil, fmt.Errorf("failed to send BODY command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(BodyFollows) + code, message, err := c.TextConn.ReadCodeLine(BodyFollows) if err != nil { return nil, fmt.Errorf("failed to read BODY response: %w", err) } @@ -244,21 +237,21 @@ func (c *BackendConn) ListGroups() ([]GroupInfo, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } c.lastUsed = time.Now() - id, err := c.textConn.Cmd("LIST") + id, err := c.TextConn.Cmd("LIST") if err != nil { return nil, fmt.Errorf("failed to send LIST command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(215) + code, message, err := c.TextConn.ReadCodeLine(215) if err != nil { return nil, fmt.Errorf("failed to read LIST response: %w", err) } @@ -276,7 +269,7 @@ func (c *BackendConn) ListGroups() ([]GroupInfo, error) { // Parse group information var groups = make([]GroupInfo, 0, len(lines)) for _, line := range lines { - group, err := c.parseGroupLine(line) + group, err := ParseGroupLine(line) if err != nil { continue // Skip malformed lines } @@ -291,21 +284,21 @@ func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } c.lastUsed = time.Now() - id, err := c.textConn.Cmd("LIST") + id, err := c.TextConn.Cmd("LIST") if err != nil { return nil, fmt.Errorf("failed to send LIST command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(215) + code, message, err := c.TextConn.ReadCodeLine(215) if err != nil { return nil, fmt.Errorf("failed to read LIST response: %w", err) } @@ -326,7 +319,7 @@ func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { break } - line, err := c.textConn.ReadLine() + line, err := c.TextConn.ReadLine() if err != nil { return nil, fmt.Errorf("failed to read group list: %w", err) } @@ -342,7 +335,7 @@ func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { } // Parse group information - group, err := c.parseGroupLine(line) + group, err := ParseGroupLine(line) if err != nil { continue // Skip malformed lines } @@ -354,7 +347,7 @@ func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { // Read remaining lines until end marker if we hit the limit if lineCount >= maxGroups { for { - line, err := c.textConn.ReadLine() + line, err := c.TextConn.ReadLine() if err != nil { break } @@ -372,21 +365,21 @@ func (c *BackendConn) SelectGroup(groupName string) (*GroupInfo, int, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, 0, fmt.Errorf("not connected") } c.lastUsed = time.Now() - id, err := c.textConn.Cmd("GROUP %s", groupName) + id, err := c.TextConn.Cmd("GROUP %s", groupName) if err != nil { return nil, 0, err } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(211) + code, message, err := c.TextConn.ReadCodeLine(211) if err != nil { if code != 411 { log.Printf("[ERROR] failed to read GROUP '%s' code=%d message='%s' err: %v", groupName, code, message, err) @@ -437,7 +430,7 @@ func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit boo return nil, fmt.Errorf("error XOver: group name is required") } //log.Printf("XOver group '%s' start=%d end=%d", groupName, start, end) - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } groupInfo, code, err := c.SelectGroup(groupName) @@ -454,18 +447,18 @@ func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit boo var id uint if end > 0 { - id, err = c.textConn.Cmd("XOVER %d-%d", start, end) + id, err = c.TextConn.Cmd("XOVER %d-%d", start, end) } else { - id, err = c.textConn.Cmd("XOVER %d", start) + id, err = c.TextConn.Cmd("XOVER %d", start) } if err != nil { return nil, fmt.Errorf("failed to send XOVER command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(224) + code, message, err := c.TextConn.ReadCodeLine(224) if err != nil { return nil, fmt.Errorf("failed to read XOVER response: %w", err) } @@ -498,9 +491,9 @@ func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit boo // XHdr retrieves specific header field for a range of articles // Automatically limits to max 1000 articles to prevent SQLite overload -func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*HeaderLine, error) { +func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]HeaderLine, error) { c.mux.Lock() - if !c.connected { + if !c.IsConnected() { c.mux.Unlock() return nil, fmt.Errorf("not connected") } @@ -519,18 +512,18 @@ func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*Header log.Printf("XHdr group '%s' field '%s' start=%d end=%d", groupName, field, start, end) var id uint if end > 0 { - id, err = c.textConn.Cmd("XHDR %s %d-%d", field, start, end) + id, err = c.TextConn.Cmd("XHDR %s %d-%d", field, start, end) } else { - id, err = c.textConn.Cmd("XHDR %s %d", field, start) + id, err = c.TextConn.Cmd("XHDR %s %d", field, start) } if err != nil { return nil, fmt.Errorf("failed to send XHDR command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(221) + code, message, err := c.TextConn.ReadCodeLine(221) if err != nil { return nil, fmt.Errorf("failed to read XHDR response: %w", err) } @@ -546,13 +539,15 @@ func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*Header } // Parse header lines - var headers = make([]*HeaderLine, 0, len(lines)) + var headers = make([]HeaderLine, 0, len(lines)) for _, line := range lines { header, err := c.parseHeaderLine(line) if err != nil { continue // Skip malformed lines } - headers = append(headers, header) + if header.ArticleNum > 0 { + headers = append(headers, header) + } } return headers, nil @@ -574,7 +569,7 @@ func (c *BackendConn) WantShutdown(shutdownChan <-chan struct{}) bool { // XHdrStreamed performs XHDR command and streams results line by line through a channel // Fetches max 1000 hdrs and starts a new fetch if the channel is less than 10% capacity -func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xhdrChan chan<- HeaderLine, shutdownChan <-chan struct{}) error { channelCap := cap(xhdrChan) lowWaterMark := channelCap / 10 // 10% threshold if lowWaterMark < 1 { @@ -630,9 +625,9 @@ func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xh } // XHdrStreamedBatch performs XHDR command and streams results line by line through a channel -func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- HeaderLine, shutdownChan <-chan struct{}) error { c.mux.Lock() - if !c.connected { + if !c.IsConnected() { c.mux.Unlock() return fmt.Errorf("not connected") } @@ -656,16 +651,16 @@ func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int6 var id uint if end > 0 { - id, err = c.textConn.Cmd("XHDR %s %d-%d", field, start, end) + id, err = c.TextConn.Cmd("XHDR %s %d-%d", field, start, end) } else { - id, err = c.textConn.Cmd("XHDR %s %d-%d", field, start, start) + id, err = c.TextConn.Cmd("XHDR %s %d-%d", field, start, start) } if err != nil { return fmt.Errorf("failed to send XHDR command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state // Check for shutdown before reading initial response if c.WantShutdown(shutdownChan) { @@ -673,7 +668,7 @@ func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int6 return fmt.Errorf("shutdown requested") } - code, message, err := c.textConn.ReadCodeLine(221) + code, message, err := c.TextConn.ReadCodeLine(221) if err != nil { return fmt.Errorf("failed to read XHDR response: %w", err) } @@ -691,7 +686,7 @@ func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int6 return fmt.Errorf("shutdown requested") } - line, err := c.textConn.ReadLine() + line, err := c.TextConn.ReadLine() if err != nil { log.Printf("[ERROR] XHdrStreamed read error ng: '%s' err='%v'", groupName, err) // EOF or error, finish streaming @@ -729,7 +724,7 @@ func (c *BackendConn) ListGroup(groupName string, start, end int64) ([]int64, er c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } @@ -738,18 +733,18 @@ func (c *BackendConn) ListGroup(groupName string, start, end int64) ([]int64, er var id uint var err error if start > 0 && end > 0 { - id, err = c.textConn.Cmd("LISTGROUP %s %d-%d", groupName, start, end) + id, err = c.TextConn.Cmd("LISTGROUP %s %d-%d", groupName, start, end) } else { - id, err = c.textConn.Cmd("LISTGROUP %s", groupName) + id, err = c.TextConn.Cmd("LISTGROUP %s", groupName) } if err != nil { return nil, fmt.Errorf("failed to send LISTGROUP command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(211) + code, message, err := c.TextConn.ReadCodeLine(211) if err != nil { return nil, fmt.Errorf("failed to read LISTGROUP response: %w", err) } @@ -829,7 +824,7 @@ func (c *BackendConn) readMultilineResponse(src string) ([]string, error) { return nil, fmt.Errorf("too many lines in response (limit: %d)", maxReadLines) } - line, err := c.textConn.ReadLine() + line, err := c.TextConn.ReadLine() if err != nil { return nil, err } @@ -957,7 +952,7 @@ func ParseHeaders(article *models.Article, headerLines []string) error { } // parseGroupLine parses a single line from LIST command response -func (c *BackendConn) parseGroupLine(line string) (GroupInfo, error) { +func ParseGroupLine(line string) (GroupInfo, error) { // Format: "group last first posting" parts := strings.Fields(line) if len(parts) < 4 { @@ -985,6 +980,7 @@ func (c *BackendConn) parseGroupLine(line string) (GroupInfo, error) { First: first, Last: last, PostingOK: postingOK, + Status: parts[3], }, nil } @@ -1017,223 +1013,135 @@ func (c *BackendConn) parseOverviewLine(line string) (OverviewLine, error) { // parseHeaderLine parses a single XHDR response line // Format: articlenumheader-value -func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { +func (c *BackendConn) parseHeaderLine(line string) (HeaderLine, error) { parts := strings.SplitN(line, " ", 2) if len(parts) < 2 { - return nil, fmt.Errorf("malformed XHDR line: %s", line) + return HeaderLine{}, fmt.Errorf("malformed XHDR line: %s", line) } articleNum, err := strconv.ParseInt(parts[0], 10, 64) if err != nil { log.Printf("Invalid article number in XHDR line: %q", parts[0]) - return nil, fmt.Errorf("invalid article number in XHDR line: %q", parts[0]) + return HeaderLine{}, fmt.Errorf("invalid article number in XHDR line: %q", parts[0]) } - - return &HeaderLine{ + headerline := HeaderLine{ ArticleNum: articleNum, Value: parts[1], - }, nil -} - -// CheckMultiple sends a CHECK command for multiple message IDs and returns responses -func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) ([]*string, error) { - c.mux.Lock() - defer c.mux.Unlock() - if !c.connected { - return nil, fmt.Errorf("not connected") - } - - if c.ModeReader { - return nil, fmt.Errorf("cannot check article in reader mode") } - - if len(messageIDs) == 0 { - return nil, fmt.Errorf("no message IDs provided") - } - - c.lastUsed = time.Now() - - // Send individual CHECK commands for each message ID (pipelining) - commandIds := make([]uint, len(messageIDs)) - for i, msgID := range messageIDs { - id, err := c.textConn.Cmd("CHECK %s", *msgID) - if err != nil { - return nil, fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) - } - commandIds[i] = id - } - - // Read responses for each CHECK command - wantedIds := make([]*string, 0, len(messageIDs)) - for i, msgID := range messageIDs { - id := commandIds[i] - // Read response for this CHECK command - c.textConn.StartResponse(id) - code, line, err := c.textConn.ReadCodeLine(238) - c.textConn.EndResponse(id) - if code == 0 && err != nil { - log.Printf("Failed to read CHECK response for %s: %v", *msgID, err) - return nil, fmt.Errorf("failed to read CHECK response for %s: %w", *msgID, err) - } - - // Parse response line - // Format: code [message] - // 238 - article wanted - // 431 - article not wanted - // 438 - article not wanted (already have it) - // ReadCodeLine returns: code=238, message=" article wanted" - parts := strings.Fields(line) - if len(parts) < 1 { - log.Printf("Malformed CHECK response: %s", line) - return nil, fmt.Errorf("malformed CHECK response: %s", line) - } - if parts[0] != *msgID { - log.Printf("Mismatched CHECK response: expected %s, got %s", *msgID, parts[0]) - return nil, fmt.Errorf("out of order CHECK response: expected %s, got %s", *msgID, parts[0]) - } - switch code { - case 238: - //log.Printf("Wanted Article '%s': response=%d", *msgID, code) - wantedIds = append(wantedIds, msgID) - ttMode.Wanted++ - case 438: - //log.Printf("Unwanted Article '%s': response=%d", *msgID, code) - ttMode.Unwanted++ - case 431: - continue - default: - log.Printf("Unknown CHECK response: line='%s' code=%d expected msgID %s", line, code, *msgID) - return nil, fmt.Errorf("unknown check response line='%s' code=%d", line, code) - - } - } - // Return all responses - return wantedIds, nil + return headerline, nil } -// TakeThisArticle sends an article via TAKETHIS command -func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *string, newsgroup string) (int, error) { +// SendCheckMultiple sends CHECK commands for multiple message IDs without returning responses! +// Registers each command ID with the demuxer for proper response routing +func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponsesChan chan *ReadRequest, job *CHTTJob, demuxer *ResponseDemuxer) (checksSent uint64, err error) { c.mux.Lock() - defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { + c.mux.Unlock() return 0, fmt.Errorf("not connected") } - if c.ModeReader { - return 0, fmt.Errorf("cannot send article in reader mode") - } - // Prepare article for transfer - headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) - if err != nil { - return 0, fmt.Errorf("failed to reconstruct headers: %v", err) + if c.ModeReader { + c.mux.Unlock() + return 0, fmt.Errorf("cannot check article in reader mode") } - c.lastUsed = time.Now() + c.mux.Unlock() - // Send TAKETHIS command - id, err := c.textConn.Cmd("TAKETHIS %s", article.MessageID) - if err != nil { - return 0, fmt.Errorf("failed to send TAKETHIS command: %w", err) - } - - // Send headers - for _, headerLine := range headers { - if _, err := c.writer.WriteString(headerLine + CRLF); err != nil { - return 0, fmt.Errorf("failed to write header: %w", err) - } - } - - // Send empty line between headers and body - if _, err := c.writer.WriteString(CRLF); err != nil { - return 0, fmt.Errorf("failed to write header/body separator: %w", err) + if len(messageIDs) == 0 { + return 0, fmt.Errorf("no message IDs provided") } - // Send body with proper dot-stuffing - // Split body preserving line endings - bodyLines := strings.Split(article.BodyText, "\n") - for i, line := range bodyLines { - // Skip empty last element from trailing \n - if i == len(bodyLines)-1 && line == "" { - break - } - - // Remove trailing \r if present (will add CRLF) - line = strings.TrimSuffix(line, "\r") + //writer := bufio.NewWriter(c.conn) + //defer writer.Flush() + //log.Printf("Newsgroup: '%s' | SendCheckMultiple commands for %d message IDs", *job.Newsgroup, len(messageIDs)) - // Dot-stuff lines that start with a dot (RFC 977) - if strings.HasPrefix(line, ".") { - line = "." + line + for n, msgID := range messageIDs { + if msgID == nil || *msgID == "" { + log.Printf("Newsgroup: '%s' | Skipping empty message ID in CHECK command", *job.Newsgroup) + continue } - - if _, err := c.writer.WriteString(line + CRLF); err != nil { - return 0, fmt.Errorf("failed to write body line: %w", err) + //log.Printf("Newsgroup: '%s' | CHECK '%s' acquire c.mux.Lock() (%d/%d)", *job.Newsgroup, *msgID, n+1, len(messageIDs)) + c.mux.Lock() + cmdID, err := c.TextConn.Cmd("CHECK %s", *msgID) + c.mux.Unlock() + if err != nil { + return checksSent, fmt.Errorf("failed to send CHECK '%s': %w", *msgID, err) } - } - // Send termination line (single dot) - if _, err := c.writer.WriteString(DOT + CRLF); err != nil { - return 0, fmt.Errorf("failed to send article terminator: %w", err) - } + checksSent++ - // Flush the writer to ensure all data is sent - if err := c.writer.Flush(); err != nil { - return 0, fmt.Errorf("failed to flush article data: %w", err) - } + // Register command ID with demuxer as TYPE_CHECK + demuxer.RegisterCommand(cmdID, TYPE_CHECK) - // Read TAKETHIS response - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) - - code, _, err := c.textConn.ReadCodeLine(239) // -1 means any code is acceptable - if code == 0 && err != nil { - return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) + //log.Printf("Newsgroup: '%s' | CHECK sent '%s' (CmdID=%d) pass notify to readResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) + //readCHECKResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: msgID, N: n + 1, Reqs: len(messageIDs)} + readCHECKResponsesChan <- GetReadRequest(cmdID, job, msgID, n+1, len(messageIDs)) + //log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readCHECKResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) } - // Parse response - // Format: code [message] - // 239 - article transferred successfully - // 439 - article transfer failed + // Update job counter with how many CHECK commands were actually sent + job.Mux.Lock() + job.CheckSentCount += checksSent + job.Mux.Unlock() - return code, nil + return checksSent, nil } -// SendTakeThisArticleStreaming sends TAKETHIS command and article content without waiting for response +// SendTakeThisArticleStreaming IS UNSAFE! MUST BE LOCKED AND UNLOCKED OUTSIDE FOR THE WHOLE BATCH!!! +// sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode -func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (uint, error) { - c.mux.Lock() - defer c.mux.Unlock() +// Registers the command ID with the demuxer for proper response routing +// return value doContinue indicates whether the caller should continue sending more articles +func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string, demuxer *ResponseDemuxer, readTAKETHISResponsesChan chan *ReadRequest, job *CHTTJob, n int, reqs int) (cmdID uint, txBytes int, err error, doContinue bool) { + //start := time.Now() + //c.mux.Lock() + //defer c.mux.Unlock() - if !c.connected { - return 0, fmt.Errorf("not connected") + if !c.IsConnected() { + //c.mux.Unlock() + return 0, 0, fmt.Errorf("not connected"), false } if c.ModeReader { - return 0, fmt.Errorf("cannot send article in reader mode") + //c.mux.Unlock() + return 0, 0, fmt.Errorf("cannot send article in reader mode"), false } + c.lastUsed = time.Now() + //c.mux.Unlock() // Prepare article for transfer headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) if err != nil { - return 0, err + return 0, 0, err, true } + //writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers + writer := bufio.NewWriter(c.conn) + //c.mux.Lock() + //defer c.mux.Unlock() + + //startSend := time.Now() // Send TAKETHIS command - id, err := c.textConn.Cmd("TAKETHIS %s", article.MessageID) + cmdID, err = c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { - return 0, fmt.Errorf("failed to send TAKETHIS command: %w", err) + return 0, 0, fmt.Errorf("failed SendTakeThisArticleStreaming command: %w", err), false } // Send headers for _, headerLine := range headers { - if _, err := c.writer.WriteString(headerLine + CRLF); err != nil { - return 0, fmt.Errorf("failed to write header SendTakeThisArticleStreaming: %w", err) + if tx, err := writer.WriteString(headerLine + CRLF); err != nil { + return 0, txBytes, fmt.Errorf("failed to write header SendTakeThisArticleStreaming: %w", err), false + } else { + txBytes += tx } } // Send empty line between headers and body - if _, err := c.writer.WriteString(CRLF); err != nil { - return 0, fmt.Errorf("failed to write header/body separator SendTakeThisArticleStreaming: %w", err) + if tx, err := writer.WriteString(CRLF); err != nil { + return 0, txBytes, fmt.Errorf("failed to write header/body separator SendTakeThisArticleStreaming: %w", err), false + } else { + txBytes += tx } // Send body with proper dot-stuffing @@ -1253,45 +1161,37 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp line = "." + line } - if _, err := c.writer.WriteString(line + CRLF); err != nil { - return 0, fmt.Errorf("failed to write body line SendTakeThisArticleStreaming: %w", err) + if tx, err := writer.WriteString(line + CRLF); err != nil { + return 0, txBytes, fmt.Errorf("failed to write body line SendTakeThisArticleStreaming: %w", err), false + } else { + txBytes += tx } } // Send termination line (single dot) - if _, err := c.writer.WriteString(DOT + CRLF); err != nil { - return 0, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err) + if tx, err := writer.WriteString(DOT + CRLF); err != nil { + return 0, txBytes, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err), false + } else { + txBytes += tx } + //log.Printf("Newsgroup: '%s' | TAKETHIS sent CmdID=%d '%s' txBytes: %d in %v (sending took: %v) readTAKETHISResponsesChanLen=%d/%d", newsgroup, cmdID, article.MessageID, txBytes, time.Since(start), time.Since(startSend), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) - // Flush the writer to ensure all data is sent - if err := c.writer.Flush(); err != nil { - return 0, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err) + //startFlush := time.Now() + if err := writer.Flush(); err != nil { + return 0, txBytes, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err), false } - // Return command ID without reading response (streaming mode) - return id, nil -} - -// ReadTakeThisResponseStreaming reads a TAKETHIS response using the command ID -// Used in streaming mode after all articles have been sent -func (c *BackendConn) ReadTakeThisResponseStreaming(id uint) (int, error) { - c.mux.Lock() - defer c.mux.Unlock() + //chanStart := time.Now() + // Register command ID with demuxer as TYPE_TAKETHIS (CRITICAL: must match CHECK pattern) + demuxer.RegisterCommand(cmdID, TYPE_TAKETHIS) - // Read TAKETHIS response - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) - - code, _, err := c.textConn.ReadCodeLine(239) - if code == 0 && err != nil { - return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) - } - - // Parse response - // Format: code [message] - // 239 - article transferred successfully - // 439 - article transfer failed - return code, nil + //log.Printf("Newsgroup: '%s' | TAKETHIS flushed CmdID=%d '%s' (flushing took: %v) total time: %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(startFlush), time.Since(start), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) + // Queue ReadRequest IMMEDIATELY after command (like SendCheckMultiple does at line 1608) + //readTAKETHISResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: &article.MessageID, N: 1, Reqs: 1} + readTAKETHISResponsesChan <- GetReadRequest(cmdID, job, &article.MessageID, n+1, reqs) // reuse global struct to reduce GC pressure + //log.Printf("Newsgroup: '%s' | TAKETHIS notified response reader CmdID=%d '%s' waited %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(chanStart), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) + // Return command ID without reading response (streaming mode) + return cmdID, txBytes, nil, true } // PostArticle posts an article using the POST command @@ -1299,7 +1199,7 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return 0, fmt.Errorf("not connected") } // Prepare article for posting @@ -1310,19 +1210,20 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { c.lastUsed = time.Now() // Send POST command - id, err := c.textConn.Cmd("POST") + id, err := c.TextConn.Cmd("POST") if err != nil { return 0, fmt.Errorf("failed to send POST command: %w", err) } - c.textConn.StartResponse(id) + c.TextConn.StartResponse(id) // Read response to POST command - code, line, err := c.textConn.ReadCodeLine(340) - c.textConn.EndResponse(id) + code, line, err := c.TextConn.ReadCodeLine(340) + c.TextConn.EndResponse(id) if err != nil && code == 0 { return code, fmt.Errorf("POST command failed: %s", line) } - + writer := bufio.NewWriter(c.conn) + defer writer.Flush() switch code { case 340: // pass, posted @@ -1334,14 +1235,14 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { } // Send POST command again - id, err := c.textConn.Cmd("POST") + id, err := c.TextConn.Cmd("POST") if err != nil { return 0, fmt.Errorf("failed to send POST command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Read response to POST command - code, line, err = c.textConn.ReadCodeLine(340) + code, line, err = c.TextConn.ReadCodeLine(340) if err != nil { return code, fmt.Errorf("POST command failed: %s", line) } @@ -1355,13 +1256,13 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { // Send headers using writer (not DotWriter) for _, headerLine := range headers { - if _, err := c.writer.WriteString(headerLine + CRLF); err != nil { + if _, err := writer.WriteString(headerLine + CRLF); err != nil { return 0, fmt.Errorf("failed to write header: %w", err) } } // Send empty line between headers and body - if _, err := c.writer.WriteString(CRLF); err != nil { + if _, err := writer.WriteString(CRLF); err != nil { return 0, fmt.Errorf("failed to write header/body separator: %w", err) } @@ -1382,23 +1283,23 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { line = "." + line } - if _, err := c.writer.WriteString(line + CRLF); err != nil { + if _, err := writer.WriteString(line + CRLF); err != nil { return 0, fmt.Errorf("failed to write body line: %w", err) } } // Send termination line (single dot) - if _, err := c.writer.WriteString(DOT + CRLF); err != nil { + if _, err := writer.WriteString(DOT + CRLF); err != nil { return 0, fmt.Errorf("failed to send article terminator: %w", err) } // Flush the writer to ensure all data is sent - if err := c.writer.Flush(); err != nil { + if err := writer.Flush(); err != nil { return 0, fmt.Errorf("failed to flush article data: %w", err) } // Read final response - code, _, err = c.textConn.ReadCodeLine(240) + code, _, err = c.TextConn.ReadCodeLine(240) if err != nil { return code, fmt.Errorf("failed to read POST response: %w", err) } @@ -1433,21 +1334,21 @@ func (c *BackendConn) SwitchToModeReader() error { c.lastUsed = time.Now() // Send MODE READER command - id, err := c.textConn.Cmd("MODE READER") + id, err := c.TextConn.Cmd("MODE READER") if err != nil { return fmt.Errorf("failed to send MODE READER command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) - code, line, err := c.textConn.ReadCodeLine(200) - if err != nil { + code, line, err := c.TextConn.ReadCodeLine(200) + if code == 0 && err != nil { return fmt.Errorf("failed to read MODE READER response: %w", err) } - if code != 200 { - return fmt.Errorf("MODE READER failed (code %d): %s", code, line) + if code < 200 || code > 201 { + return fmt.Errorf("set MODE READER failed (code %d): %s", code, line) } c.ModeReader = true @@ -1468,15 +1369,15 @@ func (c *BackendConn) SwitchToModeStream() error { c.lastUsed = time.Now() // Send MODE STREAM command - id, err := c.textConn.Cmd("MODE STREAM") + id, err := c.TextConn.Cmd("MODE STREAM") if err != nil { return fmt.Errorf("failed to send MODE STREAM command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) - code, line, err := c.textConn.ReadCodeLine(203) + code, line, err := c.TextConn.ReadCodeLine(203) if err != nil { return fmt.Errorf("failed to read MODE STREAM response: %w", err) } diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index 704c579d..29d83b8d 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -3,7 +3,6 @@ package nntp // nntp provides NNTP client functionality for go-pugleaf. import ( - "bufio" "crypto/tls" "fmt" "log" @@ -12,6 +11,7 @@ import ( "sync" "time" + "github.com/go-while/go-pugleaf/internal/common" "github.com/go-while/go-pugleaf/internal/config" ) @@ -43,32 +43,31 @@ const ( DefaultConnExpire = 25 * time.Second // MaxReadLines is the maximum lines to read per response (allow for large group lists). - MaxReadLines = 500000 + MaxReadLines = 1024 * 1024 ) // DefaultBufferTX is the default buffer size for bufio.Writer // when sending articles via POST/TAKETHIS/IHAVE commands. -var DefaultBufferTX int = 64 * 1024 +var DefaultBufferTX int = 16 * 1024 // BackendConn represents an NNTP connection to a server. // It manages the connection state, authentication, and provides methods // for interacting with the NNTP server. type BackendConn struct { conn net.Conn - textConn *textproto.Conn - writer *bufio.Writer + TextConn *textproto.Conn Backend *BackendConfig mux sync.RWMutex Pool *Pool // link to parent pool // Connection state - connected bool - authenticated bool - ModeReader bool - ModeStream bool - forceClose bool - created time.Time - lastUsed time.Time + isConnected chan struct{} + hasClosed chan struct{} + ModeReader bool + ModeStream bool + forceClose bool + created time.Time + lastUsed time.Time // INN allows switching to mode reader when mode stream is active // but INN does not allow switching to mode stream when mode reader is active // when using check/takethis and we got a mode reader connection from pool @@ -116,6 +115,7 @@ type GroupInfo struct { FetchStart int64 FetchEnd int64 PostingOK bool + Status string } // OverviewLine represents a line from XOVER command @@ -139,8 +139,10 @@ type HeaderLine struct { // NewConn creates a new empty NNTP connection with the provided backend configuration. func NewConn(backend *BackendConfig) *BackendConn { return &BackendConn{ - Backend: backend, - created: time.Now(), + Backend: backend, + created: time.Now(), + isConnected: make(chan struct{}, 1), + hasClosed: make(chan struct{}, 1), } } @@ -151,10 +153,12 @@ func (c *BackendConn) Connect() error { c.Backend.ConnectTimeout = config.DefaultConnectTimeout } c.Backend.Mux.Unlock() + c.mux.Lock() defer c.mux.Unlock() - if c.connected { - return nil + if c.IsConnected() { + log.Printf("[NNTP-CONN] ERROR: Tried Connect() but this conn is already connected to %s:%d hasClosed=%d", c.Backend.Host, c.Backend.Port, len(c.hasClosed)) + return fmt.Errorf("connection already established to %s:%d", c.Backend.Host, c.Backend.Port) } // Check if this is a .onion address and automatically enable Tor if not already configured @@ -200,11 +204,11 @@ func (c *BackendConn) Connect() error { } c.conn = conn - c.textConn = textproto.NewConn(conn) - c.writer = bufio.NewWriterSize(conn, DefaultBufferTX) // bufio writer with defined buffer size + c.TextConn = textproto.NewConn(conn) + //c.Writer = bufio.NewWriterSize(conn, DefaultBufferTX) // bufio writer with defined buffer size // Read welcome message - code, message, err := c.textConn.ReadCodeLine(NNTPWelcomeCodeMin) + code, message, err := c.TextConn.ReadCodeLine(NNTPWelcomeCodeMin) if err != nil { log.Printf("[NNTP-CONN] Error reading welcome from %s:%d: %v", c.Backend.Host, c.Backend.Port, err) return err @@ -217,36 +221,35 @@ func (c *BackendConn) Connect() error { //log.Printf("[NNTP-CONN] Successfully connected to %s:%d with welcome code %d", c.Backend.Host, c.Backend.Port, code) - c.connected = true c.lastUsed = time.Now() // Authenticate if credentials provided if c.Backend.Username != "" { //log.Printf("[NNTP-AUTH] Attempting authentication for user '%s' on %s:%d", c.Backend.Username, c.Backend.Host, c.Backend.Port) if err := c.authenticate(); err != nil { - log.Printf("[NNTP-AUTH] Authentication FAILED for user '%s' on %s:%d err: %v", c.Backend.Username, c.Backend.Host, c.Backend.Port, err) - time.Sleep(time.Second * 5) + log.Printf("[NNTP-AUTH] Authentication FAILED for user '%s' on %s:%d err: %v (sleep 15s)", c.Backend.Username, c.Backend.Host, c.Backend.Port, err) + time.Sleep(time.Second * 15) return err } //log.Printf("[NNTP-AUTH] Authentication SUCCESS for user '%s' on %s:%d", c.Backend.Username, c.Backend.Host, c.Backend.Port) } else { //log.Printf("[NNTP-AUTH] No credentials provided, skipping authentication for %s:%d", c.Backend.Host, c.Backend.Port) } - + c.isConnected <- struct{}{} return nil } // authenticate performs NNTP authentication func (c *BackendConn) authenticate() error { // Send AUTHINFO USER - id, err := c.textConn.Cmd("AUTHINFO USER %s", c.Backend.Username) + id, err := c.TextConn.Cmd("AUTHINFO USER %s", c.Backend.Username) if err != nil { return err } - c.textConn.StartResponse(id) - code, message, err := c.textConn.ReadCodeLine(NNTPMoreInfoCode) - c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + code, message, err := c.TextConn.ReadCodeLine(NNTPMoreInfoCode) + c.TextConn.EndResponse(id) if err != nil { return err @@ -257,14 +260,14 @@ func (c *BackendConn) authenticate() error { } // Send AUTHINFO PASS - id, err = c.textConn.Cmd("AUTHINFO PASS %s", c.Backend.Password) + id, err = c.TextConn.Cmd("AUTHINFO PASS %s", c.Backend.Password) if err != nil { return err } - c.textConn.StartResponse(id) - code, message, err = c.textConn.ReadCodeLine(NNTPAuthSuccess) - c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + code, message, err = c.TextConn.ReadCodeLine(NNTPAuthSuccess) + c.TextConn.EndResponse(id) if err != nil { return err @@ -274,7 +277,6 @@ func (c *BackendConn) authenticate() error { return fmt.Errorf("authentication failed: %d %s", code, message) } - c.authenticated = true return nil } @@ -283,31 +285,63 @@ func (c *BackendConn) CloseFromPoolOnly() error { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil } - if c.textConn != nil { - if err := c.textConn.Close(); err != nil { + if c.TextConn != nil { + if err := c.TextConn.Close(); err != nil { //log.Printf("Error closing text connection: %v", err) } } if c.conn != nil { if err := c.conn.Close(); err != nil { - //log.Printf("xx Error closing connection: %v", err) + //log.Printf("Error closing connection: %v", err) } } - c.connected = false - c.authenticated = false - c.textConn = nil // CloseFromPoolOnly - c.conn = nil // CloseFromPoolOnly - c.writer = nil - //log.Printf("Closed NNTP Connection to %s", c.Backend.Host) + close(c.hasClosed) + <-c.isConnected + //c.TextConn = nil // CloseFromPoolOnly + //c.conn = nil // CloseFromPoolOnly + //c.Writer = nil + log.Printf("Closed NNTP Connection to %s", c.Backend.Host) return nil } +func (c *BackendConn) ForceCloseConn() { + go func() { + c.mux.Lock() + if !c.forceClose { + c.forceClose = true + } + c.mux.Unlock() + c.Pool.Put(c) + }() +} + +func (c *BackendConn) IsConnected() bool { + if common.IsClosedChannel(c.hasClosed) { + return false + } + if len(c.isConnected) == 0 { + return false + } + /* + c.mux.Lock() + //log.Printf("IsConnected check: connected=%v conn=%v", c.connected, c.conn) + defer c.mux.Unlock() + if !c.connected { + return false + } + if c.conn == nil { + return false + } + */ + return true +} + // SetReadDeadline sets the read deadline for the connection func (c *BackendConn) xSetReadDeadline(t time.Time) error { @@ -335,3 +369,17 @@ func (c *BackendConn) UpdateLastUsed() { //c.SetReadDeadline(time.Now().Add(c.Backend.ReadTimeout)) //c.SetWriteDeadline(time.Now().Add(c.Backend.WriteTimeout)) } + +func (c *BackendConn) GetBufSize(size int) int { + if size+1024 <= DefaultBufferTX { + return size + 1024 + } + return DefaultBufferTX // hardcoded default max buffer size +} + +func (c *BackendConn) Lock() { + c.mux.Lock() +} +func (c *BackendConn) Unlock() { + c.mux.Unlock() +} diff --git a/internal/nntp/nntp-cmd-article.go b/internal/nntp/nntp-cmd-article.go index 539e555d..5010f429 100644 --- a/internal/nntp/nntp-cmd-article.go +++ b/internal/nntp/nntp-cmd-article.go @@ -1,6 +1,3 @@ package nntp -// handleArticle handles ARTICLE command -func (c *ClientConnection) handleArticle(args []string) error { - return c.retrieveArticleCommon(args, RetrievalArticle) -} +// stub diff --git a/internal/nntp/nntp-cmd-body.go b/internal/nntp/nntp-cmd-body.go index 9bbf52b5..5010f429 100644 --- a/internal/nntp/nntp-cmd-body.go +++ b/internal/nntp/nntp-cmd-body.go @@ -1,6 +1,3 @@ package nntp -// handleBody handles BODY command -func (c *ClientConnection) handleBody(args []string) error { - return c.retrieveArticleCommon(args, RetrievalBody) -} +// stub diff --git a/internal/nntp/nntp-cmd-group.go b/internal/nntp/nntp-cmd-group.go index 47a2ea93..e77280b6 100644 --- a/internal/nntp/nntp-cmd-group.go +++ b/internal/nntp/nntp-cmd-group.go @@ -46,15 +46,15 @@ func (c *ClientConnection) handleListGroup(args []string) error { } // Get group database - groupDBs, err := c.server.DB.GetGroupDBs(groupName) + groupDB, err := c.server.DB.GetGroupDB(groupName) if err != nil { c.rateLimitOnError() return c.sendResponse(411, "No such newsgroup") } - defer groupDBs.Return(c.server.DB) + defer groupDB.Return() // Get overview data to list article numbers - overviews, err := c.server.DB.GetOverviews(groupDBs) + overviews, err := c.server.DB.GetOverviews(groupDB) if err != nil { c.rateLimitOnError() return c.sendResponse(503, "Failed to retrieve article list") diff --git a/internal/nntp/nntp-cmd-head.go b/internal/nntp/nntp-cmd-head.go index eb4bc811..5010f429 100644 --- a/internal/nntp/nntp-cmd-head.go +++ b/internal/nntp/nntp-cmd-head.go @@ -1,6 +1,3 @@ package nntp -// handleHead handles HEAD command -func (c *ClientConnection) handleHead(args []string) error { - return c.retrieveArticleCommon(args, RetrievalHead) -} +// stub diff --git a/internal/nntp/nntp-cmd-posting.go b/internal/nntp/nntp-cmd-posting.go index 0bf90a67..d796499b 100644 --- a/internal/nntp/nntp-cmd-posting.go +++ b/internal/nntp/nntp-cmd-posting.go @@ -64,7 +64,7 @@ func (c *ClientConnection) handleIHave(args []string) error { } // Check if we already have this article - response, err := c.server.Processor.Lookup(msgIdItem) + response, _, err := c.server.Processor.Lookup(msgIdItem, true) if err != nil { log.Printf("Error looking up message ID %s in history: %v", msgIdItem.MessageId, err) c.rateLimitOnError() @@ -130,7 +130,7 @@ func (c *ClientConnection) handleTakeThis(args []string) error { } // Check if we already have this article - response, err := c.server.Processor.Lookup(msgIdItem) + response, _, err := c.server.Processor.Lookup(msgIdItem, true) if err != nil { log.Printf("Error looking up message ID %s in history: %v", msgIdItem.MessageId, err) c.rateLimitOnError() diff --git a/internal/nntp/nntp-cmd-stat.go b/internal/nntp/nntp-cmd-stat.go index 34e26143..5010f429 100644 --- a/internal/nntp/nntp-cmd-stat.go +++ b/internal/nntp/nntp-cmd-stat.go @@ -1,6 +1,3 @@ package nntp -// handleStat handles STAT command -func (c *ClientConnection) handleStat(args []string) error { - return c.retrieveArticleCommon(args, RetrievalStat) -} +// stub diff --git a/internal/nntp/nntp-cmd-xhdr.go b/internal/nntp/nntp-cmd-xhdr.go index f916816e..bee15a79 100644 --- a/internal/nntp/nntp-cmd-xhdr.go +++ b/internal/nntp/nntp-cmd-xhdr.go @@ -74,15 +74,15 @@ func (c *ClientConnection) handleXHdr(args []string) error { } // Get group database - groupDBs, err := c.server.DB.GetGroupDBs(c.currentGroup) + groupDB, err := c.server.DB.GetGroupDB(c.currentGroup) if err != nil { c.rateLimitOnError() return c.sendResponse(411, "No such newsgroup") } - defer groupDBs.Return(c.server.DB) + defer groupDB.Return() // Get header field data for the range - headerData, err := c.server.DB.GetHeaderFieldRange(groupDBs, headerField, startNum, endNum) + headerData, err := c.server.DB.GetHeaderFieldRange(groupDB, headerField, startNum, endNum) if err != nil { c.rateLimitOnError() return c.sendResponse(503, "Failed to retrieve header data") diff --git a/internal/nntp/nntp-cmd-xover.go b/internal/nntp/nntp-cmd-xover.go index 40e65429..00156387 100644 --- a/internal/nntp/nntp-cmd-xover.go +++ b/internal/nntp/nntp-cmd-xover.go @@ -68,15 +68,15 @@ func (c *ClientConnection) handleXOver(args []string) error { } // Get group database - groupDBs, err := c.server.DB.GetGroupDBs(c.currentGroup) + groupDB, err := c.server.DB.GetGroupDB(c.currentGroup) if err != nil { c.rateLimitOnError() return c.sendResponse(411, "No such newsgroup") } - defer groupDBs.Return(c.server.DB) + defer groupDB.Return() // Get overview data for the range - overviews, err := c.server.DB.GetOverviewsRange(groupDBs, startNum, endNum) + overviews, err := c.server.DB.GetOverviewsRange(groupDB, startNum, endNum) if err != nil { c.rateLimitOnError() return c.sendResponse(503, "Failed to retrieve overview data") diff --git a/internal/nntp/nntp-server.go b/internal/nntp/nntp-server.go index 360b4456..b859b566 100644 --- a/internal/nntp/nntp-server.go +++ b/internal/nntp/nntp-server.go @@ -18,7 +18,7 @@ import ( // Processor interface for article processing type ArticleProcessor interface { ProcessIncomingArticle(article *models.Article) (int, error) - Lookup(msgIdItem *history.MessageIdItem) (int, error) + Lookup(msgIdItem *history.MessageIdItem, quick bool) (response int, newsgroupIDs []int64, err error) } const ( diff --git a/internal/nntp/nntp-transfer-demuxer.go b/internal/nntp/nntp-transfer-demuxer.go new file mode 100644 index 00000000..c99311c0 --- /dev/null +++ b/internal/nntp/nntp-transfer-demuxer.go @@ -0,0 +1,249 @@ +package nntp + +import ( + "log" + "sync" + "time" + + "github.com/go-while/go-pugleaf/internal/common" +) + +// ResponseDemuxer reads all responses from a connection in ONE goroutine +// and dispatches them to the appropriate handler channel (CHECK or TAKETHIS) +// This eliminates race conditions in concurrent ReadCodeLine calls +type ResponseDemuxer struct { + conn *BackendConn + cmdIDChan chan *CmdIDinfo + //signalChan chan struct{} + //cmdIDQMux sync.RWMutex + LastID uint + checkResponseChan chan *ResponseData + ttResponseChan chan *ResponseData + errChan chan struct{} + started bool + startedMux sync.Mutex + lastRequest time.Time + lastRequestMux sync.RWMutex +} + +// NewResponseDemuxer creates a new response demultiplexer +func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}) *ResponseDemuxer { + return &ResponseDemuxer{ + conn: conn, + cmdIDChan: make(chan *CmdIDinfo, 64*1024), // Buffer for command IDs + checkResponseChan: make(chan *ResponseData, 1024*1024), // Buffer for CHECK responses + ttResponseChan: make(chan *ResponseData, 1024*1024), // Buffer for TAKETHIS responses + //signalChan: make(chan struct{}, 64*1024), + errChan: errChan, + started: false, + } +} + +// RegisterCommand registers a command ID with its type (CHECK or TAKETHIS) +func (d *ResponseDemuxer) RegisterCommand(cmdID uint, cmdType ResponseType) { + d.lastRequestMux.Lock() + d.lastRequest = time.Now() + d.lastRequestMux.Unlock() + select { + case d.cmdIDChan <- &CmdIDinfo{CmdID: cmdID, RespType: cmdType}: + // Registered successfully + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while registering command, exiting") + common.SignalErrChan(d.errChan) + return + } + /* + select { + case d.signalChan <- struct{}{}: + // sent signal + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while signaling command, exiting") + common.SignalErrChan(d.errChan) + return + /,* disabled + default: + // no-op + *,/ + } + */ +} + +// PopCommand removes a command ID from the queue +func (d *ResponseDemuxer) PopCommand() *CmdIDinfo { + + //if len(d.cmdIDChan) == 0 { + // return nil + //} + + select { + case cmdIDInfo := <-d.cmdIDChan: + return cmdIDInfo + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while popping command, exiting") + common.SignalErrChan(d.errChan) + return nil + //default: + // // no-op + } + + //return nil +} + +// GetCheckResponseChan returns the channel for CHECK responses +func (d *ResponseDemuxer) GetCheckResponseChan() chan *ResponseData { + return d.checkResponseChan +} + +// GetTakeThisResponseChan returns the channel for TAKETHIS responses +func (d *ResponseDemuxer) GetTakeThisResponseChan() chan *ResponseData { + return d.ttResponseChan +} + +// Start launches the central response reader goroutine (call once) +func (d *ResponseDemuxer) Start() { + d.startedMux.Lock() + defer d.startedMux.Unlock() + + if d.started { + return // Already started + } + d.started = true + + go d.readAndDispatch() + + /* disabled + go func() { + // keep alive + for { + <-time.After(time.Millisecond * 16) + select { + case d.signalChan <- struct{}{}: + default: + } + } + }() + */ +} + +// readAndDispatch is the SINGLE goroutine that reads ALL responses from the shared connection +func (d *ResponseDemuxer) readAndDispatch() { + /* disabled + defer func() { + if r := recover(); r != nil { + log.Printf("ResponseDemuxer: panic in readAndDispatch: %v", r) + } + common.SignalErrChan(d.errChan) + }() + */ + outoforderBacklog := make(map[uint]*CmdIDinfo, 1024) +loop: + for { + select { + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan signal, exiting") + common.SignalErrChan(d.errChan) + // exit + return + default: + // pass + } + + if !d.conn.IsConnected() { + common.SignalErrChan(d.errChan) + log.Printf("ResponseDemuxer: connection lost, exiting") + return + } + + var cmdInfo *CmdIDinfo + if len(outoforderBacklog) > 0 { + if cmdInfoBacklog, exists := outoforderBacklog[d.LastID+1]; exists { + log.Printf("ResponseDemuxer: processing out-of-order backlog cmdID=%d d.LastID=%d", cmdInfoBacklog.CmdID, d.LastID) + cmdInfo = cmdInfoBacklog + outoforderBacklog[d.LastID+1] = nil + delete(outoforderBacklog, d.LastID+1) + } else { + log.Printf("ResponseDemuxer: no backlog with cmdID=%d found. try PopCommand", d.LastID+1) + cmdInfo = d.PopCommand() + } + } else { + cmdInfo = d.PopCommand() + } + if cmdInfo == nil { + /* disabled + if len(outoforderBacklog) > 0 { + log.Printf("ResponseDemuxer: got no cmdInfo but have outoforderBacklog: %d [%v]", len(outoforderBacklog), outoforderBacklog) + if _, exists := outoforderBacklog[d.LastID+1]; exists { + log.Printf("ResponseDemuxer: pre-processing out-of-order backlog cmdID=%d d.LastID=%d", d.LastID+1, d.LastID) + continue loop + } + } + */ + //log.Printf("ResponseDemuxer: nothing to process, waiting on signalChan") + //<-d.signalChan + continue loop + } + if d.LastID+1 != cmdInfo.CmdID { + log.Printf("ResponseDemuxer: WARNING - out-of-order cmdID received, expected %d got %d", d.LastID+1, cmdInfo.CmdID) + outoforderBacklog[cmdInfo.CmdID] = cmdInfo + continue loop + } else { + d.LastID = cmdInfo.CmdID + } + + //log.Printf("ResponseDemuxer: waiting for response cmdID=%d respType=%d", cmdInfo.CmdID, cmdInfo.RespType) + start := time.Now() + d.conn.TextConn.StartResponse(cmdInfo.CmdID) + code, line, err := d.conn.TextConn.ReadCodeLine(0) // Read any code + d.conn.TextConn.EndResponse(cmdInfo.CmdID) + if time.Since(start) > time.Second { + log.Printf("LongWait ResponseDemuxer: received response cmdID=%d: code=%d line='%s' err='%v' respType=%d (waited %v)", cmdInfo.CmdID, code, line, err, cmdInfo.RespType, time.Since(start)) + } + if err != nil && code == 0 { + common.SignalErrChan(d.errChan) + log.Printf("ResponseDemuxer: error reading response for cmdID=%d: %v", cmdInfo.CmdID, err) + return + } + // Dispatch based on registered type + switch cmdInfo.RespType { + case TYPE_CHECK: + select { + case d.checkResponseChan <- GetResponseData(cmdInfo.CmdID, code, line, err): + // Dispatched successfully + //log.Printf("ResponseDemuxer: dispatched CHECK response cmdID=%d d.checkResponseChan=%d", cmdInfo.CmdID, len(d.checkResponseChan)) + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while dispatching CHECK response, exiting") + common.SignalErrChan(d.errChan) + return + } + + case TYPE_TAKETHIS: + select { + case d.ttResponseChan <- GetResponseData(cmdInfo.CmdID, code, line, err): + // Dispatched successfully + //log.Printf("ResponseDemuxer: dispatched TAKETHIS response cmdID=%d d.ttResponseChan=%d", cmdInfo.CmdID, len(d.ttResponseChan)) + case <-d.errChan: + common.SignalErrChan(d.errChan) + log.Printf("ResponseDemuxer: got errChan while dispatching TAKETHIS response, exiting") + return + } + + default: + log.Printf("ResponseDemuxer: WARNING - unknown command type for cmdID=%d, signaling ERROR", cmdInfo.CmdID) + common.SignalErrChan(d.errChan) + } + } +} + +// GetStatistics returns current demuxer statistics +func (d *ResponseDemuxer) GetDemuxerStats() (pendingCommands int64, checkResponsesQueued int64, ttResponsesQueued int64, lastRequest time.Time) { + + pendingCommands = int64(len(d.cmdIDChan)) + checkResponsesQueued = int64(len(d.checkResponseChan)) + ttResponsesQueued = int64(len(d.ttResponseChan)) + + d.lastRequestMux.RLock() + lastRequest = d.lastRequest + d.lastRequestMux.RUnlock() + + return pendingCommands, checkResponsesQueued, ttResponsesQueued, lastRequest +} diff --git a/internal/nntp/nntp-transfer.go b/internal/nntp/nntp-transfer.go new file mode 100644 index 00000000..ab30c422 --- /dev/null +++ b/internal/nntp/nntp-transfer.go @@ -0,0 +1,627 @@ +package nntp + +import ( + "log" + "sync" + "time" + + "github.com/go-while/go-pugleaf/internal/common" + "github.com/go-while/go-pugleaf/internal/models" +) + +var NNTPTransferThreads int = 1 + +var JobIDCounter uint64 // Atomic counter for unique job IDs + +var ReturnDelay = time.Millisecond * 16 + +// ResponseType indicates which handler should process a response +type ResponseType int + +const ( + TYPE_CHECK ResponseType = iota + TYPE_TAKETHIS +) + +// Pool of ResponseData structs to reduce allocations +var ResponseDataPool = make(chan *ResponseData, 1024*1024) + +// GetResponseData returns a recycled ResponseData struct or makes a new one if none are available +func GetResponseData(cmdID uint, code int, line string, err error) *ResponseData { + select { + case rd := <-ResponseDataPool: + rd.CmdID = cmdID + rd.Code = code + rd.Line = line + rd.Err = err + return rd + default: + return &ResponseData{ + CmdID: cmdID, + Code: code, + Line: line, + Err: err, + } + } +} + +// RecycleResponseData resets a ResponseData struct and recycles it back into the pool +func RecycleResponseData(rd *ResponseData) { + rd.CmdID = 0 + rd.Code = 0 + rd.Line = "" + rd.Err = nil + select { + case ResponseDataPool <- rd: + default: + // pool is full, discard + } +} + +// ResponseData holds a read response from the connection +type ResponseData struct { + CmdID uint + Code int + Line string + Err error +} + +// CmdIDinfo holds information about a command sent to the remote server +type CmdIDinfo struct { + CmdID uint + RespType ResponseType +} + +// used in nntp-transfer/main.go +type TakeThisMode struct { + mux sync.Mutex + Newsgroup *string + TmpSuccessCount uint64 + TmpTTotalsCount uint64 + CheckMode bool +} + +// TTSetup holds the response channel for a batched TAKETHIS job +type TTSetup struct { + ResponseChan chan *TTResponse +} + +// Pool of TTSetup structs to reduce allocations +var TTSetupPool = make(chan *TTSetup, 1024*1024) + +// GetTTSetup returns a recycled TTSetup struct or makes a new one if none are available +// the responseChan parameter is received from processBatch() and is mandatory and will be set on the returned struct +func GetTTSetup(responseChan chan *TTResponse) *TTSetup { + select { + case ch := <-TTSetupPool: + ch.ResponseChan = responseChan + return ch + default: + return &TTSetup{ + ResponseChan: responseChan, + } + } +} + +// RecycleTTSetup recycles a TTSetup struct back into the pool +func RecycleTTSetup(tts *TTSetup) { + tts.ResponseChan = nil + select { + case TTSetupPool <- tts: + default: + // pool is full, discard + } +} + +// OffsetQueue manages the number of concurrent batches being processed for a newsgroup +type OffsetQueue struct { + Newsgroup *string + MaxQueuedJobs int + mux sync.RWMutex + queued int + waiter []chan struct{} +} + +// Wait waits until the number of queued batches is less than n +func (o *OffsetQueue) Wait(n int) { + start := time.Now() + lastPrint := start + setWaiting := false + waitChan := common.GetStructChanCap1() + defer common.RecycleStructChanCap1(waitChan) + for { + if common.WantShutdown() { + return + } + o.mux.Lock() + // log.Printf("OffsetQueue: currently queued: %d, waiting for %d batches to finish", o.queued, n) + if o.queued < n { + // enough batches have finished + if time.Since(start).Milliseconds() > 1000 { + log.Printf("Newsgroup: '%s' | OffsetQueue: waited (%d ms) for %d batches. queued: %d", *o.Newsgroup, time.Since(start).Milliseconds(), n, o.queued) + } + o.mux.Unlock() + return + } + if time.Since(lastPrint) > time.Second*5 { + log.Printf("Newsgroup: '%s' | OffsetQueue: waiting for queued batches: %d", *o.Newsgroup, o.queued) + lastPrint = time.Now() + } + if !setWaiting { + o.waiter = append(o.waiter, waitChan) + setWaiting = true + } + o.mux.Unlock() + + // wait for signal or timeout to retry + if setWaiting { + wait: + for { + select { + case <-waitChan: + // got signal, recheck condition + break wait + case <-time.After(time.Second * 6): + // timeout, recheck condition + break wait + } + } + } + } +} + +// OffsetBatchDone signals that a batch has finished processing +func (o *OffsetQueue) OffsetBatchDone() { + o.mux.Lock() + defer o.mux.Unlock() + o.queued-- + if len(o.waiter) > 0 { + // notify one waiter + waitChan := o.waiter[0] + o.waiter = o.waiter[1:] + select { + case waitChan <- struct{}{}: + default: + // if the channel is full, skip sending + } + } + //log.Printf("OffsetQueue: a batch is done, still queued: %d", o.queued) +} + +func (o *OffsetQueue) Add(n int) { + o.mux.Lock() + defer o.mux.Unlock() + o.queued += n + if o.MaxQueuedJobs > 10 && o.queued > o.MaxQueuedJobs/100*90 { + // prints only if occupancy is over 90% + log.Printf("Newsgroup: '%s' | OffsetQueue: added %d batches, now queued: %d/%d", *o.Newsgroup, n, o.queued, o.MaxQueuedJobs) + } +} + +// TTResponse holds the response for a batched TAKETHIS job +type TTResponse struct { + Job *CHTTJob + ForceCleanUp bool + Err error +} + +// Pool of TTResponse structs to reduce allocations +var TTResponsePool = make(chan *TTResponse, 1024*1024) + +// GetTTResponse returns a recycled TTResponse struct or makes a new one if none are available +func GetTTResponse(job *CHTTJob, forceCleanup bool, err error) *TTResponse { + select { + case resp := <-TTResponsePool: + resp.Job = job + resp.ForceCleanUp = forceCleanup + resp.Err = err + return resp + default: + return &TTResponse{ + Job: job, + ForceCleanUp: forceCleanup, + Err: err, + } + } +} + +// RecycleTTResponse resets a TTResponse struct and recycles it back into the pool +func RecycleTTResponse(resp *TTResponse) { + resp.Job = nil + resp.ForceCleanUp = false + resp.Err = nil + select { + case TTResponsePool <- resp: + default: + // pool is full, discard + } +} + +// Pool of TTResponse chans to reduce allocations +var TTResponseChans = make(chan chan *TTResponse, 1024*1024) + +// GetTTResponseChan returns a recycled chan *TTResponse or makes a new one with capacity of 1 if none are available +func GetTTResponseChan() chan *TTResponse { + select { + case ch := <-TTResponseChans: + return ch + default: + return make(chan *TTResponse, 1) + } +} + +func RecycleTTResponseChan(ch chan *TTResponse) { + if cap(ch) != 1 { + log.Printf("Warning: Attempt to recycle chan *TTResponse with wrong capacity: %d", cap(ch)) + return + } + // empty out the channel + select { + case <-ch: + // successfully emptied + default: + // is already empty + } + // park + select { + case TTResponseChans <- ch: + // successfully recycled + default: + // channel pool is full, discard + } +} + +type CheckResponse struct { // deprecated + CmdId uint + Article *models.Article +} + +type ReadRequest struct { + CmdID uint + Job *CHTTJob + MsgID *string + N int + Reqs int +} + +// Pool of ReadRequest structs to reduce allocations +var ReadRequestsPool = make(chan *ReadRequest, 1024*1024) + +// ClearReadRequest resets a ReadRequest struct and recycles it back into the pool +func (rr *ReadRequest) ClearReadRequest(respData *ResponseData) { + rr.CmdID = 0 + rr.Job = nil + rr.MsgID = nil + rr.N = 0 + rr.Reqs = 0 + RecycleReadRequest(rr) + if respData != nil { + RecycleResponseData(respData) + } +} + +// GetReadRequest returns a recycled ReadRequest struct or makes a new one if none are available +func GetReadRequest(CmdID uint, Job *CHTTJob, MsgID *string, n int, reqs int) *ReadRequest { + select { + case rr := <-ReadRequestsPool: + rr.CmdID = CmdID + rr.Job = Job + rr.MsgID = MsgID + rr.N = n + rr.Reqs = reqs + return rr + default: + return &ReadRequest{ + CmdID: CmdID, + Job: Job, + MsgID: MsgID, + N: n, + Reqs: reqs, + } + } +} + +func RecycleReadRequest(rr *ReadRequest) { + select { + case ReadRequestsPool <- rr: + default: + // pool is full, discard + } +} + +// batched CHECK/TAKETHIS Job +type CHTTJob struct { + JobID uint64 // Unique job ID for tracing + Newsgroup *string + Mux sync.RWMutex + TTMode *TakeThisMode + ResponseChan chan *TTResponse + responseSent bool // Track if response already sent (prevents double send) + MessageIDs []*string + WantedIDs []*string + PendingResponses sync.WaitGroup // Track pending TAKETHIS responses + CheckSentCount uint64 // Track how many CHECK commands were sent + //checked uint64 + //wanted uint64 + //unwanted uint64 + //rejected uint64 + //retry uint64 + //transferred uint64 + //redisCached uint64 + //TxErrors uint64 + //ConnErrors uint64 + TmpTxBytes uint64 + TTxBytes uint64 + OffsetStart int64 + BatchStart int64 + BatchEnd int64 + OffsetQ *OffsetQueue + NGTProgress *NewsgroupTransferProgress +} + +// GetResponseChan returns the ResponseChan for the job +func (job *CHTTJob) GetResponseChan() chan *TTResponse { + job.Mux.RLock() + defer job.Mux.RUnlock() + if job.ResponseChan != nil { + return job.ResponseChan + } + return nil +} + +// QuitResponseChan signals that no more responses will be sent and returns the closed ResponseChan +func (job *CHTTJob) QuitResponseChan() chan *TTResponse { + job.OffsetQ.OffsetBatchDone() + job.Response(true, nil) + job.Mux.RLock() + defer job.Mux.RUnlock() + if job.ResponseChan != nil { + log.Printf("Newsgroup: '%s' | CHTTJob.QuitResponseChan(): returning closed ResponseChan for job #%d", *job.Newsgroup, job.JobID) + return job.ResponseChan + } + return nil +} + +// Response sends the response back to a go routine via the ResponseChan +func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { + if job.ResponseChan == nil { + log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job #%d", job.JobID) + return + } + + // Check if response already sent (prevents double send on connection loss) + job.Mux.Lock() + if job.responseSent { + log.Printf("WARNING CHTTJob.Response(): Response already sent for job #%d, skipping", job.JobID) + job.Mux.Unlock() + return + } + job.responseSent = true + job.Mux.Unlock() + + job.ResponseChan <- GetTTResponse(job, ForceCleanUp, Err) + //close(job.ResponseChan) +} + +// NewsgroupTransferProgressMap is protected by ResultsMutex, used in nntp-transfer/main.go +var ResultsMutex sync.RWMutex +var NewsgroupTransferProgressMap = make(map[string]*NewsgroupTransferProgress) + +// NewsgroupProgress tracks the progress of a newsgroup transfer +type NewsgroupTransferProgress struct { + Mux sync.RWMutex + Newsgroup *string + Started time.Time + LastUpdated time.Time + + OffsetStart int64 + //BatchStart int64 + //BatchEnd int64 + TotalArticles int64 + Checked uint64 + Wanted uint64 + Unwanted uint64 + Rejected uint64 + Retry uint64 + Transferred uint64 + TTSentCount uint64 + CheckSentCount uint64 + RedisCached uint64 + TxErrors uint64 + ConnErrors uint64 + Skipped uint64 + RedisCachedBeforeCheck uint64 + RedisCachedBeforeTakethis uint64 + ArticlesTT uint64 + ArticlesCH uint64 + Finished bool + TXBytes uint64 + TXBytesTMP uint64 + LastCronTX time.Time + LastSpeedKB uint64 + LastArtPerfC uint64 // check articles per second + LastArtPerfT uint64 // takethis articles per second +} + +// GetSpeed returns the last calculated transfer speed in KB/s +func (ngp *NewsgroupTransferProgress) GetSpeed() uint64 { + ngp.Mux.RLock() + defer ngp.Mux.RUnlock() + return ngp.LastSpeedKB +} + +// CalcSpeed calculates the transfer speed and article performance +func (ngp *NewsgroupTransferProgress) CalcSpeed() { + ngp.Mux.Lock() + if time.Since(ngp.LastCronTX) >= time.Second*3 { + since := uint64(time.Since(ngp.LastCronTX).Seconds()) + if ngp.TXBytesTMP > 0 { + ngp.LastSpeedKB = ngp.TXBytesTMP / since / 1024 + } else { + ngp.LastSpeedKB = 0 + } + if ngp.ArticlesCH > 0 { + ngp.LastArtPerfC = ngp.ArticlesCH / since + } else { + ngp.LastArtPerfC = 0 + } + if ngp.ArticlesTT > 0 { + ngp.LastArtPerfT = ngp.ArticlesTT / since + } else { + ngp.LastArtPerfT = 0 + } + //log.Printf("Newsgroup: '%s' | Transfer Perf: %d KB/s (%d bytes in %v) did: CH=(%d|%d/s) TT=(%d|%d/s)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since, ngp.ArticlesCH, ngp.LastArtPerfC, ngp.ArticlesTT, ngp.LastArtPerfT) + + ngp.ArticlesCH = 0 + ngp.ArticlesTT = 0 + ngp.TXBytesTMP = 0 + ngp.LastCronTX = time.Now() + } + ngp.Mux.Unlock() +} + +// AddNGTP adds to the NewsgroupTransferProgress temporary counters to calculate speed +func (ngp *NewsgroupTransferProgress) AddNGTP(articlesCH uint64, articlesTT uint64, txbytes uint64) { + if articlesCH > 0 { + ngp.Mux.Lock() + ngp.ArticlesCH += articlesCH + ngp.Mux.Unlock() + } + if articlesTT > 0 { + ngp.Mux.Lock() + ngp.ArticlesTT += articlesTT + ngp.Mux.Unlock() + } + if txbytes > 0 { + ngp.Mux.Lock() + ngp.TXBytes += txbytes + ngp.TXBytesTMP += txbytes + ngp.Mux.Unlock() + } + if articlesCH > 0 || articlesTT > 0 || txbytes > 0 { + ngp.Mux.Lock() + ngp.LastUpdated = time.Now() + ngp.Mux.Unlock() + } + ngp.CalcSpeed() +} + +const IncrFLAG_CHECKED = 1 +const IncrFLAG_WANTED = 2 +const IncrFLAG_UNWANTED = 3 +const IncrFLAG_REJECTED = 4 +const IncrFLAG_RETRY = 5 +const IncrFLAG_TRANSFERRED = 6 +const IncrFLAG_REDIS_CACHED = 7 +const IncrFLAG_TX_ERRORS = 8 +const IncrFLAG_CONN_ERRORS = 9 +const IncrFLAG_SKIPPED = 10 +const IncrFLAG_REDIS_CACHED_BEFORE_CHECK = 11 +const IncrFLAG_REDIS_CACHED_BEFORE_TAKETHIS = 12 +const IncrFLAG_TTSentCount = 13 + +// Increment increments a counter in NewsgroupTransferProgress +func (ntp *NewsgroupTransferProgress) Increment(counter int, n uint64) { + ntp.Mux.Lock() + defer ntp.Mux.Unlock() + switch counter { + case IncrFLAG_CHECKED: + ntp.Checked += n + case IncrFLAG_WANTED: + ntp.Wanted += n + case IncrFLAG_UNWANTED: + ntp.Unwanted += n + case IncrFLAG_REJECTED: + ntp.Rejected += n + case IncrFLAG_RETRY: + ntp.Retry += n + case IncrFLAG_TRANSFERRED: + ntp.Transferred += n + case IncrFLAG_REDIS_CACHED: + ntp.RedisCached += n + case IncrFLAG_TX_ERRORS: + ntp.TxErrors += n + case IncrFLAG_CONN_ERRORS: + ntp.ConnErrors += n + case IncrFLAG_SKIPPED: + ntp.Skipped += n + case IncrFLAG_TTSentCount: + ntp.TTSentCount += n + case IncrFLAG_REDIS_CACHED_BEFORE_CHECK: + ntp.RedisCachedBeforeCheck += n + ntp.RedisCached += n // also increment total + case IncrFLAG_REDIS_CACHED_BEFORE_TAKETHIS: + ntp.RedisCachedBeforeTakethis += n + ntp.RedisCached += n // also increment total + } +} + +// AppendMessageID appends a message ID to the job +func (job *CHTTJob) AppendWantedMessageID(msgID *string) { + job.Mux.Lock() + job.WantedIDs = append(job.WantedIDs, msgID) + job.Mux.Unlock() +} + +// UseCHECK returns true if CHECK mode is active +func (ttMode *TakeThisMode) UseCHECK() bool { + ttMode.mux.Lock() + defer ttMode.mux.Unlock() + if ttMode.CheckMode { + return true + } + return false +} + +// SetForceCHECK forces CHECK mode +func (ttMode *TakeThisMode) SetForceCHECK() { + ttMode.mux.Lock() + ttMode.CheckMode = true + ttMode.mux.Unlock() +} + +// IncrementSuccess increments the temporary TAKETHIS success count +func (ttMode *TakeThisMode) IncrementSuccess() { + ttMode.mux.Lock() + ttMode.TmpSuccessCount++ + ttMode.mux.Unlock() +} + +// IncrementTmp increments the temporary TAKETHIS total count +func (ttMode *TakeThisMode) IncrementTmp() { + ttMode.mux.Lock() + ttMode.TmpTTotalsCount++ + ttMode.mux.Unlock() +} + +// SetNoCHECK forces TAKETHIS mode +func (ttMode *TakeThisMode) SetNoCHECK() { + ttMode.mux.Lock() + ttMode.CheckMode = false + ttMode.mux.Unlock() +} + +// FlipMode checks the TAKETHIS success rate and flips between CHECK and TAKETHIS modes +func (ttMode *TakeThisMode) FlipMode(lowerLevel float64, upperLevel float64) { + ttMode.mux.Lock() + defer ttMode.mux.Unlock() + if ttMode.TmpSuccessCount < 100 || ttMode.TmpTTotalsCount < 100 { + ttMode.CheckMode = true + return // Force CHECK mode for this batch + } + successRate := float64(ttMode.TmpSuccessCount) / float64(ttMode.TmpTTotalsCount) * 100.0 + switch ttMode.CheckMode { + case false: // Currently in TAKETHIS mode + if successRate < lowerLevel { + ttMode.CheckMode = true + log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% < %f%%, switching to CHECK mode (%d/%d)", *ttMode.Newsgroup, successRate, lowerLevel, ttMode.TmpSuccessCount, ttMode.TmpTTotalsCount) + } + case true: // Currently in CHECK mode + if successRate > upperLevel { + ttMode.CheckMode = false + log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% >= %f%%, switching to TAKETHIS mode (%d/%d)", *ttMode.Newsgroup, successRate, upperLevel, ttMode.TmpSuccessCount, ttMode.TmpTTotalsCount) + } + } + ttMode.TmpSuccessCount = 0 + ttMode.TmpTTotalsCount = 0 +} diff --git a/internal/nntp/transfer-progress-utils.go b/internal/nntp/transfer-progress-utils.go new file mode 100644 index 00000000..29a19d4b --- /dev/null +++ b/internal/nntp/transfer-progress-utils.go @@ -0,0 +1,154 @@ +package nntp + +import ( + "fmt" + "log" + "time" +) + +// PrintRecentTransfers prints recent transfer results to the console +func (tpdb *TransferProgressDB) PrintRecentTransfers(limit int) error { + results, err := tpdb.GetRecentTransfers(limit) + if err != nil { + return fmt.Errorf("failed to get recent transfers: %v", err) + } + + if len(results) == 0 { + log.Printf("No transfer records found for remote '%s' (id=%d)", tpdb.remoteName, tpdb.remoteID) + return nil + } + + log.Printf("=== Recent transfers for remote '%s' (showing last %d) ===", tpdb.remoteName, len(results)) + log.Printf("%-30s %-20s %8s %8s %8s %8s %8s %8s %8s %8s", + "Newsgroup", "Timestamp", "Sent", "Unwanted", "Checked", "Rejected", "Retry", "Skipped", "TXErr", "ConnErr") + log.Printf("%s", "-------------------------------------------------------------------------------------------------------------------") + + for _, r := range results { + log.Printf("%-30s %-20s %8d %8d %8d %8d %8d %8d %8d %8d", + truncateString(r.Newsgroup, 30), + r.Timestamp.Format("2006-01-02 15:04:05"), + r.Sent, + r.Unwanted, + r.Checked, + r.Rejected, + r.Retry, + r.Skipped, + r.TXErrors, + r.ConnErrors, + ) + } + + return nil +} + +// GetTransferStatsByNewsgroup returns aggregated statistics for a specific newsgroup +func (tpdb *TransferProgressDB) GetTransferStatsByNewsgroup(newsgroup string) (*TransferResult, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + query := ` + SELECT + remote_id, + newsgroup, + MAX(timestamp) as last_transfer, + SUM(sent) as total_sent, + SUM(unwanted) as total_unwanted, + SUM(checked) as total_checked, + SUM(rejected) as total_rejected, + SUM(retry) as total_retry, + SUM(skipped) as total_skipped, + SUM(tx_errors) as total_tx_errors, + SUM(conn_errors) as total_conn_errors + FROM transfers + WHERE remote_id = ? AND newsgroup = ? + GROUP BY remote_id, newsgroup + ` + + var r TransferResult + var timestampStr string + err := tpdb.db.QueryRow(query, tpdb.remoteID, newsgroup).Scan( + &r.RemoteID, + &r.Newsgroup, + ×tampStr, + &r.Sent, + &r.Unwanted, + &r.Checked, + &r.Rejected, + &r.Retry, + &r.Skipped, + &r.TXErrors, + &r.ConnErrors, + ) + if err != nil { + return nil, err + } + + // Parse timestamp + r.Timestamp, err = time.Parse("2006-01-02 15:04:05", timestampStr) + if err != nil { + return nil, err + } + + return &r, nil +} + +// GetAllTransferStats returns aggregated statistics across all newsgroups +func (tpdb *TransferProgressDB) GetAllTransferStats() (*TransferResult, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + query := ` + SELECT + remote_id, + MAX(timestamp) as last_transfer, + SUM(sent) as total_sent, + SUM(unwanted) as total_unwanted, + SUM(checked) as total_checked, + SUM(rejected) as total_rejected, + SUM(retry) as total_retry, + SUM(skipped) as total_skipped, + SUM(tx_errors) as total_tx_errors, + SUM(conn_errors) as total_conn_errors + FROM transfers + WHERE remote_id = ? + GROUP BY remote_id + ` + + var r TransferResult + var timestampStr string + err := tpdb.db.QueryRow(query, tpdb.remoteID).Scan( + &r.RemoteID, + ×tampStr, + &r.Sent, + &r.Unwanted, + &r.Checked, + &r.Rejected, + &r.Retry, + &r.Skipped, + &r.TXErrors, + &r.ConnErrors, + ) + if err != nil { + return nil, err + } + + // Parse timestamp + r.Timestamp, err = time.Parse("2006-01-02 15:04:05", timestampStr) + if err != nil { + return nil, err + } + + r.Newsgroup = "ALL" + return &r, nil +} + +// truncateString truncates a string to the specified length +func truncateString(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + if maxLen <= 3 { + return s[:maxLen] + } + return s[:maxLen-3] + "..." +} diff --git a/internal/nntp/transfer-progress.go b/internal/nntp/transfer-progress.go new file mode 100644 index 00000000..ef62aea4 --- /dev/null +++ b/internal/nntp/transfer-progress.go @@ -0,0 +1,400 @@ +package nntp + +import ( + "database/sql" + "fmt" + "log" + "os" + "path/filepath" + "sync" + "time" + + "github.com/go-while/go-pugleaf/internal/database" + _ "github.com/mattn/go-sqlite3" +) + +// TransferProgressDB manages the SQLite database for tracking transfer progress +type TransferProgressDB struct { + db *sql.DB + remoteID int64 + remoteName string + mu sync.RWMutex +} + +// TransferResult represents a single transfer result record +type TransferResult struct { + RemoteID int64 + Newsgroup string + Timestamp time.Time + StartDate *time.Time // Can be nil for no start date filter + EndDate *time.Time // Can be nil for no end date filter + Sent int64 + Unwanted int64 + Checked int64 + Rejected int64 + Retry int64 + Skipped int64 + TXErrors int64 + ConnErrors int64 +} + +// OpenTransferProgressDB opens or creates the transfer progress database +func OpenTransferProgressDB(dataDir, remoteName string) (*TransferProgressDB, error) { + // Create data directory if it doesn't exist + progressDir := filepath.Join(dataDir, "transfer-progress") + if err := os.MkdirAll(progressDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create progress directory: %v", err) + } + + dbPath := filepath.Join(progressDir, "transfer-progress.db") + db, err := sql.Open("sqlite3", dbPath) + if err != nil { + return nil, fmt.Errorf("failed to open database: %v", err) + } + + // Enable WAL mode for better concurrent performance + if _, err := db.Exec("PRAGMA journal_mode=WAL"); err != nil { + db.Close() + return nil, fmt.Errorf("failed to set WAL mode: %v", err) + } + + // Create tables if they don't exist + if err := createTables(db); err != nil { + db.Close() + return nil, fmt.Errorf("failed to create tables: %v", err) + } + + tpdb := &TransferProgressDB{ + db: db, + remoteName: remoteName, + } + + // Get or create remote ID + remoteID, err := tpdb.getOrCreateRemoteTransferProgress(remoteName) + if err != nil { + db.Close() + return nil, fmt.Errorf("failed to get/create remote: %v", err) + } + tpdb.remoteID = remoteID + + log.Printf("Transfer progress database opened: %s (remote_id=%d, hostname=%s)", dbPath, remoteID, remoteName) + return tpdb, nil +} + +// createTables creates the necessary database tables +func createTables(db *sql.DB) error { + schema := ` + CREATE TABLE IF NOT EXISTS remotes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + hostname TEXT NOT NULL UNIQUE, + created_at TEXT NOT NULL DEFAULT (datetime('now', 'utc')) + ); + + CREATE TABLE IF NOT EXISTS transfers ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + remote_id INTEGER NOT NULL, + newsgroup TEXT NOT NULL, + timestamp TEXT NOT NULL, + start_date TEXT NOT NULL DEFAULT "", + end_date TEXT NOT NULL DEFAULT "", + sent INTEGER NOT NULL DEFAULT 0, + unwanted INTEGER NOT NULL DEFAULT 0, + checked INTEGER NOT NULL DEFAULT 0, + rejected INTEGER NOT NULL DEFAULT 0, + retry INTEGER NOT NULL DEFAULT 0, + skipped INTEGER NOT NULL DEFAULT 0, + tx_errors INTEGER NOT NULL DEFAULT 0, + conn_errors INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY (remote_id) REFERENCES remotes(id) + ); + + CREATE INDEX IF NOT EXISTS idx_transfers_remote_newsgroup + ON transfers(remote_id, newsgroup); + + CREATE INDEX IF NOT EXISTS idx_transfers_remote_ng_dates + ON transfers(remote_id, newsgroup, start_date, end_date); + + CREATE INDEX IF NOT EXISTS idx_transfers_timestamp + ON transfers(timestamp DESC); + ` + + _, err := database.RetryableExec(db, schema) + return err +} + +const query_getOrCreateRemote = "INSERT INTO remotes (hostname, created_at) VALUES (?, datetime('now', 'utc'))" + +// getOrCreateRemote gets or creates a remote server record +func (tpdb *TransferProgressDB) getOrCreateRemoteTransferProgress(hostname string) (int64, error) { + tpdb.mu.Lock() + defer tpdb.mu.Unlock() + + // Try to get existing remote + var id int64 + err := database.RetryableQueryRowScan( + tpdb.db, + "SELECT id FROM remotes WHERE hostname = ?", + []interface{}{hostname}, + &id, + ) + if err == nil { + return id, nil + } + if err != sql.ErrNoRows { + return 0, err + } + + // Create new remote + result, err := database.RetryableExec(tpdb.db, query_getOrCreateRemote, hostname) + if err != nil { + return 0, err + } + + return result.LastInsertId() +} + +const query_InsertResult = ` + INSERT INTO transfers ( + remote_id, newsgroup, timestamp, start_date, end_date, sent, unwanted, checked, + rejected, retry, skipped, tx_errors, conn_errors + ) VALUES (?, ?, datetime('now', 'utc'), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ` + +// InsertResult inserts a transfer result into the database +func (tpdb *TransferProgressDB) InsertResult(newsgroup string, startDate, endDate *time.Time, sent, unwanted, checked, rejected, retry, skipped, txErrors, connErrors int64) error { + tpdb.mu.Lock() + defer tpdb.mu.Unlock() + + // Convert time pointers to strings (empty string if nil to match NOT NULL DEFAULT "") + var startDateStr, endDateStr string + if startDate != nil { + startDateStr = startDate.UTC().Format("2006-01-02 15:04:05") + } else { + startDateStr = "" + } + if endDate != nil { + endDateStr = endDate.UTC().Format("2006-01-02 15:04:05") + } else { + endDateStr = "" + } + + _, err := database.RetryableExec( + tpdb.db, + query_InsertResult, + tpdb.remoteID, + newsgroup, + startDateStr, + endDateStr, + sent, + unwanted, + checked, + rejected, + retry, + skipped, + txErrors, + connErrors, + ) + + if err != nil { + return fmt.Errorf("failed to insert transfer result: %v", err) + } + + return nil +} + +// GetRemoteID returns the current remote ID +func (tpdb *TransferProgressDB) GetRemoteID() int64 { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + return tpdb.remoteID +} + +// GetRemoteName returns the current remote hostname +func (tpdb *TransferProgressDB) GetRemoteName() string { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + return tpdb.remoteName +} + +// GetRecentTransfers returns recent transfer records for the current remote +func (tpdb *TransferProgressDB) GetRecentTransfers(limit int) ([]TransferResult, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + query := ` + SELECT remote_id, newsgroup, timestamp, start_date, end_date, sent, unwanted, checked, + rejected, retry, skipped, tx_errors, conn_errors + FROM transfers + WHERE remote_id = ? + ORDER BY timestamp DESC + LIMIT ? + ` + + rows, err := database.RetryableQuery(tpdb.db, query, tpdb.remoteID, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var results []TransferResult + for rows.Next() { + var r TransferResult + var timestampStr string + var startDateStr, endDateStr sql.NullString + err := rows.Scan( + &r.RemoteID, + &r.Newsgroup, + ×tampStr, + &startDateStr, + &endDateStr, + &r.Sent, + &r.Unwanted, + &r.Checked, + &r.Rejected, + &r.Retry, + &r.Skipped, + &r.TXErrors, + &r.ConnErrors, + ) + if err != nil { + return nil, err + } + + // Parse timestamp + r.Timestamp, err = time.Parse("2006-01-02 15:04:05", timestampStr) + if err != nil { + return nil, err + } + + // Parse start_date if present + if startDateStr.Valid { + parsedStart, err := time.Parse("2006-01-02 15:04:05", startDateStr.String) + if err != nil { + return nil, err + } + r.StartDate = &parsedStart + } + + // Parse end_date if present + if endDateStr.Valid { + parsedEnd, err := time.Parse("2006-01-02 15:04:05", endDateStr.String) + if err != nil { + return nil, err + } + r.EndDate = &parsedEnd + } + + results = append(results, r) + } + + return results, rows.Err() +} + +// NewsgroupExists checks if a newsgroup already has transfer results for the current remote +// with exactly the same start_date and end_date (empty string represents no filter) +func (tpdb *TransferProgressDB) NewsgroupExists(newsgroup string, startDate, endDate *time.Time) (bool, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + // Convert time pointers to strings (empty string if nil to match schema) + var startDateStr, endDateStr string + if startDate != nil { + startDateStr = startDate.UTC().Format("2006-01-02 15:04:05") + } else { + startDateStr = "" + } + if endDate != nil { + endDateStr = endDate.UTC().Format("2006-01-02 15:04:05") + } else { + endDateStr = "" + } + + // Query that checks for exact match including empty strings + query := ` + SELECT COUNT(*) FROM transfers + WHERE remote_id = ? + AND newsgroup = ? + AND start_date = ? + AND end_date = ? + ` + + var count int64 + err := database.RetryableQueryRowScan( + tpdb.db, + query, + []interface{}{ + tpdb.remoteID, + newsgroup, + startDateStr, + endDateStr, + }, + &count, + ) + if err != nil { + return false, fmt.Errorf("failed to check newsgroup existence: %v", err) + } + + return count > 0, nil +} + +// GetAllProgressNewsgroups fetches all newsgroup names that have progress records for the current remote and date range +// Returns a map for O(1) lookup performance +func (tpdb *TransferProgressDB) GetAllProgressNewsgroups(startDate, endDate *time.Time) (map[string]bool, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + // Convert time pointers to strings (empty string if nil to match schema) + var startDateStr, endDateStr string + if startDate != nil { + startDateStr = startDate.UTC().Format("2006-01-02 15:04:05") + } else { + startDateStr = "" + } + if endDate != nil { + endDateStr = endDate.UTC().Format("2006-01-02 15:04:05") + } else { + endDateStr = "" + } + + query := ` + SELECT DISTINCT newsgroup FROM transfers + WHERE remote_id = ? + AND start_date = ? + AND end_date = ? + ` + + rows, err := database.RetryableQuery( + tpdb.db, + query, + tpdb.remoteID, + startDateStr, + endDateStr, + ) + if err != nil { + return nil, fmt.Errorf("failed to fetch progress newsgroups: %v", err) + } + defer rows.Close() + + progressMap := make(map[string]bool) + for rows.Next() { + var newsgroup string + if err := rows.Scan(&newsgroup); err != nil { + return nil, fmt.Errorf("failed to scan newsgroup: %v", err) + } + progressMap[newsgroup] = true + } + + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("error iterating progress newsgroups: %v", err) + } + + return progressMap, nil +} + +// Close closes the database connection +func (tpdb *TransferProgressDB) Close() error { + if tpdb.db != nil { + return tpdb.db.Close() + } + return nil +} diff --git a/internal/postmgr/postmgr.go b/internal/postmgr/postmgr.go index 760324c5..74ff4e17 100644 --- a/internal/postmgr/postmgr.go +++ b/internal/postmgr/postmgr.go @@ -153,14 +153,14 @@ func (pm *PosterManager) processEntry(entry database.PostQueueEntry) error { // getArticleByMessageID retrieves an article from the local database func (pm *PosterManager) getArticleByMessageID(messageID, newsgroup string) (*models.Article, error) { // Get group database connection - groupDBs, err := pm.DB.GetGroupDBs(newsgroup) + groupDB, err := pm.DB.GetGroupDB(newsgroup) if err != nil { return nil, err } - defer groupDBs.Return(pm.DB) + defer groupDB.Return() - // Get article by message ID using the database method (not groupDBs method) - article, err := pm.DB.GetArticleByMessageID(groupDBs, messageID) + // Get article by message ID using the database method (not groupDB method) + article, err := pm.DB.GetArticleByMessageID(groupDB, messageID) if err != nil { return nil, err } diff --git a/internal/preloader/preloader.go b/internal/preloader/preloader.go index b208ba65..679cf065 100644 --- a/internal/preloader/preloader.go +++ b/internal/preloader/preloader.go @@ -275,7 +275,7 @@ func RepairNewsgroupWatermarks(ctx context.Context, db *database.Database) error for _, newsgroup := range newsgroups { // Get group database to check actual articles - groupDBs, err := db.GetGroupDBs(newsgroup.Name) + groupDB, err := db.GetGroupDB(newsgroup.Name) if err != nil { log.Printf("PreLoader: Failed to get group DBs for %s: %v", newsgroup.Name, err) errorCount++ @@ -286,33 +286,33 @@ func RepairNewsgroupWatermarks(ctx context.Context, db *database.Database) error var maxArticle, minArticle, articleCount int64 // Get max article number - err = database.RetryableQueryRowScan(groupDBs.DB, "SELECT COALESCE(MAX(article_num), 0) FROM articles", nil, &maxArticle) + err = database.RetryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(article_num), 0) FROM articles", nil, &maxArticle) if err != nil { log.Printf("PreLoader: Failed to get max article for %s: %v", newsgroup.Name, err) - groupDBs.Return(db) + groupDB.Return() errorCount++ continue } // Get min article number - err = database.RetryableQueryRowScan(groupDBs.DB, "SELECT COALESCE(MIN(article_num), 1) FROM articles", nil, &minArticle) + err = database.RetryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MIN(article_num), 1) FROM articles", nil, &minArticle) if err != nil { log.Printf("PreLoader: Failed to get min article for %s: %v", newsgroup.Name, err) - groupDBs.Return(db) + groupDB.Return() errorCount++ continue } // Get article count - err = database.RetryableQueryRowScan(groupDBs.DB, "SELECT COUNT(*) FROM articles", nil, &articleCount) + err = database.RetryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM articles", nil, &articleCount) if err != nil { log.Printf("PreLoader: Failed to get article count for %s: %v", newsgroup.Name, err) - groupDBs.Return(db) + groupDB.Return() errorCount++ continue } - groupDBs.Return(db) + groupDB.Return() // If no articles, set defaults if articleCount == 0 { diff --git a/internal/processor/PostQueue.go b/internal/processor/PostQueue.go index 077c25b4..cf21d3f3 100644 --- a/internal/processor/PostQueue.go +++ b/internal/processor/PostQueue.go @@ -140,11 +140,11 @@ func (w *PostQueueWorker) processArticleForNewsgroup(article *models.Article, ne } // Get group database connection - groupDBs, err := w.processor.DB.GetGroupDBs(newsgroup) + groupDB, err := w.processor.DB.GetGroupDB(newsgroup) if err != nil { return err } - defer groupDBs.Return(w.processor.DB) + defer groupDB.Return() // Use the existing threading function to process the article // This will handle all the threading logic, database insertion, etc. diff --git a/internal/processor/analyze.go b/internal/processor/analyze.go index bf8efa86..dd88867e 100644 --- a/internal/processor/analyze.go +++ b/internal/processor/analyze.go @@ -496,7 +496,7 @@ func (proc *Processor) cacheFileExists(cacheFile string) bool { } // GetCachedMessageIDs returns cached message IDs for download optimization -func (proc *Processor) GetCachedMessageIDs(groupName string, startArticle, endArticle int64) ([]*nntp.HeaderLine, error) { +func (proc *Processor) GetCachedMessageIDs(groupName string, startArticle, endArticle int64) ([]nntp.HeaderLine, error) { providerName := "unknown" if proc.Pool.Backend != nil { providerName = proc.Pool.Backend.Provider.Name @@ -518,7 +518,7 @@ func (proc *Processor) GetCachedMessageIDs(groupName string, startArticle, endAr } defer file.Close() - var results []*nntp.HeaderLine + var results []nntp.HeaderLine scanner := bufio.NewScanner(file) for scanner.Scan() { @@ -550,7 +550,7 @@ func (proc *Processor) GetCachedMessageIDs(groupName string, startArticle, endAr continue } - results = append(results, &nntp.HeaderLine{ + results = append(results, nntp.HeaderLine{ ArticleNum: articleNum, Value: messageID, }) diff --git a/internal/processor/interface.go b/internal/processor/interface.go index 5c645ba7..ca7cef1f 100644 --- a/internal/processor/interface.go +++ b/internal/processor/interface.go @@ -4,19 +4,21 @@ import ( "github.com/go-while/go-pugleaf/internal/database" ) -// MsgIdExists implements the ThreadingProcessor interface +/* +// MsgIdExists implements the ProcessorInterface interface // Returns true if the message ID exists in the cache for the given group func (proc *Processor) MsgIdExists(group *string, messageID string) bool { item := proc.MsgIdCache.MsgIdExists(group, messageID) return item != nil } +*/ func (proc *Processor) IsNewsGroupInSectionsDB(name *string) bool { return proc.DB.IsNewsGroupInSections(*name) } -// ForceCloseGroupDBs implements the ThreadingProcessor interface +// ForceCloseGroupDB implements the ProcessorInterface interface // Forces closure of group database connections -func (proc *Processor) ForceCloseGroupDBs(groupsDB *database.GroupDBs) error { - return proc.DB.ForceCloseGroupDBs(groupsDB) +func (proc *Processor) ForceCloseGroupDB(groupsDB *database.GroupDB) error { + return proc.DB.ForceCloseGroupDB(groupsDB) } diff --git a/internal/processor/proc-utils.go b/internal/processor/proc-utils.go index 1621b906..22e89354 100644 --- a/internal/processor/proc-utils.go +++ b/internal/processor/proc-utils.go @@ -416,7 +416,7 @@ func (proc *Processor) extractGroupsFromHeaders(msgID, groupsline string) []stri rawGroups := common.SeparatorRegex.Split(groupsline, -1) var validGroups []string - seen := make(map[string]bool) // For deduplication + seen := make(map[string]bool, len(rawGroups)) // For deduplication for _, group := range rawGroups { group = strings.TrimSpace(group) @@ -446,10 +446,8 @@ func (proc *Processor) extractGroupsFromHeaders(msgID, groupsline string) []stri } // Deduplicate using map (faster than slices.Contains) - if !seen[group] { - seen[group] = true - validGroups = append(validGroups, group) - } + seen[group] = true + validGroups = append(validGroups, group) } return validGroups diff --git a/internal/processor/proc_DLArt.go b/internal/processor/proc_DLArt.go index 8da0daac..27dbc5b1 100644 --- a/internal/processor/proc_DLArt.go +++ b/internal/processor/proc_DLArt.go @@ -66,6 +66,9 @@ func (bq *BatchQueue) GetOrCreateGroupBatch(newsgroup string) *GroupBatch { // DownloadArticles fetches full articles and stores them in the articles DB. func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{}, progressDB *database.ProgressDB, start int64, end int64, shutdownChan <-chan struct{}) error { + if common.WantShutdown() { + return fmt.Errorf("DownloadArticles: common.WantShutdown() group '%s'", newsgroup) + } //log.Printf("DEBUG-DownloadArticles: ng='%s' called with start=%d end=%d", newsgroup, start, end) DLParChan <- struct{}{} // aquire lock defer func() { @@ -83,32 +86,29 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ return fmt.Errorf("DownloadArticles: NNTP pool is nil for group '%s'", newsgroup) } //log.Printf("DownloadArticles: ng: '%s' @ (%s)", newsgroup, providerName) - groupDBs, err := proc.DB.GetGroupDBs(newsgroup) + groupDB, err := proc.DB.GetGroupDB(newsgroup) if err != nil { log.Printf("Failed to get group DBs for newsgroup '%s': %v", newsgroup, err) - if groupDBs != nil { - if err := proc.DB.ForceCloseGroupDBs(groupDBs); err != nil { - log.Printf("error in DownloadArticles ForceCloseGroupDBs err='%v'", err) + if groupDB != nil { + if err := proc.DB.ForceCloseGroupDB(groupDB); err != nil { + log.Printf("error in DownloadArticles ForceCloseGroupDB err='%v'", err) } - //groupDBs.Return(proc.DB) // Return connection even on error + //groupDB.Return(proc.DB) // Return connection even on error } log.Printf("DownloadArticles: Failed to get group DBs for newsgroup '%s': %v", newsgroup, err) return fmt.Errorf("error in DownloadArticles: failed to get group DBs err='%v'", err) } - defer proc.DB.ForceCloseGroupDBs(groupDBs) - if proc.DB.IsDBshutdown() { - return fmt.Errorf("DownloadArticles: Database shutdown detected for group '%s'", newsgroup) + defer proc.DB.ForceCloseGroupDB(groupDB) + if common.WantShutdown() { + return fmt.Errorf("DownloadArticles: common.WantShutdown() group '%s'", newsgroup) } //remaining := groupInfo.Last - end //log.Printf("DownloadArticles: Fetching XHDR for %s from %d to %d (last known: %d, remaining: %d)", newsgroup, start, end, groupInfo.Last, remaining) var lastGoodEnd int64 = start //toFetch := end - start + 1 // +1 because ranges are inclusive (start=1, end=3 means articles 1,2,3) - xhdrChan := make(chan *nntp.HeaderLine, 1000) + xhdrChan := make(chan nntp.HeaderLine, MaxBatchSize) errChan := make(chan error, 1) //log.Printf("Launch XHdrStreamed: '%s' toFetch=%d start=%d end=%d", newsgroup, toFetch, start, end) - if proc.DB.IsDBshutdown() { - return fmt.Errorf("got shutdown in DownloadArticles: Database shutdown while in group '%s'", newsgroup) - } go func() { errChan <- proc.Pool.XHdrStreamed(newsgroup, "message-id", start, end, xhdrChan, shutdownChan) }() @@ -122,7 +122,7 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ var exists, queued int64 for hdr := range xhdrChan { if common.WantShutdown() { - log.Printf("DownloadArticlesFromDate: Worker received shutdown signal, stopping") + log.Printf("DownloadArticlesFromDate: xhdrChan common.WantShutdown(): stopping") return } /* @@ -133,7 +133,7 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ } */ //log.Printf("DownloadArticles: Checking if article '%s' exists in group '%s'", msgID.Value, newsgroup) - if groupDBs.ExistsMsgIdInArticlesDB(hdr.Value) { + if groupDB.ExistsMsgIdInArticlesDB(hdr.Value) { exists++ groupBatch.ReturnQ <- &BatchItem{Error: errIsDuplicateError} continue @@ -148,12 +148,11 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ GroupName: proc.DB.Batch.GetNewsgroupPointer(newsgroup), } item.ReturnQ = groupBatch.ReturnQ - Batch.GetQ <- item // send to fetcher/main.go:461: for item := range processor.Batch.GetQ + Batch.GetQ <- item // send to fetcher/main.go:~L495: for item := range processor.Batch.GetQ queued++ //log.Printf("DownloadArticles: Queued article %d (%s) for group '%s'", hdr.ArticleNum, hdr.Value, *item.GroupName) //hdr.Value = "" //hdr.ArticleNum = 0 - *hdr = nntp.HeaderLine{} } // end for xhdrChan //log.Printf("DownloadArticles: XHdr closed, finished feeding batch queue %d articles for group '%s' (existing: %d) total=%d", queued, newsgroup, exists, queued+exists) if queued == 0 { @@ -164,6 +163,7 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ }() var dups, lastDups, gots, lastGots, notf, lastNotf, errs, lastErrs int64 aliveCheck := 5 * time.Second + maxDeathCounter := 12 // Maximum number of allowed "stuck" checks done every aliveCheck interval ticker := time.NewTicker(100 * time.Millisecond) defer ticker.Stop() startTime := time.Now() @@ -172,7 +172,7 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ bulkmode := true var gotQueued int64 = -1 if common.WantShutdown() { - log.Printf("DownloadArticlesFromDate: Worker received shutdown signal, stopping") + log.Printf("DownloadArticlesFromDate: common.WantShutdown(): stopping") return fmt.Errorf("shutdown requested") } // Start processing loop @@ -184,6 +184,12 @@ forProcessing: //log.Printf("DownloadArticles: releaseChan triggered '%s'", newsgroup) break forProcessing case <-ticker.C: + + /* disabled + if common.WantShutdown() { + return fmt.Errorf("DownloadArticles:common.WantShutdown() group '%s'", newsgroup) + } + */ // Periodically check if we are done or stuck if gotQueued > 0 && gots+errs+notf == gotQueued { //log.Printf("OK-DA1: '%s' (dups: %d, gots: %d, notf: %d, errs: %d, gotQueued: %d)", newsgroup, dups, gots, notf, errs, gotQueued) @@ -197,13 +203,17 @@ forProcessing: lastErrs = errs deathCounter = 0 // Reset death counter on progress } + if nextCheck.Before(time.Now()) { + if common.WantShutdown() { + return fmt.Errorf("DownloadArticles: got common.WantShutdown() in group '%s'", newsgroup) + } // If we haven't made progress in N seconds, log a warning log.Printf("DownloadArticles: '%s' Stuck? %d articles processed (%d dups, %d gots, %d notf, %d errs, gotQueued: %d) (since Start=%v)", newsgroup, dups+gots+notf+errs, dups, gots, notf, errs, gotQueued, time.Since(startTime)) nextCheck = time.Now().Add(aliveCheck) // Reset last check time deathCounter++ } - if deathCounter > 3 { // If we are stuck for too long + if deathCounter > maxDeathCounter { // If we are stuck for too long log.Printf("DownloadArticles: '%s' Timeout... stopping import deathCounter=%d", newsgroup, deathCounter) return fmt.Errorf("DownloadArticles: '%s' Timeout... %d articles processed (%d dups, %d got, %d errs)", newsgroup, dups+gots+notf+errs, dups, gots, errs) } @@ -212,6 +222,15 @@ forProcessing: //log.Printf("DEBUG-RETURN: received item: Error=%v, Article=%v", item != nil && item.Error != nil, item != nil && item.Article != nil) if item == nil || item.Error != nil || item.Article == nil { if item != nil { + if item.MessageID != nil { + msgIdItem := history.MsgIdCache.GetORCreate(*item.MessageID) + if msgIdItem != nil && msgIdItem.MessageId == *item.MessageID { + msgIdItem.Mux.Lock() + msgIdItem.CachedEntryExpires = time.Now().Add(5 * time.Second) + msgIdItem.Response = history.CaseError + msgIdItem.Mux.Unlock() + } + } switch item.Error { case errIsDuplicateError: dups++ @@ -232,9 +251,6 @@ forProcessing: errs++ } } else if item.Error == nil && item.Article != nil { - if proc.DB.IsDBshutdown() { - return fmt.Errorf("DownloadArticles: Database shutdown detected for group '%s'", newsgroup) - } //log.Printf("DownloadArticles --> proc.processArticle '%s' in group '%s'", *item.MessageID, newsgroup) response, err := proc.processArticle(item.Article, newsgroup, bulkmode) if err != nil { @@ -259,16 +275,15 @@ forProcessing: } } // end for processing routine (counts only) if common.WantShutdown() { - log.Printf("DownloadArticlesFromDate: Worker received shutdown signal, stopping") - return fmt.Errorf("shutdown requested") - } - if proc.DB.IsDBshutdown() { - return fmt.Errorf("DownloadArticles: Database shutdown detected for group '%s'", newsgroup) + return fmt.Errorf("DownloadArticles: common.WantShutdown() group '%s'", newsgroup) } xerr := <-errChan if xerr != nil { end = lastGoodEnd } + if common.WantShutdown() { + return fmt.Errorf("DownloadArticles: common.WantShutdown() group '%s'", newsgroup) + } if gotQueued > 0 || dups > 0 { // only update progress if we actually got something err = progressDB.UpdateProgress(proc.Pool.Backend.Provider.Name, newsgroup, end) @@ -282,8 +297,8 @@ forProcessing: // do another one if we haven't run enough times runtime.GC() - if proc.DB.IsDBshutdown() { - return fmt.Errorf("DownloadArticles: Database shutdown detected for group '%s'", newsgroup) + if common.WantShutdown() { + return fmt.Errorf("DownloadArticles: common.WantShutdown() group '%s'", newsgroup) } return nil } // end func DownloadArticles @@ -341,6 +356,9 @@ func (proc *Processor) FindStartArticleByDate(groupName string, targetDate time. // Binary search using 50% approach for groupInfo.FetchEnd-groupInfo.FetchStart > 1 { + if common.WantShutdown() { + return 0, fmt.Errorf("FindStartArticleByDate: common.WantShutdown() group '%s'", groupName) + } mid := groupInfo.FetchStart + (groupInfo.FetchEnd-groupInfo.FetchStart)/2 // Get XOVER for this article @@ -350,9 +368,7 @@ func (proc *Processor) FindStartArticleByDate(groupName string, targetDate time. groupInfo.FetchStart = mid continue } - if proc.DB.IsDBshutdown() { - return 0, fmt.Errorf("FindStartArticleByDate: Database shutdown detected for group '%s'", groupName) - } + articleDate := ParseNNTPDate(overviews[0].Date) if articleDate.IsZero() { groupInfo.FetchStart = mid @@ -367,6 +383,9 @@ func (proc *Processor) FindStartArticleByDate(groupName string, targetDate time. groupInfo.FetchEnd = mid } } + if common.WantShutdown() { + return 0, fmt.Errorf("FindStartArticleByDate: common.WantShutdown() group '%s'", groupName) + } log.Printf("Found start article: %d, ng: %s", groupInfo.FetchEnd, groupName) return groupInfo.FetchEnd, nil } @@ -436,7 +455,7 @@ func (proc *Processor) DownloadArticlesFromDate(groupName string, startDate time downloadEnd = groupInfo.Last } if common.WantShutdown() { - log.Printf("DownloadArticlesFromDate: Worker received shutdown signal, stopping") + log.Printf("DownloadArticlesFromDate: common.WantShutdown(): stopping") return fmt.Errorf("shutdown requested") } //log.Printf("DownloadArticlesFromDate: Downloading range %d-%d for group '%s' (group last: %d)", downloadStart, downloadEnd, groupName, groupInfo.Last) @@ -445,7 +464,7 @@ func (proc *Processor) DownloadArticlesFromDate(groupName string, startDate time err = proc.DownloadArticles(groupName, DLParChan, progressDB, downloadStart, downloadEnd, shutdownChan) if common.WantShutdown() { - log.Printf("DownloadArticlesFromDate: Worker received shutdown signal, stopping") + log.Printf("DownloadArticlesFromDate: common.WantShutdown(): stopping") return fmt.Errorf("shutdown requested") } // If there was an error and we haven't made progress, restore the original progress diff --git a/internal/processor/proc_DLXHDR.go b/internal/processor/proc_DLXHDR.go index 6b0dd584..f042ccb8 100644 --- a/internal/processor/proc_DLXHDR.go +++ b/internal/processor/proc_DLXHDR.go @@ -3,7 +3,7 @@ package processor import "github.com/go-while/go-pugleaf/internal/nntp" // GetXHDR fetches XHDR data for a group -func (proc *Processor) GetXHDR(groupName string, header string, start, end int64) ([]*nntp.HeaderLine, error) { +func (proc *Processor) GetXHDR(groupName string, header string, start, end int64) ([]nntp.HeaderLine, error) { // Fetch XHDR data from NNTP server xhdrData, err := proc.Pool.XHdr(groupName, header, start, end) if err != nil { diff --git a/internal/processor/proc_ImportOV.go b/internal/processor/proc_ImportOV.go index 39b6e048..c89edbb8 100644 --- a/internal/processor/proc_ImportOV.go +++ b/internal/processor/proc_ImportOV.go @@ -12,14 +12,14 @@ import ( // ImportOverview fetches XOVER data for a group and stores it in the overview DB. func (proc *Processor) ImportOverview(groupName string) error { - groupDBs, err := proc.DB.GetGroupDBs(groupName) + groupDB, err := proc.DB.GetGroupDB(groupName) if err != nil { return err } - defer groupDBs.Return(proc.DB) + defer groupDB.Return() /* defer func() { - err := proc.DB.CloseGroupDBs() + err := proc.DB.CloseGroupDB() if err != nil { log.Printf("ImportOverview: Failed to close group DBs for %s: %v", groupName, err) } @@ -31,7 +31,7 @@ func (proc *Processor) ImportOverview(groupName string) error { } // Efficiently find the highest article number already in articles table var maxNum sql.NullInt64 - if err := database.RetryableQueryRowScan(groupDBs.DB, "SELECT MAX(article_num) FROM articles", nil, &maxNum); err != nil { + if err := database.RetryableQueryRowScan(groupDB.DB, "SELECT MAX(article_num) FROM articles", nil, &maxNum); err != nil { return err } start := groupInfo.First // Start from the first article in the remote group @@ -82,7 +82,7 @@ func (proc *Processor) ImportOverview(groupName string) error { Lines: int(ov.Lines), ReplyCount: 0, // Initialize to 0, will be updated when replies are found } - if num, err := proc.DB.InsertOverview(groupDBs, o); err != nil || num == 0 { + if num, err := proc.DB.InsertOverview(groupDB, o); err != nil || num == 0 { log.Printf("Failed to insert overview for article %d: %v", num, err) } else { importedCount++ @@ -91,21 +91,21 @@ func (proc *Processor) ImportOverview(groupName string) error { // In ImportOverview after inserting overviews log.Printf("ImportOverview: Inserted %d overviews to newsgroup '%s', forcing commit", importedCount, groupName) - if tx, err := groupDBs.DB.Begin(); err == nil { + if tx, err := groupDB.DB.Begin(); err == nil { tx.Commit() // Force any pending transactions to commit log.Printf("ImportOverview: Forced commit completed newsgroup '%s'", groupName) } else { log.Printf("ImportOverview: Could not begin transaction for commit newsgroup '%s': %v", groupName, err) } // After overview inserts, force WAL to sync - _, err = database.RetryableExec(groupDBs.DB, "PRAGMA wal_checkpoint(FULL)") + _, err = database.RetryableExec(groupDB.DB, "PRAGMA wal_checkpoint(FULL)") if err != nil { log.Printf("ImportOverview: WAL checkpoint failed newsgroup '%s': %v", groupName, err) } else { log.Printf("ImportOverview: WAL checkpoint completed newsgroup '%s'", groupName) } // After all overview inserts in ImportOverview - _, err = database.RetryableExec(groupDBs.DB, "PRAGMA synchronous = FULL") + _, err = database.RetryableExec(groupDB.DB, "PRAGMA synchronous = FULL") if err != nil { log.Printf("Warning: Could not set synchronous mode: %v", err) } diff --git a/internal/processor/processor.go b/internal/processor/processor.go index 6b9e1a7f..1706dec1 100644 --- a/internal/processor/processor.go +++ b/internal/processor/processor.go @@ -33,23 +33,25 @@ type Processor struct { } var ( + DownloadMaxPar int = 10 // HARDCODED Maximum number of parallel newsgroups downloads + // these list of ' var ' can be set after importing the lib before starting!! - MaxCrossPosts int = 15 // HARDCODED Maximum number of crossposts to allow per article + MaxCrossPosts int = 9 // HARDCODED Maximum number of crossposts to allow per article LocalNNTPHostname string = "" // Hostname must be set before processing articles // MaxBatch defines the maximum number of articles to fetch in a single batch - MaxBatchSize int64 = 128 + MaxBatchSize int64 = 1000 // RunRSLIGHTImport is used to indicate if the importer should run the legacy RockSolid Light importer RunRSLIGHTImport = false // Global Batch Queue (proc_DLArt.go) Batch = &BatchQueue{ - Check: make(chan *string), // check newsgroups - TodoQ: make(chan *nntp.GroupInfo), // todo newsgroups - GetQ: make(chan *BatchItem), // get articles, blocking channel - GroupQueues: make(map[string]*GroupBatch), // per-newsgroup queues + Check: make(chan *string, 1), // check newsgroups + TodoQ: make(chan *nntp.GroupInfo, DownloadMaxPar), // todo newsgroups + GetQ: make(chan *BatchItem, 128), // get articles + GroupQueues: make(map[string]*GroupBatch), // per-newsgroup queues } ) @@ -114,66 +116,14 @@ func (proc *Processor) CheckNoMoreWorkInHistory() bool { } // AddProcessedArticleToHistory adds a successfully processed article to history with correct group and article number -func (proc *Processor) AddProcessedArticleToHistory(msgIdItem *history.MessageIdItem, newsgroupPtr *string, articleNumber int64) { - if msgIdItem == nil || newsgroupPtr == nil { +func (proc *Processor) AddProcessedArticleToHistory(msgIdItem *history.MessageIdItem) bool { + if msgIdItem == nil { //log.Print("ERROR: addProcessedArticleToHistory called with nil MessageIdItem or newsgroupPtr") - return - } - if *newsgroupPtr == "" || articleNumber <= 0 { - //log.Printf("ERROR: addProcessedArticleToHistory called with invalid parameters: newsgroupPtr='%s', articleNumber=%d msgIdItem='%#v'", *newsgroupPtr, articleNumber, msgIdItem) - return - } - - msgIdItem.Mux.Lock() - if msgIdItem.FileOffset > 0 || msgIdItem.ArtNum > 0 || msgIdItem.GroupName != nil { - msgIdItem.Response = history.CaseDupes - msgIdItem.CachedEntryExpires = time.Now().Add(15 * time.Second) - //log.Printf("ERROR: addProcessedArticleToHistory called with existing FileOffset %d or ArtNum %d or GroupName '%v', ignoring new values for msgIdItem='%#v'", msgIdItem.FileOffset, msgIdItem.ArtNum, *msgIdItem.GroupName, msgIdItem) - msgIdItem.Mux.Unlock() - return + return false } - if msgIdItem.GroupName == nil && msgIdItem.ArtNum <= 0 { - msgIdItem.GroupName = newsgroupPtr - msgIdItem.ArtNum = articleNumber // Set article number if not already set - //msgIdItem.StorageToken = fmt.Sprintf("%s:%d", *newsgroupPtr, articleNumber) // Set the storage token in the item - } else { - msgIdItem.Response = history.CaseDupes - msgIdItem.CachedEntryExpires = time.Now().Add(15 * time.Second) - //log.Printf("WARNING: addProcessedArticleToHistory called with existing GroupName '%s' or ArtNum %d, ignoring new values for msgIdItem='%#v'", *msgIdItem.GroupName, msgIdItem.ArtNum, msgIdItem) - msgIdItem.Mux.Unlock() - return - } - msgIdItem.Mux.Unlock() // Add to history channel - proc.History.Add(msgIdItem) -} - -// FindThreadRootInCache - public wrapper for the interface -func (proc *Processor) FindThreadRootInCache(newsgroupPtr *string, refs []string) *database.MsgIdTmpCacheItem { - item := proc.MsgIdCache.FindThreadRootInCache(newsgroupPtr, refs) - if item == nil { - return nil - } - - // Convert from history.MessageIdItem to database.MsgIdTmpCacheItem for interface compatibility - item.Mux.RLock() - defer item.Mux.RUnlock() - - // Get group-specific threading information - threadingInfo, exists := item.GroupThreading[newsgroupPtr] - if !exists { - return nil // No threading info for this group - } - - result := &database.MsgIdTmpCacheItem{ - MessageId: item.MessageId, - ArtNum: threadingInfo.ArtNum, - RootArticle: threadingInfo.RootArticle, - IsThreadRoot: threadingInfo.IsThreadRoot, - } - - return result + return proc.History.Add(msgIdItem) } // GetHistoryStats returns current history statistics @@ -235,8 +185,8 @@ func (proc *Processor) WaitForBatchCompletion() { // Public methods for NNTP server integration // Lookup looks up a message-ID in history and returns the storage token in the item -func (proc *Processor) Lookup(msgIdItem *history.MessageIdItem) (int, error) { - return proc.History.Lookup(msgIdItem) +func (proc *Processor) Lookup(msgIdItem *history.MessageIdItem, quick bool) (response int, newsgroupIDs []int64, err error) { + return proc.History.Lookup(msgIdItem, quick) } /* diff --git a/internal/processor/rslight.go b/internal/processor/rslight.go index 129abd8d..ec9c0097 100644 --- a/internal/processor/rslight.go +++ b/internal/processor/rslight.go @@ -434,12 +434,12 @@ func (leg *LegacyImporter) ImportAllSQLiteDatabases(sqliteDir string, threads in if newsgroup != anewsgroup { log.Printf("[RSLIGHT-IMPORT] Info: newsgroup '%s' from file '%s' does not match expected newsgroup '%s'", anewsgroup, file, newsgroup) } - if groupDBs, err := leg.proc.DB.GetGroupDBs(newsgroup); err != nil { + if groupDB, err := leg.proc.DB.GetGroupDB(newsgroup); err != nil { log.Printf("[RSLIGHT-IMPORT] Warning: failed to get group DBs for newsgroup '%s': %v", newsgroup, err) // Don't close on error, this is a different type of error } else { - // Return the groupDBs connection immediately - we were just testing if the group exists - groupDBs.Return(leg.proc.DB) + // Return the groupDB connection immediately - we were just testing if the group exists + groupDB.Return() err = leg.insertNewsgroupIfNotExists(newsgroup, "") // Insert newsgroup if it doesn't exist if err != nil { log.Printf("internal/legacy/main.go: Warning: failed to insert newsgroup %s: %v", newsgroup, err) diff --git a/internal/processor/threading.go b/internal/processor/threading.go index 20449bb7..a85bfa06 100644 --- a/internal/processor/threading.go +++ b/internal/processor/threading.go @@ -4,6 +4,7 @@ import ( "crypto/md5" "fmt" "log" + "slices" "strings" "time" @@ -92,7 +93,7 @@ func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup s default: // Also check history database for final determination msgIdItem.Mux.Unlock() - response, err := proc.Lookup(msgIdItem) + response, _, err := proc.Lookup(msgIdItem, true) if err != nil { log.Printf("Error looking up message ID %s in history: %v", msgIdItem.MessageId, err) return history.CaseError, err @@ -119,6 +120,32 @@ func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup s msgIdItem.Mux.Unlock() // dont process crossposts if we downloaded articles in bulkmode // Use legacy newsgroup in bulkmode. add article only to single newsgroup db. + + newsgroupsStr := common.GetHeaderFirst(article.Headers, "newsgroups") + if newsgroupsStr == "" { + log.Printf("[SPAM:HDR] Article '%s' no newsgroups header", article.MessageID) + proc.setCaseDupes(msgIdItem, bulkmode) + return history.CaseError, fmt.Errorf("error processArticle: article '%s' has no 'newsgroups' header", article.MessageID) + } + + ngs := proc.extractGroupsFromHeaders(article.MessageID, newsgroupsStr) + if len(ngs) == 0 || len(ngs) > MaxCrossPosts { + log.Printf("[SPAM:EMP] Article '%s' newsgroups=%d", article.MessageID, len(ngs)) + proc.setCaseDupes(msgIdItem, bulkmode) + return history.CaseError, fmt.Errorf("error processArticle: article '%s' crossposts=%d", article.MessageID, len(ngs)) + } + for _, ngName := range ngs { + ngid, err := proc.DB.MainDBGetNewsgroup(ngName) + if err != nil { + log.Printf("processArticle: failed to get newsgroup ID for name '%s': %v", ngName, err) + continue + } + msgIdItem.Mux.Lock() + if !slices.Contains(msgIdItem.NewsgroupIDs, ngid.ID) { + msgIdItem.NewsgroupIDs = append(msgIdItem.NewsgroupIDs, ngid.ID) + } + msgIdItem.Mux.Unlock() + } newsgroups = append(newsgroups, legacyNewsgroup) } else if !RunRSLIGHTImport && !bulkmode { @@ -131,16 +158,28 @@ func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup s } newsgroups = proc.extractGroupsFromHeaders(article.MessageID, newsgroupsStr) - if len(newsgroups) > MaxCrossPosts { + if len(newsgroups) == 0 || len(newsgroups) > MaxCrossPosts { log.Printf("[SPAM:EMP] Article '%s' newsgroups=%d", article.MessageID, len(newsgroups)) proc.setCaseDupes(msgIdItem, bulkmode) return history.CaseError, fmt.Errorf("error processArticle: article '%s' crossposts=%d", article.MessageID, len(newsgroups)) } + for _, ngName := range newsgroups { + ngid, err := proc.DB.MainDBGetNewsgroup(ngName) + if err != nil { + log.Printf("processArticle: failed to get newsgroup ID for name '%s': %v", ngName, err) + continue + } + msgIdItem.Mux.Lock() + if !slices.Contains(msgIdItem.NewsgroupIDs, ngid.ID) { + msgIdItem.NewsgroupIDs = append(msgIdItem.NewsgroupIDs, ngid.ID) + } + msgIdItem.Mux.Unlock() + } } else { - log.Printf("ERROR processArticle: article '%s' has no 'newsgroups' header and no legacy newsgroup provided", article.MessageID) + log.Printf("ERROR in processArticle: invalid bulk import flags") proc.setCaseDupes(msgIdItem, bulkmode) - return history.CaseError, fmt.Errorf("error processArticle: article '%s' has no 'newsgroups' header", article.MessageID) + return history.CaseError, fmt.Errorf("error processArticle") } if article.Subject == "" { log.Printf("[HDR-SPAM] Article '%s' empty subject... headers='%#v'", article.MessageID, article.Headers) @@ -154,14 +193,12 @@ func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup s return history.CaseError, fmt.Errorf("error processArticle: article '%s' has no 'from' header", article.MessageID) } + article.DateSent = ParseNNTPDate(article.DateString) if article.DateSent.IsZero() { - article.DateSent = ParseNNTPDate(article.DateString) - if article.DateSent.IsZero() { - log.Printf("[ERROR-HDR] Article '%s' no valid date... headerDate='%v' dateString='%s'", article.MessageID, article.DateSent, article.DateString) - proc.setCaseDupes(msgIdItem, bulkmode) - //dateString = time.Now().Format(time.RFC1123Z) // Use current time as fallback - return history.CaseError, fmt.Errorf("error processArticle: article '%s' has no valid 'date' header", article.MessageID) - } + log.Printf("[ERROR-HDR] Article '%s' no valid date... headerDate='%v' dateString='%s'", article.MessageID, article.DateSent, article.DateString) + proc.setCaseDupes(msgIdItem, bulkmode) + //dateString = time.Now().Format(time.RFC1123Z) // Use current time as fallback + return history.CaseError, fmt.Errorf("error processArticle: article '%s' has no valid 'date' header", article.MessageID) } // Check for future posts (more than 25 hours in the future) and skip processing if article.DateSent.After(time.Now().Add(25 * time.Hour)) { @@ -169,10 +206,12 @@ func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup s proc.setCaseDupes(msgIdItem, bulkmode) return history.CaseError, fmt.Errorf("article '%s' posted too far in future: %v", article.MessageID, article.DateSent) } + // TODO: add article cutoff date checks here // part of parsing data moved to nntp-client-commands.go:L~850 (func ParseLegacyArticleLines) - article.ReplyCount = 0 // Will be updated by threading article.MsgIdItem = msgIdItem + proc.AddProcessedArticleToHistory(msgIdItem) + article.ArticleNums = make(map[*string]int64) article.ProcessQueue = make(chan *string, 16) // Initialize process queue @@ -224,36 +263,36 @@ func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup s if !bulkmode { // @AI !!! NO CACHE CHECK for bulk legacy import!! // @AI !!! NO CACHE CHECK for bulk legacy import!! // Cache check still provides some throttling while avoiding the expensive DB query - if proc.MsgIdCache.HasMessageIDInGroup(article.MessageID, newsgroupPtr) { // CHECK GLOBAL PROCESSOR CACHE with POINTER - log.Printf("processArticle: article '%s' already exists in cache for newsgroup '%s', skipping crosspost", article.MessageID, *newsgroupPtr) - continue - } + //if proc.MsgIdCache.HasMessageIDInGroup(article.MessageID, newsgroupPtr) { // CHECK GLOBAL PROCESSOR CACHE with POINTER + // log.Printf("processArticle: article '%s' already exists in cache for newsgroup '%s', skipping crosspost", article.MessageID, *newsgroupPtr) + // continue + //} } //log.Printf("Crossposted article '%s' to newsgroup '%s'", article.MessageID, group) - groupDBs, err := proc.DB.GetGroupDBs(newsgroup) + groupDB, err := proc.DB.GetGroupDB(newsgroup) if err != nil { log.Printf("Failed to get group DBs for newsgroup '%s': %v", newsgroup, err) - if groupDBs != nil { - groupDBs.Return(proc.DB) // Return connection even on error + if groupDB != nil { + groupDB.Return() // Return connection even on error } continue // Continue with other groups } - if groupDBs.ExistsMsgIdInArticlesDB(article.MessageID) { - groupDBs.Return(proc.DB) // Return connection before continuing + if groupDB.ExistsMsgIdInArticlesDB(article.MessageID) { + groupDB.Return() // Return connection before continuing continue } /* // Skip database duplicate check for bulk legacy imports if !bulkmode { // check if article exists in articledb - this is the expensive operation - if groupDBs.ExistsMsgIdInArticlesDB(article.MessageID) { - groupDBs.Return(proc.DB) // Return connection before continuing + if groupDB.ExistsMsgIdInArticlesDB(article.MessageID) { + groupDB.Return(proc.DB) // Return connection before continuing continue } } */ - groupDBs.Return(proc.DB) + groupDB.Return() go proc.DB.Batch.BatchCaptureOverviewForLater(newsgroupPtr, article) diff --git a/internal/web/web_admin_newsgroups.go b/internal/web/web_admin_newsgroups.go index 06a085e5..e6345eb2 100644 --- a/internal/web/web_admin_newsgroups.go +++ b/internal/web/web_admin_newsgroups.go @@ -138,6 +138,7 @@ func (s *WebServer) adminUpdateNewsgroup(c *gin.Context) { maxArticlesStr := strings.TrimSpace(c.PostForm("max_articles")) maxArtSizeStr := strings.TrimSpace(c.PostForm("max_art_size")) activeStr := c.PostForm("active") + status := strings.TrimSpace(c.PostForm("status")) // Validate input if name == "" { @@ -146,6 +147,24 @@ func (s *WebServer) adminUpdateNewsgroup(c *gin.Context) { return } + // Validate status field + if status != "" { + validStatuses := []string{"y", "m", "n", "j", "x"} + isValid := false + for _, validStatus := range validStatuses { + if status == validStatus { + isValid = true + break + } + } + // Check if it's a redirect status (starts with =) + if !isValid && !strings.HasPrefix(status, "=") { + session.SetError("Invalid status value. Must be y, m, n, j, x, or =group.name") + c.Redirect(http.StatusSeeOther, "/admin?tab=newsgroups") + return + } + } + // Parse expiry days expiryDays := 0 if expiryDaysStr != "" { @@ -218,6 +237,16 @@ func (s *WebServer) adminUpdateNewsgroup(c *gin.Context) { return } + // Update status if provided + if status != "" { + err = s.DB.UpdateNewsgroupStatus(name, status) + if err != nil { + session.SetError("Failed to update newsgroup NNTP status") + c.Redirect(http.StatusSeeOther, "/admin?tab=newsgroups") + return + } + } + session.SetSuccess("Newsgroup updated successfully") c.Redirect(http.StatusSeeOther, buildNewsgroupAdminRedirectURL(c)) } @@ -478,7 +507,7 @@ func (s *WebServer) adminMigrateNewsgroupActivity(c *gin.Context) { } // Get the group database for this newsgroup - groupDBs, err := s.DB.GetGroupDBs(name) + groupDB, err := s.DB.GetGroupDB(name) if err != nil { session.SetError("Failed to access newsgroup database: " + err.Error()) c.Redirect(http.StatusSeeOther, buildNewsgroupAdminRedirectURL(c)) @@ -487,8 +516,8 @@ func (s *WebServer) adminMigrateNewsgroupActivity(c *gin.Context) { // Query the latest article date from visible articles only var latestDate sql.NullString - err = database.RetryableQueryRowScan(groupDBs.DB, "SELECT MAX(date_sent) FROM articles WHERE hide = 0", nil, &latestDate) - groupDBs.Return(s.DB) // Always return the database connection + err = database.RetryableQueryRowScan(groupDB.DB, "SELECT MAX(date_sent) FROM articles WHERE hide = 0", nil, &latestDate) + groupDB.Return() // Always return the database connection if err != nil { session.SetError("Failed to query latest article for " + name + ": " + err.Error()) @@ -553,13 +582,13 @@ const query_fixGroupThreadActivity3 = "UPDATE thread_cache SET last_activity = ? // fixGroupThreadActivity implements the same logic as cmd/fix-thread-activity for a single group func (s *WebServer) fixGroupThreadActivity(groupName string) error { - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { return fmt.Errorf("failed to get group DB: %w", err) } - defer groupDBs.Return(s.DB) + defer groupDB.Return() - rows, err := database.RetryableQuery(groupDBs.DB, query_fixGroupThreadActivity1) + rows, err := database.RetryableQuery(groupDB.DB, query_fixGroupThreadActivity1) if err != nil { return fmt.Errorf("failed to query thread cache: %w", err) } @@ -613,7 +642,7 @@ func (s *WebServer) fixGroupThreadActivity(groupName string) error { var dateSent time.Time var dateStr sql.NullString - err := database.RetryableQueryRowScan(groupDBs.DB, query_fixGroupThreadActivity2, []interface{}{articleNum}, &dateStr) + err := database.RetryableQueryRowScan(groupDB.DB, query_fixGroupThreadActivity2, []interface{}{articleNum}, &dateStr) if err != nil || !dateStr.Valid { log.Printf("Skipping article %d in thread %d: no valid date_sent\n", articleNum, thread.root) @@ -660,7 +689,7 @@ func (s *WebServer) fixGroupThreadActivity(groupName string) error { // Format as UTC string to avoid timezone encoding issues utcTimeStr := maxDate.UTC().Format("2006-01-02 15:04:05") - _, err := database.RetryableExec(groupDBs.DB, query_fixGroupThreadActivity3, utcTimeStr, thread.root) + _, err := database.RetryableExec(groupDB.DB, query_fixGroupThreadActivity3, utcTimeStr, thread.root) if err != nil { log.Print("Failed to update thread activity for thread ", thread.root, ": ", err) @@ -706,7 +735,7 @@ func (s *WebServer) adminHideFuturePosts(c *gin.Context) { cutoffTime := time.Now().Add(48 * time.Hour) // Get the group database for this newsgroup - groupDBs, err := s.DB.GetGroupDBs(name) + groupDB, err := s.DB.GetGroupDB(name) if err != nil { session.SetError("Failed to access newsgroup database: " + err.Error()) c.Redirect(http.StatusSeeOther, buildNewsgroupAdminRedirectURL(c)) @@ -714,9 +743,9 @@ func (s *WebServer) adminHideFuturePosts(c *gin.Context) { } // Find articles that are posted more than 48 hours in the future and not already hidden - articleRows, err := groupDBs.DB.Query("SELECT article_num FROM articles WHERE date_sent > ? AND hide = 0", cutoffTime.Format("2006-01-02 15:04:05")) + articleRows, err := groupDB.DB.Query("SELECT article_num FROM articles WHERE date_sent > ? AND hide = 0", cutoffTime.Format("2006-01-02 15:04:05")) if err != nil { - groupDBs.Return(s.DB) + groupDB.Return() session.SetError("Failed to query future articles: " + err.Error()) c.Redirect(http.StatusSeeOther, buildNewsgroupAdminRedirectURL(c)) return @@ -731,7 +760,7 @@ func (s *WebServer) adminHideFuturePosts(c *gin.Context) { futureArticles = append(futureArticles, articleNum) } articleRows.Close() - groupDBs.Return(s.DB) + groupDB.Return() if len(futureArticles) == 0 { session.SetSuccess("No future-dated articles found in newsgroup: " + name) @@ -751,12 +780,12 @@ func (s *WebServer) adminHideFuturePosts(c *gin.Context) { } // Also set the hide flag for these future-dated articles - groupDBs, err := s.DB.GetGroupDBs(name) + groupDB, err := s.DB.GetGroupDB(name) if err != nil { continue // Skip if can't get DB connection } - _, err = database.RetryableExec(groupDBs.DB, "UPDATE articles SET hide = 1 WHERE article_num = ?", articleNum) - groupDBs.Return(s.DB) + _, err = database.RetryableExec(groupDB.DB, "UPDATE articles SET hide = 1 WHERE article_num = ?", articleNum) + groupDB.Return() if err != nil { continue // Skip articles that fail hide update diff --git a/internal/web/web_apiHandlers.go b/internal/web/web_apiHandlers.go index 00eba8ee..aa6eeb63 100644 --- a/internal/web/web_apiHandlers.go +++ b/internal/web/web_apiHandlers.go @@ -110,18 +110,18 @@ func (s *WebServer) getGroupOverview(c *gin.Context) { } } - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Group not found"}) return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Handle page-based to cursor conversion for compatibility if page > 1 && lastArticleNum == 0 { skipCount := (page - 1) * LIMIT_listGroups var cursorArticleNum int64 - err = database.RetryableQueryRowScan(groupDBs.DB, ` + err = database.RetryableQueryRowScan(groupDB.DB, ` SELECT article_num FROM articles WHERE hide = 0 ORDER BY article_num DESC @@ -133,7 +133,7 @@ func (s *WebServer) getGroupOverview(c *gin.Context) { } } - overviews, totalCount, hasMore, err := s.DB.GetOverviewsPaginated(groupDBs, lastArticleNum, LIMIT_listGroups) + overviews, totalCount, hasMore, err := s.DB.GetOverviewsPaginated(groupDB, lastArticleNum, LIMIT_listGroups) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -186,13 +186,13 @@ func (s *WebServer) getArticle(c *gin.Context) { return } - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Group not found"}) return } - defer groupDBs.Return(s.DB) - article, err := s.DB.GetArticleByNum(groupDBs, articleNum) + defer groupDB.Return() + article, err := s.DB.GetArticleByNum(groupDB, articleNum) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Article not found"}) return @@ -210,13 +210,13 @@ func (s *WebServer) getArticleByMessageId(c *gin.Context) { return // Error response already sent by checkGroupAccessAPI } - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Group not found"}) return } - defer groupDBs.Return(s.DB) - article, err := s.DB.GetArticleByMessageID(groupDBs, messageId) + defer groupDB.Return() + article, err := s.DB.GetArticleByMessageID(groupDB, messageId) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Article not found"}) return @@ -233,13 +233,13 @@ func (s *WebServer) getGroupThreads(c *gin.Context) { return // Error response already sent by checkGroupAccessAPI } - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Group not found"}) return } - defer groupDBs.Return(s.DB) - threads, err := s.DB.GetThreads(groupDBs) + defer groupDB.Return() + threads, err := s.DB.GetThreads(groupDB) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -336,22 +336,22 @@ func (s *WebServer) getArticlePreview(c *gin.Context) { } // Get group database - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Group not found"}) return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Get article overview for basic info - overview, err := s.DB.GetOverviewByArticleNum(groupDBs, articleNum) + overview, err := s.DB.GetOverviewByArticleNum(groupDB, articleNum) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Article not found"}) return } // Get article content (limited for preview) - article, err := s.DB.GetArticleByNum(groupDBs, articleNum) + article, err := s.DB.GetArticleByNum(groupDB, articleNum) if err != nil { c.JSON(http.StatusNotFound, gin.H{"error": "Article content not found"}) return diff --git a/internal/web/web_articlePage.go b/internal/web/web_articlePage.go index cbecac16..42d6a365 100644 --- a/internal/web/web_articlePage.go +++ b/internal/web/web_articlePage.go @@ -35,22 +35,22 @@ func (s *WebServer) articlePage(c *gin.Context) { return } - groupDBs, err := s.DB.GetGroupDBs(groupName) - if groupDBs == nil || err != nil { + groupDB, err := s.DB.GetGroupDB(groupName) + if groupDB == nil || err != nil { c.String(http.StatusNotFound, "Group not found: %v", err) return } - defer groupDBs.Return(s.DB) - if groupDBs.NewsgroupPtr == nil { + defer groupDB.Return() + if groupDB.NewsgroupPtr == nil { c.String(http.StatusInternalServerError, "Group pointer is nil for group %s", groupName) return } // Get the article - article, err := s.DB.GetArticleByNum(groupDBs, articleNum) + article, err := s.DB.GetArticleByNum(groupDB, articleNum) if err != nil { /* TODO: THIS DOES NOT SCALE WELL! // Article not found in articles table, let's check if it exists in overview - overviews, _ := s.DB.GetOverviews(groupDBs) + overviews, _ := s.DB.GetOverviews(groupDB) var foundInOverview bool for _, overview := range overviews { if overview.ArticleNum == articleNum { @@ -81,7 +81,7 @@ func (s *WebServer) articlePage(c *gin.Context) { data := ArticlePageData{ TemplateData: s.getBaseTemplateData(c, subjectText+" - Article "+articleNumStr), GroupName: groupName, - GroupPtr: groupDBs.NewsgroupPtr, + GroupPtr: groupDB.NewsgroupPtr, ArticleNum: articleNum, Article: article, Thread: thread, @@ -108,14 +108,14 @@ func (s *WebServer) articleByMessageIdPage(c *gin.Context) { return // Error response already sent by checkGroupAccess } - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { c.String(http.StatusNotFound, "Group not found: %v", err) return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Get the article by message ID - article, err := s.DB.GetArticleByMessageID(groupDBs, messageId) + article, err := s.DB.GetArticleByMessageID(groupDB, messageId) if err != nil { c.String(http.StatusNotFound, "Article with message ID %s not found in group %s", messageId, groupName) return @@ -134,14 +134,14 @@ func (s *WebServer) articleByMessageIdPage(c *gin.Context) { subjectText := article.GetCleanSubject() data := ArticlePageData{ - TemplateData: s.getBaseTemplateData(c, subjectText+" - Article "+strconv.FormatInt(article.ArticleNums[groupDBs.NewsgroupPtr], 10)), + TemplateData: s.getBaseTemplateData(c, subjectText+" - Article "+strconv.FormatInt(article.ArticleNums[groupDB.NewsgroupPtr], 10)), GroupName: groupName, - GroupPtr: groupDBs.NewsgroupPtr, - ArticleNum: article.ArticleNums[groupDBs.NewsgroupPtr], + GroupPtr: groupDB.NewsgroupPtr, + ArticleNum: article.ArticleNums[groupDB.NewsgroupPtr], Article: article, Thread: thread, - PrevArticle: article.ArticleNums[groupDBs.NewsgroupPtr] - 1, - NextArticle: article.ArticleNums[groupDBs.NewsgroupPtr] + 1, + PrevArticle: article.ArticleNums[groupDB.NewsgroupPtr] - 1, + NextArticle: article.ArticleNums[groupDB.NewsgroupPtr] + 1, } // Load template individually to avoid conflicts diff --git a/internal/web/web_groupThreadsPage.go b/internal/web/web_groupThreadsPage.go index 05c1920c..f6c140f7 100644 --- a/internal/web/web_groupThreadsPage.go +++ b/internal/web/web_groupThreadsPage.go @@ -28,16 +28,16 @@ func (s *WebServer) groupThreadsPage(c *gin.Context) { } // Get group database connections - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { log.Printf("Failed to get group databases for %s: %v", groupName, err) s.renderError(c, http.StatusInternalServerError, "Database error", err.Error()) return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Use cached thread data for fast performance - forumThreads, totalThreads, err := s.DB.GetCachedThreads(groupDBs, page, Threads_perPage) + forumThreads, totalThreads, err := s.DB.GetCachedThreads(groupDB, page, Threads_perPage) if err != nil { log.Printf("Failed to get cached threads for %s: %v", groupName, err) s.renderError(c, http.StatusInternalServerError, "Failed to load threads", err.Error()) @@ -53,7 +53,7 @@ func (s *WebServer) groupThreadsPage(c *gin.Context) { ft.RootArticle.ArticleNums = make(map[*string]int64) } // Store the article number for this newsgroup - ft.RootArticle.ArticleNums[groupDBs.NewsgroupPtr] = ft.RootArticle.ArticleNum + ft.RootArticle.ArticleNums[groupDB.NewsgroupPtr] = ft.RootArticle.ArticleNum rootOverviews = append(rootOverviews, ft.RootArticle) } } @@ -89,7 +89,7 @@ func (s *WebServer) groupThreadsPage(c *gin.Context) { "AvailableSections": baseData.AvailableSections, "AvailableAIModels": baseData.AvailableAIModels, "GroupName": groupName, - "GroupPtr": groupDBs.NewsgroupPtr, + "GroupPtr": groupDB.NewsgroupPtr, "ForumThreads": forumThreads, "TotalThreads": totalThreads, "TotalMessages": totalMessages, diff --git a/internal/web/web_sectionsPage.go b/internal/web/web_sectionsPage.go index 5f5b68cc..6b905b2d 100644 --- a/internal/web/web_sectionsPage.go +++ b/internal/web/web_sectionsPage.go @@ -184,13 +184,13 @@ func (s *WebServer) sectionGroupPage(c *gin.Context) { } // Get group database - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { s.renderError(c, http.StatusNotFound, "Group database not found", "The group '"+groupName+"' database does not exist. Try importing data first.") return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Get pagination parameters page := 1 var lastArticleNum int64 @@ -213,7 +213,7 @@ func (s *WebServer) sectionGroupPage(c *gin.Context) { if page > 1 && lastArticleNum == 0 { skipCount := (page - 1) * LIMIT_sectionGroupPage var cursorArticleNum int64 - err = database.RetryableQueryRowScan(groupDBs.DB, ` + err = database.RetryableQueryRowScan(groupDB.DB, ` SELECT article_num FROM articles WHERE hide = 0 ORDER BY article_num DESC @@ -226,7 +226,7 @@ func (s *WebServer) sectionGroupPage(c *gin.Context) { } // Get articles (overview data) for this group with pagination - articles, totalCount, hasMore, err := s.DB.GetOverviewsPaginated(groupDBs, lastArticleNum, LIMIT_sectionGroupPage) + articles, totalCount, hasMore, err := s.DB.GetOverviewsPaginated(groupDB, lastArticleNum, LIMIT_sectionGroupPage) if err != nil { s.renderError(c, http.StatusInternalServerError, "Database Error", err.Error()) return @@ -312,15 +312,15 @@ func (s *WebServer) sectionArticlePage(c *gin.Context) { } // Get group database - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { s.renderError(c, http.StatusNotFound, "Group database not found", "The group '"+groupName+"' database does not exist.") return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Get the article - article, err := s.DB.GetArticleByNum(groupDBs, articleNum) + article, err := s.DB.GetArticleByNum(groupDB, articleNum) if err != nil { s.renderError(c, http.StatusNotFound, "Article not found", "Article "+articleNumStr+" not found in group '"+groupName+"'. It may not have been imported yet.") @@ -380,15 +380,15 @@ func (s *WebServer) sectionArticleByMessageIdPage(c *gin.Context) { } // Get group database - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { s.renderError(c, http.StatusNotFound, "Group database not found", "The group '"+groupName+"' database does not exist.") return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Get the article by message ID - article, err := s.DB.GetArticleByMessageID(groupDBs, messageId) + article, err := s.DB.GetArticleByMessageID(groupDB, messageId) if err != nil { s.renderError(c, http.StatusNotFound, "Article not found", "Article with message ID '"+messageId+"' not found in group '"+groupName+"'. It may not have been imported yet.") @@ -401,7 +401,7 @@ func (s *WebServer) sectionArticleByMessageIdPage(c *gin.Context) { TemplateData: s.getBaseTemplateData(c, section.DisplayName+" - "+groupName+" - "+subjectText), Section: section, GroupName: groupName, - ArticleNum: article.ArticleNums[groupDBs.NewsgroupPtr], + ArticleNum: article.ArticleNums[groupDB.NewsgroupPtr], Article: article, Thread: []*models.Overview{}, // TODO: Implement threading PrevArticle: 0, // TODO: Implement navigation diff --git a/internal/web/web_sitePostPage.go b/internal/web/web_sitePostPage.go index 4499b9a6..3f83b7fb 100644 --- a/internal/web/web_sitePostPage.go +++ b/internal/web/web_sitePostPage.go @@ -17,7 +17,7 @@ import ( "github.com/go-while/go-pugleaf/internal/utils" ) -var WebPostingBackOff = 42 * time.Second +var WebPostingBackOff = 42 * time.Second // TODO: make configurable // PostPageData represents data for posting page type PostPageData struct { @@ -73,9 +73,9 @@ func (s *WebServer) sitePostPage(c *gin.Context) { // Get the original article to extract subject and body for reply if articleNum, err := strconv.ParseInt(replyToArticleNum, 10, 64); err == nil { // Get group database connection - if groupDBs, err := s.DB.GetGroupDBs(prefilledNewsgroup); err == nil { - defer groupDBs.Return(s.DB) - if reply_article, err := s.DB.GetArticleByNum(groupDBs, articleNum); err == nil { + if groupDB, err := s.DB.GetGroupDB(prefilledNewsgroup); err == nil { + defer groupDB.Return() + if reply_article, err := s.DB.GetArticleByNum(groupDB, articleNum); err == nil { // Handle subject with "Re: " prefix if !strings.HasPrefix(strings.ToLower(reply_article.Subject), "re:") { article.Subject = "Re: " + models.ConvertToUTF8(reply_article.Subject) @@ -218,8 +218,8 @@ func (s *WebServer) sitePostSubmit(c *gin.Context) { if subject == "" { errors = append(errors, "Subject is required") } - if len(subject) > 255 { - errors = append(errors, "Subject limited to 255 characters") + if len(subject) > 1000 { + errors = append(errors, "Subject limited to 1000 characters") } if body == "" { errors = append(errors, "Message body is required") @@ -328,26 +328,27 @@ func (s *WebServer) sitePostSubmit(c *gin.Context) { } return } + //displayName := fmt.Sprintf("%s ", session.User.DisplayName) displayName := strings.TrimSpace(session.User.DisplayName) if displayName != "" && !strings.Contains(displayName, "<") && !strings.Contains(displayName, ">") { - displayName = fmt.Sprintf("%s ", session.User.DisplayName) + displayName = fmt.Sprintf("%s ", session.User.DisplayName) } if displayName == "" { // Fallback if display name is empty - displayName = fmt.Sprintf("Lorem Ipsum ", processor.LocalNNTPHostname) + displayName = fmt.Sprintf("Lorem Ipsum ", processor.LocalNNTPHostname) } var headers []string linesCount := strings.Count(body, "\n") + 1 - bytesCount := len(body) + bytesCount := len(body) - linesCount headers = append(headers, "MIME-Version: 1.0") headers = append(headers, "Content-Type: text/plain; charset=\"UTF-8\"") headers = append(headers, "Content-Transfer-Encoding: 8bit") - headers = append(headers, "Newsgroups: "+strings.Join(newsgroups, ",")) // Injection-Info / X-Trace header for tracking headers = append(headers, "X-pugleaf-Trace: "+processor.LocalNNTPHostname+";") - headers = append(headers, "\tnonce=\""+nonce+"\"; mail-complaints-to=\""+abuseMail+"\";") - headers = append(headers, "\tposting-account=\""+hashedUser+"\";") + headers = append(headers, " nonce=\""+nonce+"\"; mail-complaints-to=\""+abuseMail+"\";") + headers = append(headers, " posting-account=\""+hashedUser+"\";") headers = append(headers, "From: "+displayName) + headers = append(headers, "Newsgroups: "+strings.Join(newsgroups, ",")) headers = append(headers, "Lines: "+strconv.Itoa(linesCount)) headers = append(headers, "Bytes: "+strconv.Itoa(bytesCount)) @@ -375,14 +376,14 @@ func (s *WebServer) sitePostSubmit(c *gin.Context) { // Try to find the original article to get its References var originalRefs string for _, newsgroup := range newsgroups { - groupDBs, err := s.DB.GetGroupDBs(newsgroup) + groupDB, err := s.DB.GetGroupDB(newsgroup) if err != nil { log.Printf("Warning: Failed to get group DB for %s: %v", newsgroup, err) continue } - defer groupDBs.Return(s.DB) + defer groupDB.Return() - originalArticle, err := s.DB.GetArticleByMessageID(groupDBs, messageID) + originalArticle, err := s.DB.GetArticleByMessageID(groupDB, messageID) if err != nil { log.Printf("Warning: Failed to find original article %s in %s: %v", messageID, newsgroup, err) continue diff --git a/internal/web/web_threadPage.go b/internal/web/web_threadPage.go index 9ea447e0..326ec20f 100644 --- a/internal/web/web_threadPage.go +++ b/internal/web/web_threadPage.go @@ -44,15 +44,15 @@ func (s *WebServer) singleThreadPage(c *gin.Context) { page = 1 } - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { c.String(http.StatusNotFound, "Group not found: %v", err) return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Get the thread root overview first - rootOverview, err := s.DB.GetOverviewByArticleNum(groupDBs, threadRoot) + rootOverview, err := s.DB.GetOverviewByArticleNum(groupDB, threadRoot) if err != nil { c.String(http.StatusNotFound, "Thread root article %d not found: %v", threadRoot, err) return @@ -65,7 +65,7 @@ func (s *WebServer) singleThreadPage(c *gin.Context) { } // Load the full ThreadRoot article to get ArticleNums populated - threadRootArticle, err := s.DB.GetArticleByNum(groupDBs, threadRoot) + threadRootArticle, err := s.DB.GetArticleByNum(groupDB, threadRoot) if err != nil { c.String(http.StatusNotFound, "Thread root article %d not found: %v", threadRoot, err) return @@ -75,10 +75,10 @@ func (s *WebServer) singleThreadPage(c *gin.Context) { if threadRootArticle.ArticleNums == nil { threadRootArticle.ArticleNums = make(map[*string]int64) } - threadRootArticle.ArticleNums[groupDBs.NewsgroupPtr] = threadRoot + threadRootArticle.ArticleNums[groupDB.NewsgroupPtr] = threadRoot // Use cached thread replies with pagination - threadReplies, totalReplies, err := s.DB.GetCachedThreadReplies(groupDBs, threadRoot, page, ThreadMessages_perPage) + threadReplies, totalReplies, err := s.DB.GetCachedThreadReplies(groupDB, threadRoot, page, ThreadMessages_perPage) if err != nil { log.Printf("Failed to get cached thread replies for %s/%d: %v", groupName, threadRoot, err) s.renderError(c, http.StatusInternalServerError, "Failed to load thread replies", err.Error()) @@ -115,7 +115,7 @@ func (s *WebServer) singleThreadPage(c *gin.Context) { continue } - article, err := s.DB.GetArticleByNum(groupDBs, overview.ArticleNum) + article, err := s.DB.GetArticleByNum(groupDB, overview.ArticleNum) if err != nil { log.Printf("Warning: Could not load article %d: %v", overview.ArticleNum, err) // If we can't load the full article, we could fall back to overview data @@ -127,7 +127,7 @@ func (s *WebServer) singleThreadPage(c *gin.Context) { if article.ArticleNums == nil { article.ArticleNums = make(map[*string]int64) } - article.ArticleNums[groupDBs.NewsgroupPtr] = overview.ArticleNum + article.ArticleNums[groupDB.NewsgroupPtr] = overview.ArticleNum article.Mux.Unlock() threadMessages = append(threadMessages, article) } @@ -153,7 +153,7 @@ func (s *WebServer) singleThreadPage(c *gin.Context) { "AvailableSections": baseData.AvailableSections, "AvailableAIModels": baseData.AvailableAIModels, "GroupName": groupName, - "GroupPtr": groupDBs.NewsgroupPtr, + "GroupPtr": groupDB.NewsgroupPtr, "ThreadRoot": threadRootArticle, "ThreadMessages": threadMessages, "MessageCount": totalMessages, diff --git a/internal/web/web_threadTreePage.go b/internal/web/web_threadTreePage.go index 0f8637b5..478412c7 100644 --- a/internal/web/web_threadTreePage.go +++ b/internal/web/web_threadTreePage.go @@ -45,14 +45,14 @@ func (s *WebServer) handleThreadTreeAPI(c *gin.Context) { } // Get group database - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{ "error": "Failed to get group database: " + err.Error(), }) return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Parse options options := database.TreeViewOptions{ @@ -75,7 +75,7 @@ func (s *WebServer) handleThreadTreeAPI(c *gin.Context) { } // Get tree view - response, err := s.DB.GetThreadTreeView(groupDBs, threadRoot, options) + response, err := s.DB.GetThreadTreeView(groupDB, threadRoot, options) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{ "error": "Failed to get tree view: " + err.Error(), @@ -105,12 +105,12 @@ func (s *WebServer) threadTreePage(c *gin.Context) { } // Get group database - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { s.renderError(c, http.StatusNotFound, "Group Not Found", "Group not found: "+groupName) return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Get tree view options := database.TreeViewOptions{ @@ -120,14 +120,14 @@ func (s *WebServer) threadTreePage(c *gin.Context) { SortBy: "date", } - treeResponse, err := s.DB.GetThreadTreeView(groupDBs, threadRoot, options) + treeResponse, err := s.DB.GetThreadTreeView(groupDB, threadRoot, options) if err != nil { s.renderError(c, http.StatusInternalServerError, "Tree Error", "Failed to build thread tree: "+err.Error()) return } // Get root article for title - rootOverview, err := s.DB.GetOverviewByArticleNum(groupDBs, threadRoot) + rootOverview, err := s.DB.GetOverviewByArticleNum(groupDB, threadRoot) if err != nil { s.renderError(c, http.StatusNotFound, "Article Not Found", "Root article not found") return @@ -190,12 +190,12 @@ func (s *WebServer) sectionThreadTreePage(c *gin.Context) { } // Get group database - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { s.renderError(c, http.StatusNotFound, "Group Not Found", "Group not found: "+groupName) return } - defer groupDBs.Return(s.DB) + defer groupDB.Return() // Get tree view options := database.TreeViewOptions{ @@ -205,14 +205,14 @@ func (s *WebServer) sectionThreadTreePage(c *gin.Context) { SortBy: "date", } - treeResponse, err := s.DB.GetThreadTreeView(groupDBs, threadRoot, options) + treeResponse, err := s.DB.GetThreadTreeView(groupDB, threadRoot, options) if err != nil { s.renderError(c, http.StatusInternalServerError, "Tree Error", "Failed to build thread tree: "+err.Error()) return } // Get root article for title - rootOverview, err := s.DB.GetOverviewByArticleNum(groupDBs, threadRoot) + rootOverview, err := s.DB.GetOverviewByArticleNum(groupDB, threadRoot) if err != nil { s.renderError(c, http.StatusNotFound, "Article Not Found", "Root article not found") return diff --git a/internal/web/webgroupPage.go b/internal/web/webgroupPage.go index e09a68b3..f2bcdd1c 100644 --- a/internal/web/webgroupPage.go +++ b/internal/web/webgroupPage.go @@ -49,9 +49,9 @@ func (s *WebServer) groupPage(c *gin.Context) { } // Try to get group overview with pagination - groupDBs, err := s.DB.GetGroupDBs(groupName) + groupDB, err := s.DB.GetGroupDB(groupName) if err != nil { - // Handle error case - groupDBs is nil, so don't try to return it + // Handle error case - groupDB is nil, so don't try to return it data := GroupPageData{ TemplateData: s.getBaseTemplateData(c, groupName), GroupName: groupName, @@ -68,7 +68,7 @@ func (s *WebServer) groupPage(c *gin.Context) { } return } - defer groupDBs.Return(s.DB) // Only defer if groupDBs is not nil + defer groupDB.Return() // Only defer if groupDB is not nil var articles []*models.Overview var totalCount int @@ -87,7 +87,7 @@ func (s *WebServer) groupPage(c *gin.Context) { // Get the article_num at the skip position by querying with OFFSET once var cursorArticleNum int64 - err = database.RetryableQueryRowScan(groupDBs.DB, ` + err = database.RetryableQueryRowScan(groupDB.DB, ` SELECT article_num FROM articles WHERE hide = 0 ORDER BY article_num DESC @@ -99,14 +99,14 @@ func (s *WebServer) groupPage(c *gin.Context) { } } - articles, totalCount, hasMore, err = s.DB.GetOverviewsPaginated(groupDBs, lastArticleNum, LIMIT_groupPage) + articles, totalCount, hasMore, err = s.DB.GetOverviewsPaginated(groupDB, lastArticleNum, LIMIT_groupPage) if err == nil { // Initialize ArticleNums for all articles for _, article := range articles { if article.ArticleNums == nil { article.ArticleNums = make(map[*string]int64) } - article.ArticleNums[groupDBs.NewsgroupPtr] = article.ArticleNum + article.ArticleNums[groupDB.NewsgroupPtr] = article.ArticleNum } if page > 0 { @@ -127,7 +127,7 @@ func (s *WebServer) groupPage(c *gin.Context) { data := GroupPageData{ TemplateData: s.getBaseTemplateData(c, groupName), GroupName: groupName, - GroupPtr: groupDBs.NewsgroupPtr, + GroupPtr: groupDB.NewsgroupPtr, Articles: articles, Pagination: pagination, } diff --git a/rsync_nntp-transfer_TEST.sh b/rsync_nntp-transfer_TEST.sh new file mode 100755 index 00000000..e67d6536 --- /dev/null +++ b/rsync_nntp-transfer_TEST.sh @@ -0,0 +1 @@ +rsync -vaz --progress scripts/nntp-transfer-loop.sh build/nntp-transfer rocksolid-pugleaf@81-171-22-215:~/ diff --git a/scripts/queries.sh b/scripts/queries.sh index b90d8fe7..2cd5ecf0 100644 --- a/scripts/queries.sh +++ b/scripts/queries.sh @@ -25,6 +25,33 @@ # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET active = 0 WHERE name LIKE '%dateien%';" # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET active = 0 WHERE name LIKE '%warez%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%binar%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%bianr%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%bainar%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%banar%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%binaer%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%bini%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%biyar%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%boneless%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%nairies%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%naries%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%alt.bin%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE 'a.b.%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%alt-bin%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%dvdnordic%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%cd.image%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%files.image%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%music.bin%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%nzb%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '4u%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%sex%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%erotic%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%porno%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%pedo%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%paedo%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%dateien%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%warez%';" + # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET max_articles = 1000, expiry_days = 7 WHERE name LIKE '%.test';" # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET max_articles = 1000, expiry_days = 7 WHERE name LIKE 'alt.test.%';" # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET max_articles = 1000, expiry_days = 7 WHERE name LIKE 'jobs.%';" diff --git a/web/templates/admin_newsgroups.html b/web/templates/admin_newsgroups.html index df24e3e8..55bf158f 100644 --- a/web/templates/admin_newsgroups.html +++ b/web/templates/admin_newsgroups.html @@ -97,12 +97,12 @@
📰 Newsgroup Management
- + x ON + Count
Last Edit Newsgroup - Count
Last Section Expiry Actions @@ -113,7 +113,7 @@
📰 Newsgroup Management
{{range .Newsgroups}} - @@ -129,25 +129,45 @@
📰 Newsgroup Management
{{end}} {{if .Active}} - {{else}} - {{end}} - + + {{if .Status}} + + {{if eq .Status "y"}}y + {{else if eq .Status "m"}}m + {{else if eq .Status "n"}}n + {{else if eq .Status "j"}}j + {{else if eq .Status "x"}}x + {{else}}[{{.Status}}]{{end}} + + {{else}} + + err! + + {{end}} + + + C:{{.MessageCount}} +
+ L#{{if .LastArticle}}{{.LastArticle}}{{else}}-{{end}} -
@@ -158,11 +178,6 @@
📰 Newsgroup Management

{{if .Description}}{{.Description}}{{else}}No description{{end}} - - C:{{.MessageCount}} -
- L#{{if .LastArticle}}{{.LastArticle}}{{else}}-{{end}} - {{/* Find current section assignment */}} {{$currentSection := ""}} @@ -218,7 +233,7 @@
📰 Newsgroup Management
{{end}} @@ -246,7 +261,7 @@
📰 Newsgroup Management
{{end}} @@ -483,6 +498,22 @@ + +
+ + +
+ Controls posting permissions and article handling. + For redirect (=), manually enter "=foo.bar" to file articles in foo.bar instead. +
+