From 2c559de252e18a9f89f593d5caedb35b76d2dcc6 Mon Sep 17 00:00:00 2001 From: go-while Date: Sat, 4 Oct 2025 00:13:14 +0000 Subject: [PATCH 001/100] unfinished --- cmd/nntp-transfer/main.go | 465 +++++++++++++++++++++----- internal/nntp/nntp-article-common.go | 9 +- internal/nntp/nntp-backend-pool.go | 2 +- internal/nntp/nntp-client-commands.go | 226 +++++++------ internal/nntp/nntp-client.go | 34 +- 5 files changed, 542 insertions(+), 194 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 149874e..9caef28 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -83,6 +83,9 @@ var appVersion = "-unset-" var redisCtx = context.Background() var REDIS_TTL time.Duration = 3600 * time.Second // default 1h +var MaxThreads int +var BatchCheck int + func main() { common.VerboseHeaders = false config.AppVersion = appVersion @@ -159,6 +162,7 @@ func main() { if *batchCheck < 1 || *batchCheck > 10000 { log.Fatalf("Error: batch-check must be between 1 and 10000 (got %d)", *batchCheck) } + BatchCheck = *batchCheck // Validate batch size if *batchDB < 100 { @@ -170,6 +174,7 @@ func main() { if *maxThreads < 1 || *maxThreads > 500 { log.Fatalf("Error: max-threads must be between 1 and 500 (got %d)", *maxThreads) } + MaxThreads = *maxThreads // Validate UseShortHashLen if *useShortHashLen < 2 || *useShortHashLen > 7 { @@ -1159,29 +1164,30 @@ func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *n log.Printf("Newsgroup: '%s' | Sleeping %v before retrying batch %d-%d (transferred %d so far)", newsgroup.Name, isleep, i+1, end, transferred) time.Sleep(isleep) } - // Get connection from pool - conn, err := pool.Get(nntp.MODE_STREAM_MV) - if err != nil { - log.Printf("Newsgroup: '%s' | Failed to get connection from pool: %v", newsgroup.Name, err) - isleep = isleep * 2 - continue forever - } - - if conn.ModeReader { - if VERBOSE { - log.Printf("got connection in reader mode, closing and getting a new one") + /* + // Get connection from pool + conn, err := pool.Get(nntp.MODE_STREAM_MV) + if err != nil { + log.Printf("Newsgroup: '%s' | Failed to get connection from pool: %v", newsgroup.Name, err) + isleep = isleep * 2 + continue forever } - conn.ForceCloseConn() - continue forever - } - batchTransferred, batchChecked, TTsuccessRate, rc, berr := processBatch(conn, newsgroup.Name, ttMode, articles[i:end], redisCli) + if conn.ModeReader { + if VERBOSE { + log.Printf("got connection in reader mode, closing and getting a new one") + } + conn.ForceCloseConn() + continue forever + } + */ + batchTransferred, batchChecked, TTsuccessRate, rc, berr := processBatch(&newsgroup.Name, ttMode, articles[i:end], redisCli) transferred += batchTransferred redis_cache_hits += rc checked += batchChecked if berr != nil { log.Printf("Newsgroup: '%s' | Error processing network batch: %v ... retry", newsgroup.Name, berr) - conn.ForceCloseConn() + //conn.ForceCloseConn() isleep = isleep * 2 ttMode.CheckMode = true ttMode.TmpSuccessCount = 0 @@ -1191,7 +1197,7 @@ func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *n if VERBOSE || (transferred >= 1000 && transferred%1000 == 0) || (checked >= 1000 && checked%1000 == 0) { log.Printf("Newsgroup: '%s' | BatchDone (offset %d/%d) %d-%d TX:%d check=%t ttRate=%.1f%% checked=%d redis_cache_hits=%d/%d", newsgroup.Name, offset, totalArticles, i+1, end, batchTransferred, ttMode.CheckMode, TTsuccessRate, batchChecked, rc, redis_cache_hits) } - pool.Put(conn) + //pool.Put(conn) break forever } } @@ -1233,7 +1239,7 @@ var upperLevel float64 = 95.0 // processBatch processes a batch of articles using NNTP streaming protocol (RFC 4644) // Uses TAKETHIS primarily, falls back to CHECK when success rate < 95% -func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client) (transferred uint64, checked uint64, successRate float64, redis_cache_hits uint64, err error) { +func processBatch(newsgroup *string, ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client) (transferred uint64, checked uint64, successRate float64, redis_cache_hits uint64, err error) { if len(articles) == 0 { return 0, 0, 0, 0, nil @@ -1253,20 +1259,17 @@ func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThi log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% >= %f%%, switching to TAKETHIS mode", newsgroup, successRate, upperLevel) } - articleMap := make(map[string]*models.Article) - for _, article := range articles { - articleMap[article.MessageID] = article + job := &CHTTJob{ + Newsgroup: newsgroup, + MessageIDs: make([]*string, 0, len(articles)), + ArticleMap: make(map[*string]*models.Article, len(articles)), + ResponseChan: make(chan *TTResponse, 1), + TTMode: ttMode, } - switch ttMode.CheckMode { case true: // ttMode.CheckMode // CHECK mode: verify articles are wanted before sending //log.Printf("Newsgroup: '%s' | CHECK: %d articles (success rate: %.1f%%)", newsgroup, len(articles), successRate) - - checkMessageIds := make([]*string, 0, len(articles)) - checkArticles := make([]*models.Article, 0, len(articles)) - checkArticleMap := make(map[string]*models.Article, len(articles)) - // Batch check Redis cache using pipeline (1 round trip for all keys) if redisCli != nil && len(articles) > 0 { pipe := redisCli.Pipeline() @@ -1296,13 +1299,13 @@ func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThi log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [CHECK])", newsgroup, article.MessageID) } redis_cache_hits++ + articles[i] = nil // mark as nil to skip later continue } // Not cached - add to valid list - checkMessageIds = append(checkMessageIds, &article.MessageID) - checkArticles = append(checkArticles, article) - checkArticleMap[article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, &article.MessageID) + job.ArticleMap[&article.MessageID] = article } } else { // No Redis - add all non-nil message IDs @@ -1310,28 +1313,29 @@ func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThi if article == nil { continue } - checkMessageIds = append(checkMessageIds, &article.MessageID) - checkArticles = append(checkArticles, article) - checkArticleMap[article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, &article.MessageID) + job.ArticleMap[&article.MessageID] = article } } - if len(checkMessageIds) == 0 { + if len(job.MessageIDs) == 0 { log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", newsgroup, redis_cache_hits) return transferred, checked, successRate, redis_cache_hits, nil } if VERBOSE { - log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d valid articles (filtered from %d)", newsgroup, len(checkMessageIds), len(articles)) + log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d valid articles (filtered from %d)", newsgroup, len(job.MessageIDs), len(articles)) } - + CheckQueue <- job + //log.Printf("Newsgroup: '%s' | CHECK wanted %d articles", newsgroup, len(wantedIds)) // Send CHECK commands for all message IDs - wantedIds, err := conn.CheckMultiple(checkMessageIds, ttMode) - if err != nil { - ttMode.ConnErrors++ - conn.ForceCloseConn() - return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("Newsgroup: '%s' | failed to send CHECK command: %v", newsgroup, err) - } - checked += uint64(len(checkMessageIds)) + /* disabled + wantedIds, err := conn.CheckMultiple(checkMessageIds, ttMode) + if err != nil { + ttMode.ConnErrors++ + conn.ForceCloseConn() + return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("Newsgroup: '%s' | failed to send CHECK command: %v", newsgroup, err) + } + checked += uint64(len(checkMessageIds)) if len(wantedIds) == 0 { //log.Printf("No articles wanted by server in this batch") @@ -1341,7 +1345,6 @@ func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThi } // Send TAKETHIS for wanted articles - wantedArticles := make([]*models.Article, 0, len(wantedIds)) for _, msgId := range wantedIds { if msgId == nil { continue @@ -1351,13 +1354,13 @@ func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThi log.Printf("WARN: Article not found in checkArticleMap for msgId: %s", *msgId) continue } - wantedArticles = append(wantedArticles, article) + ttJob.WantedArticles = append(ttJob.WantedArticles, article) } - log.Printf("Newsgroup: '%s' | CHECK wants: %d/%d message IDs. sending=%d", newsgroup, len(wantedIds), len(checkMessageIds), len(wantedArticles)) - //log.Printf("Newsgroup: '%s' | Calling sendArticlesBatchViaTakeThis with %d articles...", newsgroup, len(wantedArticles)) + log.Printf("Newsgroup: '%s' | CHECK wants: %d/%d message IDs. sending=%d", newsgroup, len(wantedIds), len(checkMessageIds), len(ttJob.WantedArticles)) + txcount, rc, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, ttMode, newsgroup, redisCli) - log.Printf("Newsgroup: '%s' | sendArticlesBatchViaTakeThis returned: transferred=%d redis_cached=%d err=%v", newsgroup, txcount, rc, err) + //log.Printf("Newsgroup: '%s' | sendArticlesBatchViaTakeThis returned: transferred=%d redis_cached=%d err=%v", newsgroup, txcount, rc, err) transferred += txcount redis_cache_hits += rc @@ -1366,6 +1369,7 @@ func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThi return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("failed to send CHECKED TAKETHIS batch: %v", err) } return transferred, checked, successRate, redis_cache_hits, nil + */ // end case ttMode.CheckMode // case !ttMode.CheckMode case false: @@ -1373,31 +1377,32 @@ func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThi //log.Printf("Newsgroup: '%s' | TAKETHIS: %d articles (success rate: %.1f%%)", newsgroup, len(articles), successRate) // Validate articles before sending in TAKETHIS mode - wantedArticles := make([]*models.Article, 0, len(articles)) - for i, article := range articles { + + for _, article := range articles { if article == nil { continue } - if strings.Contains(article.MessageID, ">?<") { - log.Printf("ERROR: Invalid message ID contains '>?<' in TAKETHIS mode: '%s' - skipping", article.MessageID) - articles[i] = nil - continue - } - if len(article.MessageID) > 128 { - log.Printf("WARN: Message ID very long in TAKETHIS mode (%d chars): '%.100s...'", len(article.MessageID), article.MessageID) - } - wantedArticles = append(wantedArticles, article) + job.Articles = append(job.Articles, article) } - if len(wantedArticles) == 0 { + if len(job.Articles) == 0 { log.Printf("WARN: No valid articles for TAKETHIS mode, skipping batch") return transferred, checked, successRate, redis_cache_hits, nil } + TakeThisQueue <- job // TODO DEAD END + /* + response := <-ttJob.ResponseChan + transferred += response.txcount + redis_cache_hits += response.rc + successRate = float64(response.txcount) / float64(len(ttJob.WantedArticles)) * 100 + if response.err != nil { + log.Printf("Newsgroup: '%s' | Failed to send TAKETHIS: %v", newsgroup, response.err) + return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("failed to send TAKETHIS batch: %v", response.err) + } + */ + //log.Printf("Newsgroup: '%s' | TAKETHIS sent: transferred=%d redis_cached=%d", newsgroup, response.txcount, response.rc) - if len(wantedArticles) != len(articles) { - log.Printf("Newsgroup: '%s' | Filtered articles for TAKETHIS: %d valid from %d total", newsgroup, len(wantedArticles), len(articles)) - } - + /* TODO HERE txcount, rc, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, ttMode, newsgroup, redisCli) transferred += txcount redis_cache_hits += rc @@ -1412,20 +1417,49 @@ func processBatch(conn *nntp.BackendConn, newsgroup string, ttMode *nntp.TakeThi ttMode.TmpTakeThisCount = 0 } } - return transferred, checked, successRate, redis_cache_hits, nil + */ + } // end case !ttMode.CheckMode // end switch ttMode.CheckMode - return 0, 0, 0, 0, fmt.Errorf("uncatched case in processBatch") + + return transferred, checked, successRate, redis_cache_hits, nil } // end func processBatch -type CheckResponse struct { +var TakeThisQueue = make(chan *CHTTJob, MaxThreads) +var CheckQueue = make(chan *CHTTJob, MaxThreads) + +// CHECK/TAKETHIS Job +type CHTTJob struct { + Mux sync.Mutex + Newsgroup *string + MessageIDs []*string + WantedIDs []*string + Wanted uint64 + Unwanted uint64 + Rejected uint64 + Retry uint64 + Transferred uint64 + RedisCached uint64 + TX_Errors uint64 + ResponseChan chan *TTResponse + Articles []*models.Article + ArticleMap map[*string]*models.Article + TTMode *nntp.TakeThisMode +} + +type TTResponse struct { + Job *CHTTJob + err error +} + +type CheckResponse struct { // deprecated CmdId uint Article *models.Article } // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode // Sends all TAKETHIS commands first, then reads all responses (true streaming) -func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, ttMode *nntp.TakeThisMode, newsgroup string, redisCli *redis.Client) (transferred uint64, redis_cached uint64, err error) { +func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, job *CHTTJob, newsgroup string, redisCli *redis.Client) (transferred uint64, redis_cached uint64, err error) { if len(articles) == 0 { return 0, 0, nil } @@ -1507,10 +1541,10 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Phase 2: Read all responses in order for cr := range artChan { - ttMode.TmpTakeThisCount++ + job.TTMode.TmpTakeThisCount++ takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(cr.CmdId) if err != nil || takeThisResponseCode == 0 { - ttMode.ConnErrors++ + job.TTMode.ConnErrors++ conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to read TAKETHIS response for %s: %v", newsgroup, cr.Article.MessageID, err) return transferred, redis_cached, fmt.Errorf("failed to read TAKETHIS response for %s: %v", cr.Article.MessageID, err) @@ -1519,10 +1553,10 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Update success rate tracking switch takeThisResponseCode { case 239: - ttMode.TmpSuccessCount++ - transferred++ + job.TTMode.TmpSuccessCount++ + job.Transferred++ case 439: - ttMode.Rejected++ + job.Rejected++ rejected++ if VERBOSE { log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) @@ -1532,13 +1566,13 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } case 400, 480, 500, 501, 502, 503, 504: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - ttMode.TX_Errors++ + job.TX_Errors++ conn.ForceCloseConn() return transferred, redis_cached, fmt.Errorf("failed to transfer article '%s': response=%d", cr.Article.MessageID, takeThisResponseCode) default: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - ttMode.TX_Errors++ + job.TX_Errors++ continue } if redisCli != nil { @@ -1578,3 +1612,284 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } return transferred, redis_cached, nil } // end func sendArticlesBatchViaTakeThis + +func BootConnWorkers(pool *nntp.Pool) { + openConns := 0 + defaultSleep := time.Second + isleep := defaultSleep + CHECKrunning := 0 + var mux sync.Mutex +forever: + for { + time.Sleep(defaultSleep) + if common.WantShutdown() { + log.Printf("BootConnWorkers: WantShutdown, exiting") + break forever + } + if openConns == MaxThreads { + continue forever + } + sharedConns := make([]*nntp.BackendConn, 0, MaxThreads) + bootN := MaxThreads - openConns - 1 + // get connections from pool + for i := bootN; i < MaxThreads; i++ { + // Get a connection from pool + conn, err := pool.Get(nntp.MODE_STREAM_MV) + if err != nil { + isleep = isleep * 2 + if isleep > time.Minute { + isleep = time.Minute + } + log.Printf("BootConnWorkers failed to get connection from pool: %v ... retry in: %v", err, isleep) + time.Sleep(isleep) + continue forever + } + if conn.ModeReader { + if VERBOSE { + log.Printf("got connection in reader mode, closing and getting a new one") + } + conn.ForceCloseConn() + continue forever + } + + sharedConns = append(sharedConns, conn) + openConns++ + } + isleep = defaultSleep // reset to default + CHECKreturnChans := make([]chan struct{}, 0, len(sharedConns)) + for i, conn := range sharedConns { + CHECKreturnChans[i] = make(chan struct{}, 1) + go CheckWorker(conn, CHECKreturnChans[i]) + } + mux.Lock() + CHECKrunning += len(sharedConns) + mux.Unlock() + // Monitor CHECK workers + go func() { + checkBOOTED := len(sharedConns) + for { + time.Sleep(100 * time.Millisecond) + for i, retChan := range CHECKreturnChans { + if retChan == nil { + continue + } + select { + case <-CHECKreturnChans[i]: + //log.Printf("CheckWorker %d exited, restarting...", i) + mux.Lock() + CHECKrunning-- + mux.Unlock() + checkBOOTED-- + CHECKreturnChans[i] = nil + default: + // Worker still running + } + } + if checkBOOTED == 0 { + return + } + } + }() + } // end forever +} // end func BootConnWorkers + +var DefaultCheckTicker = 5 * time.Second + +func CheckWorker(conn *nntp.BackendConn, returnSignal chan struct{}) { + var mux sync.Mutex + + tmpMessageIDs := make([]*string, 0, BatchCheck) + readResponsesChan := make(chan *string, BatchCheck) + jobMap := make(map[*string]*CHTTJob, BatchCheck) // maps messageIds to Jobs + + lastRun := time.Now() + tickChan := make(chan struct{}, 1) + errChan := make(chan struct{}, 1) + + defer func(conn *nntp.BackendConn) { + conn.ForceCloseConn() + returnSignal <- struct{}{} + close(errChan) + }(conn) + + // launch go routine which sends CHECK commands if threshold exceeds BatchCheck + go func(mux *sync.Mutex) { + // tick every n seconds to check if any CHECKs to do + ticker := time.NewTicker(DefaultCheckTicker) + defer ticker.Stop() + forever: + for { + select { + case <-errChan: + return + case <-tickChan: + if common.WantShutdown() { + log.Printf("CheckWorker: Tick WantShutdown, exiting") + return + } + if common.IsClosedChannel(errChan) { + log.Printf("CheckWorker: Tick errChan closed, exiting") + return + } + // process CHECK commands + mux.Lock() + hasWork := len(tmpMessageIDs) > 0 + mux.Unlock() + if !hasWork { + log.Printf("CheckWorker: Ticked but no work? continue...") + continue forever + } + + // copy accumulated message IDs and clear slice + mux.Lock() + checkIds := make([]*string, len(tmpMessageIDs)) + copy(checkIds, tmpMessageIDs) + tmpMessageIDs = tmpMessageIDs[:0] //clear + mux.Unlock() + + log.Printf("CheckWorker: Ticker sending CHECK: %d message IDs", len(checkIds)) + err := conn.SendCheckMultiple(checkIds) + if err != nil { + log.Printf("CheckWorker: SendCheckMultiple error: %v", err) + return + } + for _, msgID := range checkIds { + if msgID != nil { + // pass message ID pointer to channel + // to read the responses from connection + readResponsesChan <- msgID + } + } + + mux.Lock() + lastRun = time.Now() + mux.Unlock() + + case <-ticker.C: + if common.WantShutdown() { + log.Printf("CheckWorker: Ticker WantShutdown, exiting") + return + } + mux.Lock() + hasWork := len(tmpMessageIDs) > 0 && time.Since(lastRun) >= DefaultCheckTicker + mux.Unlock() + if hasWork { + select { + case tickChan <- struct{}{}: + default: + // tickChan full, tickChan will tick + } + } + } // end select + } // end forever + }(&mux) + + // launch a go routine to read CHECK responses from the supplied connection with textproto readline + go func(mux *sync.Mutex) { + forever: + for { + select { + case <-errChan: + log.Printf("CheckWorker: Read CHECK errChan closed, exiting") + return + case msgID := <-readResponsesChan: + if common.WantShutdown() { + log.Printf("CheckWorker: Read CHECK WantShutdown, exiting") + return + } + if common.IsClosedChannel(errChan) { + log.Printf("CheckWorker: Read CHECK errChan closed, exiting") + return + } + code, line, err := conn.TextConn.ReadCodeLine(238) + if code == 0 && err != nil { + log.Printf("Failed to read CHECK response: %v", err) + return + } + // Parse response line + // Format: code [message] + // 238 - article wanted + // 431 - article not wanted + // 438 - article not wanted (already have it) + // ReadCodeLine returns: code=238, message=" article wanted" + parts := strings.Fields(line) + if len(parts) < 1 { + log.Printf("Malformed CHECK response: %s", line) + return + } + if parts[0] != *msgID { + log.Printf("Mismatched CHECK response: expected %s, got %s", *msgID, parts[0]) + return + } + mux.Lock() + checkjob, exists := jobMap[msgID] + mux.Unlock() + if !exists { + log.Printf("ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *msgID) + continue forever + } + switch code { + case 238: + //log.Printf("Wanted Article '%s': response=%d", *msgID, code) + + mux.Lock() + delete(jobMap, msgID) + mux.Unlock() + + checkjob.Mux.Lock() + checkjob.WantedIDs = append(checkjob.WantedIDs, msgID) + checkjob.Wanted++ + checkjob.Mux.Unlock() + + case 438: + //log.Printf("Unwanted Article '%s': response=%d", *msgID, code) + checkjob.Mux.Lock() + checkjob.Unwanted++ + checkjob.Mux.Unlock() + + case 431: + checkjob.Mux.Lock() + checkjob.Retry++ + checkjob.Mux.Unlock() + + default: + log.Printf("Unknown CHECK response: line='%s' code=%d expected msgID %s", line, code, *msgID) + return + } + } // end select + } // end forever + }(&mux) + +forever: + for checkjob := range CheckQueue { + if common.WantShutdown() { + log.Printf("CheckWorker: WantShutdown, exiting") + return + } + if checkjob == nil || len(checkjob.MessageIDs) == 0 { + if VERBOSE { + log.Printf("CheckWorker: empty job, skipping") + } + checkjob.ResponseChan <- []*string{} + continue forever + } + for _, msgId := range checkjob.MessageIDs { + if msgId != nil { + mux.Lock() + tmpMessageIDs = append(tmpMessageIDs, msgId) + jobMap[msgId] = checkjob + mux.Unlock() + } + } + if len(tmpMessageIDs) >= BatchCheck { + // Signal to process CHECK commands + if VERBOSE { + log.Printf("CheckWorker: processing batch of %d message IDs", len(tmpMessageIDs)) + } + select { + case tickChan <- struct{}{}: + default: + } + } + } // end for forever +} // end func CheckWorker diff --git a/internal/nntp/nntp-article-common.go b/internal/nntp/nntp-article-common.go index d765f9c..ec15c19 100644 --- a/internal/nntp/nntp-article-common.go +++ b/internal/nntp/nntp-article-common.go @@ -33,15 +33,14 @@ type ArticleRetrievalResult struct { // used in nntp-transfer/main.go type TakeThisMode struct { - Wanted uint64 - Unwanted uint64 - Rejected uint64 + //Wanted uint64 + //Unwanted uint64 + //Rejected uint64 TX_Errors uint64 ConnErrors uint64 TmpSuccessCount uint64 TmpTakeThisCount uint64 - - CheckMode bool // Start with TAKETHIS mode (false) + CheckMode bool // Start with TAKETHIS mode (false) } // retrieveArticleCommon handles the common logic for ARTICLE, HEAD, BODY, and STAT commands diff --git a/internal/nntp/nntp-backend-pool.go b/internal/nntp/nntp-backend-pool.go index f967ce8..9654cf2 100644 --- a/internal/nntp/nntp-backend-pool.go +++ b/internal/nntp/nntp-backend-pool.go @@ -332,7 +332,7 @@ func (pool *Pool) Put(conn *BackendConn) error { pool.mux.Unlock() return nil } - conn.writer.Reset(conn.conn) + conn.Writer.Reset(conn.conn) pool.mux.RUnlock() conn.UpdateLastUsed() // set lastused before returning to pool diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 3c17fc0..1e4b352 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -48,15 +48,15 @@ func (c *BackendConn) StatArticle(messageID string) (bool, error) { c.lastUsed = time.Now() - id, err := c.textConn.Cmd("STAT %s", messageID) + id, err := c.TextConn.Cmd("STAT %s", messageID) if err != nil { return false, fmt.Errorf("failed to send STAT command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, _, err := c.textConn.ReadCodeLine(223) + code, _, err := c.TextConn.ReadCodeLine(223) if err != nil { return false, fmt.Errorf("failed to read STAT response: %w", err) } @@ -93,15 +93,15 @@ func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Arti } }() */ - id, err := c.textConn.Cmd("ARTICLE %s", *messageID) + id, err := c.TextConn.Cmd("ARTICLE %s", *messageID) if err != nil { return nil, fmt.Errorf("failed to send ARTICLE '%s' command: %w", *messageID, err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(ArticleFollows) + code, message, err := c.TextConn.ReadCodeLine(ArticleFollows) if err != nil && code == 0 { log.Printf("[ERROR] failed to read ARTICLE '%s' code=%d message='%s' err: %v", *messageID, code, message, err) return nil, fmt.Errorf("failed to read ARTICLE '%s' code=%d message='%s' err: %v", *messageID, code, message, err) @@ -146,15 +146,15 @@ func (c *BackendConn) GetHead(messageID string) (*models.Article, error) { c.lastUsed = time.Now() - id, err := c.textConn.Cmd("HEAD %s", messageID) + id, err := c.TextConn.Cmd("HEAD %s", messageID) if err != nil { return nil, fmt.Errorf("failed to send HEAD command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(HeadFollows) + code, message, err := c.TextConn.ReadCodeLine(HeadFollows) if err != nil { return nil, fmt.Errorf("failed to read HEAD response: %w", err) } @@ -202,15 +202,15 @@ func (c *BackendConn) GetBody(messageID string) ([]byte, error) { c.lastUsed = time.Now() - id, err := c.textConn.Cmd("BODY %s", messageID) + id, err := c.TextConn.Cmd("BODY %s", messageID) if err != nil { return nil, fmt.Errorf("failed to send BODY command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(BodyFollows) + code, message, err := c.TextConn.ReadCodeLine(BodyFollows) if err != nil { return nil, fmt.Errorf("failed to read BODY response: %w", err) } @@ -250,15 +250,15 @@ func (c *BackendConn) ListGroups() ([]GroupInfo, error) { c.lastUsed = time.Now() - id, err := c.textConn.Cmd("LIST") + id, err := c.TextConn.Cmd("LIST") if err != nil { return nil, fmt.Errorf("failed to send LIST command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(215) + code, message, err := c.TextConn.ReadCodeLine(215) if err != nil { return nil, fmt.Errorf("failed to read LIST response: %w", err) } @@ -297,15 +297,15 @@ func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { c.lastUsed = time.Now() - id, err := c.textConn.Cmd("LIST") + id, err := c.TextConn.Cmd("LIST") if err != nil { return nil, fmt.Errorf("failed to send LIST command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(215) + code, message, err := c.TextConn.ReadCodeLine(215) if err != nil { return nil, fmt.Errorf("failed to read LIST response: %w", err) } @@ -326,7 +326,7 @@ func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { break } - line, err := c.textConn.ReadLine() + line, err := c.TextConn.ReadLine() if err != nil { return nil, fmt.Errorf("failed to read group list: %w", err) } @@ -354,7 +354,7 @@ func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { // Read remaining lines until end marker if we hit the limit if lineCount >= maxGroups { for { - line, err := c.textConn.ReadLine() + line, err := c.TextConn.ReadLine() if err != nil { break } @@ -378,15 +378,15 @@ func (c *BackendConn) SelectGroup(groupName string) (*GroupInfo, int, error) { c.lastUsed = time.Now() - id, err := c.textConn.Cmd("GROUP %s", groupName) + id, err := c.TextConn.Cmd("GROUP %s", groupName) if err != nil { return nil, 0, err } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(211) + code, message, err := c.TextConn.ReadCodeLine(211) if err != nil { if code != 411 { log.Printf("[ERROR] failed to read GROUP '%s' code=%d message='%s' err: %v", groupName, code, message, err) @@ -454,18 +454,18 @@ func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit boo var id uint if end > 0 { - id, err = c.textConn.Cmd("XOVER %d-%d", start, end) + id, err = c.TextConn.Cmd("XOVER %d-%d", start, end) } else { - id, err = c.textConn.Cmd("XOVER %d", start) + id, err = c.TextConn.Cmd("XOVER %d", start) } if err != nil { return nil, fmt.Errorf("failed to send XOVER command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(224) + code, message, err := c.TextConn.ReadCodeLine(224) if err != nil { return nil, fmt.Errorf("failed to read XOVER response: %w", err) } @@ -519,18 +519,18 @@ func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*Header log.Printf("XHdr group '%s' field '%s' start=%d end=%d", groupName, field, start, end) var id uint if end > 0 { - id, err = c.textConn.Cmd("XHDR %s %d-%d", field, start, end) + id, err = c.TextConn.Cmd("XHDR %s %d-%d", field, start, end) } else { - id, err = c.textConn.Cmd("XHDR %s %d", field, start) + id, err = c.TextConn.Cmd("XHDR %s %d", field, start) } if err != nil { return nil, fmt.Errorf("failed to send XHDR command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(221) + code, message, err := c.TextConn.ReadCodeLine(221) if err != nil { return nil, fmt.Errorf("failed to read XHDR response: %w", err) } @@ -656,16 +656,16 @@ func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int6 var id uint if end > 0 { - id, err = c.textConn.Cmd("XHDR %s %d-%d", field, start, end) + id, err = c.TextConn.Cmd("XHDR %s %d-%d", field, start, end) } else { - id, err = c.textConn.Cmd("XHDR %s %d-%d", field, start, start) + id, err = c.TextConn.Cmd("XHDR %s %d-%d", field, start, start) } if err != nil { return fmt.Errorf("failed to send XHDR command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state // Check for shutdown before reading initial response if c.WantShutdown(shutdownChan) { @@ -673,7 +673,7 @@ func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int6 return fmt.Errorf("shutdown requested") } - code, message, err := c.textConn.ReadCodeLine(221) + code, message, err := c.TextConn.ReadCodeLine(221) if err != nil { return fmt.Errorf("failed to read XHDR response: %w", err) } @@ -691,7 +691,7 @@ func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int6 return fmt.Errorf("shutdown requested") } - line, err := c.textConn.ReadLine() + line, err := c.TextConn.ReadLine() if err != nil { log.Printf("[ERROR] XHdrStreamed read error ng: '%s' err='%v'", groupName, err) // EOF or error, finish streaming @@ -738,18 +738,18 @@ func (c *BackendConn) ListGroup(groupName string, start, end int64) ([]int64, er var id uint var err error if start > 0 && end > 0 { - id, err = c.textConn.Cmd("LISTGROUP %s %d-%d", groupName, start, end) + id, err = c.TextConn.Cmd("LISTGROUP %s %d-%d", groupName, start, end) } else { - id, err = c.textConn.Cmd("LISTGROUP %s", groupName) + id, err = c.TextConn.Cmd("LISTGROUP %s", groupName) } if err != nil { return nil, fmt.Errorf("failed to send LISTGROUP command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) // Always clean up response state + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Always clean up response state - code, message, err := c.textConn.ReadCodeLine(211) + code, message, err := c.TextConn.ReadCodeLine(211) if err != nil { return nil, fmt.Errorf("failed to read LISTGROUP response: %w", err) } @@ -829,7 +829,7 @@ func (c *BackendConn) readMultilineResponse(src string) ([]string, error) { return nil, fmt.Errorf("too many lines in response (limit: %d)", maxReadLines) } - line, err := c.textConn.ReadLine() + line, err := c.TextConn.ReadLine() if err != nil { return nil, err } @@ -1035,6 +1035,40 @@ func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { }, nil } +// SendCheckMultiple sends CHECK commands for multiple message IDs without returning responses! +func (c *BackendConn) SendCheckMultiple(messageIDs []*string) error { + c.mux.Lock() + defer c.mux.Unlock() + if !c.connected { + return fmt.Errorf("not connected") + } + + if c.ModeReader { + return fmt.Errorf("cannot check article in reader mode") + } + + if len(messageIDs) == 0 { + return fmt.Errorf("no message IDs provided") + } + + c.lastUsed = time.Now() + for _, msgID := range messageIDs { + if msgID == nil || *msgID == "" { + log.Printf("Skipping empty message ID in CHECK command") + continue + } + _, err := c.Writer.WriteString("CHECK " + *msgID + CRLF) + if err != nil { + return fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) + } + } + if err := c.Writer.Flush(); err != nil { + return fmt.Errorf("failed to flush CHECK commands: %w", err) + } + // Responses must be read later using CheckMultiple + return nil +} + // CheckMultiple sends a CHECK command for multiple message IDs and returns responses func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) ([]*string, error) { c.mux.Lock() @@ -1056,7 +1090,7 @@ func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) // Send individual CHECK commands for each message ID (pipelining) commandIds := make([]uint, len(messageIDs)) for i, msgID := range messageIDs { - id, err := c.textConn.Cmd("CHECK %s", *msgID) + id, err := c.TextConn.Cmd("CHECK %s", *msgID) if err != nil { return nil, fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) } @@ -1068,9 +1102,9 @@ func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) for i, msgID := range messageIDs { id := commandIds[i] // Read response for this CHECK command - c.textConn.StartResponse(id) - code, line, err := c.textConn.ReadCodeLine(238) - c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + code, line, err := c.TextConn.ReadCodeLine(238) + c.TextConn.EndResponse(id) if code == 0 && err != nil { log.Printf("Failed to read CHECK response for %s: %v", *msgID, err) return nil, fmt.Errorf("failed to read CHECK response for %s: %w", *msgID, err) @@ -1132,20 +1166,20 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str c.lastUsed = time.Now() // Send TAKETHIS command - id, err := c.textConn.Cmd("TAKETHIS %s", article.MessageID) + id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { return 0, fmt.Errorf("failed to send TAKETHIS command: %w", err) } // Send headers for _, headerLine := range headers { - if _, err := c.writer.WriteString(headerLine + CRLF); err != nil { + if _, err := c.Writer.WriteString(headerLine + CRLF); err != nil { return 0, fmt.Errorf("failed to write header: %w", err) } } // Send empty line between headers and body - if _, err := c.writer.WriteString(CRLF); err != nil { + if _, err := c.Writer.WriteString(CRLF); err != nil { return 0, fmt.Errorf("failed to write header/body separator: %w", err) } @@ -1166,26 +1200,26 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str line = "." + line } - if _, err := c.writer.WriteString(line + CRLF); err != nil { + if _, err := c.Writer.WriteString(line + CRLF); err != nil { return 0, fmt.Errorf("failed to write body line: %w", err) } } // Send termination line (single dot) - if _, err := c.writer.WriteString(DOT + CRLF); err != nil { + if _, err := c.Writer.WriteString(DOT + CRLF); err != nil { return 0, fmt.Errorf("failed to send article terminator: %w", err) } // Flush the writer to ensure all data is sent - if err := c.writer.Flush(); err != nil { + if err := c.Writer.Flush(); err != nil { return 0, fmt.Errorf("failed to flush article data: %w", err) } // Read TAKETHIS response - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) - code, _, err := c.textConn.ReadCodeLine(239) // -1 means any code is acceptable + code, _, err := c.TextConn.ReadCodeLine(239) // -1 means any code is acceptable if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } @@ -1219,20 +1253,20 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp } // Send TAKETHIS command - id, err := c.textConn.Cmd("TAKETHIS %s", article.MessageID) + id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { return 0, fmt.Errorf("failed to send TAKETHIS command: %w", err) } // Send headers for _, headerLine := range headers { - if _, err := c.writer.WriteString(headerLine + CRLF); err != nil { + if _, err := c.Writer.WriteString(headerLine + CRLF); err != nil { return 0, fmt.Errorf("failed to write header SendTakeThisArticleStreaming: %w", err) } } // Send empty line between headers and body - if _, err := c.writer.WriteString(CRLF); err != nil { + if _, err := c.Writer.WriteString(CRLF); err != nil { return 0, fmt.Errorf("failed to write header/body separator SendTakeThisArticleStreaming: %w", err) } @@ -1253,18 +1287,18 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp line = "." + line } - if _, err := c.writer.WriteString(line + CRLF); err != nil { + if _, err := c.Writer.WriteString(line + CRLF); err != nil { return 0, fmt.Errorf("failed to write body line SendTakeThisArticleStreaming: %w", err) } } // Send termination line (single dot) - if _, err := c.writer.WriteString(DOT + CRLF); err != nil { + if _, err := c.Writer.WriteString(DOT + CRLF); err != nil { return 0, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err) } // Flush the writer to ensure all data is sent - if err := c.writer.Flush(); err != nil { + if err := c.Writer.Flush(); err != nil { return 0, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err) } @@ -1279,10 +1313,10 @@ func (c *BackendConn) ReadTakeThisResponseStreaming(id uint) (int, error) { defer c.mux.Unlock() // Read TAKETHIS response - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) - code, _, err := c.textConn.ReadCodeLine(239) + code, _, err := c.TextConn.ReadCodeLine(239) if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } @@ -1310,15 +1344,15 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { c.lastUsed = time.Now() // Send POST command - id, err := c.textConn.Cmd("POST") + id, err := c.TextConn.Cmd("POST") if err != nil { return 0, fmt.Errorf("failed to send POST command: %w", err) } - c.textConn.StartResponse(id) + c.TextConn.StartResponse(id) // Read response to POST command - code, line, err := c.textConn.ReadCodeLine(340) - c.textConn.EndResponse(id) + code, line, err := c.TextConn.ReadCodeLine(340) + c.TextConn.EndResponse(id) if err != nil && code == 0 { return code, fmt.Errorf("POST command failed: %s", line) } @@ -1334,14 +1368,14 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { } // Send POST command again - id, err := c.textConn.Cmd("POST") + id, err := c.TextConn.Cmd("POST") if err != nil { return 0, fmt.Errorf("failed to send POST command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) // Read response to POST command - code, line, err = c.textConn.ReadCodeLine(340) + code, line, err = c.TextConn.ReadCodeLine(340) if err != nil { return code, fmt.Errorf("POST command failed: %s", line) } @@ -1355,13 +1389,13 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { // Send headers using writer (not DotWriter) for _, headerLine := range headers { - if _, err := c.writer.WriteString(headerLine + CRLF); err != nil { + if _, err := c.Writer.WriteString(headerLine + CRLF); err != nil { return 0, fmt.Errorf("failed to write header: %w", err) } } // Send empty line between headers and body - if _, err := c.writer.WriteString(CRLF); err != nil { + if _, err := c.Writer.WriteString(CRLF); err != nil { return 0, fmt.Errorf("failed to write header/body separator: %w", err) } @@ -1382,23 +1416,23 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { line = "." + line } - if _, err := c.writer.WriteString(line + CRLF); err != nil { + if _, err := c.Writer.WriteString(line + CRLF); err != nil { return 0, fmt.Errorf("failed to write body line: %w", err) } } // Send termination line (single dot) - if _, err := c.writer.WriteString(DOT + CRLF); err != nil { + if _, err := c.Writer.WriteString(DOT + CRLF); err != nil { return 0, fmt.Errorf("failed to send article terminator: %w", err) } // Flush the writer to ensure all data is sent - if err := c.writer.Flush(); err != nil { + if err := c.Writer.Flush(); err != nil { return 0, fmt.Errorf("failed to flush article data: %w", err) } // Read final response - code, _, err = c.textConn.ReadCodeLine(240) + code, _, err = c.TextConn.ReadCodeLine(240) if err != nil { return code, fmt.Errorf("failed to read POST response: %w", err) } @@ -1433,15 +1467,15 @@ func (c *BackendConn) SwitchToModeReader() error { c.lastUsed = time.Now() // Send MODE READER command - id, err := c.textConn.Cmd("MODE READER") + id, err := c.TextConn.Cmd("MODE READER") if err != nil { return fmt.Errorf("failed to send MODE READER command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) - code, line, err := c.textConn.ReadCodeLine(200) + code, line, err := c.TextConn.ReadCodeLine(200) if err != nil { return fmt.Errorf("failed to read MODE READER response: %w", err) } @@ -1468,15 +1502,15 @@ func (c *BackendConn) SwitchToModeStream() error { c.lastUsed = time.Now() // Send MODE STREAM command - id, err := c.textConn.Cmd("MODE STREAM") + id, err := c.TextConn.Cmd("MODE STREAM") if err != nil { return fmt.Errorf("failed to send MODE STREAM command: %w", err) } - c.textConn.StartResponse(id) - defer c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + defer c.TextConn.EndResponse(id) - code, line, err := c.textConn.ReadCodeLine(203) + code, line, err := c.TextConn.ReadCodeLine(203) if err != nil { return fmt.Errorf("failed to read MODE STREAM response: %w", err) } diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index 704c579..1e24370 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -55,8 +55,8 @@ var DefaultBufferTX int = 64 * 1024 // for interacting with the NNTP server. type BackendConn struct { conn net.Conn - textConn *textproto.Conn - writer *bufio.Writer + TextConn *textproto.Conn + Writer *bufio.Writer Backend *BackendConfig mux sync.RWMutex Pool *Pool // link to parent pool @@ -200,11 +200,11 @@ func (c *BackendConn) Connect() error { } c.conn = conn - c.textConn = textproto.NewConn(conn) - c.writer = bufio.NewWriterSize(conn, DefaultBufferTX) // bufio writer with defined buffer size + c.TextConn = textproto.NewConn(conn) + c.Writer = bufio.NewWriterSize(conn, DefaultBufferTX) // bufio writer with defined buffer size // Read welcome message - code, message, err := c.textConn.ReadCodeLine(NNTPWelcomeCodeMin) + code, message, err := c.TextConn.ReadCodeLine(NNTPWelcomeCodeMin) if err != nil { log.Printf("[NNTP-CONN] Error reading welcome from %s:%d: %v", c.Backend.Host, c.Backend.Port, err) return err @@ -239,14 +239,14 @@ func (c *BackendConn) Connect() error { // authenticate performs NNTP authentication func (c *BackendConn) authenticate() error { // Send AUTHINFO USER - id, err := c.textConn.Cmd("AUTHINFO USER %s", c.Backend.Username) + id, err := c.TextConn.Cmd("AUTHINFO USER %s", c.Backend.Username) if err != nil { return err } - c.textConn.StartResponse(id) - code, message, err := c.textConn.ReadCodeLine(NNTPMoreInfoCode) - c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + code, message, err := c.TextConn.ReadCodeLine(NNTPMoreInfoCode) + c.TextConn.EndResponse(id) if err != nil { return err @@ -257,14 +257,14 @@ func (c *BackendConn) authenticate() error { } // Send AUTHINFO PASS - id, err = c.textConn.Cmd("AUTHINFO PASS %s", c.Backend.Password) + id, err = c.TextConn.Cmd("AUTHINFO PASS %s", c.Backend.Password) if err != nil { return err } - c.textConn.StartResponse(id) - code, message, err = c.textConn.ReadCodeLine(NNTPAuthSuccess) - c.textConn.EndResponse(id) + c.TextConn.StartResponse(id) + code, message, err = c.TextConn.ReadCodeLine(NNTPAuthSuccess) + c.TextConn.EndResponse(id) if err != nil { return err @@ -287,8 +287,8 @@ func (c *BackendConn) CloseFromPoolOnly() error { return nil } - if c.textConn != nil { - if err := c.textConn.Close(); err != nil { + if c.TextConn != nil { + if err := c.TextConn.Close(); err != nil { //log.Printf("Error closing text connection: %v", err) } } @@ -301,9 +301,9 @@ func (c *BackendConn) CloseFromPoolOnly() error { c.connected = false c.authenticated = false - c.textConn = nil // CloseFromPoolOnly + c.TextConn = nil // CloseFromPoolOnly c.conn = nil // CloseFromPoolOnly - c.writer = nil + c.Writer = nil //log.Printf("Closed NNTP Connection to %s", c.Backend.Host) return nil } From 40714b31afa23bfff86529d370084305a134b272 Mon Sep 17 00:00:00 2001 From: go-while Date: Sat, 4 Oct 2025 07:25:33 +0000 Subject: [PATCH 002/100] unfinished stuck in &nntp.CHTTJob .. requeue --- cmd/nntp-transfer/main.go | 717 ++++++++++++++------------ internal/common/headers.go | 4 +- internal/nntp/nntp-article-common.go | 12 - internal/nntp/nntp-client-commands.go | 167 +++++- 4 files changed, 560 insertions(+), 340 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 9caef28..023f1cc 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -83,7 +83,7 @@ var appVersion = "-unset-" var redisCtx = context.Background() var REDIS_TTL time.Duration = 3600 * time.Second // default 1h -var MaxThreads int +// var MaxThreads int var BatchCheck int func main() { @@ -174,7 +174,7 @@ func main() { if *maxThreads < 1 || *maxThreads > 500 { log.Fatalf("Error: max-threads must be between 1 and 500 (got %d)", *maxThreads) } - MaxThreads = *maxThreads + nntp.NNTPTransferThreads = *maxThreads // Validate UseShortHashLen if *useShortHashLen < 2 || *useShortHashLen > 7 { @@ -382,7 +382,7 @@ func main() { go func(wgP *sync.WaitGroup, redisCli *redis.Client) { defer wgP.Done() resultChan := make(chan error, 1) - resultChan <- runTransfer(db, proc, pool, newsgroups, *batchCheck, *maxThreads, *dryRun, startTime, endTime, *debugCapture, wgP, redisCli) + resultChan <- runTransfer(db, newsgroups, *batchCheck, *maxThreads, *dryRun, startTime, endTime, *debugCapture, wgP, redisCli) result := <-resultChan if !*debugCapture { transferDoneChan <- result @@ -974,21 +974,19 @@ func matchesAnyWildcardPattern(newsgroup string, patterns []string) bool { return false } +var totalTransferred, totalUnwanted, totalRejected, totalRedisCacheHits, totalTXErrors, totalConnErrors, nothingInDateRange uint64 +var transferMutex sync.Mutex + // runTransfer performs the actual article transfer process -func runTransfer(db *database.Database, proc *processor.Processor, pool *nntp.Pool, newsgroups []*models.Newsgroup, batchCheck int, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client) error { +func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchCheck int, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client) error { defer wgP.Done() - var totalTransferred, nothingInDateRange, totalRedisCacheHits uint64 - var totalUnwanted, totalRejected, totalTXErrors, totalConnErrors uint64 - var transferMutex sync.Mutex maxThreadsChan := make(chan struct{}, maxThreads) var wg sync.WaitGroup + log.Printf("Todo: %d newsgroups", len(newsgroups)) // Process each newsgroup - log.Printf("Starting transfer for %d newsgroups", len(newsgroups)) for _, newsgroup := range newsgroups { if common.WantShutdown() { - transferMutex.Lock() - log.Printf("Shutdown requested, stopping transfer. Total transferred: %d articles", totalTransferred) - transferMutex.Unlock() + log.Printf("Aborted before next: %s", newsgroup.Name) return nil } maxThreadsChan <- struct{}{} // acquire a thread slot @@ -999,39 +997,41 @@ func runTransfer(db *database.Database, proc *processor.Processor, pool *nntp.Po <-maxThreadsChan // release the thread slot }(wg) if common.WantShutdown() { + log.Printf("Aborted before next: %s", newsgroup.Name) return } - start := time.Now() if VERBOSE { - log.Printf("Starting transfer for newsgroup: %s", newsgroup.Name) + log.Printf("Newsgroup: '%s' | Start", newsgroup.Name) } - transferred, checked, rc, unwanted, rejected, txErrors, connErrors, err := transferNewsgroup(db, proc, pool, newsgroup, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) - - transferMutex.Lock() - totalTransferred += transferred - totalRedisCacheHits += rc - totalUnwanted += unwanted - totalRejected += rejected - totalTXErrors += txErrors - totalConnErrors += connErrors + /* + transferred, checked, rc, unwanted, rejected, txErrors, connErrors, err := transferNewsgroup(db, proc, pool, newsgroup, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) + + transferMutex.Lock() + totalTransferred += transferred + totalRedisCacheHits += rc + totalUnwanted += unwanted + totalRejected += rejected + totalTXErrors += txErrors + totalConnErrors += connErrors + transferMutex.Unlock() + */ + err := transferNewsgroup(db, newsgroup, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) if err == ErrNotInDateRange { + transferMutex.Lock() nothingInDateRange++ + transferMutex.Unlock() err = nil // not a real error } - transferMutex.Unlock() - if err != nil { log.Printf("Error transferring newsgroup %s: %v", newsgroup.Name, err) - } else { - if startTime == nil && endTime == nil { - log.Printf("DONE runTransfer Newsgroup '%s' | transferred %d articles. checked %d. took %v", newsgroup.Name, transferred, checked, time.Since(start)) - } } }(newsgroup, &wg, redisCli) } // Wait for all transfers to complete wg.Wait() + transferMutex.Lock() + defer transferMutex.Unlock() if nothingInDateRange > 0 { log.Printf("Note: %d newsgroups had no articles in the specified date range", nothingInDateRange) } @@ -1048,12 +1048,12 @@ var debugMutex sync.Mutex var ErrNotInDateRange = fmt.Errorf("article not in specified date range") // transferNewsgroup transfers articles from a single newsgroup -func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *nntp.Pool, newsgroup *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) (uint64, uint64, uint64, uint64, uint64, uint64, uint64, error) { +func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { // Get group database groupDBs, err := db.GetGroupDBs(newsgroup.Name) if err != nil { - return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) + return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) } defer func() { if ferr := db.ForceCloseGroupDBs(groupDBs); ferr != nil { @@ -1064,7 +1064,7 @@ func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *n // Get total article count first with date filtering totalArticles, err := getArticleCountWithDateFilter(groupDBs, startTime, endTime) if err != nil { - return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("failed to get article count for newsgroup '%s': %v", newsgroup.Name, err) + return fmt.Errorf("failed to get article count for newsgroup '%s': %v", newsgroup.Name, err) } if totalArticles == 0 { @@ -1073,12 +1073,12 @@ func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *n if VERBOSE { log.Printf("No articles found in newsgroup: %s (within specified date range)", newsgroup.Name) } - return 0, 0, 0, 0, 0, 0, 0, ErrNotInDateRange + return ErrNotInDateRange } else { log.Printf("No articles found in newsgroup: %s", newsgroup.Name) } - return 0, 0, 0, 0, 0, 0, 0, nil + return nil } if dryRun { @@ -1088,7 +1088,7 @@ func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *n log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s", totalArticles, newsgroup.Name) } if !debugCapture { - return 0, 0, 0, 0, 0, 0, 0, nil + return nil } } @@ -1102,23 +1102,94 @@ func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *n //time.Sleep(3 * time.Second) // debug sleep var ioffset int64 remainingArticles := totalArticles - // Process articles in database batches (much larger than network batches) ttMode := &nntp.TakeThisMode{ - CheckMode: true, // start with CHECK mode + Newsgroup: &newsgroup.Name, + CheckMode: true, } - var transferred, checked, redis_cache_hits uint64 + ttResponses := make(chan chan *nntp.TTResponse, totalArticles/int64(batchCheck)+2) + defer close(ttResponses) start := time.Now() + go func() { + var amux sync.Mutex + var transferred, unwanted, rejected, checked, txErrors, connErrors uint64 + for responseChan := range ttResponses { + if responseChan != nil { + go func(rc chan *nntp.TTResponse) { + for resp := range rc { + if resp == nil { + log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received!?", newsgroup.Name) + return + } + if resp.Err != nil { + log.Printf("Newsgroup: '%s' | Error in TT response: err='%v' job='%#v'", newsgroup.Name, resp.Err, resp.Job) + return + } + if resp.Job == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response without error!?", newsgroup.Name) + return + } + // get numbers + amux.Lock() + resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &txErrors, &connErrors) + amux.Unlock() + + // free memory + resp.Job.Mux.Lock() + defer resp.Job.Mux.Unlock() + + for msgid := range resp.Job.ArticleMap { + delete(resp.Job.ArticleMap, msgid) + } + resp.Job.ArticleMap = nil + + for i, _ := range resp.Job.Articles { + resp.Job.Articles[i] = nil + } + resp.Job.Articles = nil + + for i, _ := range resp.Job.MessageIDs { + resp.Job.MessageIDs[i] = nil + } + resp.Job.MessageIDs = nil + + for i, _ := range resp.Job.WantedIDs { + resp.Job.WantedIDs[i] = nil + } + resp.Job.WantedIDs = nil + + } + }(responseChan) + } + } + amux.Lock() + result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d (unwanted: %d | rejected: %d | checked: %d) TX_Errors: %d, connErrors: %d, took %v", + newsgroup.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) + amux.Unlock() + //log.Print(result) + resultsMutex.Lock() + results = append(results, result) + if VERBOSE { + for _, msgId := range rejectedArticles[newsgroup.Name] { + // prints all at the end again + log.Printf("END Newsgroup: '%s' | REJECTED '%s'", newsgroup.Name, msgId) + } + delete(rejectedArticles, newsgroup.Name) // free memory + } + resultsMutex.Unlock() + }() + + // Get articles in database batches (much larger than network batches) for offset := ioffset; offset < totalArticles; offset += dbBatchSize { if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: %s: Transferred %d articles", newsgroup.Name, transferred) - return transferred, checked, redis_cache_hits, ttMode.Unwanted, ttMode.Rejected, ttMode.TX_Errors, ttMode.ConnErrors, nil + log.Printf("WantShutdown in newsgroup: '%s' offset: %d", newsgroup.Name, offset) + return nil } // Load batch from database with date filtering articles, err := getArticlesBatchWithDateFilter(groupDBs, offset, startTime, endTime) if err != nil { log.Printf("Error loading article batch (offset %d) for newsgroup %s: %v", offset, newsgroup.Name, err) - continue + return fmt.Errorf("failed to load article batch (offset %d) for newsgroup '%s': %v", offset, newsgroup.Name, err) } if len(articles) == 0 { @@ -1129,106 +1200,39 @@ func transferNewsgroup(db *database.Database, proc *processor.Processor, pool *n debugMutex.Lock() debugArticles[newsgroup.Name] = append(debugArticles[newsgroup.Name], articles...) debugMutex.Unlock() - return 0, 0, 0, 0, 0, 0, 0, nil + return nil } if VERBOSE { log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d)", newsgroup.Name, len(articles), offset) } - isleep := time.Second // Process articles in network batches for i := 0; i < len(articles); i += batchCheck { if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: %s: Transferred %d articles", newsgroup.Name, transferred) - return transferred, checked, redis_cache_hits, ttMode.Unwanted, ttMode.Rejected, ttMode.TX_Errors, ttMode.ConnErrors, nil - } - if !ttMode.CheckMode && ttMode.TmpSuccessCount >= 100 { - ttMode.TmpSuccessCount = 0 - ttMode.TmpTakeThisCount = 0 + log.Printf("WantShutdown in newsgroup: '%s' (offset %d)", newsgroup.Name, offset) + return nil } // Determine end index for the batch end := i + batchCheck if end > len(articles) { end = len(articles) } - // forever: will process this batch until successful or shutdown - forever: - for { - if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: %s: Transferred %d articles", newsgroup.Name, transferred) - return transferred, checked, redis_cache_hits, ttMode.Unwanted, ttMode.Rejected, ttMode.TX_Errors, ttMode.ConnErrors, nil - } - if isleep > time.Minute { - isleep = time.Minute - } - if isleep > time.Second { - log.Printf("Newsgroup: '%s' | Sleeping %v before retrying batch %d-%d (transferred %d so far)", newsgroup.Name, isleep, i+1, end, transferred) - time.Sleep(isleep) - } - /* - // Get connection from pool - conn, err := pool.Get(nntp.MODE_STREAM_MV) - if err != nil { - log.Printf("Newsgroup: '%s' | Failed to get connection from pool: %v", newsgroup.Name, err) - isleep = isleep * 2 - continue forever - } - - if conn.ModeReader { - if VERBOSE { - log.Printf("got connection in reader mode, closing and getting a new one") - } - conn.ForceCloseConn() - continue forever - } - */ - batchTransferred, batchChecked, TTsuccessRate, rc, berr := processBatch(&newsgroup.Name, ttMode, articles[i:end], redisCli) - transferred += batchTransferred - redis_cache_hits += rc - checked += batchChecked - if berr != nil { - log.Printf("Newsgroup: '%s' | Error processing network batch: %v ... retry", newsgroup.Name, berr) - //conn.ForceCloseConn() - isleep = isleep * 2 - ttMode.CheckMode = true - ttMode.TmpSuccessCount = 0 - ttMode.TmpTakeThisCount = 0 - continue forever - } - if VERBOSE || (transferred >= 1000 && transferred%1000 == 0) || (checked >= 1000 && checked%1000 == 0) { - log.Printf("Newsgroup: '%s' | BatchDone (offset %d/%d) %d-%d TX:%d check=%t ttRate=%.1f%% checked=%d redis_cache_hits=%d/%d", newsgroup.Name, offset, totalArticles, i+1, end, batchTransferred, ttMode.CheckMode, TTsuccessRate, batchChecked, rc, redis_cache_hits) - } - //pool.Put(conn) - break forever + responseChan, err := processBatch(ttMode, articles[i:end], redisCli) + if err != nil { + log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", newsgroup.Name, i+1, end, err) + return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, newsgroup.Name, err) + } + if responseChan != nil { + // pass the response channel to the collector channel: ttResponses + ttResponses <- responseChan } - } - - // Clear articles slice to free memory - for i := range articles { - articles[i] = nil // free memory } remainingArticles -= int64(len(articles)) - var batchSuccessRate float64 - if transferred > 0 { - batchSuccessRate = float64(transferred) / float64(len(articles)) * 100.0 - } if VERBOSE { - log.Printf("Newsgroup: '%s' | Done (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t) ttRate=%.1f%%", newsgroup.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.CheckMode, batchSuccessRate) + log.Printf("Newsgroup: '%s' | Pushed to queue (offset %d/%d) remaining: %d (Check=%t)", newsgroup.Name, offset, totalArticles, remainingArticles, ttMode.GetMode()) + //log.Printf("Newsgroup: '%s' | Pushed (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", newsgroup.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } - articles = nil // free memory } // end for offset range totalArticles - result := fmt.Sprintf("END Newsgroup: '%s' | total transferred: %d articles / total articles: %d (unwanted: %d | rejected: %d | checked: %d) TX_Errors: %d, connErrors: %d, took %v", newsgroup.Name, transferred, totalArticles, ttMode.Unwanted, ttMode.Rejected, checked, ttMode.TX_Errors, ttMode.ConnErrors, time.Since(start)) - //log.Print(result) - resultsMutex.Lock() - results = append(results, result) - if VERBOSE { - for _, msgId := range rejectedArticles[newsgroup.Name] { - // prints all at the end again - log.Printf("END Newsgroup: '%s' | REJECTED '%s'", newsgroup.Name, msgId) - } - delete(rejectedArticles, newsgroup.Name) // free memory - } - resultsMutex.Unlock() - return transferred, checked, redis_cache_hits, ttMode.Unwanted, ttMode.Rejected, ttMode.TX_Errors, ttMode.ConnErrors, nil + return nil } // end func transferNewsgroup var results []string @@ -1239,56 +1243,59 @@ var upperLevel float64 = 95.0 // processBatch processes a batch of articles using NNTP streaming protocol (RFC 4644) // Uses TAKETHIS primarily, falls back to CHECK when success rate < 95% -func processBatch(newsgroup *string, ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client) (transferred uint64, checked uint64, successRate float64, redis_cache_hits uint64, err error) { +func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client) (chan *nntp.TTResponse, error) { if len(articles) == 0 { - return 0, 0, 0, 0, nil + log.Printf("processBatch: no articles in this batch for newsgroup '%s'", *ttMode.Newsgroup) + return nil, nil } + doCheck := ttMode.FlipMode(lowerLevel, upperLevel) + var batchedJob *nntp.CHTTJob - // Calculate success rate to determine whether to use CHECK or TAKETHIS - if ttMode.TmpSuccessCount > 0 { - successRate = float64(ttMode.TmpSuccessCount) / float64(ttMode.TmpTakeThisCount) * 100.0 - } - - // Switch to CHECK mode if TAKETHIS success rate drops below lowerLevel - if !ttMode.CheckMode && successRate < lowerLevel && ttMode.TmpTakeThisCount >= 10 { // Need at least 10 attempts for meaningful stats - ttMode.CheckMode = true - log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% < %f%%, switching to CHECK mode", newsgroup, successRate, lowerLevel) - } else if ttMode.CheckMode && successRate >= upperLevel && ttMode.TmpTakeThisCount >= 20 { // Switch back when rate improves - ttMode.CheckMode = false - log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% >= %f%%, switching to TAKETHIS mode", newsgroup, successRate, upperLevel) - } - - job := &CHTTJob{ - Newsgroup: newsgroup, - MessageIDs: make([]*string, 0, len(articles)), - ArticleMap: make(map[*string]*models.Article, len(articles)), - ResponseChan: make(chan *TTResponse, 1), - TTMode: ttMode, - } - switch ttMode.CheckMode { + select { + case job := <-jobRequeue: + batchedJob = job + batchedJob.TTMode = ttMode + articles = batchedJob.Articles + batchedJob.Articles = nil + default: + batchedJob = &nntp.CHTTJob{ + Newsgroup: ttMode.Newsgroup, + MessageIDs: make([]*string, 0, len(articles)), + Articles: make([]*models.Article, 0, len(articles)), + ArticleMap: make(map[*string]*models.Article, len(articles)), + ResponseChan: make(chan *nntp.TTResponse, 1), + TTMode: ttMode, + } + } + + switch doCheck { case true: // ttMode.CheckMode // CHECK mode: verify articles are wanted before sending //log.Printf("Newsgroup: '%s' | CHECK: %d articles (success rate: %.1f%%)", newsgroup, len(articles), successRate) // Batch check Redis cache using pipeline (1 round trip for all keys) + var redis_cache_hits int if redisCli != nil && len(articles) > 0 { pipe := redisCli.Pipeline() cmds := make([]*redis.IntCmd, len(articles)) // Queue all EXISTS commands for i, article := range articles { + if article == nil { + continue + } cmds[i] = pipe.Exists(redisCtx, article.MessageID) } // Execute all in one network round trip _, err := pipe.Exec(redisCtx) if err != nil && VERBOSE { - log.Printf("Newsgroup: '%s' | Redis pipeline error: %v", newsgroup, err) + log.Printf("Newsgroup: '%s' | Redis pipeline error: %v", *ttMode.Newsgroup, err) } // Process results for i, cmd := range cmds { - if articles[i] == nil { + if cmd == nil || articles[i] == nil { continue } article := articles[i] @@ -1296,16 +1303,18 @@ func processBatch(newsgroup *string, ttMode *nntp.TakeThisMode, articles []*mode if cmdErr == nil && exists > 0 { // Cached in Redis - skip this article if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [CHECK])", newsgroup, article.MessageID) + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [CHECK])", *ttMode.Newsgroup, article.MessageID) } + batchedJob.Increment(nntp.IncrFLAG_REDIS_CACHED) redis_cache_hits++ - articles[i] = nil // mark as nil to skip later + articles[i] = nil continue } // Not cached - add to valid list - job.MessageIDs = append(job.MessageIDs, &article.MessageID) - job.ArticleMap[&article.MessageID] = article + batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) + batchedJob.ArticleMap[&article.MessageID] = article + batchedJob.Articles = append(batchedJob.Articles, article) } } else { // No Redis - add all non-nil message IDs @@ -1313,19 +1322,21 @@ func processBatch(newsgroup *string, ttMode *nntp.TakeThisMode, articles []*mode if article == nil { continue } - job.MessageIDs = append(job.MessageIDs, &article.MessageID) - job.ArticleMap[&article.MessageID] = article + batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) + batchedJob.ArticleMap[&article.MessageID] = article + batchedJob.Articles = append(batchedJob.Articles, article) } } - if len(job.MessageIDs) == 0 { - log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", newsgroup, redis_cache_hits) - return transferred, checked, successRate, redis_cache_hits, nil + if len(batchedJob.MessageIDs) == 0 { + log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", *ttMode.Newsgroup, redis_cache_hits) + return nil, nil } if VERBOSE { - log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d valid articles (filtered from %d)", newsgroup, len(job.MessageIDs), len(articles)) + log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(batchedJob.MessageIDs), len(articles)) } - CheckQueue <- job + nntp.CheckQueue <- batchedJob + return batchedJob.ResponseChan, nil //log.Printf("Newsgroup: '%s' | CHECK wanted %d articles", newsgroup, len(wantedIds)) // Send CHECK commands for all message IDs /* disabled @@ -1382,14 +1393,15 @@ func processBatch(newsgroup *string, ttMode *nntp.TakeThisMode, articles []*mode if article == nil { continue } - job.Articles = append(job.Articles, article) + batchedJob.Articles = append(batchedJob.Articles, article) } - if len(job.Articles) == 0 { + if len(batchedJob.Articles) == 0 { log.Printf("WARN: No valid articles for TAKETHIS mode, skipping batch") - return transferred, checked, successRate, redis_cache_hits, nil + return nil, nil } - TakeThisQueue <- job // TODO DEAD END + nntp.TakeThisQueue <- batchedJob // TODO DEAD END + return batchedJob.ResponseChan, nil /* response := <-ttJob.ResponseChan transferred += response.txcount @@ -1422,44 +1434,12 @@ func processBatch(newsgroup *string, ttMode *nntp.TakeThisMode, articles []*mode } // end case !ttMode.CheckMode // end switch ttMode.CheckMode - return transferred, checked, successRate, redis_cache_hits, nil + return nil, nil } // end func processBatch -var TakeThisQueue = make(chan *CHTTJob, MaxThreads) -var CheckQueue = make(chan *CHTTJob, MaxThreads) - -// CHECK/TAKETHIS Job -type CHTTJob struct { - Mux sync.Mutex - Newsgroup *string - MessageIDs []*string - WantedIDs []*string - Wanted uint64 - Unwanted uint64 - Rejected uint64 - Retry uint64 - Transferred uint64 - RedisCached uint64 - TX_Errors uint64 - ResponseChan chan *TTResponse - Articles []*models.Article - ArticleMap map[*string]*models.Article - TTMode *nntp.TakeThisMode -} - -type TTResponse struct { - Job *CHTTJob - err error -} - -type CheckResponse struct { // deprecated - CmdId uint - Article *models.Article -} - // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode // Sends all TAKETHIS commands first, then reads all responses (true streaming) -func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, job *CHTTJob, newsgroup string, redisCli *redis.Client) (transferred uint64, redis_cached uint64, err error) { +func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, job *nntp.CHTTJob, newsgroup string, redisCli *redis.Client) (transferred uint64, redis_cached uint64, err error) { if len(articles) == 0 { return 0, 0, nil } @@ -1510,7 +1490,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } // Now send TAKETHIS for non-cached articles - artChan := make(chan *CheckResponse, len(articles)) + artChan := make(chan *nntp.CheckResponse, len(articles)) // ← Also close artChan for _, article := range articles { @@ -1529,7 +1509,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art return 0, redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) } - artChan <- &CheckResponse{ + artChan <- &nntp.CheckResponse{ Article: article, CmdId: cmdID, } @@ -1541,10 +1521,10 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Phase 2: Read all responses in order for cr := range artChan { - job.TTMode.TmpTakeThisCount++ + job.TTMode.IncrementTmp() takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(cr.CmdId) if err != nil || takeThisResponseCode == 0 { - job.TTMode.ConnErrors++ + job.Increment(nntp.IncrFLAG_CONN_ERRORS) conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to read TAKETHIS response for %s: %v", newsgroup, cr.Article.MessageID, err) return transferred, redis_cached, fmt.Errorf("failed to read TAKETHIS response for %s: %v", cr.Article.MessageID, err) @@ -1553,12 +1533,12 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Update success rate tracking switch takeThisResponseCode { case 239: - job.TTMode.TmpSuccessCount++ - job.Transferred++ + job.TTMode.IncrementSuccess() + job.Increment(nntp.IncrFLAG_TRANSFERRED) case 439: - job.Rejected++ - rejected++ + job.Increment(nntp.IncrFLAG_REJECTED) if VERBOSE { + rejected++ log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) //resultsMutex.Lock() //rejectedArticles[newsgroup] = append(rejectedArticles[newsgroup], article.MessageID) @@ -1566,13 +1546,13 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } case 400, 480, 500, 501, 502, 503, 504: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - job.TX_Errors++ + job.Increment(nntp.IncrFLAG_TX_ERRORS) conn.ForceCloseConn() return transferred, redis_cached, fmt.Errorf("failed to transfer article '%s': response=%d", cr.Article.MessageID, takeThisResponseCode) default: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - job.TX_Errors++ + job.Increment(nntp.IncrFLAG_TX_ERRORS) continue } if redisCli != nil { @@ -1613,11 +1593,12 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art return transferred, redis_cached, nil } // end func sendArticlesBatchViaTakeThis +var jobRequeue = make(chan *nntp.CHTTJob, nntp.NNTPTransferThreads) + func BootConnWorkers(pool *nntp.Pool) { openConns := 0 defaultSleep := time.Second isleep := defaultSleep - CHECKrunning := 0 var mux sync.Mutex forever: for { @@ -1626,22 +1607,24 @@ forever: log.Printf("BootConnWorkers: WantShutdown, exiting") break forever } - if openConns == MaxThreads { + if openConns == nntp.NNTPTransferThreads { continue forever } - sharedConns := make([]*nntp.BackendConn, 0, MaxThreads) - bootN := MaxThreads - openConns - 1 + var sharedConns []*nntp.BackendConn + bootN := nntp.NNTPTransferThreads - openConns - 1 // get connections from pool - for i := bootN; i < MaxThreads; i++ { + for i := bootN; i < nntp.NNTPTransferThreads; i++ { // Get a connection from pool conn, err := pool.Get(nntp.MODE_STREAM_MV) if err != nil { + log.Printf("BootConnWorkers failed to get connection from pool: %v ... retry in: %v", err, isleep) + if isleep > defaultSleep { + time.Sleep(isleep) + } isleep = isleep * 2 if isleep > time.Minute { isleep = time.Minute } - log.Printf("BootConnWorkers failed to get connection from pool: %v ... retry in: %v", err, isleep) - time.Sleep(isleep) continue forever } if conn.ModeReader { @@ -1651,41 +1634,84 @@ forever: conn.ForceCloseConn() continue forever } - + // got a connection sharedConns = append(sharedConns, conn) openConns++ } + if len(sharedConns) == 0 { + log.Printf("BootConnWorkers: no connections obtained, retry in: %v", isleep) + continue forever + } isleep = defaultSleep // reset to default - CHECKreturnChans := make([]chan struct{}, 0, len(sharedConns)) + returnSignals := make([]*ReturnSignal, 0, len(sharedConns)) + errChan := make(chan struct{}, 1) for i, conn := range sharedConns { - CHECKreturnChans[i] = make(chan struct{}, 1) - go CheckWorker(conn, CHECKreturnChans[i]) + returnSignal := &ReturnSignal{ + errChan: errChan, + Chan: make(chan *ReturnSignal, 1), + tmpMessageIDs: make([]*string, 0, BatchCheck), + jobsQueued: make(map[*nntp.CHTTJob]uint64, BatchCheck), + jobsReadOK: make(map[*nntp.CHTTJob]uint64, BatchCheck), + jobMap: make(map[*string]*nntp.CHTTJob, BatchCheck), + jobs: make([]*nntp.CHTTJob, 0, BatchCheck), + } + returnSignals[i] = returnSignal + go CHTTWorker(i, conn, returnSignal) } - mux.Lock() - CHECKrunning += len(sharedConns) - mux.Unlock() - // Monitor CHECK workers + // Monitor recently launched CHTT workers go func() { - checkBOOTED := len(sharedConns) + monitoring := len(sharedConns) for { time.Sleep(100 * time.Millisecond) - for i, retChan := range CHECKreturnChans { - if retChan == nil { + for i, wait := range returnSignals { + if wait == nil { continue } select { - case <-CHECKreturnChans[i]: - //log.Printf("CheckWorker %d exited, restarting...", i) + case rs := <-wait.Chan: + log.Printf("CHTTWorker (%d) exited", i) + monitoring-- + mux.Lock() - CHECKrunning-- + openConns-- mux.Unlock() - checkBOOTED-- - CHECKreturnChans[i] = nil + + returnSignals[i] = nil + + rs.Mux.Lock() + if len(rs.jobs) > 0 { + log.Printf("CHTTWorker (%d) try requeue %d jobs", i, len(rs.jobs)) + for _, job := range rs.jobs { + if job != nil { + job.Mux.Lock() + rqj := &nntp.CHTTJob{ + Newsgroup: job.Newsgroup, + Articles: job.Articles, + TxErrors: job.TxErrors, + ConnErrors: job.ConnErrors, + ResponseChan: make(chan *nntp.TTResponse, 1), + } + jobRequeue <- rqj // TODO DEAD END + // free memory + job.Newsgroup = nil + job.TTMode = nil + job.ResponseChan = nil + job.Articles = nil + job.ArticleMap = nil + job.MessageIDs = nil + job.WantedIDs = nil + job.Mux.Unlock() + } + } + log.Printf("CHTTWorker (%d) did requeue %d jobs", i, len(rs.jobs)) + } + rs.Mux.Unlock() + // TODO: check remaining work and restart connection default: // Worker still running } } - if checkBOOTED == 0 { + if monitoring == 0 { return } } @@ -1695,62 +1721,73 @@ forever: var DefaultCheckTicker = 5 * time.Second -func CheckWorker(conn *nntp.BackendConn, returnSignal chan struct{}) { - var mux sync.Mutex +var JobsToRetry []*nntp.CHTTJob +var JobsToRetryMux sync.Mutex + +type ReturnSignal struct { + Mux sync.Mutex + Chan chan *ReturnSignal + errChan chan struct{} + tmpMessageIDs []*string + jobsQueued map[*nntp.CHTTJob]uint64 + jobsReadOK map[*nntp.CHTTJob]uint64 + jobMap map[*string]*nntp.CHTTJob + jobs []*nntp.CHTTJob +} - tmpMessageIDs := make([]*string, 0, BatchCheck) +func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { readResponsesChan := make(chan *string, BatchCheck) - jobMap := make(map[*string]*CHTTJob, BatchCheck) // maps messageIds to Jobs - - lastRun := time.Now() + errChan := make(chan struct{}, 3) tickChan := make(chan struct{}, 1) - errChan := make(chan struct{}, 1) - defer func(conn *nntp.BackendConn) { + defer func(conn *nntp.BackendConn, rs *ReturnSignal) { conn.ForceCloseConn() - returnSignal <- struct{}{} - close(errChan) - }(conn) + rs.Chan <- rs + errChan <- struct{}{} + }(conn, rs) + lastRun := time.Now() // launch go routine which sends CHECK commands if threshold exceeds BatchCheck - go func(mux *sync.Mutex) { + go func() { // tick every n seconds to check if any CHECKs to do ticker := time.NewTicker(DefaultCheckTicker) defer ticker.Stop() - forever: + defer func() { + errChan <- struct{}{} + }() + loop: for { select { case <-errChan: + errChan <- struct{}{} + log.Printf("CheckWorker (%d): Send CHECK got errChan signal... exiting", id) return + case <-tickChan: if common.WantShutdown() { - log.Printf("CheckWorker: Tick WantShutdown, exiting") - return - } - if common.IsClosedChannel(errChan) { - log.Printf("CheckWorker: Tick errChan closed, exiting") + log.Printf("CheckWorker (%d): Tick WantShutdown, exiting", id) return } - // process CHECK commands - mux.Lock() - hasWork := len(tmpMessageIDs) > 0 - mux.Unlock() + // check if we have work and process CHECK commands + rs.Mux.Lock() + hasWork := len(rs.tmpMessageIDs) > 0 + rs.Mux.Unlock() if !hasWork { - log.Printf("CheckWorker: Ticked but no work? continue...") - continue forever + log.Printf("CheckWorker (%d): Ticked but no work? continue...", id) + continue loop } // copy accumulated message IDs and clear slice - mux.Lock() - checkIds := make([]*string, len(tmpMessageIDs)) - copy(checkIds, tmpMessageIDs) - tmpMessageIDs = tmpMessageIDs[:0] //clear - mux.Unlock() + rs.Mux.Lock() + checkIds := make([]*string, len(rs.tmpMessageIDs)) + copy(checkIds, rs.tmpMessageIDs) + rs.tmpMessageIDs = rs.tmpMessageIDs[:0] //clear + rs.Mux.Unlock() - log.Printf("CheckWorker: Ticker sending CHECK: %d message IDs", len(checkIds)) + log.Printf("CheckWorker (%d): SendCheckMultiple %d message IDs", id, len(checkIds)) err := conn.SendCheckMultiple(checkIds) if err != nil { - log.Printf("CheckWorker: SendCheckMultiple error: %v", err) + log.Printf("CheckWorker (%d): SendCheckMultiple error: %v", id, err) return } for _, msgID := range checkIds { @@ -1761,18 +1798,18 @@ func CheckWorker(conn *nntp.BackendConn, returnSignal chan struct{}) { } } - mux.Lock() + rs.Mux.Lock() lastRun = time.Now() - mux.Unlock() + rs.Mux.Unlock() case <-ticker.C: if common.WantShutdown() { - log.Printf("CheckWorker: Ticker WantShutdown, exiting") + log.Printf("CheckWorker (%d): Ticker WantShutdown, exiting", id) return } - mux.Lock() - hasWork := len(tmpMessageIDs) > 0 && time.Since(lastRun) >= DefaultCheckTicker - mux.Unlock() + rs.Mux.Lock() + hasWork := len(rs.tmpMessageIDs) > 0 && time.Since(lastRun) >= DefaultCheckTicker + rs.Mux.Unlock() if hasWork { select { case tickChan <- struct{}{}: @@ -1782,30 +1819,42 @@ func CheckWorker(conn *nntp.BackendConn, returnSignal chan struct{}) { } } // end select } // end forever - }(&mux) + }() // launch a go routine to read CHECK responses from the supplied connection with textproto readline - go func(mux *sync.Mutex) { - forever: + go func() { + var responseCount int64 + var tookTime int64 + defer func() { + errChan <- struct{}{} + }() + loop: for { select { case <-errChan: - log.Printf("CheckWorker: Read CHECK errChan closed, exiting") + errChan <- struct{}{} + log.Printf("CheckWorker (%d): Read CHECK responses got errChan signal... exiting", id) return + case msgID := <-readResponsesChan: if common.WantShutdown() { - log.Printf("CheckWorker: Read CHECK WantShutdown, exiting") - return - } - if common.IsClosedChannel(errChan) { - log.Printf("CheckWorker: Read CHECK errChan closed, exiting") + log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", id) return } + start := time.Now() code, line, err := conn.TextConn.ReadCodeLine(238) if code == 0 && err != nil { log.Printf("Failed to read CHECK response: %v", err) return } + tookTime += time.Since(start).Milliseconds() + responseCount++ + if responseCount >= 1000 { + avg := float64(tookTime) / float64(responseCount) + log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %.1f ms", id, responseCount, avg) + responseCount = 0 + tookTime = 0 + } // Parse response line // Format: code [message] // 238 - article wanted @@ -1821,75 +1870,95 @@ func CheckWorker(conn *nntp.BackendConn, returnSignal chan struct{}) { log.Printf("Mismatched CHECK response: expected %s, got %s", *msgID, parts[0]) return } - mux.Lock() - checkjob, exists := jobMap[msgID] - mux.Unlock() + rs.Mux.Lock() + job, exists := rs.jobMap[msgID] + rs.Mux.Unlock() if !exists { log.Printf("ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *msgID) - continue forever + continue loop } + rs.Mux.Lock() + delete(rs.jobMap, msgID) + rs.jobsReadOK[job]++ + rs.Mux.Unlock() switch code { case 238: //log.Printf("Wanted Article '%s': response=%d", *msgID, code) - - mux.Lock() - delete(jobMap, msgID) - mux.Unlock() - - checkjob.Mux.Lock() - checkjob.WantedIDs = append(checkjob.WantedIDs, msgID) - checkjob.Wanted++ - checkjob.Mux.Unlock() + job.AppendWantedMessageID(msgID) case 438: //log.Printf("Unwanted Article '%s': response=%d", *msgID, code) - checkjob.Mux.Lock() - checkjob.Unwanted++ - checkjob.Mux.Unlock() + job.Increment(nntp.IncrFLAG_UNWANTED) case 431: - checkjob.Mux.Lock() - checkjob.Retry++ - checkjob.Mux.Unlock() + job.Increment(nntp.IncrFLAG_RETRY) default: log.Printf("Unknown CHECK response: line='%s' code=%d expected msgID %s", line, code, *msgID) - return } + // check if all jobs are done + rs.Mux.Lock() + queuedCount, qexists := rs.jobsQueued[job] + readCount, rexists := rs.jobsReadOK[job] + rs.Mux.Unlock() + if !qexists || !rexists { + log.Printf("ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!") + continue loop + } + if queuedCount == readCount { + rs.Mux.Lock() + log.Printf("CheckWorker (%d): All CHECK responses received for job %v", id, job) + delete(rs.jobsQueued, job) + delete(rs.jobsReadOK, job) + rs.Mux.Unlock() + } + // TODO HERE: pass the job to takethis + // using this connection where we did the CHECK! + // *** } // end select } // end forever - }(&mux) + }() -forever: - for checkjob := range CheckQueue { - if common.WantShutdown() { - log.Printf("CheckWorker: WantShutdown, exiting") + for { + select { + case <-errChan: + errChan <- struct{}{} return - } - if checkjob == nil || len(checkjob.MessageIDs) == 0 { - if VERBOSE { - log.Printf("CheckWorker: empty job, skipping") + case job := <-nntp.CheckQueue: + if common.WantShutdown() { + log.Printf("CheckWorker: WantShutdown, exiting") + return } - checkjob.ResponseChan <- []*string{} - continue forever - } - for _, msgId := range checkjob.MessageIDs { - if msgId != nil { - mux.Lock() - tmpMessageIDs = append(tmpMessageIDs, msgId) - jobMap[msgId] = checkjob - mux.Unlock() + if job == nil || len(job.MessageIDs) == 0 { + if VERBOSE { + log.Printf("CheckWorker: empty job, skipping") + } + if job != nil && job.ResponseChan != nil { + job.ResponseChan <- nil + } + continue } - } - if len(tmpMessageIDs) >= BatchCheck { - // Signal to process CHECK commands - if VERBOSE { - log.Printf("CheckWorker: processing batch of %d message IDs", len(tmpMessageIDs)) + for _, msgId := range job.MessageIDs { + if msgId != nil { + rs.Mux.Lock() + rs.tmpMessageIDs = append(rs.tmpMessageIDs, msgId) + rs.jobMap[msgId] = job + rs.jobsQueued[job]++ + rs.jobs = append(rs.jobs, job) + rs.Mux.Unlock() + } } - select { - case tickChan <- struct{}{}: - default: + rs.Mux.Lock() + hasWork := len(rs.tmpMessageIDs) >= BatchCheck + rs.Mux.Unlock() + if hasWork { + // send signal to process CHECK commands + select { + case tickChan <- struct{}{}: + default: + // tickChan full, tickChan will tick + } } - } - } // end for forever + } // end select + } // end for } // end func CheckWorker diff --git a/internal/common/headers.go b/internal/common/headers.go index d91396d..ebefeb6 100644 --- a/internal/common/headers.go +++ b/internal/common/headers.go @@ -359,12 +359,12 @@ checkHeader: continue checkGroups } */ - var trimmedNG string + trimmedNG := strings.TrimSpace(group) if trimmedNG != strings.ToLower(group) { trimmedNG = strings.ToLower(group) // parse to lowercase badGroups++ } - trimmedNG = strings.TrimSpace(trimmedNG) + // Clean up unwanted characters (remove each character individually) for _, char := range unwantedChars { trimmedNG = strings.ReplaceAll(trimmedNG, string(char), "") diff --git a/internal/nntp/nntp-article-common.go b/internal/nntp/nntp-article-common.go index ec15c19..2c69da9 100644 --- a/internal/nntp/nntp-article-common.go +++ b/internal/nntp/nntp-article-common.go @@ -31,18 +31,6 @@ type ArticleRetrievalResult struct { GroupDBs *database.GroupDBs } -// used in nntp-transfer/main.go -type TakeThisMode struct { - //Wanted uint64 - //Unwanted uint64 - //Rejected uint64 - TX_Errors uint64 - ConnErrors uint64 - TmpSuccessCount uint64 - TmpTakeThisCount uint64 - CheckMode bool // Start with TAKETHIS mode (false) -} - // retrieveArticleCommon handles the common logic for ARTICLE, HEAD, BODY, and STAT commands func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType ArticleRetrievalType) error { time.Sleep(time.Second / 5) // TODO hardcoded ratelimit diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 1e4b352..65bc25f 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -7,6 +7,7 @@ import ( "log" "strconv" "strings" + "sync" "time" "github.com/go-while/go-pugleaf/internal/common" @@ -28,6 +29,166 @@ var MaxReadLinesXover int64 = 100 // XOVER command typically retrieves overview // MaxReadLinesBody Maximum lines for BODY command, which retrieves the body of an article const MaxReadLinesBody = MaxReadLinesArticle - MaxReadLinesHeaders +var NNTPTransferThreads int = 0 +var TakeThisQueue = make(chan *CHTTJob, NNTPTransferThreads) +var CheckQueue = make(chan *CHTTJob, NNTPTransferThreads) + +// used in nntp-transfer/main.go +type TakeThisMode struct { + mux sync.Mutex + Newsgroup *string + //Wanted uint64 + //Unwanted uint64 + //Rejected uint64 + //TX_Errors uint64 + //ConnErrors uint64 + TmpSuccessCount uint64 + TmpTTotalsCount uint64 + CheckMode bool // Start with TAKETHIS mode (false) +} + +type TTResponse struct { + Job *CHTTJob + Err error +} + +type CheckResponse struct { // deprecated + CmdId uint + Article *models.Article +} + +// batched CHECK/TAKETHIS Job +type CHTTJob struct { + Newsgroup *string + Mux sync.Mutex + TTMode *TakeThisMode + ResponseChan chan *TTResponse + Articles []*models.Article + ArticleMap map[*string]*models.Article + MessageIDs []*string + WantedIDs []*string + checked uint64 + wanted uint64 + unwanted uint64 + rejected uint64 + retry uint64 + transferred uint64 + redisCached uint64 + TxErrors uint64 + ConnErrors uint64 +} + +const IncrFLAG_CHECKED = 1 +const IncrFLAG_WANTED = 2 +const IncrFLAG_UNWANTED = 4 +const IncrFLAG_REJECTED = 8 +const IncrFLAG_RETRY = 16 +const IncrFLAG_TRANSFERRED = 32 +const IncrFLAG_REDIS_CACHED = 64 +const IncrFLAG_TX_ERRORS = 128 +const IncrFLAG_CONN_ERRORS = 256 + +func (job *CHTTJob) Increment(counter int) { + job.Mux.Lock() + defer job.Mux.Unlock() + switch counter { + case IncrFLAG_CHECKED: + job.checked++ + case IncrFLAG_WANTED: + job.wanted++ + case IncrFLAG_UNWANTED: + job.unwanted++ + case IncrFLAG_REJECTED: + job.rejected++ + case IncrFLAG_RETRY: + job.retry++ + case IncrFLAG_TRANSFERRED: + job.transferred++ + case IncrFLAG_REDIS_CACHED: + job.redisCached++ + case IncrFLAG_TX_ERRORS: + job.txErrors++ + case IncrFLAG_CONN_ERRORS: + job.connErrors++ + } +} + +func (job *CHTTJob) AppendWantedMessageID(msgID *string) { + job.Mux.Lock() + job.WantedIDs = append(job.WantedIDs, msgID) + job.Mux.Unlock() + job.Increment(IncrFLAG_WANTED) +} + +func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, txErrors, connErrors *uint64) { + job.Mux.Lock() + *transferred += job.transferred + *unwanted += job.unwanted + *rejected += job.rejected + *checked += job.checked + *txErrors += job.txErrors + *connErrors += job.connErrors + job.Mux.Unlock() +} + +func (ttMode *TakeThisMode) GetMode() bool { + ttMode.mux.Lock() + defer ttMode.mux.Unlock() + if ttMode.CheckMode { + return true + } + return false +} + +func (ttMode *TakeThisMode) SetForceCHECK() { + ttMode.mux.Lock() + ttMode.CheckMode = true + ttMode.mux.Unlock() +} + +func (ttMode *TakeThisMode) IncrementSuccess() { + ttMode.mux.Lock() + ttMode.TmpSuccessCount++ + ttMode.mux.Unlock() +} + +func (ttMode *TakeThisMode) IncrementTmp() { + ttMode.mux.Lock() + ttMode.TmpTTotalsCount++ + ttMode.mux.Unlock() +} + +func (ttMode *TakeThisMode) SetNoCHECK() { + ttMode.mux.Lock() + ttMode.CheckMode = false + ttMode.mux.Unlock() +} + +func (ttMode *TakeThisMode) FlipMode(lowerLevel float64, upperLevel float64) bool { + ttMode.mux.Lock() + defer ttMode.mux.Unlock() + if ttMode.TmpSuccessCount < 10 || ttMode.TmpTTotalsCount < 100 { + return true // Force CHECK mode for this batch + } + successRate := float64(ttMode.TmpSuccessCount) / float64(ttMode.TmpTTotalsCount) * 100.0 + ttMode.TmpSuccessCount = 0 + ttMode.TmpTTotalsCount = 0 + switch ttMode.CheckMode { + case false: // Currently in TAKETHIS mode + if successRate < lowerLevel { + ttMode.CheckMode = true + log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% < %f%%, switching to CHECK mode", *ttMode.Newsgroup, successRate, lowerLevel) + } + case true: // Currently in CHECK mode + if successRate > upperLevel { + ttMode.CheckMode = false + log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% >= %f%%, switching to TAKETHIS mode", *ttMode.Newsgroup, successRate, upperLevel) + } + } + retval := ttMode.CheckMode + return retval +} + func (c *BackendConn) ForceCloseConn() { c.mux.Lock() defer c.mux.Unlock() @@ -1069,6 +1230,7 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string) error { return nil } +/* // CheckMultiple sends a CHECK command for multiple message IDs and returns responses func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) ([]*string, error) { c.mux.Lock() @@ -1144,8 +1306,9 @@ func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) // Return all responses return wantedIds, nil } +*/ -// TakeThisArticle sends an article via TAKETHIS command +// TakeThisArticle sends a single article via TAKETHIS command and returns the response code func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *string) (int, error) { c.mux.Lock() defer c.mux.Unlock() @@ -1219,7 +1382,7 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str c.TextConn.StartResponse(id) defer c.TextConn.EndResponse(id) - code, _, err := c.TextConn.ReadCodeLine(239) // -1 means any code is acceptable + code, _, err := c.TextConn.ReadCodeLine(239) if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } From 88ede5aaa42d0e9e1d6343e5944e160072e31ba9 Mon Sep 17 00:00:00 2001 From: go-while Date: Sat, 4 Oct 2025 23:13:52 +0000 Subject: [PATCH 003/100] unfinished --- cmd/nntp-transfer/main.go | 82 +++++++++++++++++---------- internal/nntp/nntp-client-commands.go | 8 +-- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 023f1cc..9108369 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -9,6 +9,7 @@ import ( "log" "os" "os/signal" + "slices" "strconv" "strings" "sync" @@ -1137,16 +1138,16 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch resp.Job.Mux.Lock() defer resp.Job.Mux.Unlock() - for msgid := range resp.Job.ArticleMap { - delete(resp.Job.ArticleMap, msgid) - } - resp.Job.ArticleMap = nil - for i, _ := range resp.Job.Articles { resp.Job.Articles[i] = nil } resp.Job.Articles = nil + for msgid := range resp.Job.ArticleMap { + delete(resp.Job.ArticleMap, msgid) + } + resp.Job.ArticleMap = nil + for i, _ := range resp.Job.MessageIDs { resp.Job.MessageIDs[i] = nil } @@ -1162,7 +1163,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } } amux.Lock() - result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d (unwanted: %d | rejected: %d | checked: %d) TX_Errors: %d, connErrors: %d, took %v", + result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d (unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", newsgroup.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) amux.Unlock() //log.Print(result) @@ -1184,6 +1185,37 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch log.Printf("WantShutdown in newsgroup: '%s' offset: %d", newsgroup.Name, offset) return nil } + // Load articles from requeue first + var queuedJobs []*nntp.CHTTJob + jobRequeueMutex.Lock() + if jobs, exists := jobRequeue[ttMode.Newsgroup]; exists { + queuedJobs = jobs + // clear requeue + delete(jobRequeue, ttMode.Newsgroup) + } + jobRequeueMutex.Unlock() + + if len(queuedJobs) > 0 { + log.Printf("Newsgroup: '%s' | Processing %d requeued jobs first", newsgroup.Name, len(queuedJobs)) + for i, job := range queuedJobs { + log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup.Name, i+1, len(queuedJobs), len(job.Articles)) + responseChan, err := processBatch(ttMode, job.Articles, redisCli) + if err != nil { + log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup.Name, err) + jobRequeueMutex.Lock() + // insert remaining jobs back to slot 0 + jobRequeue[ttMode.Newsgroup] = slices.Insert(jobRequeue[ttMode.Newsgroup], 0, queuedJobs[i:]...) + jobRequeueMutex.Unlock() + return fmt.Errorf("error processing requeued batch for newsgroup '%s': %v", newsgroup.Name, err) + } + if responseChan != nil { + // pass the response channel to the collector channel: ttResponses + ttResponses <- responseChan + } + } + offset -= dbBatchSize + continue + } // Load batch from database with date filtering articles, err := getArticlesBatchWithDateFilter(groupDBs, offset, startTime, endTime) @@ -1252,21 +1284,13 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl doCheck := ttMode.FlipMode(lowerLevel, upperLevel) var batchedJob *nntp.CHTTJob - select { - case job := <-jobRequeue: - batchedJob = job - batchedJob.TTMode = ttMode - articles = batchedJob.Articles - batchedJob.Articles = nil - default: - batchedJob = &nntp.CHTTJob{ - Newsgroup: ttMode.Newsgroup, - MessageIDs: make([]*string, 0, len(articles)), - Articles: make([]*models.Article, 0, len(articles)), - ArticleMap: make(map[*string]*models.Article, len(articles)), - ResponseChan: make(chan *nntp.TTResponse, 1), - TTMode: ttMode, - } + batchedJob = &nntp.CHTTJob{ + Newsgroup: ttMode.Newsgroup, + MessageIDs: make([]*string, 0, len(articles)), + Articles: make([]*models.Article, 0, len(articles)), + ArticleMap: make(map[*string]*models.Article, len(articles)), + ResponseChan: make(chan *nntp.TTResponse, 1), + TTMode: ttMode, } switch doCheck { @@ -1593,7 +1617,8 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art return transferred, redis_cached, nil } // end func sendArticlesBatchViaTakeThis -var jobRequeue = make(chan *nntp.CHTTJob, nntp.NNTPTransferThreads) +var jobRequeueMutex sync.RWMutex +var jobRequeue = make(map[*string][]*nntp.CHTTJob) func BootConnWorkers(pool *nntp.Pool) { openConns := 0 @@ -1685,14 +1710,13 @@ forever: if job != nil { job.Mux.Lock() rqj := &nntp.CHTTJob{ - Newsgroup: job.Newsgroup, - Articles: job.Articles, - TxErrors: job.TxErrors, - ConnErrors: job.ConnErrors, - ResponseChan: make(chan *nntp.TTResponse, 1), + Newsgroup: job.Newsgroup, + Articles: job.Articles, } - jobRequeue <- rqj // TODO DEAD END - // free memory + jobRequeueMutex.Lock() + jobRequeue[rqj.Newsgroup] = append(jobRequeue[rqj.Newsgroup], rqj) // TODO DEAD END + jobRequeueMutex.Unlock() + // unlink pointers job.Newsgroup = nil job.TTMode = nil job.ResponseChan = nil diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 65bc25f..f0c73e2 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -107,9 +107,9 @@ func (job *CHTTJob) Increment(counter int) { case IncrFLAG_REDIS_CACHED: job.redisCached++ case IncrFLAG_TX_ERRORS: - job.txErrors++ + job.TxErrors++ case IncrFLAG_CONN_ERRORS: - job.connErrors++ + job.ConnErrors++ } } @@ -126,8 +126,8 @@ func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, *unwanted += job.unwanted *rejected += job.rejected *checked += job.checked - *txErrors += job.txErrors - *connErrors += job.connErrors + *txErrors += job.TxErrors + *connErrors += job.ConnErrors job.Mux.Unlock() } From 72b9ab5ebf0dd9aaf1c2039b6278b811b010ce67 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 5 Oct 2025 13:59:29 +0000 Subject: [PATCH 004/100] unfinished --- cmd/nntp-transfer/main.go | 480 +++++++++++++++++--------- internal/common/common.go | 10 + internal/nntp/nntp-client-commands.go | 6 +- internal/nntp/nntp-client.go | 4 +- 4 files changed, 336 insertions(+), 164 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 9108369..267a5b5 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -84,7 +84,6 @@ var appVersion = "-unset-" var redisCtx = context.Background() var REDIS_TTL time.Duration = 3600 * time.Second // default 1h -// var MaxThreads int var BatchCheck int func main() { @@ -377,6 +376,12 @@ func main() { log.Printf("Debug capture mode enabled - capturing articles without sending") *dryRun = true } + // Start NNTP worker pool + if !*dryRun { + log.Printf("Starting NNTP connection worker pool...") + go BootConnWorkers(pool, redisCli) + time.Sleep(2 * time.Second) // Give workers time to establish connections + } // Start transfer process var wgP sync.WaitGroup wgP.Add(2) @@ -1036,9 +1041,11 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe if nothingInDateRange > 0 { log.Printf("Note: %d newsgroups had no articles in the specified date range", nothingInDateRange) } + resultsMutex.Lock() for _, result := range results { log.Print(result) } + resultsMutex.Unlock() log.Printf("Summary: transferred: %d | redis_cache_hits: %d | unwanted: %d | rejected: %d | TX_Errors: %d | connErrors: %d", totalTransferred, totalRedisCacheHits, totalUnwanted, totalRejected, totalTXErrors, totalConnErrors) return nil @@ -1048,6 +1055,55 @@ var debugArticles = make(map[string][]*models.Article) var debugMutex sync.Mutex var ErrNotInDateRange = fmt.Errorf("article not in specified date range") +// processRequeuedJobs processes any failed jobs that were requeued for retry +// Returns the number of jobs processed successfully +func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponses chan chan *nntp.TTResponse, redisCli *redis.Client) (int, error) { + var queuedJobs []*nntp.CHTTJob + jobRequeueMutex.Lock() + if jobs, exists := jobRequeue[ttMode.Newsgroup]; exists { + queuedJobs = jobs + // clear requeue + delete(jobRequeue, ttMode.Newsgroup) + } + jobRequeueMutex.Unlock() + + if len(queuedJobs) == 0 { + return 0, nil + } + + log.Printf("Newsgroup: '%s' | Processing %d failed requeued jobs", newsgroup, len(queuedJobs)) + for i, job := range queuedJobs { + if common.WantShutdown() { + log.Printf("WantShutdown while processing requeued jobs for '%s'", newsgroup) + // Put remaining jobs back in queue + if i < len(queuedJobs) { + jobRequeueMutex.Lock() + jobRequeue[ttMode.Newsgroup] = slices.Insert(jobRequeue[ttMode.Newsgroup], 0, queuedJobs[i:]...) + jobRequeueMutex.Unlock() + } + return i, nil + } + + log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.Articles)) + responseChan, err := processBatch(ttMode, job.Articles, redisCli) + if err != nil { + log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup, err) + jobRequeueMutex.Lock() + // insert remaining jobs back to slot 0 + jobRequeue[ttMode.Newsgroup] = slices.Insert(jobRequeue[ttMode.Newsgroup], 0, queuedJobs[i:]...) + jobRequeueMutex.Unlock() + return i, fmt.Errorf("error processing requeued batch for newsgroup '%s': %v", newsgroup, err) + } + if responseChan != nil { + // pass the response channel to the collector channel: ttResponses + ttResponses <- responseChan + } + } + + log.Printf("Newsgroup: '%s' | Successfully processed %d requeued jobs", newsgroup, len(queuedJobs)) + return len(queuedJobs), nil +} + // transferNewsgroup transfers articles from a single newsgroup func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { @@ -1108,14 +1164,24 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch CheckMode: true, } ttResponses := make(chan chan *nntp.TTResponse, totalArticles/int64(batchCheck)+2) - defer close(ttResponses) start := time.Now() + + // WaitGroup to ensure collector goroutine finishes before returning + var collectorWG sync.WaitGroup + collectorWG.Add(1) + + // WaitGroup to track individual response channel processors + var responseWG sync.WaitGroup + go func() { + defer collectorWG.Done() var amux sync.Mutex var transferred, unwanted, rejected, checked, txErrors, connErrors uint64 for responseChan := range ttResponses { if responseChan != nil { + responseWG.Add(1) go func(rc chan *nntp.TTResponse) { + defer responseWG.Done() for resp := range rc { if resp == nil { log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received!?", newsgroup.Name) @@ -1138,7 +1204,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch resp.Job.Mux.Lock() defer resp.Job.Mux.Unlock() - for i, _ := range resp.Job.Articles { + for i := range resp.Job.Articles { resp.Job.Articles[i] = nil } resp.Job.Articles = nil @@ -1148,12 +1214,12 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } resp.Job.ArticleMap = nil - for i, _ := range resp.Job.MessageIDs { + for i := range resp.Job.MessageIDs { resp.Job.MessageIDs[i] = nil } resp.Job.MessageIDs = nil - for i, _ := range resp.Job.WantedIDs { + for i := range resp.Job.WantedIDs { resp.Job.WantedIDs[i] = nil } resp.Job.WantedIDs = nil @@ -1162,6 +1228,9 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch }(responseChan) } } + // Wait for all response channel processors to finish + responseWG.Wait() + amux.Lock() result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d (unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", newsgroup.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) @@ -1185,38 +1254,11 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch log.Printf("WantShutdown in newsgroup: '%s' offset: %d", newsgroup.Name, offset) return nil } - // Load articles from requeue first - var queuedJobs []*nntp.CHTTJob - jobRequeueMutex.Lock() - if jobs, exists := jobRequeue[ttMode.Newsgroup]; exists { - queuedJobs = jobs - // clear requeue - delete(jobRequeue, ttMode.Newsgroup) - } - jobRequeueMutex.Unlock() - - if len(queuedJobs) > 0 { - log.Printf("Newsgroup: '%s' | Processing %d requeued jobs first", newsgroup.Name, len(queuedJobs)) - for i, job := range queuedJobs { - log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup.Name, i+1, len(queuedJobs), len(job.Articles)) - responseChan, err := processBatch(ttMode, job.Articles, redisCli) - if err != nil { - log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup.Name, err) - jobRequeueMutex.Lock() - // insert remaining jobs back to slot 0 - jobRequeue[ttMode.Newsgroup] = slices.Insert(jobRequeue[ttMode.Newsgroup], 0, queuedJobs[i:]...) - jobRequeueMutex.Unlock() - return fmt.Errorf("error processing requeued batch for newsgroup '%s': %v", newsgroup.Name, err) - } - if responseChan != nil { - // pass the response channel to the collector channel: ttResponses - ttResponses <- responseChan - } - } - offset -= dbBatchSize - continue + // Process any requeued jobs first (from previous failed batches) + if _, err := processRequeuedJobs(newsgroup.Name, ttMode, ttResponses, redisCli); err != nil { + return err } - + start := time.Now() // Load batch from database with date filtering articles, err := getArticlesBatchWithDateFilter(groupDBs, offset, startTime, endTime) if err != nil { @@ -1234,9 +1276,9 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch debugMutex.Unlock() return nil } - if VERBOSE { - log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d)", newsgroup.Name, len(articles), offset) - } + //if VERBOSE { + log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d) took %v", newsgroup.Name, len(articles), offset, time.Since(start)) + //} // Process articles in network batches for i := 0; i < len(articles); i += batchCheck { if common.WantShutdown() { @@ -1264,6 +1306,36 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch //log.Printf("Newsgroup: '%s' | Pushed (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", newsgroup.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } } // end for offset range totalArticles + + // Process any remaining requeued jobs after main loop completes + // This handles failures that occurred in the last batch + for { + if common.WantShutdown() { + log.Printf("WantShutdown during final requeue processing for '%s'", newsgroup.Name) + break + } + processed, err := processRequeuedJobs(newsgroup.Name, ttMode, ttResponses, redisCli) + if err != nil { + log.Printf("Newsgroup: '%s' | Error in final requeue processing: %v", newsgroup.Name, err) + // Don't return error, just log it - we've already processed most articles + break + } + if processed == 0 { + // No more requeued jobs to process + break + } + //log.Printf("Newsgroup: '%s' | Processed %d requeued jobs in final pass", newsgroup.Name, processed) + // Loop again to check if any of those jobs failed and were requeued + } + + // Close the ttResponses channel to signal collector goroutine to finish + close(ttResponses) + + // Wait for collector goroutine to finish processing all responses + collectorWG.Wait() + + log.Printf("Newsgroup: '%s' | All jobs completed and responses collected", newsgroup.Name) + return nil } // end func transferNewsgroup @@ -1336,9 +1408,9 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl } // Not cached - add to valid list - batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) - batchedJob.ArticleMap[&article.MessageID] = article batchedJob.Articles = append(batchedJob.Articles, article) + batchedJob.ArticleMap[&article.MessageID] = article + batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) } } else { // No Redis - add all non-nil message IDs @@ -1346,9 +1418,9 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl if article == nil { continue } - batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) - batchedJob.ArticleMap[&article.MessageID] = article batchedJob.Articles = append(batchedJob.Articles, article) + batchedJob.ArticleMap[&article.MessageID] = article + batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) } } @@ -1361,99 +1433,29 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl } nntp.CheckQueue <- batchedJob return batchedJob.ResponseChan, nil - //log.Printf("Newsgroup: '%s' | CHECK wanted %d articles", newsgroup, len(wantedIds)) - // Send CHECK commands for all message IDs - /* disabled - wantedIds, err := conn.CheckMultiple(checkMessageIds, ttMode) - if err != nil { - ttMode.ConnErrors++ - conn.ForceCloseConn() - return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("Newsgroup: '%s' | failed to send CHECK command: %v", newsgroup, err) - } - checked += uint64(len(checkMessageIds)) - - if len(wantedIds) == 0 { - //log.Printf("No articles wanted by server in this batch") - ttMode.TmpSuccessCount = 0 - ttMode.TmpTakeThisCount = 0 - return transferred, checked, successRate, redis_cache_hits, nil - } - // Send TAKETHIS for wanted articles - for _, msgId := range wantedIds { - if msgId == nil { - continue - } - article, exists := checkArticleMap[*msgId] - if !exists { - log.Printf("WARN: Article not found in checkArticleMap for msgId: %s", *msgId) - continue - } - ttJob.WantedArticles = append(ttJob.WantedArticles, article) - } - - log.Printf("Newsgroup: '%s' | CHECK wants: %d/%d message IDs. sending=%d", newsgroup, len(wantedIds), len(checkMessageIds), len(ttJob.WantedArticles)) - - txcount, rc, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, ttMode, newsgroup, redisCli) - //log.Printf("Newsgroup: '%s' | sendArticlesBatchViaTakeThis returned: transferred=%d redis_cached=%d err=%v", newsgroup, txcount, rc, err) - transferred += txcount - redis_cache_hits += rc - - if err != nil { - log.Printf("Newsgroup: '%s' | Failed to send CHECKED TAKETHIS: %v", newsgroup, err) - return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("failed to send CHECKED TAKETHIS batch: %v", err) - } - return transferred, checked, successRate, redis_cache_hits, nil - */ // end case ttMode.CheckMode // case !ttMode.CheckMode case false: - // TAKETHIS mode: send articles directly and track success rate - //log.Printf("Newsgroup: '%s' | TAKETHIS: %d articles (success rate: %.1f%%)", newsgroup, len(articles), successRate) + // TAKETHIS mode: send articles directly without CHECK + //log.Printf("Newsgroup: '%s' | TAKETHIS: %d articles", newsgroup, len(articles)) // Validate articles before sending in TAKETHIS mode - for _, article := range articles { if article == nil { continue } batchedJob.Articles = append(batchedJob.Articles, article) + batchedJob.ArticleMap[&article.MessageID] = article + batchedJob.WantedIDs = append(batchedJob.WantedIDs, &article.MessageID) } if len(batchedJob.Articles) == 0 { log.Printf("WARN: No valid articles for TAKETHIS mode, skipping batch") return nil, nil } - nntp.TakeThisQueue <- batchedJob // TODO DEAD END + nntp.TakeThisQueue <- batchedJob return batchedJob.ResponseChan, nil - /* - response := <-ttJob.ResponseChan - transferred += response.txcount - redis_cache_hits += response.rc - successRate = float64(response.txcount) / float64(len(ttJob.WantedArticles)) * 100 - if response.err != nil { - log.Printf("Newsgroup: '%s' | Failed to send TAKETHIS: %v", newsgroup, response.err) - return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("failed to send TAKETHIS batch: %v", response.err) - } - */ - //log.Printf("Newsgroup: '%s' | TAKETHIS sent: transferred=%d redis_cached=%d", newsgroup, response.txcount, response.rc) - - /* TODO HERE - txcount, rc, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, ttMode, newsgroup, redisCli) - transferred += txcount - redis_cache_hits += rc - - if err != nil { - return transferred, checked, successRate, redis_cache_hits, fmt.Errorf("failed to send TAKETHIS batch: %v", err) - } - if txcount == 0 { - if !ttMode.CheckMode { - ttMode.CheckMode = true - ttMode.TmpSuccessCount = 0 - ttMode.TmpTakeThisCount = 0 - } - } - */ } // end case !ttMode.CheckMode // end switch ttMode.CheckMode @@ -1463,9 +1465,9 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode // Sends all TAKETHIS commands first, then reads all responses (true streaming) -func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, job *nntp.CHTTJob, newsgroup string, redisCli *redis.Client) (transferred uint64, redis_cached uint64, err error) { +func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, job *nntp.CHTTJob, newsgroup string, redisCli *redis.Client) (transferred uint64, rejected uint64, redis_cached uint64, err error) { if len(articles) == 0 { - return 0, 0, nil + return 0, 0, 0, nil } // Phase 1: Send all TAKETHIS commands without waiting for responses @@ -1530,7 +1532,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to send TAKETHIS for %s: %v", newsgroup, article.MessageID, err) - return 0, redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) + return 0, 0, redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) } artChan <- &nntp.CheckResponse{ @@ -1541,7 +1543,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art close(artChan) //log.Printf("Sent %d TAKETHIS commands, reading responses...", len(commandIDs)) var done []*string - var countDone, rejected int + var countDone int // Phase 2: Read all responses in order for cr := range artChan { @@ -1551,7 +1553,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art job.Increment(nntp.IncrFLAG_CONN_ERRORS) conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to read TAKETHIS response for %s: %v", newsgroup, cr.Article.MessageID, err) - return transferred, redis_cached, fmt.Errorf("failed to read TAKETHIS response for %s: %v", cr.Article.MessageID, err) + return transferred, rejected, redis_cached, fmt.Errorf("failed to read TAKETHIS response for %s: %v", cr.Article.MessageID, err) } countDone++ // Update success rate tracking @@ -1559,10 +1561,11 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art case 239: job.TTMode.IncrementSuccess() job.Increment(nntp.IncrFLAG_TRANSFERRED) + transferred++ case 439: job.Increment(nntp.IncrFLAG_REJECTED) + rejected++ if VERBOSE { - rejected++ log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) //resultsMutex.Lock() //rejectedArticles[newsgroup] = append(rejectedArticles[newsgroup], article.MessageID) @@ -1572,7 +1575,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) job.Increment(nntp.IncrFLAG_TX_ERRORS) conn.ForceCloseConn() - return transferred, redis_cached, fmt.Errorf("failed to transfer article '%s': response=%d", cr.Article.MessageID, takeThisResponseCode) + return transferred, rejected, redis_cached, fmt.Errorf("failed to transfer article '%s': response=%d", cr.Article.MessageID, takeThisResponseCode) default: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) @@ -1612,15 +1615,15 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } } if VERBOSE { - log.Printf("Newsgroup: '%s' | Batch transferred: %d/%d articles. redis_cached=%d", newsgroup, transferred, len(articles), redis_cached) + log.Printf("Newsgroup: '%s' | Batch transferred: %d/%d articles. rejected=%d redis_cached=%d", newsgroup, transferred, len(articles), rejected, redis_cached) } - return transferred, redis_cached, nil + return transferred, rejected, redis_cached, nil } // end func sendArticlesBatchViaTakeThis var jobRequeueMutex sync.RWMutex var jobRequeue = make(map[*string][]*nntp.CHTTJob) -func BootConnWorkers(pool *nntp.Pool) { +func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { openConns := 0 defaultSleep := time.Second isleep := defaultSleep @@ -1632,9 +1635,13 @@ forever: log.Printf("BootConnWorkers: WantShutdown, exiting") break forever } + mux.Lock() if openConns == nntp.NNTPTransferThreads { + mux.Unlock() continue forever } + mux.Unlock() + var sharedConns []*nntp.BackendConn bootN := nntp.NNTPTransferThreads - openConns - 1 // get connections from pool @@ -1668,11 +1675,12 @@ forever: continue forever } isleep = defaultSleep // reset to default - returnSignals := make([]*ReturnSignal, 0, len(sharedConns)) + returnSignals := make([]*ReturnSignal, len(sharedConns)) errChan := make(chan struct{}, 1) for i, conn := range sharedConns { returnSignal := &ReturnSignal{ errChan: errChan, + redisCli: redisCli, Chan: make(chan *ReturnSignal, 1), tmpMessageIDs: make([]*string, 0, BatchCheck), jobsQueued: make(map[*nntp.CHTTJob]uint64, BatchCheck), @@ -1708,15 +1716,23 @@ forever: log.Printf("CHTTWorker (%d) try requeue %d jobs", i, len(rs.jobs)) for _, job := range rs.jobs { if job != nil { + // copy articles pointer job.Mux.Lock() rqj := &nntp.CHTTJob{ Newsgroup: job.Newsgroup, Articles: job.Articles, } + job.Mux.Unlock() + jobRequeueMutex.Lock() jobRequeue[rqj.Newsgroup] = append(jobRequeue[rqj.Newsgroup], rqj) // TODO DEAD END jobRequeueMutex.Unlock() + // unlink pointers + job.Mux.Lock() + if job.TTMode != nil { + job.TTMode.Newsgroup = nil + } job.Newsgroup = nil job.TTMode = nil job.ResponseChan = nil @@ -1752,6 +1768,7 @@ type ReturnSignal struct { Mux sync.Mutex Chan chan *ReturnSignal errChan chan struct{} + redisCli *redis.Client tmpMessageIDs []*string jobsQueued map[*nntp.CHTTJob]uint64 jobsReadOK map[*nntp.CHTTJob]uint64 @@ -1759,10 +1776,18 @@ type ReturnSignal struct { jobs []*nntp.CHTTJob } +type readRequest struct { + MsgID *string + retChan chan struct{} +} + func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { - readResponsesChan := make(chan *string, BatchCheck) - errChan := make(chan struct{}, 3) + readResponsesChan := make(chan *readRequest, BatchCheck) + takeThisChan := make(chan *nntp.CHTTJob, nntp.NNTPTransferThreads) + errChan := make(chan struct{}, 4) tickChan := make(chan struct{}, 1) + flipflopChan := make(chan struct{}, 1) + rrRetChan := make(chan struct{}, BatchCheck) defer func(conn *nntp.BackendConn, rs *ReturnSignal) { conn.ForceCloseConn() @@ -1808,18 +1833,56 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { rs.tmpMessageIDs = rs.tmpMessageIDs[:0] //clear rs.Mux.Unlock() - log.Printf("CheckWorker (%d): SendCheckMultiple %d message IDs", id, len(checkIds)) - err := conn.SendCheckMultiple(checkIds) - if err != nil { - log.Printf("CheckWorker (%d): SendCheckMultiple error: %v", id, err) - return - } - for _, msgID := range checkIds { - if msgID != nil { - // pass message ID pointer to channel - // to read the responses from connection - readResponsesChan <- msgID + log.Printf("CheckWorker (%d): Checking %d message IDs in batches of %d", id, len(checkIds), BatchCheck) + + // Process checkIds in batches of BatchCheck + for batchStart := 0; batchStart < len(checkIds); batchStart += BatchCheck { + batchEnd := batchStart + BatchCheck + if batchEnd > len(checkIds) { + batchEnd = len(checkIds) + } + batch := checkIds[batchStart:batchEnd] + + // Lock for this batch only + common.ChanLock(flipflopChan) + log.Printf("CheckWorker (%d): Sending CHECK for batch %d-%d (%d messages)", id, batchStart, batchEnd, len(batch)) + + err := conn.SendCheckMultiple(batch) + if err != nil { + common.ChanRelease(flipflopChan) + log.Printf("CheckWorker (%d): SendCheckMultiple error for batch %d-%d: %v", id, batchStart, batchEnd, err) + return } + + // Send all message IDs to read channel + for _, msgID := range batch { + if msgID != nil { + // pass message ID pointer to channel + // to read the responses from connection + readResponsesChan <- &readRequest{MsgID: msgID, retChan: rrRetChan} + } + } + + // Wait for all responses in this batch before releasing lock + for _, msgID := range batch { + if msgID != nil { + <-rrRetChan + /*disabled + select { + case <-rrRetChan: + + case <-time.After(time.Second * 300): + log.Printf("CheckWorker (%d): Timeout waiting for CHECK response for msgID: %s", id, *msgID) + common.ChanRelease(flipflopChan) + return + + } + */ + + } + } + // Release lock after batch is complete, allowing TAKETHIS to run + common.ChanRelease(flipflopChan) } rs.Mux.Lock() @@ -1860,15 +1923,22 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { log.Printf("CheckWorker (%d): Read CHECK responses got errChan signal... exiting", id) return - case msgID := <-readResponsesChan: + case rr := <-readResponsesChan: + if rr == nil || rr.MsgID == nil || rr.retChan == nil { + log.Printf("CheckWorker (%d): Read CHECK got nil readRequest, skipping", id) + rr.retChan <- struct{}{} + continue loop + } if common.WantShutdown() { log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", id) + rr.retChan <- struct{}{} return } start := time.Now() code, line, err := conn.TextConn.ReadCodeLine(238) if code == 0 && err != nil { log.Printf("Failed to read CHECK response: %v", err) + rr.retChan <- struct{}{} return } tookTime += time.Since(start).Milliseconds() @@ -1888,27 +1958,30 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { parts := strings.Fields(line) if len(parts) < 1 { log.Printf("Malformed CHECK response: %s", line) + rr.retChan <- struct{}{} return } - if parts[0] != *msgID { - log.Printf("Mismatched CHECK response: expected %s, got %s", *msgID, parts[0]) + if parts[0] != *rr.MsgID { + log.Printf("Mismatched CHECK response: expected %s, got %s", *rr.MsgID, parts[0]) + rr.retChan <- struct{}{} return } rs.Mux.Lock() - job, exists := rs.jobMap[msgID] + job, exists := rs.jobMap[rr.MsgID] rs.Mux.Unlock() if !exists { - log.Printf("ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *msgID) + log.Printf("ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *rr.MsgID) + rr.retChan <- struct{}{} continue loop } rs.Mux.Lock() - delete(rs.jobMap, msgID) + delete(rs.jobMap, rr.MsgID) rs.jobsReadOK[job]++ rs.Mux.Unlock() switch code { case 238: //log.Printf("Wanted Article '%s': response=%d", *msgID, code) - job.AppendWantedMessageID(msgID) + job.AppendWantedMessageID(rr.MsgID) case 438: //log.Printf("Unwanted Article '%s': response=%d", *msgID, code) @@ -1918,7 +1991,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { job.Increment(nntp.IncrFLAG_RETRY) default: - log.Printf("Unknown CHECK response: line='%s' code=%d expected msgID %s", line, code, *msgID) + log.Printf("Unknown CHECK response: line='%s' code=%d expected msgID %s", line, code, *rr.MsgID) } // check if all jobs are done rs.Mux.Lock() @@ -1927,22 +2000,106 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { rs.Mux.Unlock() if !qexists || !rexists { log.Printf("ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!") + rr.retChan <- struct{}{} continue loop } if queuedCount == readCount { rs.Mux.Lock() - log.Printf("CheckWorker (%d): All CHECK responses received for job %v", id, job) delete(rs.jobsQueued, job) delete(rs.jobsReadOK, job) rs.Mux.Unlock() + if len(job.WantedIDs) > 0 { + // Pass job to TAKETHIS worker via channel + takeThisChan <- job // local takethis chan sharing the same connection + log.Printf("Newsgroup '%s' | CheckWorker (%d): Sent job to TAKETHIS worker (wanted: %d/%d)", *job.Newsgroup, id, len(job.WantedIDs), queuedCount) + } else { + log.Printf("Newsgroup '%s' | CheckWorker (%d): got %d CHECK responses but server wants none", *job.Newsgroup, id, queuedCount) + } } - // TODO HERE: pass the job to takethis - // using this connection where we did the CHECK! - // *** + rr.retChan <- struct{}{} } // end select } // end forever }() + // launch a goroutine to process TAKETHIS jobs from local channel sharing the same connection + go func() { + defer func() { + errChan <- struct{}{} + }() + + for { + if common.WantShutdown() { + log.Printf("CheckWorker (%d): TAKETHIS worker WantShutdown, exiting", id) + return + } + var job *nntp.CHTTJob + select { + case job1 := <-nntp.TakeThisQueue: + job = job1 + if job != nil { + log.Printf("CheckWorker (%d): Received TAKETHIS job from global queue (wanted: %d)", id, len(job.WantedIDs)) + } + case job2 := <-takeThisChan: + job = job2 + if job != nil { + log.Printf("CheckWorker (%d): Received TAKETHIS job from local CHECK channel (wanted: %d)", id, len(job.WantedIDs)) + } + } + + // Check for nil job (channel closed) + if job == nil { + log.Printf("CheckWorker (%d): Received nil job, channels may be closing", id) + continue + } + + // Build list of wanted articles + wantedArticles := make([]*models.Article, 0, len(job.WantedIDs)) + for _, wantedID := range job.WantedIDs { + if wantedID != nil { + if article, exists := job.ArticleMap[wantedID]; exists { + wantedArticles = append(wantedArticles, article) + } + } + } + + if len(wantedArticles) == 0 { + log.Printf("CheckWorker (%d): No valid wanted articles found in ArticleMap", id) + if job.ResponseChan != nil { + job.ResponseChan <- &nntp.TTResponse{Job: nil, Err: fmt.Errorf("got job without valid wanted articles")} + close(job.ResponseChan) + } + continue + } + + common.ChanLock(flipflopChan) + log.Printf("CheckWorker (%d): Sending TAKETHIS for %d wanted articles", id, len(wantedArticles)) + // Send TAKETHIS commands using existing function + transferred, rejected, redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli) + common.ChanRelease(flipflopChan) + + if err != nil { + log.Printf("CheckWorker (%d): Error in TAKETHIS: %v", id, err) + if job.ResponseChan != nil { + job.ResponseChan <- &nntp.TTResponse{Job: job, Err: err} + close(job.ResponseChan) + } + continue + } + + log.Printf("CheckWorker (%d): TAKETHIS completed: transferred=%d, rejected=%d, redis_cached=%d", id, transferred, rejected, redis_cached) + + // Send response back + if job.ResponseChan != nil { + log.Printf("CheckWorker (%d): Sending TTresponse back to responseChan len=%d", id, len(job.ResponseChan)) + job.ResponseChan <- &nntp.TTResponse{Job: job, Err: nil} + log.Printf("CheckWorker (%d): Sent TTresponse back to job channel, closing response channel", id) + close(job.ResponseChan) + } else { + log.Printf("ERROR CheckWorker (%d): job.ResponseChan is nil, cannot send response back", id) + } + } + }() + for { select { case <-errChan: @@ -1954,26 +2111,31 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { return } if job == nil || len(job.MessageIDs) == 0 { - if VERBOSE { - log.Printf("CheckWorker: empty job, skipping") - } + log.Printf("CheckWorker: empty job, skipping") if job != nil && job.ResponseChan != nil { job.ResponseChan <- nil + close(job.ResponseChan) } continue } + // Track this job and its message IDs + // accumulate message IDs to rs.tmpMessageIDs + // when threshold BatchCheck is reached, send CHECK commands + //log.Printf("CheckWorker (%d): Received job with %d message IDs", id, len(job.MessageIDs)) for _, msgId := range job.MessageIDs { if msgId != nil { rs.Mux.Lock() rs.tmpMessageIDs = append(rs.tmpMessageIDs, msgId) rs.jobMap[msgId] = job rs.jobsQueued[job]++ - rs.jobs = append(rs.jobs, job) rs.Mux.Unlock() } } rs.Mux.Lock() - hasWork := len(rs.tmpMessageIDs) >= BatchCheck + hasWork := len(rs.tmpMessageIDs) >= 0 + if hasWork { + rs.jobs = append(rs.jobs, job) + } rs.Mux.Unlock() if hasWork { // send signal to process CHECK commands diff --git a/internal/common/common.go b/internal/common/common.go index 8d430b6..45f6552 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -35,3 +35,13 @@ func IsClosedChannel(ch chan struct{}) bool { return false } } + +func ChanLock(lockChan chan struct{}) { + // try aquire lock + lockChan <- struct{}{} +} + +func ChanRelease(lockChan chan struct{}) { + // release lock + <-lockChan +} diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index f0c73e2..483c1d2 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -29,7 +29,7 @@ var MaxReadLinesXover int64 = 100 // XOVER command typically retrieves overview // MaxReadLinesBody Maximum lines for BODY command, which retrieves the body of an article const MaxReadLinesBody = MaxReadLinesArticle - MaxReadLinesHeaders -var NNTPTransferThreads int = 0 +var NNTPTransferThreads int = 1 var TakeThisQueue = make(chan *CHTTJob, NNTPTransferThreads) var CheckQueue = make(chan *CHTTJob, NNTPTransferThreads) @@ -191,11 +191,11 @@ func (ttMode *TakeThisMode) FlipMode(lowerLevel float64, upperLevel float64) boo func (c *BackendConn) ForceCloseConn() { c.mux.Lock() - defer c.mux.Unlock() if !c.forceClose { c.forceClose = true - go c.Pool.Put(c) } + c.mux.Unlock() + c.Pool.Put(c) } // StatArticle checks if an article exists on the server diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index 1e24370..baf8e54 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -301,8 +301,8 @@ func (c *BackendConn) CloseFromPoolOnly() error { c.connected = false c.authenticated = false - c.TextConn = nil // CloseFromPoolOnly - c.conn = nil // CloseFromPoolOnly + //c.TextConn = nil // CloseFromPoolOnly + //c.conn = nil // CloseFromPoolOnly c.Writer = nil //log.Printf("Closed NNTP Connection to %s", c.Backend.Host) return nil From 0fd24a11c3b7fc2e4401ecf73444f3f31f033f85 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 5 Oct 2025 16:52:21 +0000 Subject: [PATCH 005/100] unfinished --- cmd/nntp-transfer/main.go | 302 ++++++++++++++++---------- internal/nntp/nntp-client-commands.go | 11 + 2 files changed, 195 insertions(+), 118 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 267a5b5..15c9919 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -13,6 +13,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" "github.com/go-while/go-pugleaf/internal/common" @@ -1085,6 +1086,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse } log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.Articles)) + // pass articles to CHECK or TAKETHIS queue (async!) responseChan, err := processBatch(ttMode, job.Articles, redisCli) if err != nil { log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup, err) @@ -1177,60 +1179,76 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch defer collectorWG.Done() var amux sync.Mutex var transferred, unwanted, rejected, checked, txErrors, connErrors uint64 + var num uint64 for responseChan := range ttResponses { - if responseChan != nil { - responseWG.Add(1) - go func(rc chan *nntp.TTResponse) { - defer responseWG.Done() - for resp := range rc { - if resp == nil { - log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received!?", newsgroup.Name) - return - } - if resp.Err != nil { - log.Printf("Newsgroup: '%s' | Error in TT response: err='%v' job='%#v'", newsgroup.Name, resp.Err, resp.Job) - return - } - if resp.Job == nil { - log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response without error!?", newsgroup.Name) - return - } - // get numbers - amux.Lock() - resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &txErrors, &connErrors) - amux.Unlock() - - // free memory - resp.Job.Mux.Lock() - defer resp.Job.Mux.Unlock() - - for i := range resp.Job.Articles { + if responseChan == nil { + log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received in collector!?", newsgroup.Name) + continue + } + num++ + log.Printf("Newsgroup: '%s' | Starting response channel processor num %d", newsgroup.Name, num) + responseWG.Add(1) + go func(rc chan *nntp.TTResponse, num uint64) { + defer responseWG.Done() + defer log.Printf("Newsgroup: '%s' | Ending response channel processor num %d", newsgroup.Name, num) + for resp := range rc { + if resp == nil { + log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received!?", newsgroup.Name) + return + } + if resp.Err != nil { + log.Printf("Newsgroup: '%s' | Error in TT response: err='%v' job='%#v'", newsgroup.Name, resp.Err, resp.Job) + return + } + if resp.Job == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response without error!?", newsgroup.Name) + return + } + // get numbers + amux.Lock() + resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &txErrors, &connErrors) + amux.Unlock() + + // free memory - CRITICAL: Lock and unlock in same scope, not with defer! + resp.Job.Mux.Lock() + + // Clean up Articles and their internal fields + for i := range resp.Job.Articles { + if resp.Job.Articles[i] != nil { + // Clean article internal fields to free memory + resp.Job.Articles[i].RefSlice = nil + resp.Job.Articles[i].NNTPhead = nil + resp.Job.Articles[i].NNTPbody = nil + resp.Job.Articles[i].Headers = nil + resp.Job.Articles[i].ArticleNums = nil + resp.Job.Articles[i].NewsgroupsPtr = nil + resp.Job.Articles[i].ProcessQueue = nil + resp.Job.Articles[i].MsgIdItem = nil resp.Job.Articles[i] = nil } - resp.Job.Articles = nil + } + resp.Job.Articles = nil - for msgid := range resp.Job.ArticleMap { - delete(resp.Job.ArticleMap, msgid) - } - resp.Job.ArticleMap = nil + // Clean up ArticleMap - nil the keys (pointers) before deleting + for msgid := range resp.Job.ArticleMap { + resp.Job.ArticleMap[msgid] = nil + delete(resp.Job.ArticleMap, msgid) + } + resp.Job.ArticleMap = nil - for i := range resp.Job.MessageIDs { - resp.Job.MessageIDs[i] = nil - } - resp.Job.MessageIDs = nil + // NOTE: Do NOT clean up MessageIDs or WantedIDs here! + // The CHECK worker may still be using them (race condition). + // They will be cleaned up in BootConnWorkers when the job is requeued or discarded. - for i := range resp.Job.WantedIDs { - resp.Job.WantedIDs[i] = nil - } - resp.Job.WantedIDs = nil + resp.Job.Mux.Unlock() - } - }(responseChan) - } + } + }(responseChan, num) } + log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", newsgroup.Name, num) // Wait for all response channel processors to finish responseWG.Wait() - + log.Printf("Newsgroup: '%s' | Collector: all response processors closed", newsgroup.Name) amux.Lock() result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d (unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", newsgroup.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) @@ -1290,15 +1308,14 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch if end > len(articles) { end = len(articles) } + // pass articles to CHECK or TAKETHIS queue (async!) responseChan, err := processBatch(ttMode, articles[i:end], redisCli) if err != nil { log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", newsgroup.Name, i+1, end, err) return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, newsgroup.Name, err) } - if responseChan != nil { - // pass the response channel to the collector channel: ttResponses - ttResponses <- responseChan - } + // pass the response channel to the collector channel: ttResponses + ttResponses <- responseChan } remainingArticles -= int64(len(articles)) if VERBOSE { @@ -1307,6 +1324,8 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } } // end for offset range totalArticles + log.Printf("Newsgroup: '%s' | Main article loop completed, checking for requeued jobs...", newsgroup.Name) + // Process any remaining requeued jobs after main loop completes // This handles failures that occurred in the last batch for { @@ -1328,9 +1347,13 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch // Loop again to check if any of those jobs failed and were requeued } + log.Printf("Newsgroup: '%s' | Final requeue processing completed, closing ttResponses channel...", newsgroup.Name) + // Close the ttResponses channel to signal collector goroutine to finish close(ttResponses) + log.Printf("Newsgroup: '%s' | ttResponses channel closed, waiting for collector to finish...", newsgroup.Name) + // Wait for collector goroutine to finish processing all responses collectorWG.Wait() @@ -1354,9 +1377,9 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl return nil, nil } doCheck := ttMode.FlipMode(lowerLevel, upperLevel) - var batchedJob *nntp.CHTTJob - batchedJob = &nntp.CHTTJob{ + batchedJob := &nntp.CHTTJob{ + JobID: atomic.AddUint64(&nntp.JobIDCounter, 1), Newsgroup: ttMode.Newsgroup, MessageIDs: make([]*string, 0, len(articles)), Articles: make([]*models.Article, 0, len(articles)), @@ -1431,7 +1454,9 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl if VERBOSE { log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(batchedJob.MessageIDs), len(articles)) } + log.Printf("Newsgroup: '%s' | Sending job #%d to CheckQueue (global) with %d message IDs", *ttMode.Newsgroup, batchedJob.JobID, len(batchedJob.MessageIDs)) nntp.CheckQueue <- batchedJob + log.Printf("Newsgroup: '%s' | Job #%d sent to CheckQueue successfully", *ttMode.Newsgroup, batchedJob.JobID) return batchedJob.ResponseChan, nil // end case ttMode.CheckMode @@ -1451,10 +1476,12 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl } if len(batchedJob.Articles) == 0 { - log.Printf("WARN: No valid articles for TAKETHIS mode, skipping batch") + log.Printf("Newsgroup: '%s' | WARN: No valid articles for TAKETHIS mode, skipping batch", *ttMode.Newsgroup) return nil, nil } + log.Printf("Newsgroup: '%s' | Sending job #%d to TakeThisQueue with %d articles", *ttMode.Newsgroup, batchedJob.JobID, len(batchedJob.WantedIDs)) nntp.TakeThisQueue <- batchedJob + log.Printf("Newsgroup: '%s' | Job #%d sent to TakeThisQueue successfully", *ttMode.Newsgroup, batchedJob.JobID) return batchedJob.ResponseChan, nil } // end case !ttMode.CheckMode @@ -1719,6 +1746,7 @@ forever: // copy articles pointer job.Mux.Lock() rqj := &nntp.CHTTJob{ + JobID: job.JobID, Newsgroup: job.Newsgroup, Articles: job.Articles, } @@ -1745,6 +1773,43 @@ forever: } log.Printf("CHTTWorker (%d) did requeue %d jobs", i, len(rs.jobs)) } + + // Clean up ReturnSignal maps and unlink pointers + // Clean up jobMap - nil all pointers before deleting + for msgID := range rs.jobMap { + rs.jobMap[msgID] = nil + delete(rs.jobMap, msgID) + } + rs.jobMap = nil + + // Clean up jobsQueued + for job := range rs.jobsQueued { + delete(rs.jobsQueued, job) + } + rs.jobsQueued = nil + + // Clean up jobsReadOK + for job := range rs.jobsReadOK { + delete(rs.jobsReadOK, job) + } + rs.jobsReadOK = nil + + // Clean up jobs slice - nil all pointers + for idx := range rs.jobs { + rs.jobs[idx] = nil + } + rs.jobs = nil + + // Clean up tmpMessageIDs (if still present) + for idx := range rs.tmpMessageIDs { + rs.tmpMessageIDs[idx] = nil + } + rs.tmpMessageIDs = nil + + rs.redisCli = nil + rs.Chan = nil + rs.errChan = nil + rs.Mux.Unlock() // TODO: check remaining work and restart connection default: @@ -1817,23 +1882,25 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { log.Printf("CheckWorker (%d): Tick WantShutdown, exiting", id) return } - // check if we have work and process CHECK commands + // Get the next job to process rs.Mux.Lock() - hasWork := len(rs.tmpMessageIDs) > 0 - rs.Mux.Unlock() - if !hasWork { - log.Printf("CheckWorker (%d): Ticked but no work? continue...", id) + if len(rs.jobs) == 0 { + rs.Mux.Unlock() + log.Printf("CheckWorker (%d): Ticked but no jobs in queue, continue...", id) continue loop } - - // copy accumulated message IDs and clear slice - rs.Mux.Lock() - checkIds := make([]*string, len(rs.tmpMessageIDs)) - copy(checkIds, rs.tmpMessageIDs) - rs.tmpMessageIDs = rs.tmpMessageIDs[:0] //clear + currentJob := rs.jobs[0] + rs.jobs = rs.jobs[1:] // Remove first job from queue rs.Mux.Unlock() - log.Printf("CheckWorker (%d): Checking %d message IDs in batches of %d", id, len(checkIds), BatchCheck) + // Use message IDs directly from the job + checkIds := currentJob.MessageIDs + newsgroup := "unknown" + if currentJob != nil && currentJob.Newsgroup != nil { + newsgroup = *currentJob.Newsgroup + } + + log.Printf("Newsgroup: '%s' | CheckWorker (%d): waits to check %d message IDs in batches of %d (job #%d)", newsgroup, id, len(checkIds), BatchCheck, currentJob.JobID) // Process checkIds in batches of BatchCheck for batchStart := 0; batchStart < len(checkIds); batchStart += BatchCheck { @@ -1845,12 +1912,12 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { // Lock for this batch only common.ChanLock(flipflopChan) - log.Printf("CheckWorker (%d): Sending CHECK for batch %d-%d (%d messages)", id, batchStart, batchEnd, len(batch)) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sending CHECK for batch %d-%d (%d messages)", newsgroup, id, batchStart, batchEnd, len(batch)) err := conn.SendCheckMultiple(batch) if err != nil { common.ChanRelease(flipflopChan) - log.Printf("CheckWorker (%d): SendCheckMultiple error for batch %d-%d: %v", id, batchStart, batchEnd, err) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): SendCheckMultiple error for batch %d-%d: %v", newsgroup, id, batchStart, batchEnd, err) return } @@ -1887,15 +1954,26 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { rs.Mux.Lock() lastRun = time.Now() + // Check if there are more jobs to process + hasMoreJobs := len(rs.jobs) > 0 rs.Mux.Unlock() + // If there are more jobs waiting, immediately trigger next job processing + if hasMoreJobs { + select { + case tickChan <- struct{}{}: + default: + // Channel full, will be processed on next tick + } + } + case <-ticker.C: if common.WantShutdown() { log.Printf("CheckWorker (%d): Ticker WantShutdown, exiting", id) return } rs.Mux.Lock() - hasWork := len(rs.tmpMessageIDs) > 0 && time.Since(lastRun) >= DefaultCheckTicker + hasWork := len(rs.jobs) > 0 && time.Since(lastRun) >= DefaultCheckTicker rs.Mux.Unlock() if hasWork { select { @@ -1910,7 +1988,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { // launch a go routine to read CHECK responses from the supplied connection with textproto readline go func() { - var responseCount int64 + var responseCount int var tookTime int64 defer func() { errChan <- struct{}{} @@ -1943,9 +2021,11 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { } tookTime += time.Since(start).Milliseconds() responseCount++ - if responseCount >= 1000 { + if responseCount >= BatchCheck { avg := float64(tookTime) / float64(responseCount) - log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %.1f ms", id, responseCount, avg) + if avg > 1 { + log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %.1f ms", id, responseCount, avg) + } responseCount = 0 tookTime = 0 } @@ -1975,6 +2055,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { continue loop } rs.Mux.Lock() + rs.jobMap[rr.MsgID] = nil // Nil the pointer before deleting delete(rs.jobMap, rr.MsgID) rs.jobsReadOK[job]++ rs.Mux.Unlock() @@ -1991,7 +2072,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { job.Increment(nntp.IncrFLAG_RETRY) default: - log.Printf("Unknown CHECK response: line='%s' code=%d expected msgID %s", line, code, *rr.MsgID) + log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, line, code, *rr.MsgID) } // check if all jobs are done rs.Mux.Lock() @@ -1999,7 +2080,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { readCount, rexists := rs.jobsReadOK[job] rs.Mux.Unlock() if !qexists || !rexists { - log.Printf("ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!") + log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!", *job.Newsgroup) rr.retChan <- struct{}{} continue loop } @@ -2011,9 +2092,11 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel takeThisChan <- job // local takethis chan sharing the same connection - log.Printf("Newsgroup '%s' | CheckWorker (%d): Sent job to TAKETHIS worker (wanted: %d/%d)", *job.Newsgroup, id, len(job.WantedIDs), queuedCount) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sent job #%d to TAKETHIS worker (wanted: %d/%d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs), queuedCount) } else { - log.Printf("Newsgroup '%s' | CheckWorker (%d): got %d CHECK responses but server wants none", *job.Newsgroup, id, queuedCount) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d got %d CHECK responses but server wants none", *job.Newsgroup, id, job.JobID, queuedCount) + // Send response and close channel for jobs with no wanted articles + job.Response(&nntp.TTResponse{Job: job, Err: nil}) } } rr.retChan <- struct{}{} @@ -2037,12 +2120,12 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { case job1 := <-nntp.TakeThisQueue: job = job1 if job != nil { - log.Printf("CheckWorker (%d): Received TAKETHIS job from global queue (wanted: %d)", id, len(job.WantedIDs)) + log.Printf("Newsgroup '%s' | CheckWorker (%d): Received TAKETHIS job #%d from global queue (wanted: %d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs)) } case job2 := <-takeThisChan: job = job2 if job != nil { - log.Printf("CheckWorker (%d): Received TAKETHIS job from local CHECK channel (wanted: %d)", id, len(job.WantedIDs)) + log.Printf("Newsgroup '%s' | CheckWorker (%d): Received TAKETHIS job #%d from local CHECK channel (wanted: %d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs)) } } @@ -2063,40 +2146,30 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { } if len(wantedArticles) == 0 { - log.Printf("CheckWorker (%d): No valid wanted articles found in ArticleMap", id) - if job.ResponseChan != nil { - job.ResponseChan <- &nntp.TTResponse{Job: nil, Err: fmt.Errorf("got job without valid wanted articles")} - close(job.ResponseChan) - } + log.Printf("Newsgroup '%s' | CheckWorker (%d): No valid wanted articles found in ArticleMap for job #%d", *job.Newsgroup, id, job.JobID) + job.Response(&nntp.TTResponse{Job: nil, Err: fmt.Errorf("got job without valid wanted articles")}) continue } common.ChanLock(flipflopChan) - log.Printf("CheckWorker (%d): Sending TAKETHIS for %d wanted articles", id, len(wantedArticles)) + log.Printf("Newsgroup '%s' | CheckWorker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, id, job.JobID, len(wantedArticles)) // Send TAKETHIS commands using existing function transferred, rejected, redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli) common.ChanRelease(flipflopChan) if err != nil { - log.Printf("CheckWorker (%d): Error in TAKETHIS: %v", id, err) - if job.ResponseChan != nil { - job.ResponseChan <- &nntp.TTResponse{Job: job, Err: err} - close(job.ResponseChan) - } + log.Printf("Newsgroup '%s' | CheckWorker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, id, job.JobID, err) + job.Response(&nntp.TTResponse{Job: job, Err: err}) continue } - log.Printf("CheckWorker (%d): TAKETHIS completed: transferred=%d, rejected=%d, redis_cached=%d", id, transferred, rejected, redis_cached) + log.Printf("Newsgroup '%s' | CheckWorker (%d): TAKETHIS job #%d completed: transferred=%d, rejected=%d, redis_cached=%d", *job.Newsgroup, id, job.JobID, transferred, rejected, redis_cached) // Send response back - if job.ResponseChan != nil { - log.Printf("CheckWorker (%d): Sending TTresponse back to responseChan len=%d", id, len(job.ResponseChan)) - job.ResponseChan <- &nntp.TTResponse{Job: job, Err: nil} - log.Printf("CheckWorker (%d): Sent TTresponse back to job channel, closing response channel", id) - close(job.ResponseChan) - } else { - log.Printf("ERROR CheckWorker (%d): job.ResponseChan is nil, cannot send response back", id) - } + log.Printf("Newsgroup '%s' | CheckWorker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, id, job.JobID, len(job.ResponseChan)) + job.Response(&nntp.TTResponse{Job: job, Err: nil}) + log.Printf("Newsgroup '%s' | CheckWorker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, id, job.JobID) + } }() @@ -2112,38 +2185,31 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { } if job == nil || len(job.MessageIDs) == 0 { log.Printf("CheckWorker: empty job, skipping") - if job != nil && job.ResponseChan != nil { - job.ResponseChan <- nil - close(job.ResponseChan) + if job != nil { + job.Response(&nntp.TTResponse{Job: nil, Err: fmt.Errorf("got job without valid wanted articles")}) } continue } - // Track this job and its message IDs - // accumulate message IDs to rs.tmpMessageIDs - // when threshold BatchCheck is reached, send CHECK commands - //log.Printf("CheckWorker (%d): Received job with %d message IDs", id, len(job.MessageIDs)) + log.Printf("CheckWorker (%d): Received job #%d from CheckQueue for newsgroup '%s' with %d message IDs", id, job.JobID, *job.Newsgroup, len(job.MessageIDs)) + + // Build jobMap for tracking which message IDs belong to this job + // and count queued messages + rs.Mux.Lock() for _, msgId := range job.MessageIDs { if msgId != nil { - rs.Mux.Lock() - rs.tmpMessageIDs = append(rs.tmpMessageIDs, msgId) rs.jobMap[msgId] = job - rs.jobsQueued[job]++ - rs.Mux.Unlock() + rs.jobsQueued[job]++ // counts message ids to read check later } } - rs.Mux.Lock() - hasWork := len(rs.tmpMessageIDs) >= 0 - if hasWork { - rs.jobs = append(rs.jobs, job) - } + // Add job to processing queue + rs.jobs = append(rs.jobs, job) rs.Mux.Unlock() - if hasWork { - // send signal to process CHECK commands - select { - case tickChan <- struct{}{}: - default: - // tickChan full, tickChan will tick - } + + // Signal ticker to process this job + select { + case tickChan <- struct{}{}: + default: + // tickChan full, will be processed on next tick } } // end select } // end for diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 483c1d2..237d51d 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -32,6 +32,7 @@ const MaxReadLinesBody = MaxReadLinesArticle - MaxReadLinesHeaders var NNTPTransferThreads int = 1 var TakeThisQueue = make(chan *CHTTJob, NNTPTransferThreads) var CheckQueue = make(chan *CHTTJob, NNTPTransferThreads) +var JobIDCounter uint64 // Atomic counter for unique job IDs // used in nntp-transfer/main.go type TakeThisMode struct { @@ -59,6 +60,7 @@ type CheckResponse struct { // deprecated // batched CHECK/TAKETHIS Job type CHTTJob struct { + JobID uint64 // Unique job ID for tracing Newsgroup *string Mux sync.Mutex TTMode *TakeThisMode @@ -78,6 +80,15 @@ type CHTTJob struct { ConnErrors uint64 } +func (job *CHTTJob) Response(response *TTResponse) { + if job.ResponseChan == nil { + log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job ID %d response='%v'", job.JobID, response) + return + } + job.ResponseChan <- response + close(job.ResponseChan) +} + const IncrFLAG_CHECKED = 1 const IncrFLAG_WANTED = 2 const IncrFLAG_UNWANTED = 4 From 6d3743f775826e547fee417b20b0ea3be26bc142 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 5 Oct 2025 22:16:26 +0000 Subject: [PATCH 006/100] almost finished --- cmd/nntp-transfer/main.go | 207 +++++++++++++++++++++----- internal/nntp/nntp-client-commands.go | 1 + 2 files changed, 171 insertions(+), 37 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 15c9919..6c74168 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1454,9 +1454,23 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl if VERBOSE { log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(batchedJob.MessageIDs), len(articles)) } - log.Printf("Newsgroup: '%s' | Sending job #%d to CheckQueue (global) with %d message IDs", *ttMode.Newsgroup, batchedJob.JobID, len(batchedJob.MessageIDs)) - nntp.CheckQueue <- batchedJob - log.Printf("Newsgroup: '%s' | Job #%d sent to CheckQueue successfully", *ttMode.Newsgroup, batchedJob.JobID) + + // Assign job to worker (consistent assignment + load balancing) + if len(CheckQueues) == 0 { + return nil, fmt.Errorf("no workers available") + } + + workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) + checkQueue := CheckQueues[workerID] + + // Track queue length for load balancing + WorkerQueueLengthMux.Lock() + WorkerQueueLength[workerID]++ + WorkerQueueLengthMux.Unlock() + + log.Printf("Newsgroup: '%s' | Sending job #%d to Worker %d queue with %d message IDs", *ttMode.Newsgroup, batchedJob.JobID, workerID, len(batchedJob.MessageIDs)) + checkQueue <- batchedJob + log.Printf("Newsgroup: '%s' | Job #%d sent to Worker %d successfully", *ttMode.Newsgroup, batchedJob.JobID, workerID) return batchedJob.ResponseChan, nil // end case ttMode.CheckMode @@ -1650,11 +1664,103 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art var jobRequeueMutex sync.RWMutex var jobRequeue = make(map[*string][]*nntp.CHTTJob) +// CheckQueues holds per-worker CheckQueue channels for consistent newsgroup routing +var CheckQueues []chan *nntp.CHTTJob + +// NewsgroupWorkerMap tracks which worker is assigned to each newsgroup +var NewsgroupWorkerMap = make(map[string]int) +var NewsgroupWorkerMapMux sync.RWMutex + +// WorkerQueueLength tracks how many jobs are queued per worker (for load balancing) +var WorkerQueueLength []int +var WorkerQueueLengthMux sync.Mutex + +// assignWorkerToNewsgroup finds the best worker for a newsgroup +// If newsgroup already assigned, returns same worker (sequential processing) +// If new newsgroup, assigns to least busy worker (load balancing) +func assignWorkerToNewsgroup(newsgroup string) int { + // Check if already assigned + NewsgroupWorkerMapMux.RLock() + if workerID, exists := NewsgroupWorkerMap[newsgroup]; exists { + NewsgroupWorkerMapMux.RUnlock() + return workerID + } + NewsgroupWorkerMapMux.RUnlock() + + // Find least busy worker + WorkerQueueLengthMux.Lock() + if len(WorkerQueueLength) == 0 { + WorkerQueueLengthMux.Unlock() + return 0 + } + + minLoad := WorkerQueueLength[0] + workerID := 0 + for i := 1; i < len(WorkerQueueLength); i++ { + if WorkerQueueLength[i] < minLoad { + minLoad = WorkerQueueLength[i] + workerID = i + } + } + WorkerQueueLengthMux.Unlock() + + // Assign newsgroup to this worker + NewsgroupWorkerMapMux.Lock() + NewsgroupWorkerMap[newsgroup] = workerID + NewsgroupWorkerMapMux.Unlock() + + return workerID +} + +// hashStringToInt computes a simple hash of a string to an integer (UNUSED - kept for reference) +func hashStringToInt(s string) int { + h := 0 + for i := 0; i < len(s); i++ { + h = 31*h + int(s[i]) + } + if h < 0 { + h = -h + } + return h +} + +// Find first empty slot +func findEmptySlot(openConns *int, workerSlots []bool, mux *sync.Mutex) int { + mux.Lock() + defer mux.Unlock() + *openConns++ + for i := 0; i < len(workerSlots); i++ { + if !workerSlots[i] { + workerSlots[i] = true + return i + } + } + return -1 +} + +func UnsetWorker(openConns *int, slotID int, workerSlots []bool, mux *sync.Mutex) { + mux.Lock() + defer mux.Unlock() + *openConns-- + if slotID >= 0 && slotID < len(workerSlots) { + workerSlots[slotID] = false + } +} + func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { openConns := 0 + workerSlots := make([]bool, nntp.NNTPTransferThreads) defaultSleep := time.Second isleep := defaultSleep var mux sync.Mutex + // Create per-worker queues + CheckQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) + WorkerQueueLength = make([]int, nntp.NNTPTransferThreads) + for i := range CheckQueues { + CheckQueues[i] = make(chan *nntp.CHTTJob, 1) // do not queue more than one (1) job at a time! + WorkerQueueLength[i] = 0 + } + allEstablished := false forever: for { time.Sleep(defaultSleep) @@ -1663,16 +1769,23 @@ forever: break forever } mux.Lock() - if openConns == nntp.NNTPTransferThreads { - mux.Unlock() + allEstablished = openConns == nntp.NNTPTransferThreads + mux.Unlock() + if allEstablished { + continue forever + } + //var sharedConns []*nntp.BackendConn + bootN := nntp.NNTPTransferThreads - openConns + if bootN <= 0 { + log.Printf("BootConnWorkers: all %d/%d connections established", openConns, nntp.NNTPTransferThreads) continue forever } - mux.Unlock() - - var sharedConns []*nntp.BackendConn - bootN := nntp.NNTPTransferThreads - openConns - 1 // get connections from pool - for i := bootN; i < nntp.NNTPTransferThreads; i++ { + log.Printf("BootConnWorkers: need %d connections (have %d), getting from pool...", bootN, openConns) + returnSignals := make([]*ReturnSignal, bootN) + errChan := make(chan struct{}, 1) + newConns := 0 + for i := range bootN { // Get a connection from pool conn, err := pool.Get(nntp.MODE_STREAM_MV) if err != nil { @@ -1694,33 +1807,43 @@ forever: continue forever } // got a connection - sharedConns = append(sharedConns, conn) - openConns++ - } - if len(sharedConns) == 0 { - log.Printf("BootConnWorkers: no connections obtained, retry in: %v", isleep) - continue forever - } - isleep = defaultSleep // reset to default - returnSignals := make([]*ReturnSignal, len(sharedConns)) - errChan := make(chan struct{}, 1) - for i, conn := range sharedConns { + slotID := findEmptySlot(&openConns, workerSlots, &mux) + if slotID < 0 { + log.Printf("BootConnWorkers: no empty worker slot found, closing connection") + conn.ForceCloseConn() + continue forever + } returnSignal := &ReturnSignal{ + slotID: slotID, errChan: errChan, redisCli: redisCli, - Chan: make(chan *ReturnSignal, 1), + ExitChan: make(chan *ReturnSignal, 1), tmpMessageIDs: make([]*string, 0, BatchCheck), jobsQueued: make(map[*nntp.CHTTJob]uint64, BatchCheck), jobsReadOK: make(map[*nntp.CHTTJob]uint64, BatchCheck), jobMap: make(map[*string]*nntp.CHTTJob, BatchCheck), jobs: make([]*nntp.CHTTJob, 0, BatchCheck), } + returnSignals[i] = returnSignal - go CHTTWorker(i, conn, returnSignal) + // assign checkQueue by openConns counter + // so restarted workers get same channels to read from + go CHTTWorker(slotID, conn, returnSignal, CheckQueues[slotID]) + newConns++ } + if newConns == 0 { + log.Printf("BootConnWorkers: no connections obtained, retry in: %v", isleep) + isleep = isleep * 2 + if isleep > time.Minute { + isleep = time.Minute + } + continue forever + } + isleep = defaultSleep // reset to default + log.Printf("BootConnWorkers: launched %d CHTT workers", newConns) // Monitor recently launched CHTT workers go func() { - monitoring := len(sharedConns) + monitoring := newConns for { time.Sleep(100 * time.Millisecond) for i, wait := range returnSignals { @@ -1728,14 +1851,11 @@ forever: continue } select { - case rs := <-wait.Chan: + case rs := <-wait.ExitChan: log.Printf("CHTTWorker (%d) exited", i) monitoring-- - mux.Lock() - openConns-- - mux.Unlock() - + UnsetWorker(&openConns, rs.slotID, workerSlots, &mux) returnSignals[i] = nil rs.Mux.Lock() @@ -1776,6 +1896,7 @@ forever: // Clean up ReturnSignal maps and unlink pointers // Clean up jobMap - nil all pointers before deleting + log.Printf("CHTTWorker (%d) cleaning up jobMap with %d entries", i, len(rs.jobMap)) for msgID := range rs.jobMap { rs.jobMap[msgID] = nil delete(rs.jobMap, msgID) @@ -1783,31 +1904,35 @@ forever: rs.jobMap = nil // Clean up jobsQueued + log.Printf("CHTTWorker (%d) cleaning up jobsQueued with %d entries", i, len(rs.jobsQueued)) for job := range rs.jobsQueued { delete(rs.jobsQueued, job) } rs.jobsQueued = nil // Clean up jobsReadOK + log.Printf("CHTTWorker (%d) cleaning up jobsReadOK with %d entries", i, len(rs.jobsReadOK)) for job := range rs.jobsReadOK { delete(rs.jobsReadOK, job) } rs.jobsReadOK = nil // Clean up jobs slice - nil all pointers + log.Printf("CHTTWorker (%d) cleaning up jobs slice with %d entries", i, len(rs.jobs)) for idx := range rs.jobs { rs.jobs[idx] = nil } rs.jobs = nil // Clean up tmpMessageIDs (if still present) + log.Printf("CHTTWorker (%d) cleaning up tmpMessageIDs with %d entries", i, len(rs.tmpMessageIDs)) for idx := range rs.tmpMessageIDs { rs.tmpMessageIDs[idx] = nil } rs.tmpMessageIDs = nil rs.redisCli = nil - rs.Chan = nil + rs.ExitChan = nil rs.errChan = nil rs.Mux.Unlock() @@ -1831,7 +1956,8 @@ var JobsToRetryMux sync.Mutex type ReturnSignal struct { Mux sync.Mutex - Chan chan *ReturnSignal + slotID int + ExitChan chan *ReturnSignal errChan chan struct{} redisCli *redis.Client tmpMessageIDs []*string @@ -1846,7 +1972,7 @@ type readRequest struct { retChan chan struct{} } -func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { +func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { readResponsesChan := make(chan *readRequest, BatchCheck) takeThisChan := make(chan *nntp.CHTTJob, nntp.NNTPTransferThreads) errChan := make(chan struct{}, 4) @@ -1856,10 +1982,10 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { defer func(conn *nntp.BackendConn, rs *ReturnSignal) { conn.ForceCloseConn() - rs.Chan <- rs + rs.ExitChan <- rs errChan <- struct{}{} }(conn, rs) - lastRun := time.Now() + //lastRun := time.Now() // launch go routine which sends CHECK commands if threshold exceeds BatchCheck go func() { @@ -1953,10 +2079,17 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { } rs.Mux.Lock() - lastRun = time.Now() + //lastRun = time.Now() // Check if there are more jobs to process hasMoreJobs := len(rs.jobs) > 0 rs.Mux.Unlock() + + // Decrement queue length for this worker (job processing complete) + WorkerQueueLengthMux.Lock() + if id < len(WorkerQueueLength) && WorkerQueueLength[id] > 0 { + WorkerQueueLength[id]-- + } + WorkerQueueLengthMux.Unlock() // If there are more jobs waiting, immediately trigger next job processing if hasMoreJobs { @@ -1973,7 +2106,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { return } rs.Mux.Lock() - hasWork := len(rs.jobs) > 0 && time.Since(lastRun) >= DefaultCheckTicker + hasWork := len(rs.jobs) > 0 rs.Mux.Unlock() if hasWork { select { @@ -2178,7 +2311,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal) { case <-errChan: errChan <- struct{}{} return - case job := <-nntp.CheckQueue: + case job := <-checkQueue: if common.WantShutdown() { log.Printf("CheckWorker: WantShutdown, exiting") return diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 237d51d..5109a07 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -32,6 +32,7 @@ const MaxReadLinesBody = MaxReadLinesArticle - MaxReadLinesHeaders var NNTPTransferThreads int = 1 var TakeThisQueue = make(chan *CHTTJob, NNTPTransferThreads) var CheckQueue = make(chan *CHTTJob, NNTPTransferThreads) + var JobIDCounter uint64 // Atomic counter for unique job IDs // used in nntp-transfer/main.go From 5b725524d6b41a9fdb565ea9d7b209570a060bea Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 5 Oct 2025 23:07:51 +0000 Subject: [PATCH 007/100] eat my mem --- cmd/nntp-transfer/main.go | 81 +++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 6c74168..13a211f 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -9,6 +9,7 @@ import ( "log" "os" "os/signal" + "runtime" "slices" "strconv" "strings" @@ -1186,11 +1187,11 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch continue } num++ - log.Printf("Newsgroup: '%s' | Starting response channel processor num %d", newsgroup.Name, num) + log.Printf("Newsgroup: '%s' | Starting response channel processor num %d (goroutines: %d)", newsgroup.Name, num, runtime.NumGoroutine()) responseWG.Add(1) go func(rc chan *nntp.TTResponse, num uint64) { defer responseWG.Done() - defer log.Printf("Newsgroup: '%s' | Ending response channel processor num %d", newsgroup.Name, num) + defer log.Printf("Newsgroup: '%s' | Ending response channel processor num %d (goroutines: %d)", newsgroup.Name, num, runtime.NumGoroutine()) for resp := range rc { if resp == nil { log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received!?", newsgroup.Name) @@ -1454,20 +1455,20 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl if VERBOSE { log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(batchedJob.MessageIDs), len(articles)) } - + // Assign job to worker (consistent assignment + load balancing) if len(CheckQueues) == 0 { return nil, fmt.Errorf("no workers available") } - + workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) checkQueue := CheckQueues[workerID] - + // Track queue length for load balancing WorkerQueueLengthMux.Lock() WorkerQueueLength[workerID]++ WorkerQueueLengthMux.Unlock() - + log.Printf("Newsgroup: '%s' | Sending job #%d to Worker %d queue with %d message IDs", *ttMode.Newsgroup, batchedJob.JobID, workerID, len(batchedJob.MessageIDs)) checkQueue <- batchedJob log.Printf("Newsgroup: '%s' | Job #%d sent to Worker %d successfully", *ttMode.Newsgroup, batchedJob.JobID, workerID) @@ -1686,14 +1687,14 @@ func assignWorkerToNewsgroup(newsgroup string) int { return workerID } NewsgroupWorkerMapMux.RUnlock() - + // Find least busy worker WorkerQueueLengthMux.Lock() if len(WorkerQueueLength) == 0 { WorkerQueueLengthMux.Unlock() return 0 } - + minLoad := WorkerQueueLength[0] workerID := 0 for i := 1; i < len(WorkerQueueLength); i++ { @@ -1703,25 +1704,13 @@ func assignWorkerToNewsgroup(newsgroup string) int { } } WorkerQueueLengthMux.Unlock() - + // Assign newsgroup to this worker NewsgroupWorkerMapMux.Lock() NewsgroupWorkerMap[newsgroup] = workerID NewsgroupWorkerMapMux.Unlock() - - return workerID -} -// hashStringToInt computes a simple hash of a string to an integer (UNUSED - kept for reference) -func hashStringToInt(s string) int { - h := 0 - for i := 0; i < len(s); i++ { - h = 31*h + int(s[i]) - } - if h < 0 { - h = -h - } - return h + return workerID } // Find first empty slot @@ -1757,7 +1746,7 @@ func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { CheckQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) WorkerQueueLength = make([]int, nntp.NNTPTransferThreads) for i := range CheckQueues { - CheckQueues[i] = make(chan *nntp.CHTTJob, 1) // do not queue more than one (1) job at a time! + CheckQueues[i] = make(chan *nntp.CHTTJob) // no cap! only accepts if there is a reader! WorkerQueueLength[i] = 0 } allEstablished := false @@ -1972,12 +1961,23 @@ type readRequest struct { retChan chan struct{} } +func replyChan(request chan struct{}, reply chan struct{}) { + select { + case <-request: + // got a reply request + reply <- struct{}{} // send back + default: + } +} + func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { readResponsesChan := make(chan *readRequest, BatchCheck) takeThisChan := make(chan *nntp.CHTTJob, nntp.NNTPTransferThreads) errChan := make(chan struct{}, 4) tickChan := make(chan struct{}, 1) flipflopChan := make(chan struct{}, 1) + requestReplyJobDone := make(chan struct{}, 1) + replyJobDone := make(chan struct{}, 1) rrRetChan := make(chan struct{}, BatchCheck) defer func(conn *nntp.BackendConn, rs *ReturnSignal) { @@ -2077,13 +2077,13 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha // Release lock after batch is complete, allowing TAKETHIS to run common.ChanRelease(flipflopChan) } - + replyChan(requestReplyJobDone, replyJobDone) // see if anybody is waiting and reply rs.Mux.Lock() //lastRun = time.Now() // Check if there are more jobs to process hasMoreJobs := len(rs.jobs) > 0 rs.Mux.Unlock() - + // Decrement queue length for this worker (job processing complete) WorkerQueueLengthMux.Lock() if id < len(WorkerQueueLength) && WorkerQueueLength[id] > 0 { @@ -2253,12 +2253,12 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha case job1 := <-nntp.TakeThisQueue: job = job1 if job != nil { - log.Printf("Newsgroup '%s' | CheckWorker (%d): Received TAKETHIS job #%d from global queue (wanted: %d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs)) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Received TAKETHIS job #%d from global queue (wanted: %d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs)) } case job2 := <-takeThisChan: job = job2 if job != nil { - log.Printf("Newsgroup '%s' | CheckWorker (%d): Received TAKETHIS job #%d from local CHECK channel (wanted: %d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs)) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Received TAKETHIS job #%d from local CHECK channel (wanted: %d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs)) } } @@ -2279,29 +2279,29 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha } if len(wantedArticles) == 0 { - log.Printf("Newsgroup '%s' | CheckWorker (%d): No valid wanted articles found in ArticleMap for job #%d", *job.Newsgroup, id, job.JobID) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): No valid wanted articles found in ArticleMap for job #%d", *job.Newsgroup, id, job.JobID) job.Response(&nntp.TTResponse{Job: nil, Err: fmt.Errorf("got job without valid wanted articles")}) continue } common.ChanLock(flipflopChan) - log.Printf("Newsgroup '%s' | CheckWorker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, id, job.JobID, len(wantedArticles)) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, id, job.JobID, len(wantedArticles)) // Send TAKETHIS commands using existing function transferred, rejected, redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli) common.ChanRelease(flipflopChan) if err != nil { - log.Printf("Newsgroup '%s' | CheckWorker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, id, job.JobID, err) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, id, job.JobID, err) job.Response(&nntp.TTResponse{Job: job, Err: err}) continue } - log.Printf("Newsgroup '%s' | CheckWorker (%d): TAKETHIS job #%d completed: transferred=%d, rejected=%d, redis_cached=%d", *job.Newsgroup, id, job.JobID, transferred, rejected, redis_cached) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): TAKETHIS job #%d completed: transferred=%d, rejected=%d, redis_cached=%d", *job.Newsgroup, id, job.JobID, transferred, rejected, redis_cached) // Send response back - log.Printf("Newsgroup '%s' | CheckWorker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, id, job.JobID, len(job.ResponseChan)) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, id, job.JobID, len(job.ResponseChan)) job.Response(&nntp.TTResponse{Job: job, Err: nil}) - log.Printf("Newsgroup '%s' | CheckWorker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, id, job.JobID) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, id, job.JobID) } }() @@ -2323,17 +2323,30 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha } continue } - log.Printf("CheckWorker (%d): Received job #%d from CheckQueue for newsgroup '%s' with %d message IDs", id, job.JobID, *job.Newsgroup, len(job.MessageIDs)) // Build jobMap for tracking which message IDs belong to this job // and count queued messages rs.Mux.Lock() + queueFull := len(rs.jobs) > 0 + if queueFull { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, id, job.JobID, len(job.MessageIDs), len(rs.jobs)) + requestReplyJobDone <- struct{}{} + } + rs.Mux.Unlock() + if queueFull { + start := time.Now() + <-replyJobDone + log.Printf("Newsgroup: '%s' | CheckWorker (%d): waited %v for previous jobs to clear before queuing job #%d", *job.Newsgroup, id, time.Since(start), job.JobID) + } + rs.Mux.Lock() + job.Mux.Lock() for _, msgId := range job.MessageIDs { if msgId != nil { rs.jobMap[msgId] = job rs.jobsQueued[job]++ // counts message ids to read check later } } + job.Mux.Unlock() // Add job to processing queue rs.jobs = append(rs.jobs, job) rs.Mux.Unlock() From 91304c0e52bf331b48023919b3750f3bcc373f80 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 5 Oct 2025 23:32:45 +0000 Subject: [PATCH 008/100] Update headers.go --- internal/common/headers.go | 111 +++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 6 deletions(-) diff --git a/internal/common/headers.go b/internal/common/headers.go index ebefeb6..ff5d166 100644 --- a/internal/common/headers.go +++ b/internal/common/headers.go @@ -256,6 +256,100 @@ checkHeader: ignoreLine = false } + if !isSpacedLine { + if len(headerLine) < 4 { // "X: A" + log.Printf("Short header: '%s' line=%d in msgId='%s' (continue)", headerLine, i, article.MessageID) + ignoreLine = true + ignoredLines++ + continue checkHeader + } + // check if first char is lowercase + if unicode.IsLower(rune(headerLine[0])) { + if VerboseHeaders { + log.Printf("Lowercase header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) + } + headerLine = strings.ToUpper(string(headerLine[0])) + headerLine[1:] + } + + // Check for proper header format: "name: value" (colon followed by space) + colonIndex := strings.Index(headerLine, ":") + if colonIndex == -1 { + log.Printf("Invalid header (no colon): '%s' line=%d in msgId='%s' (skip)", headerLine, i, article.MessageID) + ignoreLine = true + ignoredLines++ + continue checkHeader + } + + // Check if header follows RFC format "name: value" (colon-space) + if colonIndex+1 >= len(headerLine) || headerLine[colonIndex+1] != ' ' { + // Malformed header - missing space after colon, skip it + log.Printf("Malformed header (no colon-space): '%s' line=%d in msgId='%s' (skip)", headerLine, i, article.MessageID) + ignoreLine = true + ignoredLines++ + continue checkHeader + } + + header := strings.SplitN(headerLine, ":", 2)[0] + // extracted header key. do some checks + if header == "" || strings.Contains(header, " ") { + log.Printf("Invalid header (empty or contains space): '%s' line=%d in msgId='%s' (skip)", headerLine, i, article.MessageID) + ignoreLine = true + ignoredLines++ + continue checkHeader + } + if IgnoreHeadersMap[strings.ToLower(header)] { + ignoreLine = true + continue checkHeader + } + if IgnoreGoogleHeaders && strings.HasPrefix(strings.ToLower(header), "x-goo") { + ignoreLine = true + ignoredLines++ + continue checkHeader + } + if !strings.HasPrefix(header, "X-") { + if headersMap[strings.ToLower(header)] { + log.Printf("Duplicate header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) + headerLine = "X-RW-" + headerLine + } + headersMap[strings.ToLower(header)] = true + } + if header == "Newsgroups" { + // Check if Newsgroups header contains at least one valid newsgroup name + // check if next headerlines are continued lines + getLines: + for { + if i+1 < len(moreHeaders) { + if !strings.HasPrefix(moreHeaders[i+1], " ") && !strings.HasPrefix(moreHeaders[i+1], "\t") { + break getLines + } + headerLine += moreHeaders[i+1] + i++ + } else { + break getLines + } + if len(headerLine) > 1024 { + log.Printf("Newsgroups header too long, exceeds total 1024 chars: '%s' line=%d in msgId='%s' (break)", headerLine, i, article.MessageID) + break getLines + } + } + + // Extract only the newsgroups value (after "Newsgroups: ") + parts := strings.SplitN(headerLine, ":", 2) + if len(parts) != 2 { + log.Printf("Invalid Newsgroups header format: '%s' line=%d in msgId='%s' (continue)", headerLine, i, article.MessageID) + ignoreLine = true + ignoredLines++ + continue checkHeader + } + newsgroupsValue := strings.TrimSpace(parts[1]) + newsgroupsValue = strings.ReplaceAll(newsgroupsValue, ":", " ") + newsgroupsValue = strings.ReplaceAll(newsgroupsValue, ";", " ") + newsgroupsValue = strings.TrimSpace(newsgroupsValue) + if newsgroupsValue == "" { + log.Printf("Invalid Empty Newsgroups header value: '%s' line=%d in msgId='%s' (skip)", headerLine, i, article.MessageID) + return nil, ErrNoNewsgroups + } + newsgroups := SeparatorRegex.Split(newsgroupsValue, -1) if !isSpacedLine { if len(headerLine) < 4 { // "X: A" log.Printf("Short header: '%s' line=%d in msgId='%s' (continue)", headerLine, i, article.MessageID) @@ -362,16 +456,20 @@ checkHeader: trimmedNG := strings.TrimSpace(group) if trimmedNG != strings.ToLower(group) { trimmedNG = strings.ToLower(group) // parse to lowercase - badGroups++ } - // Clean up unwanted characters (remove each character individually) for _, char := range unwantedChars { trimmedNG = strings.ReplaceAll(trimmedNG, string(char), "") } + trimmedNG = MultipleDots.ReplaceAllString(trimmedNG, ".") trimmedNG = strings.TrimSpace(trimmedNG) trimmedNG = strings.TrimLeft(trimmedNG, ".") + trimmedNG = strings.TrimLeft(trimmedNG, ",") + trimmedNG = strings.TrimLeft(trimmedNG, ";") trimmedNG = strings.TrimRight(trimmedNG, ".") + trimmedNG = strings.TrimRight(trimmedNG, ",") + trimmedNG = strings.TrimRight(trimmedNG, ";") + trimmedNG = strings.TrimSpace(trimmedNG) if trimmedNG == "" || strings.Contains(trimmedNG, " ") || !IsValidGroupName(trimmedNG) { if trimmedNG == "" { log.Printf("Invalid newsgroup name: '%s' empty after cleanup in line=%d idx=%d in msgId='%s'", group, i, x, article.MessageID) @@ -381,12 +479,14 @@ checkHeader: badGroups++ continue checkGroups } + if trimmedNG != group { + badGroups++ // but passed after trimming + } validNewsgroups = append(validNewsgroups, trimmedNG) } // end for checkGroups if len(validNewsgroups) == 0 { log.Printf("Invalid Newsgroups header: '%s' line=%d in msgId='%s' (return err)", headerLine, i, article.MessageID) - return nil, ErrNoNewsgroups } if badGroups > 0 { @@ -395,9 +495,8 @@ checkHeader: ignoredLines++ continue checkHeader } - - } - } + } // end if header Newsgroups + } // end if !isSpacedLine headers = append(headers, headerLine) } if VerboseHeaders && ignoredLines > 0 { From a4cad6436b3c9a3c16bd8717a669736ac153500b Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 5 Oct 2025 23:36:30 +0000 Subject: [PATCH 009/100] testing --- cmd/nntp-transfer/main.go | 12 +-- internal/common/headers.go | 121 +++++--------------------- internal/nntp/nntp-client-commands.go | 10 +-- 3 files changed, 32 insertions(+), 111 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 13a211f..551c786 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -403,24 +403,24 @@ func main() { // you did create a backup before, right? debugMutex.Lock() defer debugMutex.Unlock() - for groupName, articles := range debugArticles { - fmt.Printf("Debug capture - Newsgroup: %s, Articles: %d\n", groupName, len(articles)) + for newsgroup, articles := range debugArticles { + fmt.Printf("Debug capture - Newsgroup: %s, Articles: %d\n", newsgroup, len(articles)) // Get group database for updates if needed - groupDBs, err := db.GetGroupDBs(groupName) + groupDBs, err := db.GetGroupDBs(newsgroup) if err != nil { - fmt.Printf("! Error getting group database for %s: %v\n", groupName, err) + fmt.Printf("! Error getting group database for %s: %v\n", newsgroup, err) continue } for _, article := range articles { - fmt.Printf("# %s: #%d : '%s' | orgDate='%s' parsed='%#v'\n", groupName, article.DBArtNum, article.MessageID, article.DateString, article.DateSent) + fmt.Printf("# %s: #%d : '%s' | orgDate='%s' parsed='%#v'\n", newsgroup, article.DBArtNum, article.MessageID, article.DateString, article.DateSent) // Track original values to detect changes originalDateSent := article.DateSent originalDateString := article.DateString - headers, err := common.ReconstructHeaders(article, true, &nntphostname) + headers, err := common.ReconstructHeaders(article, true, &nntphostname, newsgroup) fmt.Printf("### ORG HEADER: '%s'\n%s\n", article.MessageID, article.HeadersJSON) if err != nil { fmt.Printf("! Error reconstructing headers for article '%s': %v\n", article.MessageID, err) diff --git a/internal/common/headers.go b/internal/common/headers.go index ff5d166..47293c9 100644 --- a/internal/common/headers.go +++ b/internal/common/headers.go @@ -16,12 +16,13 @@ var VerboseHeaders bool = false var IgnoreGoogleHeaders bool = false var UseStrictGroupValidation bool = false var ErrNoNewsgroups = fmt.Errorf("ErrNoNewsgroups") -var unwantedChars = ";:,<>#*`§()[]{}?!%$§/\\@\"'" +var unwantedChars = "\t\x00;:,<>#*§()[]{}?!%$§/\\@\"'`" var ( // Do NOT change this here! these are needed for runtime ! // validGroupNameRegex validates newsgroup names according to RFC standards // Pattern: lowercase alphanumeric start, components separated by dots, no trailing dots/hyphens SeparatorRegex = regexp.MustCompile(`[,;:\s]+`) + MultipleDots = regexp.MustCompile(`\.{2,}`) validGroupNameRegexStrict = regexp.MustCompile(`^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$`) validGroupNameRegexchar = regexp.MustCompile(`^[a-zA-Z0-9]{1,255}$`) validGroupNameRegexLazy = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._+&-]*$`) @@ -131,7 +132,7 @@ func parseDateReceivedHeader(dateStr string) time.Time { } // ReconstructHeaders reconstructs the header lines from an article for transmission -func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *string) ([]string, error) { +func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *string, newsgroup string) ([]string, error) { var headers []string // Add basic headers that we know about @@ -256,100 +257,6 @@ checkHeader: ignoreLine = false } - if !isSpacedLine { - if len(headerLine) < 4 { // "X: A" - log.Printf("Short header: '%s' line=%d in msgId='%s' (continue)", headerLine, i, article.MessageID) - ignoreLine = true - ignoredLines++ - continue checkHeader - } - // check if first char is lowercase - if unicode.IsLower(rune(headerLine[0])) { - if VerboseHeaders { - log.Printf("Lowercase header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) - } - headerLine = strings.ToUpper(string(headerLine[0])) + headerLine[1:] - } - - // Check for proper header format: "name: value" (colon followed by space) - colonIndex := strings.Index(headerLine, ":") - if colonIndex == -1 { - log.Printf("Invalid header (no colon): '%s' line=%d in msgId='%s' (skip)", headerLine, i, article.MessageID) - ignoreLine = true - ignoredLines++ - continue checkHeader - } - - // Check if header follows RFC format "name: value" (colon-space) - if colonIndex+1 >= len(headerLine) || headerLine[colonIndex+1] != ' ' { - // Malformed header - missing space after colon, skip it - log.Printf("Malformed header (no colon-space): '%s' line=%d in msgId='%s' (skip)", headerLine, i, article.MessageID) - ignoreLine = true - ignoredLines++ - continue checkHeader - } - - header := strings.SplitN(headerLine, ":", 2)[0] - // extracted header key. do some checks - if header == "" || strings.Contains(header, " ") { - log.Printf("Invalid header (empty or contains space): '%s' line=%d in msgId='%s' (skip)", headerLine, i, article.MessageID) - ignoreLine = true - ignoredLines++ - continue checkHeader - } - if IgnoreHeadersMap[strings.ToLower(header)] { - ignoreLine = true - continue checkHeader - } - if IgnoreGoogleHeaders && strings.HasPrefix(strings.ToLower(header), "x-goo") { - ignoreLine = true - ignoredLines++ - continue checkHeader - } - if !strings.HasPrefix(header, "X-") { - if headersMap[strings.ToLower(header)] { - log.Printf("Duplicate header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) - headerLine = "X-RW-" + headerLine - } - headersMap[strings.ToLower(header)] = true - } - if header == "Newsgroups" { - // Check if Newsgroups header contains at least one valid newsgroup name - // check if next headerlines are continued lines - getLines: - for { - if i+1 < len(moreHeaders) { - if !strings.HasPrefix(moreHeaders[i+1], " ") && !strings.HasPrefix(moreHeaders[i+1], "\t") { - break getLines - } - headerLine += moreHeaders[i+1] - i++ - } else { - break getLines - } - if len(headerLine) > 1024 { - log.Printf("Newsgroups header too long, exceeds total 1024 chars: '%s' line=%d in msgId='%s' (break)", headerLine, i, article.MessageID) - break getLines - } - } - - // Extract only the newsgroups value (after "Newsgroups: ") - parts := strings.SplitN(headerLine, ":", 2) - if len(parts) != 2 { - log.Printf("Invalid Newsgroups header format: '%s' line=%d in msgId='%s' (continue)", headerLine, i, article.MessageID) - ignoreLine = true - ignoredLines++ - continue checkHeader - } - newsgroupsValue := strings.TrimSpace(parts[1]) - newsgroupsValue = strings.ReplaceAll(newsgroupsValue, ":", " ") - newsgroupsValue = strings.ReplaceAll(newsgroupsValue, ";", " ") - newsgroupsValue = strings.TrimSpace(newsgroupsValue) - if newsgroupsValue == "" { - log.Printf("Invalid Empty Newsgroups header value: '%s' line=%d in msgId='%s' (skip)", headerLine, i, article.MessageID) - return nil, ErrNoNewsgroups - } - newsgroups := SeparatorRegex.Split(newsgroupsValue, -1) if !isSpacedLine { if len(headerLine) < 4 { // "X: A" log.Printf("Short header: '%s' line=%d in msgId='%s' (continue)", headerLine, i, article.MessageID) @@ -497,15 +404,29 @@ checkHeader: } } // end if header Newsgroups } // end if !isSpacedLine + headers = append(headers, headerLine) - } + } // end for moreHeaders if VerboseHeaders && ignoredLines > 0 { log.Printf("Reconstructed %d header lines, ignored %d: msgId='%s'", len(headers), ignoredLines, article.MessageID) } + fallbackNewsgroup := false if len(validNewsgroups) == 0 { - return nil, ErrNoNewsgroups + if newsgroup == "" { + log.Printf("No valid newsgroups found in article msgId='%s'", article.MessageID) + return nil, ErrNoNewsgroups + } + // No Newsgroups header found, but we have a newsgroup parameter + validNewsgroups = append(validNewsgroups, newsgroup) + if badGroups == 0 { + headers = append(headers, "X-pugleaf-debug: added missing newsgroups header") + } + fallbackNewsgroup = true } - if badGroups > 0 { + if badGroups > 0 || fallbackNewsgroup { + if !fallbackNewsgroup { + headers = append(headers, fmt.Sprintf("X-pugleaf-debug: badGroups=%d valid=%d", badGroups, len(validNewsgroups))) + } // append newsgroups headers with line folding var currentLine string = "Newsgroups: " for i, group := range validNewsgroups { @@ -525,7 +446,7 @@ checkHeader: if strings.TrimSpace(currentLine) != "" { headers = append(headers, currentLine) } - headers = append(headers, fmt.Sprintf("X-pugleaf-debug: %d invalid newsgroups removed", badGroups)) + log.Printf("Reconstructed Newsgroups header with %d valid, removed %d. msgId='%s'", len(validNewsgroups), badGroups, article.MessageID) for i := range validNewsgroups { validNewsgroups[i] = "" // free memory diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 5109a07..8eaf7f2 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -1321,7 +1321,7 @@ func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) */ // TakeThisArticle sends a single article via TAKETHIS command and returns the response code -func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *string) (int, error) { +func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *string, newsgroup string) (int, error) { c.mux.Lock() defer c.mux.Unlock() @@ -1333,7 +1333,7 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str } // Prepare article for transfer - headers, err := common.ReconstructHeaders(article, true, nntphostname) + headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) if err != nil { return 0, fmt.Errorf("failed to reconstruct headers: %v", err) } @@ -1409,7 +1409,7 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str // SendTakeThisArticleStreaming sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode -func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string) (uint, error) { +func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (uint, error) { c.mux.Lock() defer c.mux.Unlock() @@ -1422,7 +1422,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp } // Prepare article for transfer - headers, err := common.ReconstructHeaders(article, true, nntphostname) + headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) if err != nil { return 0, err } @@ -1512,7 +1512,7 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { return 0, fmt.Errorf("not connected") } // Prepare article for posting - headers, err := common.ReconstructHeaders(article, false, nil) + headers, err := common.ReconstructHeaders(article, false, nil, "") if err != nil { return 0, fmt.Errorf("failed to reconstruct headers: %v", err) } From b8009d2d4ceb42da1e818e0c3c104a83ae5b7119 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 5 Oct 2025 23:40:10 +0000 Subject: [PATCH 010/100] Update main.go --- cmd/nntp-transfer/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 551c786..f16ce88 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1566,7 +1566,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art continue // Skip cached articles } // Send TAKETHIS command with article content (non-blocking) - cmdID, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname) + cmdID, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup) if err != nil { if err == common.ErrNoNewsgroups { log.Printf("Newsgroup: '%s' | skipped article '%s': no newsgroups header", newsgroup, article.MessageID) From b891f502a83a2bd78ae653deef3144721369d4e7 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 5 Oct 2025 23:40:15 +0000 Subject: [PATCH 011/100] merge shit --- internal/nntp/nntp-client-commands.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 8eaf7f2..cb0ce7f 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -1320,7 +1320,7 @@ func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) } */ -// TakeThisArticle sends a single article via TAKETHIS command and returns the response code +// TakeThisArticle sends an article via TAKETHIS command func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *string, newsgroup string) (int, error) { c.mux.Lock() defer c.mux.Unlock() From 3dd23795bd8310aacb077cf10220eb5cc3bc7dfe Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 6 Oct 2025 00:44:57 +0000 Subject: [PATCH 012/100] Update main.go --- cmd/nntp-transfer/main.go | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index f16ce88..eb54854 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -457,7 +457,6 @@ func main() { } transferDoneChan <- result }(&wgP, redisCli) - wgP.Wait() // Wait for either shutdown signal or transfer completion select { case <-sigChan: @@ -470,7 +469,7 @@ func main() { log.Printf("Transfer completed successfully") } } - + wgP.Wait() pool.ClosePool() // Close processor @@ -565,8 +564,17 @@ const query_getArticlesBatchWithDateFilter_nodatefilter = `SELECT article_num, m const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" // getArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering -func getArticlesBatchWithDateFilter(groupDBs *database.GroupDBs, offset int64, startTime, endTime *time.Time) ([]*models.Article, error) { - +func getArticlesBatchWithDateFilter(db *database.Database, newsgroup *models.Newsgroup, offset int64, startTime, endTime *time.Time) ([]*models.Article, error) { + // Get group database + groupDBs, err := db.GetGroupDBs(newsgroup.Name) + if err != nil { + return nil, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) + } + defer func() { + if ferr := db.ForceCloseGroupDBs(groupDBs); ferr != nil { + log.Printf("ForceCloseGroupDBs error for '%s': %v", newsgroup.Name, ferr) + } + }() var query string var args []interface{} @@ -1111,21 +1119,19 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { // Get group database - groupDBs, err := db.GetGroupDBs(newsgroup.Name) + groupDBsA, err := db.GetGroupDBs(newsgroup.Name) if err != nil { return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) } - defer func() { - if ferr := db.ForceCloseGroupDBs(groupDBs); ferr != nil { - log.Printf("ForceCloseGroupDBs error for '%s': %v", newsgroup.Name, ferr) - } - }() // Get total article count first with date filtering - totalArticles, err := getArticleCountWithDateFilter(groupDBs, startTime, endTime) + totalArticles, err := getArticleCountWithDateFilter(groupDBsA, startTime, endTime) if err != nil { return fmt.Errorf("failed to get article count for newsgroup '%s': %v", newsgroup.Name, err) } + if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { + log.Printf("ForceCloseGroupDBs error for '%s': %v", newsgroup.Name, ferr) + } if totalArticles == 0 { @@ -1279,7 +1285,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } start := time.Now() // Load batch from database with date filtering - articles, err := getArticlesBatchWithDateFilter(groupDBs, offset, startTime, endTime) + articles, err := getArticlesBatchWithDateFilter(db, newsgroup, offset, startTime, endTime) if err != nil { log.Printf("Error loading article batch (offset %d) for newsgroup %s: %v", offset, newsgroup.Name, err) return fmt.Errorf("failed to load article batch (offset %d) for newsgroup '%s': %v", offset, newsgroup.Name, err) @@ -1296,7 +1302,11 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch return nil } //if VERBOSE { - log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d) took %v", newsgroup.Name, len(articles), offset, time.Since(start)) + var size int + for _, a := range articles { + size += a.Bytes + } + log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d) (Bytes=%d) took %v", newsgroup.Name, len(articles), offset, size, time.Since(start)) //} // Process articles in network batches for i := 0; i < len(articles); i += batchCheck { From 436c15e44fd2182c23da481d471c43993c472329 Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 6 Oct 2025 22:07:35 +0000 Subject: [PATCH 013/100] testing --- cmd/nntp-transfer/main.go | 97 +++++++++++++++++++------------------ rsync_nntp-transfer_TEST.sh | 1 + 2 files changed, 50 insertions(+), 48 deletions(-) create mode 100755 rsync_nntp-transfer_TEST.sh diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index eb54854..fbc5e9f 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1198,58 +1198,59 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch go func(rc chan *nntp.TTResponse, num uint64) { defer responseWG.Done() defer log.Printf("Newsgroup: '%s' | Ending response channel processor num %d (goroutines: %d)", newsgroup.Name, num, runtime.NumGoroutine()) - for resp := range rc { - if resp == nil { - log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received!?", newsgroup.Name) - return - } - if resp.Err != nil { - log.Printf("Newsgroup: '%s' | Error in TT response: err='%v' job='%#v'", newsgroup.Name, resp.Err, resp.Job) - return - } - if resp.Job == nil { - log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response without error!?", newsgroup.Name) - return - } - // get numbers - amux.Lock() - resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &txErrors, &connErrors) - amux.Unlock() - - // free memory - CRITICAL: Lock and unlock in same scope, not with defer! - resp.Job.Mux.Lock() - - // Clean up Articles and their internal fields - for i := range resp.Job.Articles { - if resp.Job.Articles[i] != nil { - // Clean article internal fields to free memory - resp.Job.Articles[i].RefSlice = nil - resp.Job.Articles[i].NNTPhead = nil - resp.Job.Articles[i].NNTPbody = nil - resp.Job.Articles[i].Headers = nil - resp.Job.Articles[i].ArticleNums = nil - resp.Job.Articles[i].NewsgroupsPtr = nil - resp.Job.Articles[i].ProcessQueue = nil - resp.Job.Articles[i].MsgIdItem = nil - resp.Job.Articles[i] = nil - } - } - resp.Job.Articles = nil - - // Clean up ArticleMap - nil the keys (pointers) before deleting - for msgid := range resp.Job.ArticleMap { - resp.Job.ArticleMap[msgid] = nil - delete(resp.Job.ArticleMap, msgid) + + // Read exactly ONE response from this channel (channel is buffered with cap 1) + resp := <-rc + + if resp == nil { + log.Printf("Newsgroup: '%s' | Warning: nil TT response received!?", newsgroup.Name) + return + } + if resp.Err != nil { + log.Printf("Newsgroup: '%s' | Error in TT response: err='%v' job='%#v'", newsgroup.Name, resp.Err, resp.Job) + return + } + if resp.Job == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response without error!?", newsgroup.Name) + return + } + // get numbers + amux.Lock() + resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &txErrors, &connErrors) + amux.Unlock() + + // free memory - CRITICAL: Lock and unlock in same scope, not with defer! + resp.Job.Mux.Lock() + + // Clean up Articles and their internal fields + for i := range resp.Job.Articles { + if resp.Job.Articles[i] != nil { + // Clean article internal fields to free memory + resp.Job.Articles[i].RefSlice = nil + resp.Job.Articles[i].NNTPhead = nil + resp.Job.Articles[i].NNTPbody = nil + resp.Job.Articles[i].Headers = nil + resp.Job.Articles[i].ArticleNums = nil + resp.Job.Articles[i].NewsgroupsPtr = nil + resp.Job.Articles[i].ProcessQueue = nil + resp.Job.Articles[i].MsgIdItem = nil + resp.Job.Articles[i] = nil } - resp.Job.ArticleMap = nil + } + resp.Job.Articles = nil - // NOTE: Do NOT clean up MessageIDs or WantedIDs here! - // The CHECK worker may still be using them (race condition). - // They will be cleaned up in BootConnWorkers when the job is requeued or discarded. + // Clean up ArticleMap - nil the keys (pointers) before deleting + for msgid := range resp.Job.ArticleMap { + resp.Job.ArticleMap[msgid] = nil + delete(resp.Job.ArticleMap, msgid) + } + resp.Job.ArticleMap = nil - resp.Job.Mux.Unlock() + // NOTE: Do NOT clean up MessageIDs or WantedIDs here! + // The CHECK worker may still be using them (race condition). + // They will be cleaned up in BootConnWorkers when the job is requeued or discarded. - } + resp.Job.Mux.Unlock() }(responseChan, num) } log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", newsgroup.Name, num) diff --git a/rsync_nntp-transfer_TEST.sh b/rsync_nntp-transfer_TEST.sh new file mode 100755 index 0000000..e67d653 --- /dev/null +++ b/rsync_nntp-transfer_TEST.sh @@ -0,0 +1 @@ +rsync -vaz --progress scripts/nntp-transfer-loop.sh build/nntp-transfer rocksolid-pugleaf@81-171-22-215:~/ From ffc4c5490138238208fa0d87b49b4a885222504a Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 6 Oct 2025 22:23:10 +0000 Subject: [PATCH 014/100] -tmpMessageIDs --- cmd/nntp-transfer/main.go | 57 +++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index fbc5e9f..ba7bd64 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1198,10 +1198,10 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch go func(rc chan *nntp.TTResponse, num uint64) { defer responseWG.Done() defer log.Printf("Newsgroup: '%s' | Ending response channel processor num %d (goroutines: %d)", newsgroup.Name, num, runtime.NumGoroutine()) - + // Read exactly ONE response from this channel (channel is buffered with cap 1) resp := <-rc - + if resp == nil { log.Printf("Newsgroup: '%s' | Warning: nil TT response received!?", newsgroup.Name) return @@ -1814,15 +1814,14 @@ forever: continue forever } returnSignal := &ReturnSignal{ - slotID: slotID, - errChan: errChan, - redisCli: redisCli, - ExitChan: make(chan *ReturnSignal, 1), - tmpMessageIDs: make([]*string, 0, BatchCheck), - jobsQueued: make(map[*nntp.CHTTJob]uint64, BatchCheck), - jobsReadOK: make(map[*nntp.CHTTJob]uint64, BatchCheck), - jobMap: make(map[*string]*nntp.CHTTJob, BatchCheck), - jobs: make([]*nntp.CHTTJob, 0, BatchCheck), + slotID: slotID, + errChan: errChan, + redisCli: redisCli, + ExitChan: make(chan *ReturnSignal, 1), + jobsQueued: make(map[*nntp.CHTTJob]uint64, BatchCheck), + jobsReadOK: make(map[*nntp.CHTTJob]uint64, BatchCheck), + jobMap: make(map[*string]*nntp.CHTTJob, BatchCheck), + jobs: make([]*nntp.CHTTJob, 0, BatchCheck), } returnSignals[i] = returnSignal @@ -1865,6 +1864,11 @@ forever: if job != nil { // copy articles pointer job.Mux.Lock() + if len(job.Articles) == 0 { + log.Printf("ERROR in CHTTWorker (%d) job %d has no articles, skipping requeue", i, job.JobID) + job.Mux.Unlock() + continue + } rqj := &nntp.CHTTJob{ JobID: job.JobID, Newsgroup: job.Newsgroup, @@ -1923,14 +1927,6 @@ forever: rs.jobs[idx] = nil } rs.jobs = nil - - // Clean up tmpMessageIDs (if still present) - log.Printf("CHTTWorker (%d) cleaning up tmpMessageIDs with %d entries", i, len(rs.tmpMessageIDs)) - for idx := range rs.tmpMessageIDs { - rs.tmpMessageIDs[idx] = nil - } - rs.tmpMessageIDs = nil - rs.redisCli = nil rs.ExitChan = nil rs.errChan = nil @@ -1955,16 +1951,15 @@ var JobsToRetry []*nntp.CHTTJob var JobsToRetryMux sync.Mutex type ReturnSignal struct { - Mux sync.Mutex - slotID int - ExitChan chan *ReturnSignal - errChan chan struct{} - redisCli *redis.Client - tmpMessageIDs []*string - jobsQueued map[*nntp.CHTTJob]uint64 - jobsReadOK map[*nntp.CHTTJob]uint64 - jobMap map[*string]*nntp.CHTTJob - jobs []*nntp.CHTTJob + Mux sync.Mutex + slotID int + ExitChan chan *ReturnSignal + errChan chan struct{} + redisCli *redis.Client + jobsQueued map[*nntp.CHTTJob]uint64 + jobsReadOK map[*nntp.CHTTJob]uint64 + jobMap map[*string]*nntp.CHTTJob + jobs []*nntp.CHTTJob } type readRequest struct { @@ -2053,8 +2048,12 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha err := conn.SendCheckMultiple(batch) if err != nil { + rs.Mux.Lock() + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front + rs.Mux.Unlock() common.ChanRelease(flipflopChan) log.Printf("Newsgroup: '%s' | CheckWorker (%d): SendCheckMultiple error for batch %d-%d: %v", newsgroup, id, batchStart, batchEnd, err) + time.Sleep(time.Second) return } From e0acbd76f95791f288abb2611f18d24bbb9052b4 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 7 Oct 2025 13:05:16 +0000 Subject: [PATCH 015/100] testing --- cmd/nntp-transfer/main.go | 798 +++++++++++++++++++++----- internal/nntp/nntp-backend-pool.go | 2 +- internal/nntp/nntp-client-commands.go | 86 +-- internal/nntp/nntp-client.go | 27 +- 4 files changed, 733 insertions(+), 180 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index ba7bd64..b0f6b9a 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -6,11 +6,14 @@ import ( "context" "flag" "fmt" + "html/template" "log" + "net/http" "os" "os/signal" "runtime" "slices" + "sort" "strconv" "strings" "sync" @@ -146,6 +149,9 @@ func main() { fileInclude = flag.String("file-include", "", "File containing newsgroup patterns to include (one per line)") fileExclude = flag.String("file-exclude", "", "File containing newsgroup patterns to exclude (one per line)") forceIncludeOnly = flag.Bool("force-include-only", false, "When set, only transfer newsgroups that match patterns in include file (ignores -group pattern)") + + // Web server options + webPort = flag.Int("web-port", 0, "Enable web server on this port to view results (e.g. 8080, default: disabled)") ) flag.Parse() common.IgnoreGoogleHeaders = *ignoreGoogleHeaders @@ -457,6 +463,12 @@ func main() { } transferDoneChan <- result }(&wgP, redisCli) + + // Start web server if port is specified + if *webPort > 0 { + go startWebServer(*webPort) + } + // Wait for either shutdown signal or transfer completion select { case <-sigChan: @@ -1019,6 +1031,16 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe if VERBOSE { log.Printf("Newsgroup: '%s' | Start", newsgroup.Name) } + + // Initialize newsgroup progress tracking + resultsMutex.Lock() + NewsgroupProgressMap[newsgroup.Name] = &NewsgroupProgress{ + Started: time.Now(), + LastUpdated: time.Now(), + Finished: false, + } + resultsMutex.Unlock() + /* transferred, checked, rc, unwanted, rejected, txErrors, connErrors, err := transferNewsgroup(db, proc, pool, newsgroup, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) @@ -1096,7 +1118,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.Articles)) // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, job.Articles, redisCli) + responseChan, err := processBatch(ttMode, job.Articles, redisCli, job.BatchStart, job.BatchEnd, -1) if err != nil { log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup, err) jobRequeueMutex.Lock() @@ -1147,6 +1169,15 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch return nil } + // Update progress with total articles count + resultsMutex.Lock() + if progress, exists := NewsgroupProgressMap[newsgroup.Name]; exists { + progress.Mux.Lock() + progress.TotalArticles = totalArticles + progress.Mux.Unlock() + } + resultsMutex.Unlock() + if dryRun { if startTime != nil || endTime != nil { log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s (within specified date range)", totalArticles, newsgroup.Name) @@ -1197,31 +1228,32 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch responseWG.Add(1) go func(rc chan *nntp.TTResponse, num uint64) { defer responseWG.Done() - defer log.Printf("Newsgroup: '%s' | Ending response channel processor num %d (goroutines: %d)", newsgroup.Name, num, runtime.NumGoroutine()) + defer log.Printf("Newsgroup: '%s' | Quit response channel processor num %d (goroutines: %d)", newsgroup.Name, num, runtime.NumGoroutine()) // Read exactly ONE response from this channel (channel is buffered with cap 1) - resp := <-rc + resp := <-rc // job.Response(ForceCleanUp, err) arrives here if resp == nil { log.Printf("Newsgroup: '%s' | Warning: nil TT response received!?", newsgroup.Name) return } if resp.Err != nil { - log.Printf("Newsgroup: '%s' | Error in TT response: err='%v' job='%#v'", newsgroup.Name, resp.Err, resp.Job) - return + log.Printf("Newsgroup: '%s' | Error in TT response job #%d err='%v' job='%v' ForceCleanUp=%t", newsgroup.Name, resp.Job.JobID, resp.Err, resp.Job, resp.ForceCleanUp) } if resp.Job == nil { - log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response without error!?", newsgroup.Name) + log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response job without error!? ForceCleanUp=%t", newsgroup.Name, resp.ForceCleanUp) return } // get numbers amux.Lock() resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &txErrors, &connErrors) amux.Unlock() - + if !resp.ForceCleanUp { + return + } // free memory - CRITICAL: Lock and unlock in same scope, not with defer! resp.Job.Mux.Lock() - + log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", newsgroup.Name, resp.Job.JobID, len(resp.Job.Articles)) // Clean up Articles and their internal fields for i := range resp.Job.Articles { if resp.Job.Articles[i] != nil { @@ -1264,6 +1296,13 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch //log.Print(result) resultsMutex.Lock() results = append(results, result) + // Mark newsgroup as finished + if progress, exists := NewsgroupProgressMap[newsgroup.Name]; exists { + progress.Mux.Lock() + progress.Finished = true + progress.LastUpdated = time.Now() + progress.Mux.Unlock() + } if VERBOSE { for _, msgId := range rejectedArticles[newsgroup.Name] { // prints all at the end again @@ -1321,7 +1360,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch end = len(articles) } // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, articles[i:end], redisCli) + responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), offset) if err != nil { log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", newsgroup.Name, i+1, end, err) return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, newsgroup.Name, err) @@ -1331,7 +1370,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } remainingArticles -= int64(len(articles)) if VERBOSE { - log.Printf("Newsgroup: '%s' | Pushed to queue (offset %d/%d) remaining: %d (Check=%t)", newsgroup.Name, offset, totalArticles, remainingArticles, ttMode.GetMode()) + log.Printf("Newsgroup: '%s' | Pushed to queue (offset %d/%d) remaining: %d (Check=%t)", newsgroup.Name, offset, totalArticles, remainingArticles, ttMode.UseCHECK()) //log.Printf("Newsgroup: '%s' | Pushed (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", newsgroup.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } } // end for offset range totalArticles @@ -1374,21 +1413,47 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch return nil } // end func transferNewsgroup +// NewsgroupProgress tracks the progress of a newsgroup transfer +type NewsgroupProgress struct { + Mux sync.RWMutex + Started time.Time + LastUpdated time.Time + OffsetStart int64 + BatchStart int64 + BatchEnd int64 + TotalArticles int64 + Finished bool +} + var results []string var rejectedArticles = make(map[string][]string) var resultsMutex sync.RWMutex +var NewsgroupProgressMap = make(map[string]*NewsgroupProgress) var lowerLevel float64 = 90.0 var upperLevel float64 = 95.0 // processBatch processes a batch of articles using NNTP streaming protocol (RFC 4644) // Uses TAKETHIS primarily, falls back to CHECK when success rate < 95% -func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client) (chan *nntp.TTResponse, error) { +func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64) (chan *nntp.TTResponse, error) { if len(articles) == 0 { log.Printf("processBatch: no articles in this batch for newsgroup '%s'", *ttMode.Newsgroup) return nil, nil } - doCheck := ttMode.FlipMode(lowerLevel, upperLevel) + + // Update newsgroup progress with current offset + resultsMutex.RLock() + if progress, exists := NewsgroupProgressMap[*ttMode.Newsgroup]; exists { + progress.Mux.Lock() + progress.OffsetStart = dbOffset + progress.BatchStart = batchStart + progress.BatchEnd = batchEnd + progress.LastUpdated = time.Now() + progress.Mux.Unlock() + } + resultsMutex.RUnlock() + + ttMode.FlipMode(lowerLevel, upperLevel) batchedJob := &nntp.CHTTJob{ JobID: atomic.AddUint64(&nntp.JobIDCounter, 1), @@ -1398,8 +1463,90 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl ArticleMap: make(map[*string]*models.Article, len(articles)), ResponseChan: make(chan *nntp.TTResponse, 1), TTMode: ttMode, + OffsetStart: dbOffset, + BatchStart: batchStart, + BatchEnd: batchEnd, + } + var redis_cache_hits int + if redisCli != nil && len(articles) > 0 { + pipe := redisCli.Pipeline() + cmds := make([]*redis.IntCmd, len(articles)) + + // Queue all EXISTS commands + for i, article := range articles { + if article == nil { + continue + } + cmds[i] = pipe.Exists(redisCtx, article.MessageID) + } + + // Execute all in one network round trip + _, err := pipe.Exec(redisCtx) + if err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Redis pipeline error: %v", *ttMode.Newsgroup, err) + } + + // Process results + for i, cmd := range cmds { + if cmd == nil || articles[i] == nil { + continue + } + article := articles[i] + exists, cmdErr := cmd.Result() + if cmdErr == nil && exists > 0 { + // Cached in Redis - skip this article + if VERBOSE { + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [CHECK])", *ttMode.Newsgroup, article.MessageID) + } + batchedJob.Increment(nntp.IncrFLAG_REDIS_CACHED) + redis_cache_hits++ + articles[i] = nil + continue + } + + // Not cached - add to valid list + batchedJob.Articles = append(batchedJob.Articles, article) + batchedJob.ArticleMap[&article.MessageID] = article + batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) + } + } else { + // No Redis - add all non-nil message IDs + for _, article := range articles { + if article == nil { + continue + } + batchedJob.Articles = append(batchedJob.Articles, article) + batchedJob.ArticleMap[&article.MessageID] = article + batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) + } + } + + if len(batchedJob.MessageIDs) == 0 { + log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", *ttMode.Newsgroup, redis_cache_hits) + return nil, nil + } + if VERBOSE { + log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(batchedJob.MessageIDs), len(articles)) } + // Assign job to worker (consistent assignment + load balancing) + if len(CheckQueues) == 0 { + return nil, fmt.Errorf("no workers available") + } + + workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) + + // Track queue length for load balancing + WorkerQueueLengthMux.Lock() + WorkerQueueLength[workerID]++ + WorkerQueueLengthMux.Unlock() + + log.Printf("Newsgroup: '%s' | CheckWorker (%d) Queue job #%d with %d message IDs", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs)) + CheckQueues[workerID] <- batchedJob // checkQueue <- batchedJob + log.Printf("Newsgroup: '%s' | CheckWorker (%d) Job #%d queued", *ttMode.Newsgroup, workerID, batchedJob.JobID) + return batchedJob.ResponseChan, nil + + /* disabled switch doCheck { case true: // ttMode.CheckMode // CHECK mode: verify articles are wanted before sending @@ -1473,16 +1620,15 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl } workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) - checkQueue := CheckQueues[workerID] // Track queue length for load balancing WorkerQueueLengthMux.Lock() WorkerQueueLength[workerID]++ WorkerQueueLengthMux.Unlock() - log.Printf("Newsgroup: '%s' | Sending job #%d to Worker %d queue with %d message IDs", *ttMode.Newsgroup, batchedJob.JobID, workerID, len(batchedJob.MessageIDs)) - checkQueue <- batchedJob - log.Printf("Newsgroup: '%s' | Job #%d sent to Worker %d successfully", *ttMode.Newsgroup, batchedJob.JobID, workerID) + log.Printf("Newsgroup: '%s' | CheckWorker (%d) Queue job #%d with %d message IDs", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs)) + CheckQueues[workerID] <- batchedJob // checkQueue <- batchedJob + log.Printf("Newsgroup: '%s' | CheckWorker (%d) Job #%d queued", *ttMode.Newsgroup, workerID, batchedJob.JobID) return batchedJob.ResponseChan, nil // end case ttMode.CheckMode @@ -1505,7 +1651,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl log.Printf("Newsgroup: '%s' | WARN: No valid articles for TAKETHIS mode, skipping batch", *ttMode.Newsgroup) return nil, nil } - log.Printf("Newsgroup: '%s' | Sending job #%d to TakeThisQueue with %d articles", *ttMode.Newsgroup, batchedJob.JobID, len(batchedJob.WantedIDs)) + log.Printf("Newsgroup: '%s' | job #%d Sending to TakeThisQueue with %d articles", *ttMode.Newsgroup, batchedJob.JobID, len(batchedJob.WantedIDs)) nntp.TakeThisQueue <- batchedJob log.Printf("Newsgroup: '%s' | Job #%d sent to TakeThisQueue successfully", *ttMode.Newsgroup, batchedJob.JobID) return batchedJob.ResponseChan, nil @@ -1514,6 +1660,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl // end switch ttMode.CheckMode return nil, nil + */ } // end func processBatch // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode @@ -1598,6 +1745,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art var done []*string var countDone int // Phase 2: Read all responses in order + log.Printf("Newsgroup: '%s' | Phase 2: Reading TAKETHIS responses for %d sent articles...", newsgroup, len(artChan)) for cr := range artChan { job.TTMode.IncrementTmp() @@ -1962,11 +2110,6 @@ type ReturnSignal struct { jobs []*nntp.CHTTJob } -type readRequest struct { - MsgID *string - retChan chan struct{} -} - func replyChan(request chan struct{}, reply chan struct{}) { select { case <-request: @@ -1976,15 +2119,15 @@ func replyChan(request chan struct{}, reply chan struct{}) { } } -func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { - readResponsesChan := make(chan *readRequest, BatchCheck) +func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { + readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck) + rrRetChan := make(chan struct{}, BatchCheck) takeThisChan := make(chan *nntp.CHTTJob, nntp.NNTPTransferThreads) errChan := make(chan struct{}, 4) tickChan := make(chan struct{}, 1) flipflopChan := make(chan struct{}, 1) requestReplyJobDone := make(chan struct{}, 1) replyJobDone := make(chan struct{}, 1) - rrRetChan := make(chan struct{}, BatchCheck) defer func(conn *nntp.BackendConn, rs *ReturnSignal) { conn.ForceCloseConn() @@ -1993,7 +2136,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha }(conn, rs) //lastRun := time.Now() - // launch go routine which sends CHECK commands if threshold exceeds BatchCheck + // launch go routine which sends CHECK commands go func() { // tick every n seconds to check if any CHECKs to do ticker := time.NewTicker(DefaultCheckTicker) @@ -2006,98 +2149,147 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha select { case <-errChan: errChan <- struct{}{} - log.Printf("CheckWorker (%d): Send CHECK got errChan signal... exiting", id) + log.Printf("CheckWorker (%d): Send CHECK got errChan signal... exiting", workerID) return case <-tickChan: if common.WantShutdown() { - log.Printf("CheckWorker (%d): Tick WantShutdown, exiting", id) + log.Printf("CheckWorker (%d): Tick WantShutdown, exiting", workerID) return } // Get the next job to process rs.Mux.Lock() if len(rs.jobs) == 0 { rs.Mux.Unlock() - log.Printf("CheckWorker (%d): Ticked but no jobs in queue, continue...", id) + log.Printf("CheckWorker (%d): Ticked but no jobs in queue, continue...", workerID) continue loop } currentJob := rs.jobs[0] rs.jobs = rs.jobs[1:] // Remove first job from queue rs.Mux.Unlock() - - // Use message IDs directly from the job - checkIds := currentJob.MessageIDs - newsgroup := "unknown" - if currentJob != nil && currentJob.Newsgroup != nil { - newsgroup = *currentJob.Newsgroup + if currentJob == nil { + continue loop } - log.Printf("Newsgroup: '%s' | CheckWorker (%d): waits to check %d message IDs in batches of %d (job #%d)", newsgroup, id, len(checkIds), BatchCheck, currentJob.JobID) - - // Process checkIds in batches of BatchCheck - for batchStart := 0; batchStart < len(checkIds); batchStart += BatchCheck { - batchEnd := batchStart + BatchCheck - if batchEnd > len(checkIds) { - batchEnd = len(checkIds) - } - batch := checkIds[batchStart:batchEnd] - - // Lock for this batch only - common.ChanLock(flipflopChan) - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sending CHECK for batch %d-%d (%d messages)", newsgroup, id, batchStart, batchEnd, len(batch)) + if currentJob.TTMode.UseCHECK() { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waits to check %d message IDs in batches of %d", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs), BatchCheck) - err := conn.SendCheckMultiple(batch) - if err != nil { - rs.Mux.Lock() - rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front - rs.Mux.Unlock() - common.ChanRelease(flipflopChan) - log.Printf("Newsgroup: '%s' | CheckWorker (%d): SendCheckMultiple error for batch %d-%d: %v", newsgroup, id, batchStart, batchEnd, err) - time.Sleep(time.Second) - return - } - - // Send all message IDs to read channel - for _, msgID := range batch { - if msgID != nil { - // pass message ID pointer to channel - // to read the responses from connection - readResponsesChan <- &readRequest{MsgID: msgID, retChan: rrRetChan} + // Process checkIds in batches of BatchCheck + for batchStart := 0; batchStart < len(currentJob.MessageIDs); batchStart += BatchCheck { + batchEnd := batchStart + BatchCheck + if batchEnd > len(currentJob.MessageIDs) { + batchEnd = len(currentJob.MessageIDs) } - } - - // Wait for all responses in this batch before releasing lock - for _, msgID := range batch { - if msgID != nil { - <-rrRetChan - /*disabled - select { - case <-rrRetChan: - - case <-time.After(time.Second * 300): - log.Printf("CheckWorker (%d): Timeout waiting for CHECK response for msgID: %s", id, *msgID) - common.ChanRelease(flipflopChan) - return - + // Lock for this batch only + common.ChanLock(flipflopChan) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sending CHECK for batch %d-%d (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, batchStart, batchEnd, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) + if !conn.IsConnected() { + rs.Mux.Lock() + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front + rs.Mux.Unlock() + common.ChanRelease(flipflopChan) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d connection lost before SendCheckMultiple for batch (offset %d: %d-%d)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + time.Sleep(time.Second) + return + } + err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readResponsesChan, rrRetChan) + if err != nil { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) + time.Sleep(time.Second) + rs.Mux.Lock() + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front + rs.Mux.Unlock() + common.ChanRelease(flipflopChan) + return + } + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), waiting for responses...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + deadline := time.After(time.Minute) + timedOut := false + replies := 0 + /* + // Send all message IDs to read channel + for i, msgID := range currentJob.MessageIDs[batchStart:batchEnd] { + if msgID != nil { + // pass message ID pointer to channel + // to read the responses from connection + select { + case <-deadline: + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d timeout for batch (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, id, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) + timedOut = true + case readResponsesChan <- &readRequest{id: i, reqs: len(currentJob.MessageIDs[batchStart:batchEnd]), MsgID: msgID, retChan: rrRetChan}: + // sent readRequest to readResponsesChan, wait for reply + } + if timedOut { + break + } + } } - */ + */ + if !timedOut { + for i, msgID := range currentJob.MessageIDs[batchStart:batchEnd] { + if msgID != nil { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waiting for rrRetChan (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) + select { + case <-deadline: + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d timeout waiting for rrRetChan for batch (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) + timedOut = true + case <-rrRetChan: + // got reply + replies++ + } + if timedOut { + break + } + } + } + } + // If timeout occurred, clean up the job + if timedOut { + rs.Mux.Lock() + // requeue to front + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) + /* disabled + // Clean up all message IDs from jobMap + for _, msgID := range currentJob.MessageIDs { + if msgID != nil { + if _, exists := rs.jobMap[msgID]; exists { + rs.jobMap[msgID] = nil + delete(rs.jobMap, msgID) + } + } + } + // Clean up job tracking + delete(rs.jobsQueued, currentJob) + delete(rs.jobsReadOK, currentJob) + rs.Mux.Unlock() + // Send failure response + currentJob.Response(false, fmt.Errorf("CHECK response timeout")) + */ + // Release lock + common.ChanRelease(flipflopChan) + return } + // Release lock after batch is complete, allowing TAKETHIS to run + common.ChanRelease(flipflopChan) } - // Release lock after batch is complete, allowing TAKETHIS to run - common.ChanRelease(flipflopChan) + } else { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) + currentJob.WantedIDs = currentJob.MessageIDs + takeThisChan <- currentJob // local takethis chan sharing the same connection + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) } replyChan(requestReplyJobDone, replyJobDone) // see if anybody is waiting and reply - rs.Mux.Lock() //lastRun = time.Now() // Check if there are more jobs to process + rs.Mux.Lock() hasMoreJobs := len(rs.jobs) > 0 rs.Mux.Unlock() // Decrement queue length for this worker (job processing complete) WorkerQueueLengthMux.Lock() - if id < len(WorkerQueueLength) && WorkerQueueLength[id] > 0 { - WorkerQueueLength[id]-- + if workerID < len(WorkerQueueLength) && WorkerQueueLength[workerID] > 0 { + WorkerQueueLength[workerID]-- } WorkerQueueLengthMux.Unlock() @@ -2112,7 +2304,7 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha case <-ticker.C: if common.WantShutdown() { - log.Printf("CheckWorker (%d): Ticker WantShutdown, exiting", id) + log.Printf("CheckWorker (%d): Ticker WantShutdown, exiting", workerID) return } rs.Mux.Lock() @@ -2141,33 +2333,60 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha select { case <-errChan: errChan <- struct{}{} - log.Printf("CheckWorker (%d): Read CHECK responses got errChan signal... exiting", id) + log.Printf("CheckWorker (%d): Read CHECK responses got errChan signal... exiting", workerID) return case rr := <-readResponsesChan: - if rr == nil || rr.MsgID == nil || rr.retChan == nil { - log.Printf("CheckWorker (%d): Read CHECK got nil readRequest, skipping", id) - rr.retChan <- struct{}{} + + if rr == nil || rr.MsgID == nil { + log.Printf("CheckWorker (%d): Read CHECK got nil readRequest, skipping", workerID) + nntp.ReturnReadRequest(rrRetChan) continue loop } if common.WantShutdown() { - log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", id) - rr.retChan <- struct{}{} + log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", workerID) + nntp.ReturnReadRequest(rrRetChan) return } + if !conn.IsConnected() { + log.Printf("CheckWorker (%d): Read CHECK connection lost, exiting", workerID) + nntp.ReturnReadRequest(rrRetChan) + return + } + log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) start := time.Now() + /* disabled + if err := conn.SetReadDeadline(time.Now().Add(1 * time.Minute)); err != nil { + log.Printf("Failed to set read deadline: %v", err) + nntp.ReturnReadRequest(rrRetChan) + return + } + */ + conn.TextConn.StartResponse(rr.CmdID) code, line, err := conn.TextConn.ReadCodeLine(238) + conn.TextConn.EndResponse(rr.CmdID) if code == 0 && err != nil { log.Printf("Failed to read CHECK response: %v", err) - rr.retChan <- struct{}{} + nntp.ReturnReadRequest(rrRetChan) return } + log.Printf("CheckWorker (%d): Got CHECK response line: '%s' for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, line, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + /* disabled + if err := conn.SetReadDeadline(time.Time{}); err != nil { + log.Printf("Failed to set unset read deadline: %v", err) + nntp.ReturnReadRequest(rrRetChan) + return + } + */ tookTime += time.Since(start).Milliseconds() responseCount++ + if rr.N == 1 { + log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + } if responseCount >= BatchCheck { avg := float64(tookTime) / float64(responseCount) if avg > 1 { - log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %.1f ms", id, responseCount, avg) + log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %.1f ms", workerID, responseCount, avg) } responseCount = 0 tookTime = 0 @@ -2180,23 +2399,25 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha // ReadCodeLine returns: code=238, message=" article wanted" parts := strings.Fields(line) if len(parts) < 1 { - log.Printf("Malformed CHECK response: %s", line) - rr.retChan <- struct{}{} + log.Printf("ERROR in CheckWorker: Malformed CHECK response: %s", line) + nntp.ReturnReadRequest(rrRetChan) return } if parts[0] != *rr.MsgID { - log.Printf("Mismatched CHECK response: expected %s, got %s", *rr.MsgID, parts[0]) - rr.retChan <- struct{}{} + log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected %s, got %s", *rr.MsgID, parts[0]) + nntp.ReturnReadRequest(rrRetChan) return } + log.Printf("CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) rs.Mux.Lock() job, exists := rs.jobMap[rr.MsgID] rs.Mux.Unlock() if !exists { log.Printf("ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *rr.MsgID) - rr.retChan <- struct{}{} + nntp.ReturnReadRequest(rrRetChan) continue loop } + log.Printf("CheckWorker (%d): DEBUG2 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) rs.Mux.Lock() rs.jobMap[rr.MsgID] = nil // Nil the pointer before deleting delete(rs.jobMap, rr.MsgID) @@ -2204,29 +2425,33 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha rs.Mux.Unlock() switch code { case 238: - //log.Printf("Wanted Article '%s': response=%d", *msgID, code) + log.Printf("Got Response: Wanted Article '%s': code=%d", *rr.MsgID, code) job.AppendWantedMessageID(rr.MsgID) case 438: - //log.Printf("Unwanted Article '%s': response=%d", *msgID, code) + log.Printf("Got Response: Unwanted Article '%s': code=%d", *rr.MsgID, code) job.Increment(nntp.IncrFLAG_UNWANTED) case 431: + log.Printf("Got Response: Retry Article '%s': code=%d", *rr.MsgID, code) job.Increment(nntp.IncrFLAG_RETRY) default: log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, line, code, *rr.MsgID) } // check if all jobs are done + log.Printf("CheckWorker (%d): DEBUG3 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) rs.Mux.Lock() queuedCount, qexists := rs.jobsQueued[job] readCount, rexists := rs.jobsReadOK[job] rs.Mux.Unlock() if !qexists || !rexists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!", *job.Newsgroup) - rr.retChan <- struct{}{} + nntp.ReturnReadRequest(rrRetChan) continue loop } + nntp.ReturnReadRequest(rrRetChan) + log.Printf("CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) if queuedCount == readCount { rs.Mux.Lock() delete(rs.jobsQueued, job) @@ -2234,15 +2459,15 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha rs.Mux.Unlock() if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d got all CHECK responses (%d), passing to TAKETHIS worker takeThisChanLen=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(takeThisChan)) takeThisChan <- job // local takethis chan sharing the same connection - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sent job #%d to TAKETHIS worker (wanted: %d/%d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs), queuedCount) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sent job #%d to TAKETHIS worker (wanted: %d/%d)", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount) } else { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d got %d CHECK responses but server wants none", *job.Newsgroup, id, job.JobID, queuedCount) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) // Send response and close channel for jobs with no wanted articles - job.Response(&nntp.TTResponse{Job: job, Err: nil}) + job.Response(true, nil) } } - rr.retChan <- struct{}{} } // end select } // end forever }() @@ -2255,26 +2480,20 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha for { if common.WantShutdown() { - log.Printf("CheckWorker (%d): TAKETHIS worker WantShutdown, exiting", id) + log.Printf("TTworker (%d): WantShutdown, exiting", workerID) return } var job *nntp.CHTTJob select { - case job1 := <-nntp.TakeThisQueue: - job = job1 - if job != nil { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Received TAKETHIS job #%d from global queue (wanted: %d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs)) - } - case job2 := <-takeThisChan: - job = job2 - if job != nil { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Received TAKETHIS job #%d from local CHECK channel (wanted: %d)", *job.Newsgroup, id, job.JobID, len(job.WantedIDs)) - } + case ajob := <-takeThisChan: + job = ajob + case <-errChan: + log.Printf("TTworker (%d): got errChan signal, exiting", workerID) + errChan <- struct{}{} + return } - - // Check for nil job (channel closed) if job == nil { - log.Printf("CheckWorker (%d): Received nil job, channels may be closing", id) + log.Printf("TTworker (%d): Received nil job, channels may be closing", workerID) continue } @@ -2289,29 +2508,34 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha } if len(wantedArticles) == 0 { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): No valid wanted articles found in ArticleMap for job #%d", *job.Newsgroup, id, job.JobID) - job.Response(&nntp.TTResponse{Job: nil, Err: fmt.Errorf("got job without valid wanted articles")}) + log.Printf("Newsgroup: '%s' | TTworker (%d): No valid wanted articles found in ArticleMap for job #%d", *job.Newsgroup, workerID, job.JobID) + job.Response(true, nil) continue } + log.Printf("Newsgroup: '%s' | TTworker (%d): Preparing to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) common.ChanLock(flipflopChan) - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, id, job.JobID, len(wantedArticles)) + log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) // Send TAKETHIS commands using existing function transferred, rejected, redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli) common.ChanRelease(flipflopChan) if err != nil { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, id, job.JobID, err) - job.Response(&nntp.TTResponse{Job: job, Err: err}) + log.Printf("Newsgroup: '%s' | TTworker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) + job.Response(false, err) + rs.Mux.Lock() + // requeue at front + rs.jobs = append([]*nntp.CHTTJob{job}, rs.jobs...) + rs.Mux.Unlock() continue } - log.Printf("Newsgroup: '%s' | CheckWorker (%d): TAKETHIS job #%d completed: transferred=%d, rejected=%d, redis_cached=%d", *job.Newsgroup, id, job.JobID, transferred, rejected, redis_cached) + log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d completed: transferred=%d, rejected=%d, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, transferred, rejected, redis_cached) // Send response back - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, id, job.JobID, len(job.ResponseChan)) - job.Response(&nntp.TTResponse{Job: job, Err: nil}) - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, id, job.JobID) + log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, workerID, job.JobID, len(job.ResponseChan)) + job.Response(true, nil) + log.Printf("Newsgroup: '%s' | TTworker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, workerID, job.JobID) } }() @@ -2323,13 +2547,13 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha return case job := <-checkQueue: if common.WantShutdown() { - log.Printf("CheckWorker: WantShutdown, exiting") + log.Printf("CHTTworker: WantShutdown, exiting") return } if job == nil || len(job.MessageIDs) == 0 { - log.Printf("CheckWorker: empty job, skipping") + log.Printf("CHTTworker: empty job, skipping") if job != nil { - job.Response(&nntp.TTResponse{Job: nil, Err: fmt.Errorf("got job without valid wanted articles")}) + job.Response(true, fmt.Errorf("got job without valid wanted articles")) } continue } @@ -2338,15 +2562,30 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha // and count queued messages rs.Mux.Lock() queueFull := len(rs.jobs) > 0 + rs.Mux.Unlock() if queueFull { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, id, job.JobID, len(job.MessageIDs), len(rs.jobs)) + log.Printf("Newsgroup: '%s' | CHTTworker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) requestReplyJobDone <- struct{}{} } - rs.Mux.Unlock() + if queueFull { start := time.Now() - <-replyJobDone - log.Printf("Newsgroup: '%s' | CheckWorker (%d): waited %v for previous jobs to clear before queuing job #%d", *job.Newsgroup, id, time.Since(start), job.JobID) + waitForReply: + for { + select { + case <-replyJobDone: + // pass + case <-time.After(time.Millisecond * 16): + rs.Mux.Lock() + queueFull = len(rs.jobs) > 0 + rs.Mux.Unlock() + if !queueFull { + break waitForReply + } + } + } + + log.Printf("Newsgroup: '%s' | CHTTworker (%d): waited %v for previous jobs to clear before queuing job #%d", *job.Newsgroup, workerID, time.Since(start), job.JobID) } rs.Mux.Lock() job.Mux.Lock() @@ -2370,3 +2609,288 @@ func CHTTWorker(id int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue cha } // end select } // end for } // end func CheckWorker + +// startWebServer starts a simple HTTP server to display transfer results +func startWebServer(port int) { + http.HandleFunc("/", handleIndex) + addr := fmt.Sprintf(":%d", port) + log.Printf("Starting web server on http://ANY_ADDR:%s", addr) + if err := http.ListenAndServe(addr, nil); err != nil { + log.Printf("Web server error: %v", err) + } +} + +// handleIndex serves the main page with transfer results +func handleIndex(w http.ResponseWriter, r *http.Request) { + resultsMutex.RLock() + defer resultsMutex.RUnlock() + + // HTML template for displaying results + const htmlTemplate = ` + + + + + NNTP Transfer Results + + + + +

🚀 NNTP Transfer Results

+ +
+ {{if eq .Started 0}} + Status: Waiting for transfers to start...
+ {{else}} + Transfer Progress: {{.Finished}}/{{.Started}} newsgroups completed + {{if eq .Finished .Started}} + ✅ All complete! + {{else}} + ({{subtract .Started .Finished}} in progress) + {{end}} +
+ {{end}} + + (Auto-refresh every 15 seconds) +
+ + {{if .Progress}} +

⏳ In Progress

+ + + + + + + + + + + {{range .Progress}} + + + + + + + {{end}} + +
NewsgroupProgressStartedDuration
{{.Name}} + {{if gt .TotalArticles 0}} +
+
+
+
+ {{.OffsetStart}}/{{.TotalArticles}} +
+ {{else}} + Initializing... + {{end}} +
{{.Started}}{{.Duration}}
+ {{end}} + + {{if .Results}} +

Completed Results

+ {{range .Results}} +
{{.}}
+ {{end}} + {{else}} +
No transfer results yet. Waiting for transfers to complete...
+ {{end}} + +
Last updated: {{.Timestamp}}
+ +` + + tmpl, err := template.New("index").Funcs(template.FuncMap{ + "subtract": func(a, b int) int { return a - b }, + "eq": func(a, b int) bool { return a == b }, + "gt": func(a, b int64) bool { return a > b }, + "divide": func(a, b int64) float64 { + if b == 0 { + return 0 + } + return float64(a) / float64(b) + }, + "multiply": func(a float64, b int) int { return int(a * float64(b)) }, + }).Parse(htmlTemplate) + if err != nil { + http.Error(w, "Template error", http.StatusInternalServerError) + return + } + + // Calculate started and finished counts and collect progress details + type ProgressInfo struct { + Name string + OffsetStart int64 + BatchStart int64 + BatchEnd int64 + TotalArticles int64 + Started string + LastUpdated string + Finished bool + Duration string + } + + started := len(NewsgroupProgressMap) + finished := 0 + var progressList []ProgressInfo + + for name, progress := range NewsgroupProgressMap { + progress.Mux.RLock() + if progress.Finished { + finished++ + progress.Mux.RUnlock() + continue // Skip finished newsgroups - they're already in results + } + + duration := time.Since(progress.Started).Round(time.Second).String() + + progressList = append(progressList, ProgressInfo{ + Name: name, + OffsetStart: progress.OffsetStart, + BatchStart: progress.BatchStart, + BatchEnd: progress.BatchEnd, + TotalArticles: progress.TotalArticles, + Started: progress.Started.Format("15:04:05"), + LastUpdated: progress.LastUpdated.Format("15:04:05"), + Finished: false, + Duration: duration, + }) + progress.Mux.RUnlock() + } + + // Sort progress list by newsgroup name for consistent display + sort.Slice(progressList, func(i, j int) bool { + return progressList[i].Name < progressList[j].Name + }) + + data := struct { + Results []string + Started int + Finished int + Progress []ProgressInfo + Timestamp string + }{ + Results: results, + Started: started, + Finished: finished, + Progress: progressList, + Timestamp: time.Now().Format("2006-01-02 15:04:05"), + } + + w.Header().Set("Content-Type", "text/html; charset=utf-8") + if err := tmpl.Execute(w, data); err != nil { + log.Printf("Template execution error: %v", err) + } +} diff --git a/internal/nntp/nntp-backend-pool.go b/internal/nntp/nntp-backend-pool.go index 9654cf2..41a5558 100644 --- a/internal/nntp/nntp-backend-pool.go +++ b/internal/nntp/nntp-backend-pool.go @@ -332,7 +332,7 @@ func (pool *Pool) Put(conn *BackendConn) error { pool.mux.Unlock() return nil } - conn.Writer.Reset(conn.conn) + //conn.Writer.Reset(conn.conn) pool.mux.RUnlock() conn.UpdateLastUsed() // set lastused before returning to pool diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index cb0ce7f..e282776 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -3,6 +3,7 @@ package nntp // Package nntp provides NNTP command implementations for go-pugleaf. import ( + "bufio" "fmt" "log" "strconv" @@ -30,8 +31,9 @@ var MaxReadLinesXover int64 = 100 // XOVER command typically retrieves overview const MaxReadLinesBody = MaxReadLinesArticle - MaxReadLinesHeaders var NNTPTransferThreads int = 1 -var TakeThisQueue = make(chan *CHTTJob, NNTPTransferThreads) -var CheckQueue = make(chan *CHTTJob, NNTPTransferThreads) + +// var TakeThisQueue = make(chan *CHTTJob, NNTPTransferThreads) +//var CheckQueue = make(chan *CHTTJob, NNTPTransferThreads) var JobIDCounter uint64 // Atomic counter for unique job IDs @@ -50,8 +52,9 @@ type TakeThisMode struct { } type TTResponse struct { - Job *CHTTJob - Err error + Job *CHTTJob + ForceCleanUp bool + Err error } type CheckResponse struct { // deprecated @@ -59,6 +62,20 @@ type CheckResponse struct { // deprecated Article *models.Article } +type ReadRequest struct { + CmdID uint + N int + Reqs int + MsgID *string +} + +func ReturnReadRequest(channel chan struct{}) { + select { + case channel <- struct{}{}: + default: + } +} + // batched CHECK/TAKETHIS Job type CHTTJob struct { JobID uint64 // Unique job ID for tracing @@ -79,14 +96,17 @@ type CHTTJob struct { redisCached uint64 TxErrors uint64 ConnErrors uint64 + OffsetStart int64 + BatchStart int64 + BatchEnd int64 } -func (job *CHTTJob) Response(response *TTResponse) { +func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { if job.ResponseChan == nil { - log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job ID %d response='%v'", job.JobID, response) + log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job #%d", job.JobID) return } - job.ResponseChan <- response + job.ResponseChan <- &TTResponse{Job: job, ForceCleanUp: ForceCleanUp, Err: Err} close(job.ResponseChan) } @@ -143,7 +163,7 @@ func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, job.Mux.Unlock() } -func (ttMode *TakeThisMode) GetMode() bool { +func (ttMode *TakeThisMode) UseCHECK() bool { ttMode.mux.Lock() defer ttMode.mux.Unlock() if ttMode.CheckMode { @@ -1209,7 +1229,7 @@ func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { } // SendCheckMultiple sends CHECK commands for multiple message IDs without returning responses! -func (c *BackendConn) SendCheckMultiple(messageIDs []*string) error { +func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan chan *ReadRequest, retChan chan struct{}) error { c.mux.Lock() defer c.mux.Unlock() if !c.connected { @@ -1223,22 +1243,20 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string) error { if len(messageIDs) == 0 { return fmt.Errorf("no message IDs provided") } - + //writer := bufio.NewWriter(c.conn) c.lastUsed = time.Now() - for _, msgID := range messageIDs { + + for n, msgID := range messageIDs { if msgID == nil || *msgID == "" { log.Printf("Skipping empty message ID in CHECK command") continue } - _, err := c.Writer.WriteString("CHECK " + *msgID + CRLF) + id, err := c.TextConn.Cmd("CHECK %s", *msgID) if err != nil { return fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) } + readResponsesChan <- &ReadRequest{CmdID: id, Reqs: len(messageIDs), MsgID: msgID, N: n + 1} } - if err := c.Writer.Flush(); err != nil { - return fmt.Errorf("failed to flush CHECK commands: %w", err) - } - // Responses must be read later using CheckMultiple return nil } @@ -1339,7 +1357,7 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str } c.lastUsed = time.Now() - + writer := bufio.NewWriter(c.conn) // Send TAKETHIS command id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { @@ -1348,13 +1366,13 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str // Send headers for _, headerLine := range headers { - if _, err := c.Writer.WriteString(headerLine + CRLF); err != nil { + if _, err := writer.WriteString(headerLine + CRLF); err != nil { return 0, fmt.Errorf("failed to write header: %w", err) } } // Send empty line between headers and body - if _, err := c.Writer.WriteString(CRLF); err != nil { + if _, err := writer.WriteString(CRLF); err != nil { return 0, fmt.Errorf("failed to write header/body separator: %w", err) } @@ -1375,18 +1393,18 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str line = "." + line } - if _, err := c.Writer.WriteString(line + CRLF); err != nil { + if _, err := writer.WriteString(line + CRLF); err != nil { return 0, fmt.Errorf("failed to write body line: %w", err) } } // Send termination line (single dot) - if _, err := c.Writer.WriteString(DOT + CRLF); err != nil { + if _, err := writer.WriteString(DOT + CRLF); err != nil { return 0, fmt.Errorf("failed to send article terminator: %w", err) } // Flush the writer to ensure all data is sent - if err := c.Writer.Flush(); err != nil { + if err := writer.Flush(); err != nil { return 0, fmt.Errorf("failed to flush article data: %w", err) } @@ -1426,7 +1444,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp if err != nil { return 0, err } - + writer := bufio.NewWriter(c.conn) // Send TAKETHIS command id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { @@ -1435,13 +1453,13 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // Send headers for _, headerLine := range headers { - if _, err := c.Writer.WriteString(headerLine + CRLF); err != nil { + if _, err := writer.WriteString(headerLine + CRLF); err != nil { return 0, fmt.Errorf("failed to write header SendTakeThisArticleStreaming: %w", err) } } // Send empty line between headers and body - if _, err := c.Writer.WriteString(CRLF); err != nil { + if _, err := writer.WriteString(CRLF); err != nil { return 0, fmt.Errorf("failed to write header/body separator SendTakeThisArticleStreaming: %w", err) } @@ -1462,18 +1480,18 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp line = "." + line } - if _, err := c.Writer.WriteString(line + CRLF); err != nil { + if _, err := writer.WriteString(line + CRLF); err != nil { return 0, fmt.Errorf("failed to write body line SendTakeThisArticleStreaming: %w", err) } } // Send termination line (single dot) - if _, err := c.Writer.WriteString(DOT + CRLF); err != nil { + if _, err := writer.WriteString(DOT + CRLF); err != nil { return 0, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err) } // Flush the writer to ensure all data is sent - if err := c.Writer.Flush(); err != nil { + if err := writer.Flush(); err != nil { return 0, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err) } @@ -1531,7 +1549,7 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { if err != nil && code == 0 { return code, fmt.Errorf("POST command failed: %s", line) } - + writer := bufio.NewWriter(c.conn) switch code { case 340: // pass, posted @@ -1564,13 +1582,13 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { // Send headers using writer (not DotWriter) for _, headerLine := range headers { - if _, err := c.Writer.WriteString(headerLine + CRLF); err != nil { + if _, err := writer.WriteString(headerLine + CRLF); err != nil { return 0, fmt.Errorf("failed to write header: %w", err) } } // Send empty line between headers and body - if _, err := c.Writer.WriteString(CRLF); err != nil { + if _, err := writer.WriteString(CRLF); err != nil { return 0, fmt.Errorf("failed to write header/body separator: %w", err) } @@ -1591,18 +1609,18 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { line = "." + line } - if _, err := c.Writer.WriteString(line + CRLF); err != nil { + if _, err := writer.WriteString(line + CRLF); err != nil { return 0, fmt.Errorf("failed to write body line: %w", err) } } // Send termination line (single dot) - if _, err := c.Writer.WriteString(DOT + CRLF); err != nil { + if _, err := writer.WriteString(DOT + CRLF); err != nil { return 0, fmt.Errorf("failed to send article terminator: %w", err) } // Flush the writer to ensure all data is sent - if err := c.Writer.Flush(); err != nil { + if err := writer.Flush(); err != nil { return 0, fmt.Errorf("failed to flush article data: %w", err) } diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index baf8e54..cd063ad 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -3,7 +3,6 @@ package nntp // nntp provides NNTP client functionality for go-pugleaf. import ( - "bufio" "crypto/tls" "fmt" "log" @@ -56,10 +55,10 @@ var DefaultBufferTX int = 64 * 1024 type BackendConn struct { conn net.Conn TextConn *textproto.Conn - Writer *bufio.Writer - Backend *BackendConfig - mux sync.RWMutex - Pool *Pool // link to parent pool + //Writer *bufio.Writer + Backend *BackendConfig + mux sync.RWMutex + Pool *Pool // link to parent pool // Connection state connected bool @@ -201,7 +200,7 @@ func (c *BackendConn) Connect() error { c.conn = conn c.TextConn = textproto.NewConn(conn) - c.Writer = bufio.NewWriterSize(conn, DefaultBufferTX) // bufio writer with defined buffer size + //c.Writer = bufio.NewWriterSize(conn, DefaultBufferTX) // bufio writer with defined buffer size // Read welcome message code, message, err := c.TextConn.ReadCodeLine(NNTPWelcomeCodeMin) @@ -303,13 +302,25 @@ func (c *BackendConn) CloseFromPoolOnly() error { c.authenticated = false //c.TextConn = nil // CloseFromPoolOnly //c.conn = nil // CloseFromPoolOnly - c.Writer = nil + //c.Writer = nil //log.Printf("Closed NNTP Connection to %s", c.Backend.Host) return nil } +func (c *BackendConn) IsConnected() bool { + c.mux.Lock() + defer c.mux.Unlock() + if !c.connected { + return false + } + if c.conn == nil { + return false + } + return true +} + // SetReadDeadline sets the read deadline for the connection -func (c *BackendConn) xSetReadDeadline(t time.Time) error { +func (c *BackendConn) SetReadDeadline(t time.Time) error { if c.conn == nil { return fmt.Errorf("connection not established") From a86af64b2ad25d1f06c7304f54a450592684ec7b Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 7 Oct 2025 13:09:44 +0000 Subject: [PATCH 016/100] takeThisChan unbuffered --- cmd/nntp-transfer/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index b0f6b9a..516ae2e 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2122,7 +2122,7 @@ func replyChan(request chan struct{}, reply chan struct{}) { func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck) rrRetChan := make(chan struct{}, BatchCheck) - takeThisChan := make(chan *nntp.CHTTJob, nntp.NNTPTransferThreads) + takeThisChan := make(chan *nntp.CHTTJob) // unbuffered errChan := make(chan struct{}, 4) tickChan := make(chan struct{}, 1) flipflopChan := make(chan struct{}, 1) From 9a8d8d1b4557631933ca13596be3d7e102dd6a4a Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 7 Oct 2025 14:52:38 +0000 Subject: [PATCH 017/100] testing --- cmd/nntp-transfer/main.go | 28 ++++++++++++--------------- internal/nntp/nntp-client-commands.go | 26 ++++++++++++++----------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 516ae2e..681cc31 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1278,10 +1278,6 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } resp.Job.ArticleMap = nil - // NOTE: Do NOT clean up MessageIDs or WantedIDs here! - // The CHECK worker may still be using them (race condition). - // They will be cleaned up in BootConnWorkers when the job is requeued or discarded. - resp.Job.Mux.Unlock() }(responseChan, num) } @@ -1290,7 +1286,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch responseWG.Wait() log.Printf("Newsgroup: '%s' | Collector: all response processors closed", newsgroup.Name) amux.Lock() - result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d (unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", + result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d | unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", newsgroup.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) amux.Unlock() //log.Print(result) @@ -1750,7 +1746,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art job.TTMode.IncrementTmp() takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(cr.CmdId) - if err != nil || takeThisResponseCode == 0 { + if err != nil { job.Increment(nntp.IncrFLAG_CONN_ERRORS) conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to read TAKETHIS response for %s: %v", newsgroup, cr.Article.MessageID, err) @@ -2192,7 +2188,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue time.Sleep(time.Second) return } - err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readResponsesChan, rrRetChan) + err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readResponsesChan, rrRetChan, currentJob) if err != nil { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) time.Sleep(time.Second) @@ -2340,17 +2336,16 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if rr == nil || rr.MsgID == nil { log.Printf("CheckWorker (%d): Read CHECK got nil readRequest, skipping", workerID) - nntp.ReturnReadRequest(rrRetChan) continue loop } if common.WantShutdown() { log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", workerID) - nntp.ReturnReadRequest(rrRetChan) + rr.ReturnReadRequest(rrRetChan) return } if !conn.IsConnected() { log.Printf("CheckWorker (%d): Read CHECK connection lost, exiting", workerID) - nntp.ReturnReadRequest(rrRetChan) + rr.ReturnReadRequest(rrRetChan) return } log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) @@ -2367,7 +2362,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue conn.TextConn.EndResponse(rr.CmdID) if code == 0 && err != nil { log.Printf("Failed to read CHECK response: %v", err) - nntp.ReturnReadRequest(rrRetChan) + rr.ReturnReadRequest(rrRetChan) return } log.Printf("CheckWorker (%d): Got CHECK response line: '%s' for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, line, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) @@ -2380,6 +2375,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue */ tookTime += time.Since(start).Milliseconds() responseCount++ + rr.Job.Increment(nntp.IncrFLAG_CHECKED) if rr.N == 1 { log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) } @@ -2400,12 +2396,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue parts := strings.Fields(line) if len(parts) < 1 { log.Printf("ERROR in CheckWorker: Malformed CHECK response: %s", line) - nntp.ReturnReadRequest(rrRetChan) + rr.ReturnReadRequest(rrRetChan) return } if parts[0] != *rr.MsgID { log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected %s, got %s", *rr.MsgID, parts[0]) - nntp.ReturnReadRequest(rrRetChan) + rr.ReturnReadRequest(rrRetChan) return } log.Printf("CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) @@ -2414,7 +2410,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if !exists { log.Printf("ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *rr.MsgID) - nntp.ReturnReadRequest(rrRetChan) + rr.ReturnReadRequest(rrRetChan) continue loop } log.Printf("CheckWorker (%d): DEBUG2 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) @@ -2447,11 +2443,11 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if !qexists || !rexists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!", *job.Newsgroup) - nntp.ReturnReadRequest(rrRetChan) + rr.ReturnReadRequest(rrRetChan) continue loop } - nntp.ReturnReadRequest(rrRetChan) log.Printf("CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + rr.ReturnReadRequest(rrRetChan) if queuedCount == readCount { rs.Mux.Lock() delete(rs.jobsQueued, job) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index e282776..04202fa 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -64,16 +64,20 @@ type CheckResponse struct { // deprecated type ReadRequest struct { CmdID uint + Job *CHTTJob N int Reqs int MsgID *string } -func ReturnReadRequest(channel chan struct{}) { +func (rr *ReadRequest) ReturnReadRequest(channel chan struct{}) { select { case channel <- struct{}{}: default: } + rr.Job = nil + rr.MsgID = nil + rr = nil } // batched CHECK/TAKETHIS Job @@ -112,13 +116,13 @@ func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { const IncrFLAG_CHECKED = 1 const IncrFLAG_WANTED = 2 -const IncrFLAG_UNWANTED = 4 -const IncrFLAG_REJECTED = 8 -const IncrFLAG_RETRY = 16 -const IncrFLAG_TRANSFERRED = 32 -const IncrFLAG_REDIS_CACHED = 64 -const IncrFLAG_TX_ERRORS = 128 -const IncrFLAG_CONN_ERRORS = 256 +const IncrFLAG_UNWANTED = 3 +const IncrFLAG_REJECTED = 4 +const IncrFLAG_RETRY = 5 +const IncrFLAG_TRANSFERRED = 6 +const IncrFLAG_REDIS_CACHED = 7 +const IncrFLAG_TX_ERRORS = 8 +const IncrFLAG_CONN_ERRORS = 9 func (job *CHTTJob) Increment(counter int) { job.Mux.Lock() @@ -1229,7 +1233,7 @@ func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { } // SendCheckMultiple sends CHECK commands for multiple message IDs without returning responses! -func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan chan *ReadRequest, retChan chan struct{}) error { +func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan chan *ReadRequest, retChan chan struct{}, job *CHTTJob) error { c.mux.Lock() defer c.mux.Unlock() if !c.connected { @@ -1255,7 +1259,7 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan if err != nil { return fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) } - readResponsesChan <- &ReadRequest{CmdID: id, Reqs: len(messageIDs), MsgID: msgID, N: n + 1} + readResponsesChan <- &ReadRequest{CmdID: id, Job: job, Reqs: len(messageIDs), MsgID: msgID, N: n + 1} } return nil } @@ -1444,7 +1448,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp if err != nil { return 0, err } - writer := bufio.NewWriter(c.conn) + writer := bufio.NewWriterSize(c.conn, article.Bytes+2048) // Slightly larger buffer than article size for headers // Send TAKETHIS command id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { From 01baf4828f0a287c88136c4e2215adf75bf6dc65 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 7 Oct 2025 15:07:37 +0000 Subject: [PATCH 018/100] Update main.go --- cmd/nntp-transfer/main.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 681cc31..4fd3007 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2258,10 +2258,10 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue delete(rs.jobsQueued, currentJob) delete(rs.jobsReadOK, currentJob) rs.Mux.Unlock() - + */ // Send failure response currentJob.Response(false, fmt.Errorf("CHECK response timeout")) - */ + // Release lock common.ChanRelease(flipflopChan) return @@ -2737,7 +2737,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { function autoRefresh() { setTimeout(function() { location.reload(); - }, 15000); + }, 3000); } @@ -2757,7 +2757,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) {
{{end}} - (Auto-refresh every 15 seconds) + (Auto-refresh every 3 seconds) {{if .Progress}} @@ -2854,7 +2854,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { progressList = append(progressList, ProgressInfo{ Name: name, - OffsetStart: progress.OffsetStart, + OffsetStart: progress.OffsetStart + progress.BatchStart, BatchStart: progress.BatchStart, BatchEnd: progress.BatchEnd, TotalArticles: progress.TotalArticles, From 09dafab0e476678684606cc884ec39e8f9e4ea76 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 7 Oct 2025 15:16:12 +0000 Subject: [PATCH 019/100] q1 --- cmd/nntp-transfer/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 4fd3007..73970e9 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2557,7 +2557,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Build jobMap for tracking which message IDs belong to this job // and count queued messages rs.Mux.Lock() - queueFull := len(rs.jobs) > 0 + queueFull := len(rs.jobs) > 1 rs.Mux.Unlock() if queueFull { log.Printf("Newsgroup: '%s' | CHTTworker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) @@ -2573,7 +2573,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // pass case <-time.After(time.Millisecond * 16): rs.Mux.Lock() - queueFull = len(rs.jobs) > 0 + queueFull = len(rs.jobs) > 1 rs.Mux.Unlock() if !queueFull { break waitForReply From 4197d36d5479a30ae07dcc7eb83d99700194af3b Mon Sep 17 00:00:00 2001 From: go-while Date: Wed, 8 Oct 2025 14:45:55 +0000 Subject: [PATCH 020/100] testing --- cmd/nntp-transfer/main.go | 189 +++++++++++++++++--------- internal/nntp/nntp-client-commands.go | 12 ++ 2 files changed, 136 insertions(+), 65 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 73970e9..cbe15c5 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -9,9 +9,11 @@ import ( "html/template" "log" "net/http" + _ "net/http/pprof" // Memory profiling "os" "os/signal" "runtime" + "runtime/debug" "slices" "sort" "strconv" @@ -78,6 +80,16 @@ func showUsageExamples() { fmt.Println(" ./nntp-transfer -host news.server.local -group alt.test -redis-clear-cache") fmt.Println(" # Use -redis-clear-cache to start fresh (clears all cached message IDs)") fmt.Println() + fmt.Println("Memory Profiling & Monitoring:") + fmt.Println(" ./nntp-transfer -host news.server.local -group alt.* -mem-stats") + fmt.Println(" ./nntp-transfer -host news.server.local -group alt.* -pprof-port 6060") + fmt.Println(" ./nntp-transfer -host news.server.local -group alt.* -gc-percent 50") + fmt.Println(" # -mem-stats: Log memory stats every 30 seconds") + fmt.Println(" # -pprof-port: Enable pprof at http://localhost:6060/debug/pprof/") + fmt.Println(" # -gc-percent: Lower values = more GC, less memory (default 100)") + fmt.Println(" # Get heap profile: curl http://localhost:6060/debug/pprof/heap > heap.prof") + fmt.Println(" # Analyze: go tool pprof heap.prof") + fmt.Println() fmt.Println("Show ALL command line flags:") fmt.Println(" ./nntp-transfer -h") @@ -150,12 +162,21 @@ func main() { fileExclude = flag.String("file-exclude", "", "File containing newsgroup patterns to exclude (one per line)") forceIncludeOnly = flag.Bool("force-include-only", false, "When set, only transfer newsgroups that match patterns in include file (ignores -group pattern)") - // Web server options - webPort = flag.Int("web-port", 0, "Enable web server on this port to view results (e.g. 8080, default: disabled)") + // Web server and profiling options + webPort = flag.Int("web-port", 0, "Enable web server on this port to view results (e.g. 8080, default: disabled)") + pprofPort = flag.Int("pprof-port", 0, "Enable pprof profiling server on this port (e.g., 6060). Access at http://localhost:PORT/debug/pprof/") + memStats = flag.Bool("mem-stats", false, "Log memory statistics every 30 seconds") + gcPercent = flag.Int("gc-percent", 100, "Set GOGC percentage (default 100). Lower values = more frequent GC, less memory") ) flag.Parse() common.IgnoreGoogleHeaders = *ignoreGoogleHeaders + // Configure garbage collector + if *gcPercent != 100 { + old := debug.SetGCPercent(*gcPercent) + log.Printf("Set GOGC from %d to %d (lower = more GC, less memory)", old, *gcPercent) + } + // Show help if requested if *showHelp { showUsageExamples() @@ -469,6 +490,25 @@ func main() { go startWebServer(*webPort) } + // Start pprof server if port is specified + if *pprofPort > 0 { + go func() { + addr := fmt.Sprintf("localhost:%d", *pprofPort) + log.Printf("Starting pprof server on http://%s/debug/pprof/", addr) + log.Printf(" Heap profile: http://%s/debug/pprof/heap", addr) + log.Printf(" Goroutines: http://%s/debug/pprof/goroutine", addr) + log.Printf(" Allocs: http://%s/debug/pprof/allocs", addr) + if err := http.ListenAndServe(addr, nil); err != nil { + log.Printf("pprof server error: %v", err) + } + }() + } + + // Start memory stats monitoring if enabled + if *memStats { + go monitorMemoryStats() + } + // Wait for either shutdown signal or transfer completion select { case <-sigChan: @@ -666,8 +706,15 @@ func getArticleCountWithDateFilter(groupDBs *database.GroupDBs, startTime, endTi query = "SELECT COUNT(*) FROM articles" } + start := time.Now() var count int64 err := groupDBs.DB.QueryRow(query, args...).Scan(&count) + elapsed := time.Since(start) + + if elapsed > 5*time.Second { + log.Printf("WARNING: Slow COUNT query for group '%s' took %v (count=%d)", groupDBs.Newsgroup, elapsed, count) + } + if err != nil { return 0, err } @@ -1031,28 +1078,6 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe if VERBOSE { log.Printf("Newsgroup: '%s' | Start", newsgroup.Name) } - - // Initialize newsgroup progress tracking - resultsMutex.Lock() - NewsgroupProgressMap[newsgroup.Name] = &NewsgroupProgress{ - Started: time.Now(), - LastUpdated: time.Now(), - Finished: false, - } - resultsMutex.Unlock() - - /* - transferred, checked, rc, unwanted, rejected, txErrors, connErrors, err := transferNewsgroup(db, proc, pool, newsgroup, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) - - transferMutex.Lock() - totalTransferred += transferred - totalRedisCacheHits += rc - totalUnwanted += unwanted - totalRejected += rejected - totalTXErrors += txErrors - totalConnErrors += connErrors - transferMutex.Unlock() - */ err := transferNewsgroup(db, newsgroup, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) if err == ErrNotInDateRange { transferMutex.Lock() @@ -1140,21 +1165,30 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse // transferNewsgroup transfers articles from a single newsgroup func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { + log.Printf("Newsgroup: '%s' | transferNewsgroup: Starting (getting group DBs)...", newsgroup.Name) + // Get group database groupDBsA, err := db.GetGroupDBs(newsgroup.Name) if err != nil { return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) } + log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", newsgroup.Name) + // Get total article count first with date filtering totalArticles, err := getArticleCountWithDateFilter(groupDBsA, startTime, endTime) if err != nil { return fmt.Errorf("failed to get article count for newsgroup '%s': %v", newsgroup.Name, err) } + + log.Printf("Newsgroup: '%s' | transferNewsgroup: Got article count (%d), closing group DBs...", newsgroup.Name, totalArticles) + if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { log.Printf("ForceCloseGroupDBs error for '%s': %v", newsgroup.Name, ferr) } + log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", newsgroup.Name) + if totalArticles == 0 { if startTime != nil || endTime != nil { @@ -1169,12 +1203,15 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch return nil } - // Update progress with total articles count + // Initialize newsgroup progress tracking resultsMutex.Lock() - if progress, exists := NewsgroupProgressMap[newsgroup.Name]; exists { - progress.Mux.Lock() - progress.TotalArticles = totalArticles - progress.Mux.Unlock() + if _, exists := NewsgroupProgressMap[newsgroup.Name]; !exists { + NewsgroupProgressMap[newsgroup.Name] = &NewsgroupProgress{ + Started: time.Now(), + LastUpdated: time.Now(), + Finished: false, + TotalArticles: totalArticles, + } } resultsMutex.Unlock() @@ -2202,40 +2239,20 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue deadline := time.After(time.Minute) timedOut := false replies := 0 - /* - // Send all message IDs to read channel - for i, msgID := range currentJob.MessageIDs[batchStart:batchEnd] { - if msgID != nil { - // pass message ID pointer to channel - // to read the responses from connection - select { - case <-deadline: - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d timeout for batch (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, id, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) - timedOut = true - case readResponsesChan <- &readRequest{id: i, reqs: len(currentJob.MessageIDs[batchStart:batchEnd]), MsgID: msgID, retChan: rrRetChan}: - // sent readRequest to readResponsesChan, wait for reply - } - if timedOut { - break - } + + for i, msgID := range currentJob.MessageIDs[batchStart:batchEnd] { + if msgID != nil { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waiting for rrRetChan (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) + select { + case <-deadline: + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d timeout waiting for rrRetChan for batch (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) + timedOut = true + case <-rrRetChan: + // got reply + replies++ } - } - */ - if !timedOut { - for i, msgID := range currentJob.MessageIDs[batchStart:batchEnd] { - if msgID != nil { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waiting for rrRetChan (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) - select { - case <-deadline: - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d timeout waiting for rrRetChan for batch (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) - timedOut = true - case <-rrRetChan: - // got reply - replies++ - } - if timedOut { - break - } + if timedOut { + break } } } @@ -2378,8 +2395,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.Job.Increment(nntp.IncrFLAG_CHECKED) if rr.N == 1 { log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) - } - if responseCount >= BatchCheck { + } else if responseCount >= BatchCheck/2 { avg := float64(tookTime) / float64(responseCount) if avg > 1 { log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %.1f ms", workerID, responseCount, avg) @@ -2523,7 +2539,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // requeue at front rs.jobs = append([]*nntp.CHTTJob{job}, rs.jobs...) rs.Mux.Unlock() - continue + return } log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d completed: transferred=%d, rejected=%d, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, transferred, rejected, redis_cached) @@ -2606,6 +2622,49 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } // end for } // end func CheckWorker +// monitorMemoryStats logs memory statistics periodically +func monitorMemoryStats() { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + var m runtime.MemStats + startTime := time.Now() + + for { + <-ticker.C + runtime.ReadMemStats(&m) + + // Convert bytes to MB for readability + allocMB := float64(m.Alloc) / 1024 / 1024 + totalAllocMB := float64(m.TotalAlloc) / 1024 / 1024 + sysMB := float64(m.Sys) / 1024 / 1024 + heapAllocMB := float64(m.HeapAlloc) / 1024 / 1024 + heapSysMB := float64(m.HeapSys) / 1024 / 1024 + heapIdleMB := float64(m.HeapIdle) / 1024 / 1024 + heapInuseMB := float64(m.HeapInuse) / 1024 / 1024 + + log.Printf("=== MEMORY STATS (uptime: %v) ===", time.Since(startTime).Round(time.Second)) + log.Printf(" Alloc = %.2f MB (currently allocated)", allocMB) + log.Printf(" TotalAlloc = %.2f MB (cumulative allocated)", totalAllocMB) + log.Printf(" Sys = %.2f MB (obtained from system)", sysMB) + log.Printf(" HeapAlloc = %.2f MB (heap allocated)", heapAllocMB) + log.Printf(" HeapSys = %.2f MB (heap from system)", heapSysMB) + log.Printf(" HeapIdle = %.2f MB (heap idle)", heapIdleMB) + log.Printf(" HeapInuse = %.2f MB (heap in use)", heapInuseMB) + log.Printf(" NumGC = %d (garbage collections)", m.NumGC) + log.Printf(" Goroutines = %d", runtime.NumGoroutine()) + log.Printf(" GCCPUFract = %.4f%% (GC CPU fraction)", m.GCCPUFraction*100) + + // Warning if memory usage is high + if allocMB > 1000 { + log.Printf(" ⚠️ WARNING: High memory usage (%.2f MB)! Consider lowering -batch-check or -batch-db", allocMB) + } + if runtime.NumGoroutine() > 1000 { + log.Printf(" ⚠️ WARNING: High goroutine count (%d)! Possible goroutine leak", runtime.NumGoroutine()) + } + } +} + // startWebServer starts a simple HTTP server to display transfer results func startWebServer(port int) { http.HandleFunc("/", handleIndex) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 04202fa..87ba177 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -87,6 +87,7 @@ type CHTTJob struct { Mux sync.Mutex TTMode *TakeThisMode ResponseChan chan *TTResponse + responseSent bool // Track if response already sent (prevents double send) Articles []*models.Article ArticleMap map[*string]*models.Article MessageIDs []*string @@ -110,6 +111,17 @@ func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job #%d", job.JobID) return } + + // Check if response already sent (prevents double send on connection loss) + job.Mux.Lock() + if job.responseSent { + log.Printf("WARNING CHTTJob.Response(): Response already sent for job #%d, skipping", job.JobID) + job.Mux.Unlock() + return + } + job.responseSent = true + job.Mux.Unlock() + job.ResponseChan <- &TTResponse{Job: job, ForceCleanUp: ForceCleanUp, Err: Err} close(job.ResponseChan) } From b58831b449432599b69de845a43042d05817cb63 Mon Sep 17 00:00:00 2001 From: go-while Date: Wed, 8 Oct 2025 15:02:44 +0000 Subject: [PATCH 021/100] testing --- cmd/nntp-transfer/main.go | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index cbe15c5..2fd7441 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1174,7 +1174,17 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", newsgroup.Name) - + // Initialize newsgroup progress tracking + resultsMutex.Lock() + if _, exists := NewsgroupProgressMap[newsgroup.Name]; !exists { + NewsgroupProgressMap[newsgroup.Name] = &NewsgroupProgress{ + Started: time.Now(), + LastUpdated: time.Now(), + Finished: false, + TotalArticles: 0, + } + } + resultsMutex.Unlock() // Get total article count first with date filtering totalArticles, err := getArticleCountWithDateFilter(groupDBsA, startTime, endTime) if err != nil { @@ -1190,7 +1200,12 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", newsgroup.Name) if totalArticles == 0 { - + resultsMutex.Lock() + NewsgroupProgressMap[newsgroup.Name].Finished = true + NewsgroupProgressMap[newsgroup.Name].LastUpdated = time.Now() + results = append(results, fmt.Sprintf("END Newsgroup: '%s' | No articles to process", newsgroup.Name)) + resultsMutex.Unlock() + // No articles to process if startTime != nil || endTime != nil { if VERBOSE { log.Printf("No articles found in newsgroup: %s (within specified date range)", newsgroup.Name) @@ -1205,14 +1220,9 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch // Initialize newsgroup progress tracking resultsMutex.Lock() - if _, exists := NewsgroupProgressMap[newsgroup.Name]; !exists { - NewsgroupProgressMap[newsgroup.Name] = &NewsgroupProgress{ - Started: time.Now(), - LastUpdated: time.Now(), - Finished: false, - TotalArticles: totalArticles, - } - } + progress := NewsgroupProgressMap[newsgroup.Name] + progress.TotalArticles = totalArticles + progress.LastUpdated = time.Now() resultsMutex.Unlock() if dryRun { From cb4f21488df27dcafbc93f94038991192057fc6d Mon Sep 17 00:00:00 2001 From: go-while Date: Wed, 8 Oct 2025 22:30:35 +0000 Subject: [PATCH 022/100] testing --- cmd/nntp-transfer/main.go | 324 ++++++++++++++++++-------- internal/nntp/nntp-client-commands.go | 99 +++++++- internal/nntp/nntp-client.go | 10 +- 3 files changed, 320 insertions(+), 113 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 2fd7441..dca8c71 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1114,7 +1114,7 @@ var ErrNotInDateRange = fmt.Errorf("article not in specified date range") // processRequeuedJobs processes any failed jobs that were requeued for retry // Returns the number of jobs processed successfully -func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponses chan chan *nntp.TTResponse, redisCli *redis.Client) (int, error) { +func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponses chan *nntp.TTSetup, redisCli *redis.Client) (int, error) { var queuedJobs []*nntp.CHTTJob jobRequeueMutex.Lock() if jobs, exists := jobRequeue[ttMode.Newsgroup]; exists { @@ -1143,7 +1143,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.Articles)) // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, job.Articles, redisCli, job.BatchStart, job.BatchEnd, -1) + responseChan, err := processBatch(ttMode, job.Articles, redisCli, job.BatchStart, job.BatchEnd, -1, job.OffsetQ) if err != nil { log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup, err) jobRequeueMutex.Lock() @@ -1154,7 +1154,9 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse } if responseChan != nil { // pass the response channel to the collector channel: ttResponses - ttResponses <- responseChan + ttResponses <- &nntp.TTSetup{ + ResponseChan: responseChan, + } } } @@ -1165,7 +1167,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse // transferNewsgroup transfers articles from a single newsgroup func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { - log.Printf("Newsgroup: '%s' | transferNewsgroup: Starting (getting group DBs)...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Starting (getting group DBs)...", newsgroup.Name) // Get group database groupDBsA, err := db.GetGroupDBs(newsgroup.Name) @@ -1173,7 +1175,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) } - log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", newsgroup.Name) // Initialize newsgroup progress tracking resultsMutex.Lock() if _, exists := NewsgroupProgressMap[newsgroup.Name]; !exists { @@ -1191,13 +1193,13 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch return fmt.Errorf("failed to get article count for newsgroup '%s': %v", newsgroup.Name, err) } - log.Printf("Newsgroup: '%s' | transferNewsgroup: Got article count (%d), closing group DBs...", newsgroup.Name, totalArticles) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got article count (%d), closing group DBs...", newsgroup.Name, totalArticles) if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { log.Printf("ForceCloseGroupDBs error for '%s': %v", newsgroup.Name, ferr) } - log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", newsgroup.Name) if totalArticles == 0 { resultsMutex.Lock() @@ -1220,9 +1222,8 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch // Initialize newsgroup progress tracking resultsMutex.Lock() - progress := NewsgroupProgressMap[newsgroup.Name] - progress.TotalArticles = totalArticles - progress.LastUpdated = time.Now() + NewsgroupProgressMap[newsgroup.Name].TotalArticles = totalArticles + NewsgroupProgressMap[newsgroup.Name].LastUpdated = time.Now() resultsMutex.Unlock() if dryRun { @@ -1250,7 +1251,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch Newsgroup: &newsgroup.Name, CheckMode: true, } - ttResponses := make(chan chan *nntp.TTResponse, totalArticles/int64(batchCheck)+2) + ttResponses := make(chan *nntp.TTSetup, totalArticles/int64(batchCheck)+2) start := time.Now() // WaitGroup to ensure collector goroutine finishes before returning @@ -1265,8 +1266,8 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch var amux sync.Mutex var transferred, unwanted, rejected, checked, txErrors, connErrors uint64 var num uint64 - for responseChan := range ttResponses { - if responseChan == nil { + for setup := range ttResponses { + if setup == nil || setup.ResponseChan == nil { log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received in collector!?", newsgroup.Name) continue } @@ -1300,7 +1301,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } // free memory - CRITICAL: Lock and unlock in same scope, not with defer! resp.Job.Mux.Lock() - log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", newsgroup.Name, resp.Job.JobID, len(resp.Job.Articles)) + //log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", newsgroup.Name, resp.Job.JobID, len(resp.Job.Articles)) // Clean up Articles and their internal fields for i := range resp.Job.Articles { if resp.Job.Articles[i] != nil { @@ -1324,9 +1325,9 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch delete(resp.Job.ArticleMap, msgid) } resp.Job.ArticleMap = nil - resp.Job.Mux.Unlock() - }(responseChan, num) + resp.Job = nil + }(setup.ResponseChan, num) } log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", newsgroup.Name, num) // Wait for all response channel processors to finish @@ -1355,7 +1356,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } resultsMutex.Unlock() }() - + OffsetQueue := &nntp.OffsetQueue{} // Get articles in database batches (much larger than network batches) for offset := ioffset; offset < totalArticles; offset += dbBatchSize { if common.WantShutdown() { @@ -1393,6 +1394,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch //} // Process articles in network batches for i := 0; i < len(articles); i += batchCheck { + OffsetQueue.Add(1) if common.WantShutdown() { log.Printf("WantShutdown in newsgroup: '%s' (offset %d)", newsgroup.Name, offset) return nil @@ -1403,22 +1405,26 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch end = len(articles) } // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), offset) + responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), offset, OffsetQueue) if err != nil { log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", newsgroup.Name, i+1, end, err) return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, newsgroup.Name, err) } // pass the response channel to the collector channel: ttResponses - ttResponses <- responseChan + ttResponses <- &nntp.TTSetup{ + ResponseChan: responseChan, + } + OffsetQueue.Wait(2) // wait for offset batches to finish, less than 2 in flight } remainingArticles -= int64(len(articles)) if VERBOSE { log.Printf("Newsgroup: '%s' | Pushed to queue (offset %d/%d) remaining: %d (Check=%t)", newsgroup.Name, offset, totalArticles, remainingArticles, ttMode.UseCHECK()) //log.Printf("Newsgroup: '%s' | Pushed (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", newsgroup.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } + } // end for offset range totalArticles - log.Printf("Newsgroup: '%s' | Main article loop completed, checking for requeued jobs...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | Main article loop completed, checking for requeued jobs...", newsgroup.Name) // Process any remaining requeued jobs after main loop completes // This handles failures that occurred in the last batch @@ -1441,17 +1447,17 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch // Loop again to check if any of those jobs failed and were requeued } - log.Printf("Newsgroup: '%s' | Final requeue processing completed, closing ttResponses channel...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | Final requeue processing completed, closing ttResponses channel...", newsgroup.Name) // Close the ttResponses channel to signal collector goroutine to finish close(ttResponses) - log.Printf("Newsgroup: '%s' | ttResponses channel closed, waiting for collector to finish...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | ttResponses channel closed, waiting for collector to finish...", newsgroup.Name) // Wait for collector goroutine to finish processing all responses collectorWG.Wait() - log.Printf("Newsgroup: '%s' | All jobs completed and responses collected", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | All jobs completed and responses collected", newsgroup.Name) return nil } // end func transferNewsgroup @@ -1477,7 +1483,7 @@ var upperLevel float64 = 95.0 // processBatch processes a batch of articles using NNTP streaming protocol (RFC 4644) // Uses TAKETHIS primarily, falls back to CHECK when success rate < 95% -func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64) (chan *nntp.TTResponse, error) { +func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64, offsetQ *nntp.OffsetQueue) (chan *nntp.TTResponse, error) { if len(articles) == 0 { log.Printf("processBatch: no articles in this batch for newsgroup '%s'", *ttMode.Newsgroup) @@ -1509,6 +1515,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl OffsetStart: dbOffset, BatchStart: batchStart, BatchEnd: batchEnd, + OffsetQ: offsetQ, } var redis_cache_hits int if redisCli != nil && len(articles) > 0 { @@ -1584,9 +1591,9 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl WorkerQueueLength[workerID]++ WorkerQueueLengthMux.Unlock() - log.Printf("Newsgroup: '%s' | CheckWorker (%d) Queue job #%d with %d message IDs", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs)) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queue job #%d with %d message IDs. CheckQ=%d", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs), len(CheckQueues[workerID])) CheckQueues[workerID] <- batchedJob // checkQueue <- batchedJob - log.Printf("Newsgroup: '%s' | CheckWorker (%d) Job #%d queued", *ttMode.Newsgroup, workerID, batchedJob.JobID) + log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, batchedJob.JobID) return batchedJob.ResponseChan, nil /* disabled @@ -1790,7 +1797,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Phase 2: Read all responses in order log.Printf("Newsgroup: '%s' | Phase 2: Reading TAKETHIS responses for %d sent articles...", newsgroup, len(artChan)) for cr := range artChan { - + log.Printf("Newsgroup: '%s' | Reading TAKETHIS response for article '%s' (CmdID=%d) (i=%d/%d)", newsgroup, cr.Article.MessageID, cr.CmdId, countDone+1, len(articles)) job.TTMode.IncrementTmp() takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(cr.CmdId) if err != nil { @@ -1799,6 +1806,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art log.Printf("ERROR Newsgroup: '%s' | Failed to read TAKETHIS response for %s: %v", newsgroup, cr.Article.MessageID, err) return transferred, rejected, redis_cached, fmt.Errorf("failed to read TAKETHIS response for %s: %v", cr.Article.MessageID, err) } + log.Printf("Newsgroup: '%s' | TAKETHIS response '%s': %d", newsgroup, cr.Article.MessageID, takeThisResponseCode) countDone++ // Update success rate tracking switch takeThisResponseCode { @@ -2064,6 +2072,7 @@ forever: JobID: job.JobID, Newsgroup: job.Newsgroup, Articles: job.Articles, + OffsetQ: job.OffsetQ, } job.Mux.Unlock() @@ -2083,6 +2092,7 @@ forever: job.ArticleMap = nil job.MessageIDs = nil job.WantedIDs = nil + job.OffsetQ = nil job.Mux.Unlock() } } @@ -2091,7 +2101,7 @@ forever: // Clean up ReturnSignal maps and unlink pointers // Clean up jobMap - nil all pointers before deleting - log.Printf("CHTTWorker (%d) cleaning up jobMap with %d entries", i, len(rs.jobMap)) + //log.Printf("CHTTWorker (%d) cleaning up jobMap with %d entries", i, len(rs.jobMap)) for msgID := range rs.jobMap { rs.jobMap[msgID] = nil delete(rs.jobMap, msgID) @@ -2099,21 +2109,21 @@ forever: rs.jobMap = nil // Clean up jobsQueued - log.Printf("CHTTWorker (%d) cleaning up jobsQueued with %d entries", i, len(rs.jobsQueued)) + //log.Printf("CHTTWorker (%d) cleaning up jobsQueued with %d entries", i, len(rs.jobsQueued)) for job := range rs.jobsQueued { delete(rs.jobsQueued, job) } rs.jobsQueued = nil // Clean up jobsReadOK - log.Printf("CHTTWorker (%d) cleaning up jobsReadOK with %d entries", i, len(rs.jobsReadOK)) + //log.Printf("CHTTWorker (%d) cleaning up jobsReadOK with %d entries", i, len(rs.jobsReadOK)) for job := range rs.jobsReadOK { delete(rs.jobsReadOK, job) } rs.jobsReadOK = nil // Clean up jobs slice - nil all pointers - log.Printf("CHTTWorker (%d) cleaning up jobs slice with %d entries", i, len(rs.jobs)) + //log.Printf("CHTTWorker (%d) cleaning up jobs slice with %d entries", i, len(rs.jobs)) for idx := range rs.jobs { rs.jobs[idx] = nil } @@ -2143,6 +2153,8 @@ var JobsToRetryMux sync.Mutex type ReturnSignal struct { Mux sync.Mutex + CHECK bool + RunTT bool slotID int ExitChan chan *ReturnSignal errChan chan struct{} @@ -2153,22 +2165,92 @@ type ReturnSignal struct { jobs []*nntp.CHTTJob } +func (rs *ReturnSignal) UnlockTT() { + rs.Mux.Lock() + rs.RunTT = false + rs.Mux.Unlock() +} + +func (rs *ReturnSignal) GetLockTT() { + for { + rs.Mux.Lock() + if !rs.RunTT && !rs.CHECK { + rs.RunTT = true + rs.Mux.Unlock() + return + } + if rs.RunTT { + rs.Mux.Unlock() + return + } + rs.Mux.Unlock() + log.Printf("GetLockTT: waiting for RunTT to be true...") + time.Sleep(time.Millisecond * 5000) + } +} + +func (rs *ReturnSignal) UnlockCHECKforTTwithWait() { + for { + rs.Mux.Lock() + if !rs.RunTT { + rs.CHECK = false + rs.RunTT = true + rs.Mux.Unlock() + return + } + rs.Mux.Unlock() + log.Printf("UnlockCHECKforTTwithWait: waiting for RunTT to be false...") + time.Sleep(time.Millisecond * 5000) + } +} +func (rs *ReturnSignal) UnlockCHECKforTT() { + rs.Mux.Lock() + rs.CHECK = false + rs.RunTT = true + rs.Mux.Unlock() +} + +func (rs *ReturnSignal) BlockCHECK() { + rs.Mux.Lock() + rs.CHECK = false + rs.Mux.Unlock() +} + +func (rs *ReturnSignal) LockCHECK() { + for { + rs.Mux.Lock() + if !rs.RunTT { + rs.CHECK = true + rs.Mux.Unlock() + return + } + log.Printf("LockCHECK: waiting for RunTT to be false... rs.CHECK=%t rs.RunTT=%t", rs.CHECK, rs.RunTT) + rs.Mux.Unlock() + time.Sleep(time.Millisecond * 5000) + } +} + func replyChan(request chan struct{}, reply chan struct{}) { select { case <-request: // got a reply request - reply <- struct{}{} // send back + select { + case reply <- struct{}{}: // send back + default: + // pass, is full + } default: + // pass, no request } } func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck) - rrRetChan := make(chan struct{}, BatchCheck) - takeThisChan := make(chan *nntp.CHTTJob) // unbuffered + //rrRetChan := make(chan struct{}, BatchCheck) + takeThisChan := make(chan *nntp.CHTTJob, 2) // buffer 2 errChan := make(chan struct{}, 4) tickChan := make(chan struct{}, 1) - flipflopChan := make(chan struct{}, 1) + //flipflopChan := make(chan struct{}, 1) requestReplyJobDone := make(chan struct{}, 1) replyJobDone := make(chan struct{}, 1) @@ -2200,6 +2282,16 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("CheckWorker (%d): Tick WantShutdown, exiting", workerID) return } + waiting: + for { + if len(takeThisChan) > 1 { + log.Printf("CheckWorker (%d): waiting takeThisChan full (%d)", workerID, len(takeThisChan)) + time.Sleep(time.Millisecond * 16) + continue waiting + } + break + } + // Get the next job to process rs.Mux.Lock() if len(rs.jobs) == 0 { @@ -2207,15 +2299,17 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("CheckWorker (%d): Ticked but no jobs in queue, continue...", workerID) continue loop } + log.Printf("CheckWorker (%d): Ticked and found %d jobs in queue", workerID, len(rs.jobs)) currentJob := rs.jobs[0] rs.jobs = rs.jobs[1:] // Remove first job from queue rs.Mux.Unlock() + if currentJob == nil { continue loop } - + currentJob.OffsetQ.Done() if currentJob.TTMode.UseCHECK() { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waits to check %d message IDs in batches of %d", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs), BatchCheck) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waits to check %d message IDs in batches of %d", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs), BatchCheck) // Process checkIds in batches of BatchCheck for batchStart := 0; batchStart < len(currentJob.MessageIDs); batchStart += BatchCheck { @@ -2224,35 +2318,42 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue batchEnd = len(currentJob.MessageIDs) } // Lock for this batch only - common.ChanLock(flipflopChan) - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sending CHECK for batch %d-%d (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, batchStart, batchEnd, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) + + //common.ChanLock(flipflopChan) if !conn.IsConnected() { rs.Mux.Lock() rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front rs.Mux.Unlock() - common.ChanRelease(flipflopChan) + //common.ChanRelease(flipflopChan) log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d connection lost before SendCheckMultiple for batch (offset %d: %d-%d)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) time.Sleep(time.Second) return } - err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readResponsesChan, rrRetChan, currentJob) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire LOCK CHECK for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) + rs.LockCHECK() + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readResponsesChan, currentJob) if err != nil { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) time.Sleep(time.Second) rs.Mux.Lock() rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front rs.Mux.Unlock() - common.ChanRelease(flipflopChan) + rs.BlockCHECK() + //common.ChanRelease(flipflopChan) return } - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), waiting for responses...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + // NOTE: CHECK lock remains locked! It will be unlocked by the response reader + // when all responses are processed (see rs.UnlockCHECKforTT() in response reader) + /* disabled deadline := time.After(time.Minute) timedOut := false replies := 0 for i, msgID := range currentJob.MessageIDs[batchStart:batchEnd] { if msgID != nil { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waiting for rrRetChan (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waiting for rrRetChan (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) select { case <-deadline: log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d timeout waiting for rrRetChan for batch (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) @@ -2271,21 +2372,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Lock() // requeue to front rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) - /* disabled - // Clean up all message IDs from jobMap - for _, msgID := range currentJob.MessageIDs { - if msgID != nil { - if _, exists := rs.jobMap[msgID]; exists { - rs.jobMap[msgID] = nil - delete(rs.jobMap, msgID) - } - } - } - // Clean up job tracking - delete(rs.jobsQueued, currentJob) - delete(rs.jobsReadOK, currentJob) - rs.Mux.Unlock() - */ // Send failure response currentJob.Response(false, fmt.Errorf("CHECK response timeout")) @@ -2293,22 +2379,25 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue common.ChanRelease(flipflopChan) return } + */ // Release lock after batch is complete, allowing TAKETHIS to run - common.ChanRelease(flipflopChan) + //common.ChanRelease(flipflopChan) } } else { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) currentJob.WantedIDs = currentJob.MessageIDs + rs.UnlockCHECKforTTwithWait() takeThisChan <- currentJob // local takethis chan sharing the same connection log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) } - replyChan(requestReplyJobDone, replyJobDone) // see if anybody is waiting and reply //lastRun = time.Now() // Check if there are more jobs to process + //log.Printf("CheckWorker (%d): job #%d CHECK done, checking for more jobs...", workerID, currentJob.JobID) rs.Mux.Lock() hasMoreJobs := len(rs.jobs) > 0 rs.Mux.Unlock() - + replyChan(requestReplyJobDone, replyJobDone) // see if anybody is waiting and reply + //log.Printf("CheckWorker (%d): job #%d CHECK done, hasMoreJobs=%v", workerID, currentJob.JobID, hasMoreJobs) // Decrement queue length for this worker (job processing complete) WorkerQueueLengthMux.Lock() if workerID < len(WorkerQueueLength) && WorkerQueueLength[workerID] > 0 { @@ -2324,6 +2413,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Channel full, will be processed on next tick } } + log.Printf("CheckWorker (%d): job #%d CHECKs sent, loop to next job", workerID, currentJob.JobID) case <-ticker.C: if common.WantShutdown() { @@ -2336,6 +2426,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if hasWork { select { case tickChan <- struct{}{}: + log.Printf("CheckWorker (%d): Ticker ticked, sent tickChan signal", workerID) default: // tickChan full, tickChan will tick } @@ -2355,24 +2446,28 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue for { select { case <-errChan: + log.Printf("CheckWorker (%d): Read CHECK responses got errChan signal...", workerID) errChan <- struct{}{} - log.Printf("CheckWorker (%d): Read CHECK responses got errChan signal... exiting", workerID) + log.Printf("CheckWorker (%d): Read CHECK responses exiting", workerID) return case rr := <-readResponsesChan: - + log.Printf("CheckWorker (%d): Read CHECK got readRequest for rr: '%v'", workerID, rr) if rr == nil || rr.MsgID == nil { log.Printf("CheckWorker (%d): Read CHECK got nil readRequest, skipping", workerID) continue loop } if common.WantShutdown() { log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", workerID) - rr.ReturnReadRequest(rrRetChan) + //rr.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() return } + log.Printf("CheckWorker (%d): Read CHECK response (do conn check) for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) if !conn.IsConnected() { log.Printf("CheckWorker (%d): Read CHECK connection lost, exiting", workerID) - rr.ReturnReadRequest(rrRetChan) + //rr.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() return } log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) @@ -2381,6 +2476,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if err := conn.SetReadDeadline(time.Now().Add(1 * time.Minute)); err != nil { log.Printf("Failed to set read deadline: %v", err) nntp.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() return } */ @@ -2389,7 +2485,8 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue conn.TextConn.EndResponse(rr.CmdID) if code == 0 && err != nil { log.Printf("Failed to read CHECK response: %v", err) - rr.ReturnReadRequest(rrRetChan) + //rr.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() return } log.Printf("CheckWorker (%d): Got CHECK response line: '%s' for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, line, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) @@ -2397,6 +2494,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if err := conn.SetReadDeadline(time.Time{}); err != nil { log.Printf("Failed to set unset read deadline: %v", err) nntp.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() return } */ @@ -2405,7 +2503,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.Job.Increment(nntp.IncrFLAG_CHECKED) if rr.N == 1 { log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) - } else if responseCount >= BatchCheck/2 { + } else if responseCount >= 10 { avg := float64(tookTime) / float64(responseCount) if avg > 1 { log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %.1f ms", workerID, responseCount, avg) @@ -2422,24 +2520,27 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue parts := strings.Fields(line) if len(parts) < 1 { log.Printf("ERROR in CheckWorker: Malformed CHECK response: %s", line) - rr.ReturnReadRequest(rrRetChan) + //rr.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() return } if parts[0] != *rr.MsgID { log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected %s, got %s", *rr.MsgID, parts[0]) - rr.ReturnReadRequest(rrRetChan) + //rr.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() return } - log.Printf("CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) rs.Mux.Lock() job, exists := rs.jobMap[rr.MsgID] rs.Mux.Unlock() + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) if !exists { - log.Printf("ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *rr.MsgID) - rr.ReturnReadRequest(rrRetChan) + log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *job.Newsgroup, *rr.MsgID) + //rr.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() continue loop } - log.Printf("CheckWorker (%d): DEBUG2 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG2 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) rs.Mux.Lock() rs.jobMap[rr.MsgID] = nil // Nil the pointer before deleting delete(rs.jobMap, rr.MsgID) @@ -2447,33 +2548,35 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() switch code { case 238: - log.Printf("Got Response: Wanted Article '%s': code=%d", *rr.MsgID, code) + log.Printf("Newsgroup: '%s' | Got Response: Wanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.AppendWantedMessageID(rr.MsgID) case 438: - log.Printf("Got Response: Unwanted Article '%s': code=%d", *rr.MsgID, code) + log.Printf("Newsgroup: '%s' | Got Response: Unwanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.Increment(nntp.IncrFLAG_UNWANTED) case 431: - log.Printf("Got Response: Retry Article '%s': code=%d", *rr.MsgID, code) + log.Printf("Newsgroup: '%s' | Got Response: Retry Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.Increment(nntp.IncrFLAG_RETRY) default: log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, line, code, *rr.MsgID) } // check if all jobs are done - log.Printf("CheckWorker (%d): DEBUG3 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG3 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) rs.Mux.Lock() queuedCount, qexists := rs.jobsQueued[job] readCount, rexists := rs.jobsReadOK[job] rs.Mux.Unlock() if !qexists || !rexists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!", *job.Newsgroup) - rr.ReturnReadRequest(rrRetChan) + //rr.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() continue loop } - log.Printf("CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) - rr.ReturnReadRequest(rrRetChan) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + //rr.ReturnReadRequest(rrRetChan) + rr.ClearReadRequest() if queuedCount == readCount { rs.Mux.Lock() delete(rs.jobsQueued, job) @@ -2481,15 +2584,34 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d got all CHECK responses (%d), passing to TAKETHIS worker takeThisChanLen=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(takeThisChan)) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles)", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs)) + if len(readResponsesChan) == 0 { + rs.UnlockCHECKforTT() // Unlock CHECK, lock for TAKETHIS + } else { + rs.BlockCHECK() + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5a WARNING: readResponsesChan not empty (%d), delaying TAKETHIS unlock", *job.Newsgroup, workerID, len(readResponsesChan)) + go func() { + for { + time.Sleep(time.Millisecond) + if len(readResponsesChan) == 0 { + rs.UnlockCHECKforTT() + return + } + } + }() + } takeThisChan <- job // local takethis chan sharing the same connection - log.Printf("Newsgroup: '%s' | CheckWorker (%d): Sent job #%d to TAKETHIS worker (wanted: %d/%d)", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d)", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount) + } else { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) // Send response and close channel for jobs with no wanted articles job.Response(true, nil) } + } else { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d CHECK responses so far: %d/%d readResponsesChan=%d", *job.Newsgroup, workerID, job.JobID, readCount, queuedCount, len(readResponsesChan)) } + continue loop } // end select } // end forever }() @@ -2535,13 +2657,14 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue continue } - log.Printf("Newsgroup: '%s' | TTworker (%d): Preparing to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) - common.ChanLock(flipflopChan) + log.Printf("Newsgroup: '%s' | TTworker (%d): Prepare locking to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) + rs.GetLockTT() + //common.ChanLock(flipflopChan) log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) // Send TAKETHIS commands using existing function transferred, rejected, redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli) - common.ChanRelease(flipflopChan) - + //common.ChanRelease(flipflopChan) + rs.UnlockTT() if err != nil { log.Printf("Newsgroup: '%s' | TTworker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) job.Response(false, err) @@ -2583,15 +2706,20 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Build jobMap for tracking which message IDs belong to this job // and count queued messages rs.Mux.Lock() - queueFull := len(rs.jobs) > 1 - rs.Mux.Unlock() + queueFull := len(rs.jobs) > 1 || len(takeThisChan) > 1 if queueFull { log.Printf("Newsgroup: '%s' | CHTTworker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) - requestReplyJobDone <- struct{}{} + select { + case requestReplyJobDone <- struct{}{}: + default: + log.Printf("ERROR Newsgroup: '%s' | CHTTworker (%d): job #%d could not signal requestReplyJobDone, channel full", *job.Newsgroup, workerID, job.JobID) + // pass + } } - + rs.Mux.Unlock() if queueFull { start := time.Now() + wait := start waitForReply: for { select { @@ -2599,14 +2727,18 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // pass case <-time.After(time.Millisecond * 16): rs.Mux.Lock() - queueFull = len(rs.jobs) > 1 + queueFull = len(rs.jobs) > 1 || len(takeThisChan) > 1 rs.Mux.Unlock() if !queueFull { break waitForReply } + // log every 5s + if time.Since(wait) > time.Second { + log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(takeThisChan)) + wait = time.Now() + } } } - log.Printf("Newsgroup: '%s' | CHTTworker (%d): waited %v for previous jobs to clear before queuing job #%d", *job.Newsgroup, workerID, time.Since(start), job.JobID) } rs.Mux.Lock() @@ -2620,14 +2752,14 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue job.Mux.Unlock() // Add job to processing queue rs.jobs = append(rs.jobs, job) - rs.Mux.Unlock() - // Signal ticker to process this job select { case tickChan <- struct{}{}: + log.Printf("Newsgroup: '%s' | CHTTworker (%d): signal ticker start job #%d with %d message IDs. queued=%d", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) default: // tickChan full, will be processed on next tick } + rs.Mux.Unlock() } // end select } // end for } // end func CheckWorker diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 87ba177..207f125 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -51,6 +51,57 @@ type TakeThisMode struct { CheckMode bool // Start with TAKETHIS mode (false) } +type TTSetup struct { + ResponseChan chan *TTResponse + OffsetQ *OffsetQueue +} + +type OffsetQueue struct { + mux sync.RWMutex + isleep time.Duration + queued int +} + +func (o *OffsetQueue) Wait(n int) { + start := time.Now() + for { + o.mux.RLock() + if o.queued < n { + o.mux.RUnlock() + o.mux.Lock() + o.isleep = o.isleep / 2 + if o.isleep < time.Millisecond { + o.isleep = time.Millisecond + } + o.mux.Unlock() + log.Printf("OffsetQueue: waited (%d ms) for %d batches to finish, currently queued: %d", time.Since(start).Milliseconds(), n, o.queued) + return + } + log.Printf("OffsetQueue: waiting for %d batches to finish, currently queued: %d", n, o.queued) + o.mux.RUnlock() + o.mux.Lock() + o.isleep += time.Millisecond + if o.isleep > time.Millisecond*5000 { + o.isleep = time.Millisecond * 5000 + } + time.Sleep(o.isleep) + o.mux.Unlock() + } +} + +func (o *OffsetQueue) Done() { + o.mux.Lock() + defer o.mux.Unlock() + o.queued-- + log.Printf("OffsetQueue: a batch is done, still queued: %d", o.queued) +} +func (o *OffsetQueue) Add(n int) { + o.mux.Lock() + defer o.mux.Unlock() + o.queued += n + log.Printf("OffsetQueue: added %d batches, now queued: %d", n, o.queued) +} + type TTResponse struct { Job *CHTTJob ForceCleanUp bool @@ -70,14 +121,18 @@ type ReadRequest struct { MsgID *string } +func (rr *ReadRequest) ClearReadRequest() { + rr.Job = nil + rr.MsgID = nil + rr = nil +} + func (rr *ReadRequest) ReturnReadRequest(channel chan struct{}) { select { case channel <- struct{}{}: default: } - rr.Job = nil - rr.MsgID = nil - rr = nil + rr.ClearReadRequest() } // batched CHECK/TAKETHIS Job @@ -104,6 +159,7 @@ type CHTTJob struct { OffsetStart int64 BatchStart int64 BatchEnd int64 + OffsetQ *OffsetQueue } func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { @@ -1245,33 +1301,45 @@ func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { } // SendCheckMultiple sends CHECK commands for multiple message IDs without returning responses! -func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan chan *ReadRequest, retChan chan struct{}, job *CHTTJob) error { +func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan chan *ReadRequest, job *CHTTJob) error { c.mux.Lock() - defer c.mux.Unlock() + if !c.connected { + c.mux.Unlock() return fmt.Errorf("not connected") } if c.ModeReader { + c.mux.Unlock() return fmt.Errorf("cannot check article in reader mode") } + c.lastUsed = time.Now() + c.mux.Unlock() if len(messageIDs) == 0 { return fmt.Errorf("no message IDs provided") } - //writer := bufio.NewWriter(c.conn) - c.lastUsed = time.Now() + //writer := bufio.NewWriter(c.conn) + //defer writer.Flush() + log.Printf("SendCheckMultiple commands for %d message IDs", len(messageIDs)) for n, msgID := range messageIDs { if msgID == nil || *msgID == "" { log.Printf("Skipping empty message ID in CHECK command") continue } + log.Printf("Newsgroup: '%s' | Preparing c.mux.Lock() 'CHECK %s' (%d/%d)", *job.Newsgroup, *msgID, n+1, len(messageIDs)) + c.mux.Lock() id, err := c.TextConn.Cmd("CHECK %s", *msgID) + //_, err := fmt.Fprintf(c.conn, "CHECK %s%s", *msgID, CRLF) + c.mux.Unlock() if err != nil { return fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) } + log.Printf("Newsgroup: '%s' | Sent CHECK command for %s (CmdID=%d) notify readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) readResponsesChan <- &ReadRequest{CmdID: id, Job: job, Reqs: len(messageIDs), MsgID: msgID, N: n + 1} + log.Printf("Newsgroup: '%s' | Notify reader done for %s (CmdID=%d) readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) + id++ } return nil } @@ -1441,6 +1509,13 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str return code, nil } +func (c *BackendConn) GetBufSize(size int) int { + if size+2048 <= 16*1024 { + return size + 2048 + } + return 16 * 1024 // hardcoded default 32KB max buffer size +} + // SendTakeThisArticleStreaming sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (uint, error) { @@ -1460,7 +1535,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp if err != nil { return 0, err } - writer := bufio.NewWriterSize(c.conn, article.Bytes+2048) // Slightly larger buffer than article size for headers + writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers // Send TAKETHIS command id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { @@ -1518,17 +1593,17 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // ReadTakeThisResponseStreaming reads a TAKETHIS response using the command ID // Used in streaming mode after all articles have been sent func (c *BackendConn) ReadTakeThisResponseStreaming(id uint) (int, error) { - c.mux.Lock() - defer c.mux.Unlock() - + log.Printf("*BackendConn.ReadTakeThisResponseStreaming: wait command ID %d Response", id) // Read TAKETHIS response c.TextConn.StartResponse(id) defer c.TextConn.EndResponse(id) - + c.mux.Lock() + defer c.mux.Unlock() code, _, err := c.TextConn.ReadCodeLine(239) if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } + log.Printf("got *BackendConn.ReadTakeThisResponseStreaming: command ID %d: code=%d", id, code) // Parse response // Format: code [message] diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index cd063ad..04c0973 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -55,10 +55,9 @@ var DefaultBufferTX int = 64 * 1024 type BackendConn struct { conn net.Conn TextConn *textproto.Conn - //Writer *bufio.Writer - Backend *BackendConfig - mux sync.RWMutex - Pool *Pool // link to parent pool + Backend *BackendConfig + mux sync.RWMutex + Pool *Pool // link to parent pool // Connection state connected bool @@ -309,6 +308,7 @@ func (c *BackendConn) CloseFromPoolOnly() error { func (c *BackendConn) IsConnected() bool { c.mux.Lock() + log.Printf("IsConnected check: connected=%v conn=%v", c.connected, c.conn) defer c.mux.Unlock() if !c.connected { return false @@ -320,7 +320,7 @@ func (c *BackendConn) IsConnected() bool { } // SetReadDeadline sets the read deadline for the connection -func (c *BackendConn) SetReadDeadline(t time.Time) error { +func (c *BackendConn) xSetReadDeadline(t time.Time) error { if c.conn == nil { return fmt.Errorf("connection not established") From e940a1e6adcd23270aafa3e76ce24b636e3a8eb6 Mon Sep 17 00:00:00 2001 From: go-while Date: Wed, 8 Oct 2025 23:55:33 +0000 Subject: [PATCH 023/100] testing --- cmd/nntp-transfer/main.go | 72 +++++++++++++++++---------- internal/nntp/nntp-backend-pool.go | 1 - internal/nntp/nntp-client-commands.go | 39 +++++++++------ internal/nntp/nntp-client.go | 2 +- 4 files changed, 73 insertions(+), 41 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index dca8c71..0dd34ef 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2169,23 +2169,25 @@ func (rs *ReturnSignal) UnlockTT() { rs.Mux.Lock() rs.RunTT = false rs.Mux.Unlock() + log.Printf("UnlockTT: released RunTT lock") } func (rs *ReturnSignal) GetLockTT() { for { rs.Mux.Lock() - if !rs.RunTT && !rs.CHECK { - rs.RunTT = true + if rs.RunTT { rs.Mux.Unlock() return } - if rs.RunTT { + if !rs.RunTT && !rs.CHECK { + rs.RunTT = true rs.Mux.Unlock() + log.Printf("GetLockTT: acquired RunTT lock") return } rs.Mux.Unlock() log.Printf("GetLockTT: waiting for RunTT to be true...") - time.Sleep(time.Millisecond * 5000) + time.Sleep(nntp.ReturnDelay) } } @@ -2196,37 +2198,50 @@ func (rs *ReturnSignal) UnlockCHECKforTTwithWait() { rs.CHECK = false rs.RunTT = true rs.Mux.Unlock() + log.Printf("UnlockCHECKforTTwithWait: switched CHECK to RunTT") return } rs.Mux.Unlock() log.Printf("UnlockCHECKforTTwithWait: waiting for RunTT to be false...") - time.Sleep(time.Millisecond * 5000) + time.Sleep(nntp.ReturnDelay) } } + func (rs *ReturnSignal) UnlockCHECKforTT() { rs.Mux.Lock() + defer rs.Mux.Unlock() + if !rs.CHECK || rs.RunTT { + log.Printf("UnlockCHECKforTT: cannot switch to RunTT, CHECK=%t RunTT=%t", rs.CHECK, rs.RunTT) + return + } rs.CHECK = false rs.RunTT = true - rs.Mux.Unlock() } func (rs *ReturnSignal) BlockCHECK() { rs.Mux.Lock() rs.CHECK = false + log.Printf("BlockCHECK: set CHECK to false (runTT=%t)", rs.RunTT) rs.Mux.Unlock() } func (rs *ReturnSignal) LockCHECK() { + start := time.Now() + printLast := start for { rs.Mux.Lock() if !rs.RunTT { rs.CHECK = true + log.Printf("LockCHECK: acquired CHECK lock (runTT=%t) waited %v", rs.RunTT, time.Since(start)) rs.Mux.Unlock() return } - log.Printf("LockCHECK: waiting for RunTT to be false... rs.CHECK=%t rs.RunTT=%t", rs.CHECK, rs.RunTT) + if time.Since(printLast) > time.Second { + log.Printf("LockCHECK: waiting for RunTT to be false... rs.CHECK=%t rs.RunTT=%t", rs.CHECK, rs.RunTT) + printLast = time.Now() + } rs.Mux.Unlock() - time.Sleep(time.Millisecond * 5000) + time.Sleep(nntp.ReturnDelay) } } @@ -2245,7 +2260,7 @@ func replyChan(request chan struct{}, reply chan struct{}) { } func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { - readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck) + readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck*2) //rrRetChan := make(chan struct{}, BatchCheck) takeThisChan := make(chan *nntp.CHTTJob, 2) // buffer 2 errChan := make(chan struct{}, 4) @@ -2331,7 +2346,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire LOCK CHECK for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) rs.LockCHECK() - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readResponsesChan, currentJob) if err != nil { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) @@ -2452,7 +2467,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue return case rr := <-readResponsesChan: - log.Printf("CheckWorker (%d): Read CHECK got readRequest for rr: '%v'", workerID, rr) + //log.Printf("CheckWorker (%d): Read CHECK got readRequest for rr: '%v'", workerID, rr) if rr == nil || rr.MsgID == nil { log.Printf("CheckWorker (%d): Read CHECK got nil readRequest, skipping", workerID) continue loop @@ -2463,14 +2478,14 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() return } - log.Printf("CheckWorker (%d): Read CHECK response (do conn check) for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + //log.Printf("CheckWorker (%d): Read CHECK response (do conn check) for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) if !conn.IsConnected() { log.Printf("CheckWorker (%d): Read CHECK connection lost, exiting", workerID) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } - log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + //log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) start := time.Now() /* disabled if err := conn.SetReadDeadline(time.Now().Add(1 * time.Minute)); err != nil { @@ -2489,7 +2504,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() return } - log.Printf("CheckWorker (%d): Got CHECK response line: '%s' for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, line, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + //log.Printf("CheckWorker (%d): Got CHECK response line: '%s' for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, line, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) /* disabled if err := conn.SetReadDeadline(time.Time{}); err != nil { log.Printf("Failed to set unset read deadline: %v", err) @@ -2498,11 +2513,13 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue return } */ - tookTime += time.Since(start).Milliseconds() + took := time.Since(start).Milliseconds() + tookTime += took responseCount++ rr.Job.Increment(nntp.IncrFLAG_CHECKED) if rr.N == 1 { log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + tookTime = 0 } else if responseCount >= 10 { avg := float64(tookTime) / float64(responseCount) if avg > 1 { @@ -2533,7 +2550,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Lock() job, exists := rs.jobMap[rr.MsgID] rs.Mux.Unlock() - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) if !exists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *job.Newsgroup, *rr.MsgID) //rr.ReturnReadRequest(rrRetChan) @@ -2548,15 +2565,15 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() switch code { case 238: - log.Printf("Newsgroup: '%s' | Got Response: Wanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) + //log.Printf("Newsgroup: '%s' | Got Response: Wanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.AppendWantedMessageID(rr.MsgID) case 438: - log.Printf("Newsgroup: '%s' | Got Response: Unwanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) + //log.Printf("Newsgroup: '%s' | Got Response: Unwanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.Increment(nntp.IncrFLAG_UNWANTED) case 431: - log.Printf("Newsgroup: '%s' | Got Response: Retry Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) + //log.Printf("Newsgroup: '%s' | Got Response: Retry Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.Increment(nntp.IncrFLAG_RETRY) default: @@ -2584,7 +2601,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles)", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs)) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(takeThisChan)) if len(readResponsesChan) == 0 { rs.UnlockCHECKforTT() // Unlock CHECK, lock for TAKETHIS } else { @@ -2601,7 +2618,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue }() } takeThisChan <- job // local takethis chan sharing the same connection - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d)", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(takeThisChan)) } else { log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) @@ -2609,7 +2626,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue job.Response(true, nil) } } else { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d CHECK responses so far: %d/%d readResponsesChan=%d", *job.Newsgroup, workerID, job.JobID, readCount, queuedCount, len(readResponsesChan)) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d CHECK responses so far: %d/%d readResponsesChan=%d", *job.Newsgroup, workerID, job.JobID, readCount, queuedCount, len(readResponsesChan)) } continue loop } // end select @@ -2621,16 +2638,17 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue defer func() { errChan <- struct{}{} }() - + var job *nntp.CHTTJob for { if common.WantShutdown() { log.Printf("TTworker (%d): WantShutdown, exiting", workerID) return } - var job *nntp.CHTTJob + select { case ajob := <-takeThisChan: job = ajob + case <-errChan: log.Printf("TTworker (%d): got errChan signal, exiting", workerID) errChan <- struct{}{} @@ -2640,7 +2658,11 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("TTworker (%d): Received nil job, channels may be closing", workerID) continue } - + if len(job.WantedIDs) == 0 { + log.Printf("Newsgroup: '%s' | TTworker (%d): job #%d has no wanted articles, skipping TAKETHIS", *job.Newsgroup, workerID, job.JobID) + job.Response(true, nil) + continue + } // Build list of wanted articles wantedArticles := make([]*models.Article, 0, len(job.WantedIDs)) for _, wantedID := range job.WantedIDs { diff --git a/internal/nntp/nntp-backend-pool.go b/internal/nntp/nntp-backend-pool.go index 41a5558..2b38dea 100644 --- a/internal/nntp/nntp-backend-pool.go +++ b/internal/nntp/nntp-backend-pool.go @@ -332,7 +332,6 @@ func (pool *Pool) Put(conn *BackendConn) error { pool.mux.Unlock() return nil } - //conn.Writer.Reset(conn.conn) pool.mux.RUnlock() conn.UpdateLastUsed() // set lastused before returning to pool diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 207f125..1dd21a7 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -62,27 +62,35 @@ type OffsetQueue struct { queued int } +var ReturnDelay = time.Millisecond * 16 + func (o *OffsetQueue) Wait(n int) { start := time.Now() + lastPrint := start for { o.mux.RLock() if o.queued < n { o.mux.RUnlock() + o.mux.Lock() + log.Printf("OffsetQueue: waited (%d ms) for %d batches to finish, currently queued: %d", time.Since(start).Milliseconds(), n, o.queued) o.isleep = o.isleep / 2 if o.isleep < time.Millisecond { - o.isleep = time.Millisecond + o.isleep = 0 } o.mux.Unlock() - log.Printf("OffsetQueue: waited (%d ms) for %d batches to finish, currently queued: %d", time.Since(start).Milliseconds(), n, o.queued) + return } - log.Printf("OffsetQueue: waiting for %d batches to finish, currently queued: %d", n, o.queued) + if time.Since(lastPrint) > time.Second { + log.Printf("OffsetQueue: waiting for batches to finish, currently queued: %d", o.queued) + lastPrint = time.Now() + } o.mux.RUnlock() o.mux.Lock() o.isleep += time.Millisecond - if o.isleep > time.Millisecond*5000 { - o.isleep = time.Millisecond * 5000 + if o.isleep > ReturnDelay { + o.isleep = ReturnDelay } time.Sleep(o.isleep) o.mux.Unlock() @@ -1322,13 +1330,13 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan //writer := bufio.NewWriter(c.conn) //defer writer.Flush() - log.Printf("SendCheckMultiple commands for %d message IDs", len(messageIDs)) + //log.Printf("Newsgroup: '%s' | SendCheckMultiple commands for %d message IDs", *job.Newsgroup, len(messageIDs)) for n, msgID := range messageIDs { if msgID == nil || *msgID == "" { - log.Printf("Skipping empty message ID in CHECK command") + log.Printf("Newsgroup: '%s' | Skipping empty message ID in CHECK command", *job.Newsgroup) continue } - log.Printf("Newsgroup: '%s' | Preparing c.mux.Lock() 'CHECK %s' (%d/%d)", *job.Newsgroup, *msgID, n+1, len(messageIDs)) + //log.Printf("Newsgroup: '%s' | Preparing c.mux.Lock() 'CHECK %s' (%d/%d)", *job.Newsgroup, *msgID, n+1, len(messageIDs)) c.mux.Lock() id, err := c.TextConn.Cmd("CHECK %s", *msgID) //_, err := fmt.Fprintf(c.conn, "CHECK %s%s", *msgID, CRLF) @@ -1336,9 +1344,12 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan if err != nil { return fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) } - log.Printf("Newsgroup: '%s' | Sent CHECK command for %s (CmdID=%d) notify readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) + //log.Printf("Newsgroup: '%s' | Sent CHECK command for %s (CmdID=%d) notify readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) + if len(readResponsesChan) == cap(readResponsesChan) { + log.Printf("Newsgroup: '%s' | WARNING: readResponsesChan is full (%d/%d)", *job.Newsgroup, len(readResponsesChan), cap(readResponsesChan)) + } readResponsesChan <- &ReadRequest{CmdID: id, Job: job, Reqs: len(messageIDs), MsgID: msgID, N: n + 1} - log.Printf("Newsgroup: '%s' | Notify reader done for %s (CmdID=%d) readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) + //log.Printf("Newsgroup: '%s' | Notify reader done for %s (CmdID=%d) readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) id++ } return nil @@ -1593,17 +1604,17 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // ReadTakeThisResponseStreaming reads a TAKETHIS response using the command ID // Used in streaming mode after all articles have been sent func (c *BackendConn) ReadTakeThisResponseStreaming(id uint) (int, error) { - log.Printf("*BackendConn.ReadTakeThisResponseStreaming: wait command ID %d Response", id) + log.Printf("TAKETHIS wait Response command ID %d ", id) // Read TAKETHIS response c.TextConn.StartResponse(id) defer c.TextConn.EndResponse(id) - c.mux.Lock() - defer c.mux.Unlock() + //c.mux.Lock() + //defer c.mux.Unlock() code, _, err := c.TextConn.ReadCodeLine(239) if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } - log.Printf("got *BackendConn.ReadTakeThisResponseStreaming: command ID %d: code=%d", id, code) + log.Printf("TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: command ID %d: code=%d", id, code) // Parse response // Format: code [message] diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index 04c0973..990ce2e 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -308,7 +308,7 @@ func (c *BackendConn) CloseFromPoolOnly() error { func (c *BackendConn) IsConnected() bool { c.mux.Lock() - log.Printf("IsConnected check: connected=%v conn=%v", c.connected, c.conn) + //log.Printf("IsConnected check: connected=%v conn=%v", c.connected, c.conn) defer c.mux.Unlock() if !c.connected { return false From 432ab5889994957ace2de5fbd8c6e32587ed5437 Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 00:24:59 +0000 Subject: [PATCH 024/100] testing --- cmd/nntp-transfer/main.go | 10 ++++++---- internal/nntp/nntp-client-commands.go | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 0dd34ef..699e68f 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2176,6 +2176,7 @@ func (rs *ReturnSignal) GetLockTT() { for { rs.Mux.Lock() if rs.RunTT { + log.Printf("GetLockTT: RunTT already true") rs.Mux.Unlock() return } @@ -2214,6 +2215,7 @@ func (rs *ReturnSignal) UnlockCHECKforTT() { log.Printf("UnlockCHECKforTT: cannot switch to RunTT, CHECK=%t RunTT=%t", rs.CHECK, rs.RunTT) return } + log.Printf("UnlockCHECKforTT: switched CHECK to RunTT") rs.CHECK = false rs.RunTT = true } @@ -2221,7 +2223,7 @@ func (rs *ReturnSignal) UnlockCHECKforTT() { func (rs *ReturnSignal) BlockCHECK() { rs.Mux.Lock() rs.CHECK = false - log.Printf("BlockCHECK: set CHECK to false (runTT=%t)", rs.RunTT) + log.Printf("BlockCHECK: set CHECK to false (RunTT=%t)", rs.RunTT) rs.Mux.Unlock() } @@ -2232,12 +2234,12 @@ func (rs *ReturnSignal) LockCHECK() { rs.Mux.Lock() if !rs.RunTT { rs.CHECK = true - log.Printf("LockCHECK: acquired CHECK lock (runTT=%t) waited %v", rs.RunTT, time.Since(start)) + log.Printf("LockCHECK: acquired CHECK lock (RunTT=%t) waited %v", rs.RunTT, time.Since(start)) rs.Mux.Unlock() return } if time.Since(printLast) > time.Second { - log.Printf("LockCHECK: waiting for RunTT to be false... rs.CHECK=%t rs.RunTT=%t", rs.CHECK, rs.RunTT) + log.Printf("LockCHECK: waiting for RunTT to be false... CHECK=%t RunTT=%t", rs.CHECK, rs.RunTT) printLast = time.Now() } rs.Mux.Unlock() @@ -2260,7 +2262,7 @@ func replyChan(request chan struct{}, reply chan struct{}) { } func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { - readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck*2) + readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck) //rrRetChan := make(chan struct{}, BatchCheck) takeThisChan := make(chan *nntp.CHTTJob, 2) // buffer 2 errChan := make(chan struct{}, 4) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 1dd21a7..b0a3fad 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -1345,9 +1345,9 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan return fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) } //log.Printf("Newsgroup: '%s' | Sent CHECK command for %s (CmdID=%d) notify readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) - if len(readResponsesChan) == cap(readResponsesChan) { - log.Printf("Newsgroup: '%s' | WARNING: readResponsesChan is full (%d/%d)", *job.Newsgroup, len(readResponsesChan), cap(readResponsesChan)) - } + //if len(readResponsesChan) == cap(readResponsesChan) { + // log.Printf("Newsgroup: '%s' | WARNING: readResponsesChan is full (%d/%d)", *job.Newsgroup, len(readResponsesChan), cap(readResponsesChan)) + //} readResponsesChan <- &ReadRequest{CmdID: id, Job: job, Reqs: len(messageIDs), MsgID: msgID, N: n + 1} //log.Printf("Newsgroup: '%s' | Notify reader done for %s (CmdID=%d) readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) id++ From b2fdbe1ec2e9ded0d5316d92dccb42c4aa7e25a9 Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 00:29:26 +0000 Subject: [PATCH 025/100] testing Q*2 --- cmd/nntp-transfer/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 699e68f..c636144 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2262,7 +2262,7 @@ func replyChan(request chan struct{}, reply chan struct{}) { } func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { - readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck) + readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck*2) //rrRetChan := make(chan struct{}, BatchCheck) takeThisChan := make(chan *nntp.CHTTJob, 2) // buffer 2 errChan := make(chan struct{}, 4) From 64d8bb6961782179dfef724540dca56f61ed05ab Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 01:06:00 +0000 Subject: [PATCH 026/100] testing --- cmd/nntp-transfer/main.go | 51 ++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index c636144..0984a2e 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1877,6 +1877,7 @@ var jobRequeue = make(map[*string][]*nntp.CHTTJob) // CheckQueues holds per-worker CheckQueue channels for consistent newsgroup routing var CheckQueues []chan *nntp.CHTTJob +var TakeThisQueues []chan *nntp.CHTTJob // NewsgroupWorkerMap tracks which worker is assigned to each newsgroup var NewsgroupWorkerMap = make(map[string]int) @@ -1891,12 +1892,11 @@ var WorkerQueueLengthMux sync.Mutex // If new newsgroup, assigns to least busy worker (load balancing) func assignWorkerToNewsgroup(newsgroup string) int { // Check if already assigned - NewsgroupWorkerMapMux.RLock() + NewsgroupWorkerMapMux.Lock() + defer NewsgroupWorkerMapMux.Unlock() if workerID, exists := NewsgroupWorkerMap[newsgroup]; exists { - NewsgroupWorkerMapMux.RUnlock() return workerID } - NewsgroupWorkerMapMux.RUnlock() // Find least busy worker WorkerQueueLengthMux.Lock() @@ -1916,9 +1916,7 @@ func assignWorkerToNewsgroup(newsgroup string) int { WorkerQueueLengthMux.Unlock() // Assign newsgroup to this worker - NewsgroupWorkerMapMux.Lock() NewsgroupWorkerMap[newsgroup] = workerID - NewsgroupWorkerMapMux.Unlock() return workerID } @@ -1954,9 +1952,11 @@ func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { var mux sync.Mutex // Create per-worker queues CheckQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) + TakeThisQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) WorkerQueueLength = make([]int, nntp.NNTPTransferThreads) for i := range CheckQueues { - CheckQueues[i] = make(chan *nntp.CHTTJob) // no cap! only accepts if there is a reader! + CheckQueues[i] = make(chan *nntp.CHTTJob) // no cap! only accepts if there is a reader! + TakeThisQueues[i] = make(chan *nntp.CHTTJob, 2) // allows max 2 queued TT jobs WorkerQueueLength[i] = 0 } allEstablished := false @@ -2223,6 +2223,7 @@ func (rs *ReturnSignal) UnlockCHECKforTT() { func (rs *ReturnSignal) BlockCHECK() { rs.Mux.Lock() rs.CHECK = false + rs.RunTT = true log.Printf("BlockCHECK: set CHECK to false (RunTT=%t)", rs.RunTT) rs.Mux.Unlock() } @@ -2264,7 +2265,7 @@ func replyChan(request chan struct{}, reply chan struct{}) { func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck*2) //rrRetChan := make(chan struct{}, BatchCheck) - takeThisChan := make(chan *nntp.CHTTJob, 2) // buffer 2 + //takeThisChan := make(chan *nntp.CHTTJob, 2) // buffer 2 errChan := make(chan struct{}, 4) tickChan := make(chan struct{}, 1) //flipflopChan := make(chan struct{}, 1) @@ -2299,15 +2300,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("CheckWorker (%d): Tick WantShutdown, exiting", workerID) return } - waiting: - for { - if len(takeThisChan) > 1 { - log.Printf("CheckWorker (%d): waiting takeThisChan full (%d)", workerID, len(takeThisChan)) - time.Sleep(time.Millisecond * 16) - continue waiting - } - break - } // Get the next job to process rs.Mux.Lock() @@ -2320,10 +2312,19 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue currentJob := rs.jobs[0] rs.jobs = rs.jobs[1:] // Remove first job from queue rs.Mux.Unlock() - if currentJob == nil { continue loop } + workerID := assignWorkerToNewsgroup(*currentJob.Newsgroup) + waiting: + for { + if len(TakeThisQueues[workerID]) > 1 { + log.Printf("CheckWorker (%d): waiting shared takeThisChan full (%d)", workerID, len(TakeThisQueues[workerID])) + time.Sleep(time.Millisecond * 16) + continue waiting + } + break + } currentJob.OffsetQ.Done() if currentJob.TTMode.UseCHECK() { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waits to check %d message IDs in batches of %d", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs), BatchCheck) @@ -2404,7 +2405,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) currentJob.WantedIDs = currentJob.MessageIDs rs.UnlockCHECKforTTwithWait() - takeThisChan <- currentJob // local takethis chan sharing the same connection + TakeThisQueues[workerID] <- currentJob // local takethis chan sharing the same connection log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) } //lastRun = time.Now() @@ -2603,7 +2604,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(takeThisChan)) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(TakeThisQueues[workerID])) if len(readResponsesChan) == 0 { rs.UnlockCHECKforTT() // Unlock CHECK, lock for TAKETHIS } else { @@ -2619,8 +2620,8 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } }() } - takeThisChan <- job // local takethis chan sharing the same connection - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(takeThisChan)) + TakeThisQueues[workerID] <- job // local takethis chan sharing the same connection + log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(TakeThisQueues[workerID])) } else { log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) @@ -2648,7 +2649,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } select { - case ajob := <-takeThisChan: + case ajob := <-TakeThisQueues[workerID]: job = ajob case <-errChan: @@ -2730,7 +2731,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Build jobMap for tracking which message IDs belong to this job // and count queued messages rs.Mux.Lock() - queueFull := len(rs.jobs) > 1 || len(takeThisChan) > 1 + queueFull := len(rs.jobs) > 1 || len(TakeThisQueues[workerID]) > 1 if queueFull { log.Printf("Newsgroup: '%s' | CHTTworker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) select { @@ -2751,14 +2752,14 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // pass case <-time.After(time.Millisecond * 16): rs.Mux.Lock() - queueFull = len(rs.jobs) > 1 || len(takeThisChan) > 1 + queueFull = len(rs.jobs) > 1 || len(TakeThisQueues[workerID]) > 1 rs.Mux.Unlock() if !queueFull { break waitForReply } // log every 5s if time.Since(wait) > time.Second { - log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(takeThisChan)) + log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(TakeThisQueues[workerID])) wait = time.Now() } } From 9c5f068da966885c0cff3fe1f3dd58a387aff2ec Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 03:28:05 +0000 Subject: [PATCH 027/100] testing --- cmd/nntp-transfer/main.go | 19 +++++++++++++------ internal/nntp/nntp-client-commands.go | 3 ++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 0984a2e..f921302 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2456,7 +2456,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // launch a go routine to read CHECK responses from the supplied connection with textproto readline go func() { var responseCount int - var tookTime int64 + var tookTime time.Duration defer func() { errChan <- struct{}{} }() @@ -2516,17 +2516,17 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue return } */ - took := time.Since(start).Milliseconds() + took := time.Since(start) tookTime += took responseCount++ rr.Job.Increment(nntp.IncrFLAG_CHECKED) if rr.N == 1 { log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) tookTime = 0 - } else if responseCount >= 10 { - avg := float64(tookTime) / float64(responseCount) + } else if responseCount >= 100 { + avg := time.Duration(float64(tookTime) / float64(responseCount)) if avg > 1 { - log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %.1f ms", workerID, responseCount, avg) + log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %v, last: %v (cmdId:%d MID=%d/%d)", workerID, responseCount, avg, took, rr.CmdID, rr.N, rr.Reqs) } responseCount = 0 tookTime = 0 @@ -2605,6 +2605,8 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(TakeThisQueues[workerID])) + rs.UnlockCHECKforTT() + /* disabled if len(readResponsesChan) == 0 { rs.UnlockCHECKforTT() // Unlock CHECK, lock for TAKETHIS } else { @@ -2619,7 +2621,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } } }() - } + }*/ TakeThisQueues[workerID] <- job // local takethis chan sharing the same connection log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(TakeThisQueues[workerID])) @@ -2627,6 +2629,11 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) // Send response and close channel for jobs with no wanted articles job.Response(true, nil) + if len(TakeThisQueues[workerID]) > 0 { + rs.UnlockCHECKforTT() + } else { + rs.UnlockTT() + } } } else { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d CHECK responses so far: %d/%d readResponsesChan=%d", *job.Newsgroup, workerID, job.JobID, readCount, queuedCount, len(readResponsesChan)) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index b0a3fad..8fb37d7 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -103,6 +103,7 @@ func (o *OffsetQueue) Done() { o.queued-- log.Printf("OffsetQueue: a batch is done, still queued: %d", o.queued) } + func (o *OffsetQueue) Add(n int) { o.mux.Lock() defer o.mux.Unlock() @@ -1604,7 +1605,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // ReadTakeThisResponseStreaming reads a TAKETHIS response using the command ID // Used in streaming mode after all articles have been sent func (c *BackendConn) ReadTakeThisResponseStreaming(id uint) (int, error) { - log.Printf("TAKETHIS wait Response command ID %d ", id) + log.Printf("TAKETHIS wait Response command ID %d", id) // Read TAKETHIS response c.TextConn.StartResponse(id) defer c.TextConn.EndResponse(id) From eee87659830ed5985e293f1aef13811d100da08c Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 20:02:07 +0000 Subject: [PATCH 028/100] debugs --- cmd/nntp-transfer/main.go | 13 +++++++------ internal/nntp/nntp-client-commands.go | 19 ++++++++----------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index f921302..7208a1d 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1774,17 +1774,18 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art continue // Skip cached articles } // Send TAKETHIS command with article content (non-blocking) + log.Printf("Newsgroup: '%s' | Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) cmdID, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup) if err != nil { if err == common.ErrNoNewsgroups { - log.Printf("Newsgroup: '%s' | skipped article '%s': no newsgroups header", newsgroup, article.MessageID) + log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': no newsgroups header", newsgroup, article.MessageID) continue } conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to send TAKETHIS for %s: %v", newsgroup, article.MessageID, err) return 0, 0, redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) } - + log.Printf("Newsgroup: '%s' | Sent TAKETHIS '%s' CmdID=%d", newsgroup, article.MessageID, cmdID) artChan <- &nntp.CheckResponse{ Article: article, CmdId: cmdID, @@ -1795,18 +1796,18 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art var done []*string var countDone int // Phase 2: Read all responses in order - log.Printf("Newsgroup: '%s' | Phase 2: Reading TAKETHIS responses for %d sent articles...", newsgroup, len(artChan)) + log.Printf("Newsgroup: '%s' | Reading TAKETHIS responses for %d sent articles...", newsgroup, len(artChan)) for cr := range artChan { - log.Printf("Newsgroup: '%s' | Reading TAKETHIS response for article '%s' (CmdID=%d) (i=%d/%d)", newsgroup, cr.Article.MessageID, cr.CmdId, countDone+1, len(articles)) + log.Printf("Newsgroup: '%s' | Pre-Read TAKETHIS response for article '%s' CmdID=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, cr.CmdId, countDone+1, len(articles)) job.TTMode.IncrementTmp() - takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(cr.CmdId) + takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(cr) if err != nil { job.Increment(nntp.IncrFLAG_CONN_ERRORS) conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to read TAKETHIS response for %s: %v", newsgroup, cr.Article.MessageID, err) return transferred, rejected, redis_cached, fmt.Errorf("failed to read TAKETHIS response for %s: %v", cr.Article.MessageID, err) } - log.Printf("Newsgroup: '%s' | TAKETHIS response '%s': %d", newsgroup, cr.Article.MessageID, takeThisResponseCode) + log.Printf("Newsgroup: '%s' | GOT TAKETHIS response '%s': %d CmdID=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, cr.CmdId, countDone+1, len(articles)) countDone++ // Update success rate tracking switch takeThisResponseCode { diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 8fb37d7..6cd48f1 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -62,7 +62,7 @@ type OffsetQueue struct { queued int } -var ReturnDelay = time.Millisecond * 16 +var ReturnDelay = time.Millisecond * 8 func (o *OffsetQueue) Wait(n int) { start := time.Now() @@ -1548,6 +1548,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp return 0, err } writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers + defer writer.Flush() // Send TAKETHIS command id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { @@ -1593,29 +1594,25 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp return 0, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err) } - // Flush the writer to ensure all data is sent - if err := writer.Flush(); err != nil { - return 0, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err) - } - // Return command ID without reading response (streaming mode) return id, nil } // ReadTakeThisResponseStreaming reads a TAKETHIS response using the command ID // Used in streaming mode after all articles have been sent -func (c *BackendConn) ReadTakeThisResponseStreaming(id uint) (int, error) { - log.Printf("TAKETHIS wait Response command ID %d", id) +func (c *BackendConn) ReadTakeThisResponseStreaming(cr *CheckResponse) (int, error) { + log.Printf("TAKETHIS wait Response CmdID=%d message-id '%s'", cr.CmdId, cr.Article.MessageID) // Read TAKETHIS response - c.TextConn.StartResponse(id) - defer c.TextConn.EndResponse(id) + c.TextConn.StartResponse(cr.CmdId) + defer c.TextConn.EndResponse(cr.CmdId) + log.Printf("TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed StartResponse CmdID=%d message-id '%s'", cr.CmdId, cr.Article.MessageID) //c.mux.Lock() //defer c.mux.Unlock() code, _, err := c.TextConn.ReadCodeLine(239) if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } - log.Printf("TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: command ID %d: code=%d", id, code) + log.Printf("TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed ReadCodeLine CmdID=%d: code=%d message-id '%s'", cr.CmdId, code, cr.Article.MessageID) // Parse response // Format: code [message] From 456fdde5ccf4131892a669eb05845dfd31346e20 Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 20:04:36 +0000 Subject: [PATCH 029/100] testing --- cmd/nntp-transfer/main.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 7208a1d..7651147 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2405,7 +2405,8 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } else { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) currentJob.WantedIDs = currentJob.MessageIDs - rs.UnlockCHECKforTTwithWait() + //rs.UnlockCHECKforTTwithWait() + rs.BlockCHECK() TakeThisQueues[workerID] <- currentJob // local takethis chan sharing the same connection log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) } From 011896709b5dc97f15d471580776aae0449031b8 Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 20:43:00 +0000 Subject: [PATCH 030/100] debugs --- cmd/nntp-transfer/main.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 7651147..58c0405 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1800,7 +1800,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art for cr := range artChan { log.Printf("Newsgroup: '%s' | Pre-Read TAKETHIS response for article '%s' CmdID=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, cr.CmdId, countDone+1, len(articles)) job.TTMode.IncrementTmp() - takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(cr) + takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(newsgroup, cr) if err != nil { job.Increment(nntp.IncrFLAG_CONN_ERRORS) conn.ForceCloseConn() @@ -2541,13 +2541,13 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // ReadCodeLine returns: code=238, message=" article wanted" parts := strings.Fields(line) if len(parts) < 1 { - log.Printf("ERROR in CheckWorker: Malformed CHECK response: %s", line) + log.Printf("ERROR in CheckWorker: Malformed CHECK response code=%d line: '%s' (cmdId:%d MID=%d/%d)", code, line, rr.CmdID, rr.N, rr.Reqs) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } if parts[0] != *rr.MsgID { - log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected %s, got %s", *rr.MsgID, parts[0]) + log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected '%s', got '%s' code=%d (cmdId:%d MID=%d/%d)", *rr.MsgID, parts[0], code, rr.CmdID, rr.N, rr.Reqs) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return @@ -2557,7 +2557,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) if !exists { - log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId did not exist in jobMap: %s", *job.Newsgroup, *rr.MsgID) + log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId '%s' did not exist in jobMap.", *job.Newsgroup, *rr.MsgID) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() continue loop From 7210d228a7981b8d83120d557022892dcd80303d Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 20:43:13 +0000 Subject: [PATCH 031/100] testing mutex --- internal/nntp/nntp-client-commands.go | 44 +++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 6cd48f1..622109d 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -62,7 +62,7 @@ type OffsetQueue struct { queued int } -var ReturnDelay = time.Millisecond * 8 +var ReturnDelay = time.Millisecond * 256 func (o *OffsetQueue) Wait(n int) { start := time.Now() @@ -1337,20 +1337,16 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan log.Printf("Newsgroup: '%s' | Skipping empty message ID in CHECK command", *job.Newsgroup) continue } - //log.Printf("Newsgroup: '%s' | Preparing c.mux.Lock() 'CHECK %s' (%d/%d)", *job.Newsgroup, *msgID, n+1, len(messageIDs)) + log.Printf("Newsgroup: '%s' | CHECK '%s' acquire c.mux.Lock() (%d/%d)", *job.Newsgroup, *msgID, n+1, len(messageIDs)) c.mux.Lock() id, err := c.TextConn.Cmd("CHECK %s", *msgID) - //_, err := fmt.Fprintf(c.conn, "CHECK %s%s", *msgID, CRLF) c.mux.Unlock() if err != nil { - return fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) + return fmt.Errorf("failed to send CHECK '%s': %w", *msgID, err) } - //log.Printf("Newsgroup: '%s' | Sent CHECK command for %s (CmdID=%d) notify readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) - //if len(readResponsesChan) == cap(readResponsesChan) { - // log.Printf("Newsgroup: '%s' | WARNING: readResponsesChan is full (%d/%d)", *job.Newsgroup, len(readResponsesChan), cap(readResponsesChan)) - //} + log.Printf("Newsgroup: '%s' | CHECK sent '%s' (CmdID=%d) pass notify to readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) readResponsesChan <- &ReadRequest{CmdID: id, Job: job, Reqs: len(messageIDs), MsgID: msgID, N: n + 1} - //log.Printf("Newsgroup: '%s' | Notify reader done for %s (CmdID=%d) readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) + log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) id++ } return nil @@ -1434,8 +1430,9 @@ func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) } */ +/* unused // TakeThisArticle sends an article via TAKETHIS command -func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *string, newsgroup string) (int, error) { +func (c *BackendConn) xxTakeThisArticle(article *models.Article, nntphostname *string, newsgroup string) (int, error) { c.mux.Lock() defer c.mux.Unlock() @@ -1454,6 +1451,7 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str c.lastUsed = time.Now() writer := bufio.NewWriter(c.conn) + defer writer.Flush() // Send TAKETHIS command id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { @@ -1520,6 +1518,7 @@ func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *str return code, nil } +*/ func (c *BackendConn) GetBufSize(size int) int { if size+2048 <= 16*1024 { @@ -1532,15 +1531,19 @@ func (c *BackendConn) GetBufSize(size int) int { // Returns command ID for later response reading - used for streaming mode func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (uint, error) { c.mux.Lock() - defer c.mux.Unlock() + //defer c.mux.Unlock() if !c.connected { + c.mux.Unlock() return 0, fmt.Errorf("not connected") } if c.ModeReader { + c.mux.Unlock() return 0, fmt.Errorf("cannot send article in reader mode") } + c.lastUsed = time.Now() + c.mux.Unlock() // Prepare article for transfer headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) @@ -1549,10 +1552,14 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp } writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers defer writer.Flush() + + c.mux.Lock() + defer c.mux.Unlock() + // Send TAKETHIS command id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { - return 0, fmt.Errorf("failed to send TAKETHIS command: %w", err) + return 0, fmt.Errorf("failed SendTakeThisArticleStreaming command: %w", err) } // Send headers @@ -1600,19 +1607,23 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // ReadTakeThisResponseStreaming reads a TAKETHIS response using the command ID // Used in streaming mode after all articles have been sent -func (c *BackendConn) ReadTakeThisResponseStreaming(cr *CheckResponse) (int, error) { - log.Printf("TAKETHIS wait Response CmdID=%d message-id '%s'", cr.CmdId, cr.Article.MessageID) +func (c *BackendConn) ReadTakeThisResponseStreaming(newsgroup string, cr *CheckResponse) (int, error) { + //log.Printf("Newsgroup: '%s' | TAKETHIS acquire c.mux.Lock(): CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) + //c.mux.Lock() + //defer c.mux.Unlock() + //log.Printf("Newsgroup: '%s' | TAKETHIS acquired Lock(): wait Response CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) + log.Printf("Newsgroup: '%s' | TAKETHIS wait for response CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) // Read TAKETHIS response c.TextConn.StartResponse(cr.CmdId) defer c.TextConn.EndResponse(cr.CmdId) - log.Printf("TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed StartResponse CmdID=%d message-id '%s'", cr.CmdId, cr.Article.MessageID) + log.Printf("Newsgroup: '%s' | TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed StartResponse CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) //c.mux.Lock() //defer c.mux.Unlock() code, _, err := c.TextConn.ReadCodeLine(239) if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } - log.Printf("TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed ReadCodeLine CmdID=%d: code=%d message-id '%s'", cr.CmdId, code, cr.Article.MessageID) + log.Printf("Newsgroup: '%s' | TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed ReadCodeLine CmdID=%d: code=%d message-id '%s'", newsgroup, cr.CmdId, code, cr.Article.MessageID) // Parse response // Format: code [message] @@ -1650,6 +1661,7 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { return code, fmt.Errorf("POST command failed: %s", line) } writer := bufio.NewWriter(c.conn) + defer writer.Flush() switch code { case 340: // pass, posted From ef2fd9780d29c4580b134e57cf78f7dd5049f726 Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 22:17:40 +0000 Subject: [PATCH 032/100] testing --- cmd/nntp-transfer/main.go | 195 +++++++++++++------------- internal/nntp/nntp-client-commands.go | 100 ++++++++++--- 2 files changed, 183 insertions(+), 112 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 58c0405..a3a5e3c 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -616,15 +616,15 @@ const query_getArticlesBatchWithDateFilter_nodatefilter = `SELECT article_num, m const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" // getArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering -func getArticlesBatchWithDateFilter(db *database.Database, newsgroup *models.Newsgroup, offset int64, startTime, endTime *time.Time) ([]*models.Article, error) { +func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, offset int64, startTime, endTime *time.Time) ([]*models.Article, error) { // Get group database - groupDBs, err := db.GetGroupDBs(newsgroup.Name) + groupDBs, err := db.GetGroupDBs(ng.Name) if err != nil { - return nil, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) + return nil, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) } defer func() { if ferr := db.ForceCloseGroupDBs(groupDBs); ferr != nil { - log.Printf("ForceCloseGroupDBs error for '%s': %v", newsgroup.Name, ferr) + log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) } }() var query string @@ -1059,9 +1059,9 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe var wg sync.WaitGroup log.Printf("Todo: %d newsgroups", len(newsgroups)) // Process each newsgroup - for _, newsgroup := range newsgroups { + for _, ng := range newsgroups { if common.WantShutdown() { - log.Printf("Aborted before next: %s", newsgroup.Name) + log.Printf("Aborted before next: %s", ng.Name) return nil } maxThreadsChan <- struct{}{} // acquire a thread slot @@ -1072,13 +1072,13 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe <-maxThreadsChan // release the thread slot }(wg) if common.WantShutdown() { - log.Printf("Aborted before next: %s", newsgroup.Name) + log.Printf("Aborted before next: %s", ng.Name) return } if VERBOSE { - log.Printf("Newsgroup: '%s' | Start", newsgroup.Name) + log.Printf("Newsgroup: '%s' | Start", ng.Name) } - err := transferNewsgroup(db, newsgroup, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) + err := transferNewsgroup(db, ng, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) if err == ErrNotInDateRange { transferMutex.Lock() nothingInDateRange++ @@ -1086,9 +1086,9 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe err = nil // not a real error } if err != nil { - log.Printf("Error transferring newsgroup %s: %v", newsgroup.Name, err) + log.Printf("Error transferring newsgroup %s: %v", ng.Name, err) } - }(newsgroup, &wg, redisCli) + }(ng, &wg, redisCli) } // Wait for all transfers to complete @@ -1143,7 +1143,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.Articles)) // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, job.Articles, redisCli, job.BatchStart, job.BatchEnd, -1, job.OffsetQ) + responseChan, err := processBatch(ttMode, job.Articles, redisCli, job.BatchStart, job.BatchEnd, -1, job.OffsetQ, job.NGTProgress) if err != nil { log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup, err) jobRequeueMutex.Lock() @@ -1165,23 +1165,25 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse } // transferNewsgroup transfers articles from a single newsgroup -func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { +func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { - //log.Printf("Newsgroup: '%s' | transferNewsgroup: Starting (getting group DBs)...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Starting (getting group DBs)...", ng.Name) // Get group database - groupDBsA, err := db.GetGroupDBs(newsgroup.Name) + groupDBsA, err := db.GetGroupDBs(ng.Name) if err != nil { - return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", newsgroup.Name, err) + return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) } - //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", ng.Name) // Initialize newsgroup progress tracking resultsMutex.Lock() - if _, exists := NewsgroupProgressMap[newsgroup.Name]; !exists { - NewsgroupProgressMap[newsgroup.Name] = &NewsgroupProgress{ + if _, exists := nntp.NewsgroupTransferProgressMap[ng.Name]; !exists { + nntp.NewsgroupTransferProgressMap[ng.Name] = &nntp.NewsgroupTransferProgress{ + Newsgroup: &ng.Name, Started: time.Now(), LastUpdated: time.Now(), + LastCronTX: time.Now(), Finished: false, TotalArticles: 0, } @@ -1190,31 +1192,31 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch // Get total article count first with date filtering totalArticles, err := getArticleCountWithDateFilter(groupDBsA, startTime, endTime) if err != nil { - return fmt.Errorf("failed to get article count for newsgroup '%s': %v", newsgroup.Name, err) + return fmt.Errorf("failed to get article count for newsgroup '%s': %v", ng.Name, err) } - //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got article count (%d), closing group DBs...", newsgroup.Name, totalArticles) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got article count (%d), closing group DBs...", ng.Name, totalArticles) if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { - log.Printf("ForceCloseGroupDBs error for '%s': %v", newsgroup.Name, ferr) + log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) } - //log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", ng.Name) if totalArticles == 0 { resultsMutex.Lock() - NewsgroupProgressMap[newsgroup.Name].Finished = true - NewsgroupProgressMap[newsgroup.Name].LastUpdated = time.Now() - results = append(results, fmt.Sprintf("END Newsgroup: '%s' | No articles to process", newsgroup.Name)) + nntp.NewsgroupTransferProgressMap[ng.Name].Finished = true + nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() + results = append(results, fmt.Sprintf("END Newsgroup: '%s' | No articles to process", ng.Name)) resultsMutex.Unlock() // No articles to process if startTime != nil || endTime != nil { if VERBOSE { - log.Printf("No articles found in newsgroup: %s (within specified date range)", newsgroup.Name) + log.Printf("No articles found in newsgroup: %s (within specified date range)", ng.Name) } return ErrNotInDateRange } else { - log.Printf("No articles found in newsgroup: %s", newsgroup.Name) + log.Printf("No articles found in newsgroup: %s", ng.Name) } return nil @@ -1222,15 +1224,16 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch // Initialize newsgroup progress tracking resultsMutex.Lock() - NewsgroupProgressMap[newsgroup.Name].TotalArticles = totalArticles - NewsgroupProgressMap[newsgroup.Name].LastUpdated = time.Now() + nntp.NewsgroupTransferProgressMap[ng.Name].TotalArticles = totalArticles + nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() + ngtprogress := nntp.NewsgroupTransferProgressMap[ng.Name] resultsMutex.Unlock() if dryRun { if startTime != nil || endTime != nil { - log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s (within specified date range)", totalArticles, newsgroup.Name) + log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s (within specified date range)", totalArticles, ng.Name) } else { - log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s", totalArticles, newsgroup.Name) + log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s", totalArticles, ng.Name) } if !debugCapture { return nil @@ -1239,16 +1242,16 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch if !dryRun && !debugCapture { if startTime != nil || endTime != nil { - log.Printf("Found %d articles in newsgroup %s (within specified date range) - processing in batches", totalArticles, newsgroup.Name) + log.Printf("Found %d articles in newsgroup %s (within specified date range) - processing in batches", totalArticles, ng.Name) } else { - log.Printf("Found %d articles in newsgroup %s - processing in batches", totalArticles, newsgroup.Name) + log.Printf("Found %d articles in newsgroup %s - processing in batches", totalArticles, ng.Name) } } //time.Sleep(3 * time.Second) // debug sleep var ioffset int64 remainingArticles := totalArticles ttMode := &nntp.TakeThisMode{ - Newsgroup: &newsgroup.Name, + Newsgroup: &ng.Name, CheckMode: true, } ttResponses := make(chan *nntp.TTSetup, totalArticles/int64(batchCheck)+2) @@ -1268,28 +1271,28 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch var num uint64 for setup := range ttResponses { if setup == nil || setup.ResponseChan == nil { - log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received in collector!?", newsgroup.Name) + log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received in collector!?", ng.Name) continue } num++ - log.Printf("Newsgroup: '%s' | Starting response channel processor num %d (goroutines: %d)", newsgroup.Name, num, runtime.NumGoroutine()) + log.Printf("Newsgroup: '%s' | Starting response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) responseWG.Add(1) go func(rc chan *nntp.TTResponse, num uint64) { defer responseWG.Done() - defer log.Printf("Newsgroup: '%s' | Quit response channel processor num %d (goroutines: %d)", newsgroup.Name, num, runtime.NumGoroutine()) + defer log.Printf("Newsgroup: '%s' | Quit response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) // Read exactly ONE response from this channel (channel is buffered with cap 1) resp := <-rc // job.Response(ForceCleanUp, err) arrives here if resp == nil { - log.Printf("Newsgroup: '%s' | Warning: nil TT response received!?", newsgroup.Name) + log.Printf("Newsgroup: '%s' | Warning: nil TT response received!?", ng.Name) return } if resp.Err != nil { - log.Printf("Newsgroup: '%s' | Error in TT response job #%d err='%v' job='%v' ForceCleanUp=%t", newsgroup.Name, resp.Job.JobID, resp.Err, resp.Job, resp.ForceCleanUp) + log.Printf("Newsgroup: '%s' | Error in TT response job #%d err='%v' job='%v' ForceCleanUp=%t", ng.Name, resp.Job.JobID, resp.Err, resp.Job, resp.ForceCleanUp) } if resp.Job == nil { - log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response job without error!? ForceCleanUp=%t", newsgroup.Name, resp.ForceCleanUp) + log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response job without error!? ForceCleanUp=%t", ng.Name, resp.ForceCleanUp) return } // get numbers @@ -1301,7 +1304,7 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } // free memory - CRITICAL: Lock and unlock in same scope, not with defer! resp.Job.Mux.Lock() - //log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", newsgroup.Name, resp.Job.JobID, len(resp.Job.Articles)) + //log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) // Clean up Articles and their internal fields for i := range resp.Job.Articles { if resp.Job.Articles[i] != nil { @@ -1329,30 +1332,33 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch resp.Job = nil }(setup.ResponseChan, num) } - log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", newsgroup.Name, num) + log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", ng.Name, num) // Wait for all response channel processors to finish responseWG.Wait() - log.Printf("Newsgroup: '%s' | Collector: all response processors closed", newsgroup.Name) + log.Printf("Newsgroup: '%s' | Collector: all response processors closed", ng.Name) amux.Lock() result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d | unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", - newsgroup.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) + ng.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) amux.Unlock() //log.Print(result) resultsMutex.Lock() results = append(results, result) // Mark newsgroup as finished - if progress, exists := NewsgroupProgressMap[newsgroup.Name]; exists { + if progress, exists := nntp.NewsgroupTransferProgressMap[ng.Name]; exists { progress.Mux.Lock() progress.Finished = true progress.LastUpdated = time.Now() + progress.TXBytes += progress.TXBytesTMP + progress.TXBytesTMP = 0 + progress.LastCronTX = progress.LastUpdated progress.Mux.Unlock() } if VERBOSE { - for _, msgId := range rejectedArticles[newsgroup.Name] { + for _, msgId := range rejectedArticles[ng.Name] { // prints all at the end again - log.Printf("END Newsgroup: '%s' | REJECTED '%s'", newsgroup.Name, msgId) + log.Printf("END Newsgroup: '%s' | REJECTED '%s'", ng.Name, msgId) } - delete(rejectedArticles, newsgroup.Name) // free memory + delete(rejectedArticles, ng.Name) // free memory } resultsMutex.Unlock() }() @@ -1360,28 +1366,28 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch // Get articles in database batches (much larger than network batches) for offset := ioffset; offset < totalArticles; offset += dbBatchSize { if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: '%s' offset: %d", newsgroup.Name, offset) + log.Printf("WantShutdown in newsgroup: '%s' offset: %d", ng.Name, offset) return nil } // Process any requeued jobs first (from previous failed batches) - if _, err := processRequeuedJobs(newsgroup.Name, ttMode, ttResponses, redisCli); err != nil { + if _, err := processRequeuedJobs(ng.Name, ttMode, ttResponses, redisCli); err != nil { return err } start := time.Now() // Load batch from database with date filtering - articles, err := getArticlesBatchWithDateFilter(db, newsgroup, offset, startTime, endTime) + articles, err := getArticlesBatchWithDateFilter(db, ng, offset, startTime, endTime) if err != nil { - log.Printf("Error loading article batch (offset %d) for newsgroup %s: %v", offset, newsgroup.Name, err) - return fmt.Errorf("failed to load article batch (offset %d) for newsgroup '%s': %v", offset, newsgroup.Name, err) + log.Printf("Error loading article batch (offset %d) for newsgroup %s: %v", offset, ng.Name, err) + return fmt.Errorf("failed to load article batch (offset %d) for newsgroup '%s': %v", offset, ng.Name, err) } if len(articles) == 0 { - //log.Printf("No more articles in newsgroup %s (offset %d)", newsgroup.Name, offset) + //log.Printf("No more articles in newsgroup %s (offset %d)", ng.Name, offset) break } if dryRun && debugCapture { debugMutex.Lock() - debugArticles[newsgroup.Name] = append(debugArticles[newsgroup.Name], articles...) + debugArticles[ng.Name] = append(debugArticles[ng.Name], articles...) debugMutex.Unlock() return nil } @@ -1390,13 +1396,13 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch for _, a := range articles { size += a.Bytes } - log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d) (Bytes=%d) took %v", newsgroup.Name, len(articles), offset, size, time.Since(start)) + log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d) (Bytes=%d) took %v", ng.Name, len(articles), offset, size, time.Since(start)) //} // Process articles in network batches for i := 0; i < len(articles); i += batchCheck { OffsetQueue.Add(1) if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: '%s' (offset %d)", newsgroup.Name, offset) + log.Printf("WantShutdown in newsgroup: '%s' (offset %d)", ng.Name, offset) return nil } // Determine end index for the batch @@ -1405,10 +1411,10 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch end = len(articles) } // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), offset, OffsetQueue) + responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), offset, OffsetQueue, ngtprogress) if err != nil { - log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", newsgroup.Name, i+1, end, err) - return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, newsgroup.Name, err) + log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", ng.Name, i+1, end, err) + return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, ng.Name, err) } // pass the response channel to the collector channel: ttResponses ttResponses <- &nntp.TTSetup{ @@ -1418,24 +1424,24 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch } remainingArticles -= int64(len(articles)) if VERBOSE { - log.Printf("Newsgroup: '%s' | Pushed to queue (offset %d/%d) remaining: %d (Check=%t)", newsgroup.Name, offset, totalArticles, remainingArticles, ttMode.UseCHECK()) - //log.Printf("Newsgroup: '%s' | Pushed (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", newsgroup.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) + log.Printf("Newsgroup: '%s' | Pushed to queue (offset %d/%d) remaining: %d (Check=%t)", ng.Name, offset, totalArticles, remainingArticles, ttMode.UseCHECK()) + //log.Printf("Newsgroup: '%s' | Pushed (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } } // end for offset range totalArticles - //log.Printf("Newsgroup: '%s' | Main article loop completed, checking for requeued jobs...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | Main article loop completed, checking for requeued jobs...", ng.Name) // Process any remaining requeued jobs after main loop completes // This handles failures that occurred in the last batch for { if common.WantShutdown() { - log.Printf("WantShutdown during final requeue processing for '%s'", newsgroup.Name) + log.Printf("WantShutdown during final requeue processing for '%s'", ng.Name) break } - processed, err := processRequeuedJobs(newsgroup.Name, ttMode, ttResponses, redisCli) + processed, err := processRequeuedJobs(ng.Name, ttMode, ttResponses, redisCli) if err != nil { - log.Printf("Newsgroup: '%s' | Error in final requeue processing: %v", newsgroup.Name, err) + log.Printf("Newsgroup: '%s' | Error in final requeue processing: %v", ng.Name, err) // Don't return error, just log it - we've already processed most articles break } @@ -1443,47 +1449,34 @@ func transferNewsgroup(db *database.Database, newsgroup *models.Newsgroup, batch // No more requeued jobs to process break } - //log.Printf("Newsgroup: '%s' | Processed %d requeued jobs in final pass", newsgroup.Name, processed) + //log.Printf("Newsgroup: '%s' | Processed %d requeued jobs in final pass", ng.Name, processed) // Loop again to check if any of those jobs failed and were requeued } - //log.Printf("Newsgroup: '%s' | Final requeue processing completed, closing ttResponses channel...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | Final requeue processing completed, closing ttResponses channel...", ng.Name) // Close the ttResponses channel to signal collector goroutine to finish close(ttResponses) - //log.Printf("Newsgroup: '%s' | ttResponses channel closed, waiting for collector to finish...", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | ttResponses channel closed, waiting for collector to finish...", ng.Name) // Wait for collector goroutine to finish processing all responses collectorWG.Wait() - //log.Printf("Newsgroup: '%s' | All jobs completed and responses collected", newsgroup.Name) + //log.Printf("Newsgroup: '%s' | All jobs completed and responses collected", ng.Name) return nil } // end func transferNewsgroup -// NewsgroupProgress tracks the progress of a newsgroup transfer -type NewsgroupProgress struct { - Mux sync.RWMutex - Started time.Time - LastUpdated time.Time - OffsetStart int64 - BatchStart int64 - BatchEnd int64 - TotalArticles int64 - Finished bool -} - var results []string var rejectedArticles = make(map[string][]string) var resultsMutex sync.RWMutex -var NewsgroupProgressMap = make(map[string]*NewsgroupProgress) var lowerLevel float64 = 90.0 var upperLevel float64 = 95.0 // processBatch processes a batch of articles using NNTP streaming protocol (RFC 4644) // Uses TAKETHIS primarily, falls back to CHECK when success rate < 95% -func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64, offsetQ *nntp.OffsetQueue) (chan *nntp.TTResponse, error) { +func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64, offsetQ *nntp.OffsetQueue, ngtprogress *nntp.NewsgroupTransferProgress) (chan *nntp.TTResponse, error) { if len(articles) == 0 { log.Printf("processBatch: no articles in this batch for newsgroup '%s'", *ttMode.Newsgroup) @@ -1492,7 +1485,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl // Update newsgroup progress with current offset resultsMutex.RLock() - if progress, exists := NewsgroupProgressMap[*ttMode.Newsgroup]; exists { + if progress, exists := nntp.NewsgroupTransferProgressMap[*ttMode.Newsgroup]; exists { progress.Mux.Lock() progress.OffsetStart = dbOffset progress.BatchStart = batchStart @@ -1516,6 +1509,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl BatchStart: batchStart, BatchEnd: batchEnd, OffsetQ: offsetQ, + NGTProgress: ngtprogress, } var redis_cache_hits int if redisCli != nil && len(articles) > 0 { @@ -1775,7 +1769,8 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } // Send TAKETHIS command with article content (non-blocking) log.Printf("Newsgroup: '%s' | Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) - cmdID, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup) + cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup) + job.NGTProgress.AddTXBytes(txBytes) if err != nil { if err == common.ErrNoNewsgroups { log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': no newsgroups header", newsgroup, article.MessageID) @@ -2070,10 +2065,11 @@ forever: continue } rqj := &nntp.CHTTJob{ - JobID: job.JobID, - Newsgroup: job.Newsgroup, - Articles: job.Articles, - OffsetQ: job.OffsetQ, + JobID: job.JobID, + Newsgroup: job.Newsgroup, + Articles: job.Articles, + OffsetQ: job.OffsetQ, + NGTProgress: job.NGTProgress, } job.Mux.Unlock() @@ -2094,6 +2090,7 @@ forever: job.MessageIDs = nil job.WantedIDs = nil job.OffsetQ = nil + job.NGTProgress = nil job.Mux.Unlock() } } @@ -2490,7 +2487,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() return } - //log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + log.Printf("CheckWorker (%d): Pre-Read CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) start := time.Now() /* disabled if err := conn.SetReadDeadline(time.Now().Add(1 * time.Minute)); err != nil { @@ -2501,6 +2498,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } */ conn.TextConn.StartResponse(rr.CmdID) + log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) code, line, err := conn.TextConn.ReadCodeLine(238) conn.TextConn.EndResponse(rr.CmdID) if code == 0 && err != nil { @@ -2509,7 +2507,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() return } - //log.Printf("CheckWorker (%d): Got CHECK response line: '%s' for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, line, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) /* disabled if err := conn.SetReadDeadline(time.Time{}); err != nil { log.Printf("Failed to set unset read deadline: %v", err) @@ -2552,6 +2549,8 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() return } + log.Printf("CheckWorker (%d): Got CHECK response: '%s' (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + rs.Mux.Lock() job, exists := rs.jobMap[rr.MsgID] rs.Mux.Unlock() @@ -3002,6 +3001,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Newsgroup Progress + Speed Started Duration @@ -3022,8 +3022,10 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Initializing... {{end}} + {{.SpeedKB}} KByte/s {{.Started}} {{.Duration}} + {{end}} @@ -3070,14 +3072,16 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Started string LastUpdated string Finished bool + SpeedKB int64 Duration string } - started := len(NewsgroupProgressMap) + started := len(nntp.NewsgroupTransferProgressMap) finished := 0 var progressList []ProgressInfo - for name, progress := range NewsgroupProgressMap { + for name, progress := range nntp.NewsgroupTransferProgressMap { + progress.CalcSpeed() progress.Mux.RLock() if progress.Finished { finished++ @@ -3095,6 +3099,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { TotalArticles: progress.TotalArticles, Started: progress.Started.Format("15:04:05"), LastUpdated: progress.LastUpdated.Format("15:04:05"), + SpeedKB: progress.LastSpeedKB, Finished: false, Duration: duration, }) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 622109d..2e652d9 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -169,6 +169,7 @@ type CHTTJob struct { BatchStart int64 BatchEnd int64 OffsetQ *OffsetQueue + NGTProgress *NewsgroupTransferProgress } func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { @@ -191,6 +192,52 @@ func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { close(job.ResponseChan) } +// NewsgroupTransferProgressMap is protected by resultsMutex in nntp-transfer/main.go +var NewsgroupTransferProgressMap = make(map[string]*NewsgroupTransferProgress) + +// NewsgroupProgress tracks the progress of a newsgroup transfer +type NewsgroupTransferProgress struct { + Mux sync.RWMutex + Newsgroup *string + Started time.Time + LastUpdated time.Time + OffsetStart int64 + BatchStart int64 + BatchEnd int64 + TotalArticles int64 + Finished bool + TXBytes int64 + TXBytesTMP int64 + LastCronTX time.Time + LastSpeedKB int64 +} + +func (ngp *NewsgroupTransferProgress) CalcSpeed() (speed int64) { + ngp.Mux.Lock() + if time.Since(ngp.LastCronTX) >= time.Second*5 { + since := time.Since(ngp.LastCronTX) + ngp.LastSpeedKB = int64(float64(ngp.TXBytesTMP) / since.Seconds() / 1024) + speed = ngp.LastSpeedKB + log.Printf("Newsgroup: '%s' | Transfer speed: %d KB/s (%d bytes in %v)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since) + ngp.TXBytesTMP = 0 + ngp.LastCronTX = time.Now() + } else { + speed = ngp.LastSpeedKB + } + ngp.Mux.Unlock() + return speed +} + +func (ngp *NewsgroupTransferProgress) AddTXBytes(n int) { + if n > 0 { + ngp.Mux.Lock() + ngp.TXBytes += int64(n) + ngp.TXBytesTMP += int64(n) + ngp.Mux.Unlock() + } + ngp.CalcSpeed() +} + const IncrFLAG_CHECKED = 1 const IncrFLAG_WANTED = 2 const IncrFLAG_UNWANTED = 3 @@ -1345,7 +1392,7 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan return fmt.Errorf("failed to send CHECK '%s': %w", *msgID, err) } log.Printf("Newsgroup: '%s' | CHECK sent '%s' (CmdID=%d) pass notify to readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) - readResponsesChan <- &ReadRequest{CmdID: id, Job: job, Reqs: len(messageIDs), MsgID: msgID, N: n + 1} + readResponsesChan <- &ReadRequest{CmdID: id, Job: job, MsgID: msgID, N: n + 1, Reqs: len(messageIDs)} log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) id++ } @@ -1529,18 +1576,18 @@ func (c *BackendConn) GetBufSize(size int) int { // SendTakeThisArticleStreaming sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode -func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (uint, error) { +func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (cmdID uint, txBytes int, err error) { c.mux.Lock() //defer c.mux.Unlock() if !c.connected { c.mux.Unlock() - return 0, fmt.Errorf("not connected") + return 0, 0, fmt.Errorf("not connected") } if c.ModeReader { c.mux.Unlock() - return 0, fmt.Errorf("cannot send article in reader mode") + return 0, 0, fmt.Errorf("cannot send article in reader mode") } c.lastUsed = time.Now() c.mux.Unlock() @@ -1548,7 +1595,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // Prepare article for transfer headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) if err != nil { - return 0, err + return 0, 0, err } writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers defer writer.Flush() @@ -1557,21 +1604,25 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp defer c.mux.Unlock() // Send TAKETHIS command - id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) + cmdID, err = c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { - return 0, fmt.Errorf("failed SendTakeThisArticleStreaming command: %w", err) + return 0, 0, fmt.Errorf("failed SendTakeThisArticleStreaming command: %w", err) } // Send headers for _, headerLine := range headers { - if _, err := writer.WriteString(headerLine + CRLF); err != nil { - return 0, fmt.Errorf("failed to write header SendTakeThisArticleStreaming: %w", err) + if tx, err := writer.WriteString(headerLine + CRLF); err != nil { + return 0, txBytes, fmt.Errorf("failed to write header SendTakeThisArticleStreaming: %w", err) + } else { + txBytes += tx } } // Send empty line between headers and body - if _, err := writer.WriteString(CRLF); err != nil { - return 0, fmt.Errorf("failed to write header/body separator SendTakeThisArticleStreaming: %w", err) + if tx, err := writer.WriteString(CRLF); err != nil { + return 0, txBytes, fmt.Errorf("failed to write header/body separator SendTakeThisArticleStreaming: %w", err) + } else { + txBytes += tx } // Send body with proper dot-stuffing @@ -1591,18 +1642,22 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp line = "." + line } - if _, err := writer.WriteString(line + CRLF); err != nil { - return 0, fmt.Errorf("failed to write body line SendTakeThisArticleStreaming: %w", err) + if tx, err := writer.WriteString(line + CRLF); err != nil { + return 0, txBytes, fmt.Errorf("failed to write body line SendTakeThisArticleStreaming: %w", err) + } else { + txBytes += tx } } // Send termination line (single dot) - if _, err := writer.WriteString(DOT + CRLF); err != nil { - return 0, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err) + if tx, err := writer.WriteString(DOT + CRLF); err != nil { + return 0, txBytes, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err) + } else { + txBytes += tx } // Return command ID without reading response (streaming mode) - return id, nil + return cmdID, txBytes, nil } // ReadTakeThisResponseStreaming reads a TAKETHIS response using the command ID @@ -1619,10 +1674,21 @@ func (c *BackendConn) ReadTakeThisResponseStreaming(newsgroup string, cr *CheckR log.Printf("Newsgroup: '%s' | TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed StartResponse CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) //c.mux.Lock() //defer c.mux.Unlock() - code, _, err := c.TextConn.ReadCodeLine(239) + code, line, err := c.TextConn.ReadCodeLine(239) if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } + parts := strings.Fields(line) + if len(parts) < 1 { + log.Printf("ERROR in ReadTakeThisResponseStreaming: Malformed response code=%d line: '%s' CmdID=%d message-id '%s')", code, line, cr.CmdId, cr.Article.MessageID) + //rr.ReturnReadRequest(rrRetChan) + return 0, fmt.Errorf("malformed TAKETHIS response: %s", line) + } + + if parts[0] != cr.Article.MessageID { + log.Printf("ERROR in ReadTakeThisResponseStreaming: Mismatched response code=%d line: '%s' (expected msgID '%s') CmdID=%d", code, line, cr.Article.MessageID, cr.CmdId) + return 0, fmt.Errorf("out of order TAKETHIS response: expected %s, got %s", cr.Article.MessageID, parts[0]) + } log.Printf("Newsgroup: '%s' | TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed ReadCodeLine CmdID=%d: code=%d message-id '%s'", newsgroup, cr.CmdId, code, cr.Article.MessageID) // Parse response From 30dea3a464e0caf708d3f281f096531aaa5d2728 Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 23:19:54 +0000 Subject: [PATCH 033/100] testing --- cmd/nntp-transfer/main.go | 15 +- internal/nntp/nntp-client-commands.go | 228 +++++--------------------- 2 files changed, 57 insertions(+), 186 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index a3a5e3c..94b6137 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1770,7 +1770,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Send TAKETHIS command with article content (non-blocking) log.Printf("Newsgroup: '%s' | Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup) - job.NGTProgress.AddTXBytes(txBytes) + job.NGTProgress.AddNGTP(0, 1, int64(txBytes)) if err != nil { if err == common.ErrNoNewsgroups { log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': no newsgroups header", newsgroup, article.MessageID) @@ -2554,6 +2554,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Lock() job, exists := rs.jobMap[rr.MsgID] rs.Mux.Unlock() + job.NGTProgress.AddNGTP(1, 0, 0) //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) if !exists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId '%s' did not exist in jobMap.", *job.Newsgroup, *rr.MsgID) @@ -3002,6 +3003,9 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Newsgroup Progress Speed + CH/s + TT/s + Active Started Duration @@ -3023,6 +3027,9 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { {{end}} {{.SpeedKB}} KByte/s + {{.LastArtPerfC}}/s + {{.LastArtPerfT}}/s + {{.TimeSince}} ago {{.Started}} {{.Duration}} @@ -3074,6 +3081,9 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Finished bool SpeedKB int64 Duration string + TimeSince string + LastArtPerfC int64 + LastArtPerfT int64 } started := len(nntp.NewsgroupTransferProgressMap) @@ -3099,7 +3109,10 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { TotalArticles: progress.TotalArticles, Started: progress.Started.Format("15:04:05"), LastUpdated: progress.LastUpdated.Format("15:04:05"), + TimeSince: time.Since(progress.LastUpdated).Round(time.Second).String(), SpeedKB: progress.LastSpeedKB, + LastArtPerfC: progress.LastArtPerfC, + LastArtPerfT: progress.LastArtPerfT, Finished: false, Duration: duration, }) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 2e652d9..3ea7dfc 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -62,7 +62,7 @@ type OffsetQueue struct { queued int } -var ReturnDelay = time.Millisecond * 256 +var ReturnDelay = time.Millisecond * 16 func (o *OffsetQueue) Wait(n int) { start := time.Now() @@ -79,7 +79,6 @@ func (o *OffsetQueue) Wait(n int) { o.isleep = 0 } o.mux.Unlock() - return } if time.Since(lastPrint) > time.Second { @@ -205,34 +204,61 @@ type NewsgroupTransferProgress struct { BatchStart int64 BatchEnd int64 TotalArticles int64 + ArticlesTT int64 + ArticlesCH int64 Finished bool TXBytes int64 TXBytesTMP int64 LastCronTX time.Time LastSpeedKB int64 + LastArtPerfC int64 // check articles per second + LastArtPerfT int64 // takethis articles per second } -func (ngp *NewsgroupTransferProgress) CalcSpeed() (speed int64) { +func (ngp *NewsgroupTransferProgress) CalcSpeed() { ngp.Mux.Lock() + defer ngp.Mux.Unlock() if time.Since(ngp.LastCronTX) >= time.Second*5 { - since := time.Since(ngp.LastCronTX) - ngp.LastSpeedKB = int64(float64(ngp.TXBytesTMP) / since.Seconds() / 1024) - speed = ngp.LastSpeedKB - log.Printf("Newsgroup: '%s' | Transfer speed: %d KB/s (%d bytes in %v)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since) + since := int64(time.Since(ngp.LastCronTX).Seconds()) + if ngp.TXBytesTMP > 0 { + ngp.LastSpeedKB = ngp.TXBytesTMP / since / 1024 + } + if ngp.ArticlesCH > 0 { + ngp.LastArtPerfC = ngp.ArticlesCH / since + } + if ngp.ArticlesTT > 0 { + ngp.LastArtPerfT = ngp.ArticlesTT / since + } + log.Printf("Newsgroup: '%s' | Transfer Perf: %d KB/s (%d bytes in %v) did: CH=(%d|%d/s) TT=(%d|%d/s)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since, ngp.ArticlesCH, ngp.LastArtPerfC, ngp.ArticlesTT, ngp.LastArtPerfT) + + ngp.ArticlesCH = 0 + ngp.ArticlesTT = 0 ngp.TXBytesTMP = 0 ngp.LastCronTX = time.Now() - } else { - speed = ngp.LastSpeedKB + } - ngp.Mux.Unlock() - return speed } -func (ngp *NewsgroupTransferProgress) AddTXBytes(n int) { - if n > 0 { +func (ngp *NewsgroupTransferProgress) AddNGTP(articlesCH int64, articlesTT int64, txbytes int64) { + if articlesCH > 0 { + ngp.Mux.Lock() + ngp.ArticlesCH += articlesCH + ngp.Mux.Unlock() + } + if articlesTT > 0 { + ngp.Mux.Lock() + ngp.ArticlesTT += articlesTT + ngp.Mux.Unlock() + } + if txbytes > 0 { + ngp.Mux.Lock() + ngp.TXBytes += txbytes + ngp.TXBytesTMP += txbytes + ngp.Mux.Unlock() + } + if articlesCH > 0 || articlesTT > 0 || txbytes > 0 { ngp.Mux.Lock() - ngp.TXBytes += int64(n) - ngp.TXBytesTMP += int64(n) + ngp.LastUpdated = time.Now() ngp.Mux.Unlock() } ngp.CalcSpeed() @@ -1399,179 +1425,11 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan return nil } -/* -// CheckMultiple sends a CHECK command for multiple message IDs and returns responses -func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) ([]*string, error) { - c.mux.Lock() - defer c.mux.Unlock() - if !c.connected { - return nil, fmt.Errorf("not connected") - } - - if c.ModeReader { - return nil, fmt.Errorf("cannot check article in reader mode") - } - - if len(messageIDs) == 0 { - return nil, fmt.Errorf("no message IDs provided") - } - - c.lastUsed = time.Now() - - // Send individual CHECK commands for each message ID (pipelining) - commandIds := make([]uint, len(messageIDs)) - for i, msgID := range messageIDs { - id, err := c.TextConn.Cmd("CHECK %s", *msgID) - if err != nil { - return nil, fmt.Errorf("failed to send CHECK command for %s: %w", *msgID, err) - } - commandIds[i] = id - } - - // Read responses for each CHECK command - wantedIds := make([]*string, 0, len(messageIDs)) - for i, msgID := range messageIDs { - id := commandIds[i] - // Read response for this CHECK command - c.TextConn.StartResponse(id) - code, line, err := c.TextConn.ReadCodeLine(238) - c.TextConn.EndResponse(id) - if code == 0 && err != nil { - log.Printf("Failed to read CHECK response for %s: %v", *msgID, err) - return nil, fmt.Errorf("failed to read CHECK response for %s: %w", *msgID, err) - } - - // Parse response line - // Format: code [message] - // 238 - article wanted - // 431 - article not wanted - // 438 - article not wanted (already have it) - // ReadCodeLine returns: code=238, message=" article wanted" - parts := strings.Fields(line) - if len(parts) < 1 { - log.Printf("Malformed CHECK response: %s", line) - return nil, fmt.Errorf("malformed CHECK response: %s", line) - } - if parts[0] != *msgID { - log.Printf("Mismatched CHECK response: expected %s, got %s", *msgID, parts[0]) - return nil, fmt.Errorf("out of order CHECK response: expected %s, got %s", *msgID, parts[0]) - } - switch code { - case 238: - //log.Printf("Wanted Article '%s': response=%d", *msgID, code) - wantedIds = append(wantedIds, msgID) - ttMode.Wanted++ - case 438: - //log.Printf("Unwanted Article '%s': response=%d", *msgID, code) - ttMode.Unwanted++ - case 431: - continue - default: - log.Printf("Unknown CHECK response: line='%s' code=%d expected msgID %s", line, code, *msgID) - return nil, fmt.Errorf("unknown check response line='%s' code=%d", line, code) - - } - } - // Return all responses - return wantedIds, nil -} -*/ - -/* unused -// TakeThisArticle sends an article via TAKETHIS command -func (c *BackendConn) xxTakeThisArticle(article *models.Article, nntphostname *string, newsgroup string) (int, error) { - c.mux.Lock() - defer c.mux.Unlock() - - if !c.connected { - return 0, fmt.Errorf("not connected") - } - if c.ModeReader { - return 0, fmt.Errorf("cannot send article in reader mode") - } - - // Prepare article for transfer - headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) - if err != nil { - return 0, fmt.Errorf("failed to reconstruct headers: %v", err) - } - - c.lastUsed = time.Now() - writer := bufio.NewWriter(c.conn) - defer writer.Flush() - // Send TAKETHIS command - id, err := c.TextConn.Cmd("TAKETHIS %s", article.MessageID) - if err != nil { - return 0, fmt.Errorf("failed to send TAKETHIS command: %w", err) - } - - // Send headers - for _, headerLine := range headers { - if _, err := writer.WriteString(headerLine + CRLF); err != nil { - return 0, fmt.Errorf("failed to write header: %w", err) - } - } - - // Send empty line between headers and body - if _, err := writer.WriteString(CRLF); err != nil { - return 0, fmt.Errorf("failed to write header/body separator: %w", err) - } - - // Send body with proper dot-stuffing - // Split body preserving line endings - bodyLines := strings.Split(article.BodyText, "\n") - for i, line := range bodyLines { - // Skip empty last element from trailing \n - if i == len(bodyLines)-1 && line == "" { - break - } - - // Remove trailing \r if present (will add CRLF) - line = strings.TrimSuffix(line, "\r") - - // Dot-stuff lines that start with a dot (RFC 977) - if strings.HasPrefix(line, ".") { - line = "." + line - } - - if _, err := writer.WriteString(line + CRLF); err != nil { - return 0, fmt.Errorf("failed to write body line: %w", err) - } - } - - // Send termination line (single dot) - if _, err := writer.WriteString(DOT + CRLF); err != nil { - return 0, fmt.Errorf("failed to send article terminator: %w", err) - } - - // Flush the writer to ensure all data is sent - if err := writer.Flush(); err != nil { - return 0, fmt.Errorf("failed to flush article data: %w", err) - } - - // Read TAKETHIS response - c.TextConn.StartResponse(id) - defer c.TextConn.EndResponse(id) - - code, _, err := c.TextConn.ReadCodeLine(239) - if code == 0 && err != nil { - return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) - } - - // Parse response - // Format: code [message] - // 239 - article transferred successfully - // 439 - article transfer failed - - return code, nil -} -*/ - func (c *BackendConn) GetBufSize(size int) int { - if size+2048 <= 16*1024 { + if size+2048 <= 16384 { return size + 2048 } - return 16 * 1024 // hardcoded default 32KB max buffer size + return 16384 // hardcoded default max buffer size } // SendTakeThisArticleStreaming sends TAKETHIS command and article content without waiting for response From afa9db6785f0e9ed74359e08290e3378f8aa013e Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 9 Oct 2025 23:40:47 +0000 Subject: [PATCH 034/100] takethis conn mutex --- cmd/nntp-transfer/main.go | 7 +++++-- internal/nntp/nntp-client-commands.go | 20 ++++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 94b6137..081965f 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1762,7 +1762,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Now send TAKETHIS for non-cached articles artChan := make(chan *nntp.CheckResponse, len(articles)) // ← Also close artChan - + conn.Lock() for _, article := range articles { if article == nil { continue // Skip cached articles @@ -1770,12 +1770,13 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Send TAKETHIS command with article content (non-blocking) log.Printf("Newsgroup: '%s' | Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup) - job.NGTProgress.AddNGTP(0, 1, int64(txBytes)) + job.NGTProgress.AddNGTP(0, 0, int64(txBytes)) if err != nil { if err == common.ErrNoNewsgroups { log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': no newsgroups header", newsgroup, article.MessageID) continue } + conn.Unlock() conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to send TAKETHIS for %s: %v", newsgroup, article.MessageID, err) return 0, 0, redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) @@ -1786,6 +1787,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art CmdId: cmdID, } } + conn.Unlock() close(artChan) //log.Printf("Sent %d TAKETHIS commands, reading responses...", len(commandIDs)) var done []*string @@ -1804,6 +1806,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } log.Printf("Newsgroup: '%s' | GOT TAKETHIS response '%s': %d CmdID=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, cr.CmdId, countDone+1, len(articles)) countDone++ + job.NGTProgress.AddNGTP(0, 1, 0) // Update success rate tracking switch takeThisResponseCode { case 239: diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 3ea7dfc..1c9459b 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -1405,6 +1405,7 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan //writer := bufio.NewWriter(c.conn) //defer writer.Flush() //log.Printf("Newsgroup: '%s' | SendCheckMultiple commands for %d message IDs", *job.Newsgroup, len(messageIDs)) + for n, msgID := range messageIDs { if msgID == nil || *msgID == "" { log.Printf("Newsgroup: '%s' | Skipping empty message ID in CHECK command", *job.Newsgroup) @@ -1432,23 +1433,30 @@ func (c *BackendConn) GetBufSize(size int) int { return 16384 // hardcoded default max buffer size } +func (c *BackendConn) Lock() { + c.mux.Lock() +} +func (c *BackendConn) Unlock() { + c.mux.Unlock() +} + // SendTakeThisArticleStreaming sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (cmdID uint, txBytes int, err error) { - c.mux.Lock() + //c.mux.Lock() //defer c.mux.Unlock() if !c.connected { - c.mux.Unlock() + //c.mux.Unlock() return 0, 0, fmt.Errorf("not connected") } if c.ModeReader { - c.mux.Unlock() + //c.mux.Unlock() return 0, 0, fmt.Errorf("cannot send article in reader mode") } c.lastUsed = time.Now() - c.mux.Unlock() + //c.mux.Unlock() // Prepare article for transfer headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) @@ -1458,8 +1466,8 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers defer writer.Flush() - c.mux.Lock() - defer c.mux.Unlock() + //c.mux.Lock() + //defer c.mux.Unlock() // Send TAKETHIS command cmdID, err = c.TextConn.Cmd("TAKETHIS %s", article.MessageID) From 4c3885ef928546936e31ad58f230ef4e1cb2d1c5 Mon Sep 17 00:00:00 2001 From: go-while Date: Fri, 10 Oct 2025 00:06:23 +0000 Subject: [PATCH 035/100] cherry-pick: updateNewsgroupsExpiryFromFile --- cmd/web/main.go | 16 +++++++++ cmd/web/main_functions.go | 71 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/cmd/web/main.go b/cmd/web/main.go index 5bee454..f35959c 100644 --- a/cmd/web/main.go +++ b/cmd/web/main.go @@ -67,6 +67,9 @@ var ( compareActiveFile string compareActiveMinArticles int64 + // Expiry update flag + updateNewsgroupsExpiryFile string + // Bridge flags (disabled by default) /* code path disabled (not tested) enableFediverse bool @@ -123,6 +126,7 @@ func main() { flag.BoolVar(&findOrphanDBs, "find-orphan-dbs", false, "Find orphaned database folders in data/db that don't correspond to any newsgroup in main database") flag.StringVar(&compareActiveFile, "compare-active", "", "Compare active file with database and show missing groups (format: groupname highwater lowwater status)") flag.Int64Var(&compareActiveMinArticles, "compare-active-min-articles", 0, "use with -compare-active: only show groups with more than N articles (calculated as high-low)") + flag.StringVar(&updateNewsgroupsExpiryFile, "update-newsgroups-expiry-from-file", "", "Update newsgroup expiry_days from file (format: newsgroup:days, one per line)") flag.BoolVar(&verbose, "verbose", false, "print more (debug) output") flag.StringVar(&dataDir, "data", "./data", "path to database and config directory") @@ -306,6 +310,18 @@ func main() { } } + // updateNewsgroupsExpiryFile + if updateNewsgroupsExpiryFile != "" { + log.Printf("[WEB]: Updating newsgroup expiry from file: %s", updateNewsgroupsExpiryFile) + if err := updateNewsgroupsExpiryFromFile(db, updateNewsgroupsExpiryFile); err != nil { + log.Printf("[WEB]: Error: Failed to update newsgroup expiry: %v", err) + os.Exit(1) + } else { + log.Printf("[WEB]: Newsgroup expiry update completed successfully") + os.Exit(0) + } + } + // Get or set history UseShortHashLen configuration finalUseShortHashLen, isLocked, err := db.GetHistoryUseShortHashLen(useShortHashLen) if err != nil { diff --git a/cmd/web/main_functions.go b/cmd/web/main_functions.go index e4d8d65..f3516ea 100644 --- a/cmd/web/main_functions.go +++ b/cmd/web/main_functions.go @@ -934,3 +934,74 @@ func formatBytes(bytes int64) string { } return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp]) } + +// updateNewsgroupsExpiryFromFile reads a file with newsgroup:days format and updates expiry_days +func updateNewsgroupsExpiryFromFile(db *database.Database, filename string) error { + file, err := os.Open(filename) + if err != nil { + return fmt.Errorf("failed to open file: %w", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + lineNum := 0 + updated := 0 + errors := 0 + + for scanner.Scan() { + lineNum++ + line := strings.TrimSpace(scanner.Text()) + + // Skip empty lines and comments + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + // Parse line: newsgroup:days + parts := strings.SplitN(line, ":", 2) + if len(parts) != 2 { + log.Printf("[EXPIRY] Line %d: invalid format (expected 'newsgroup:days'): %s", lineNum, line) + errors++ + continue + } + + newsgroup := strings.TrimSpace(parts[0]) + daysStr := strings.TrimSpace(parts[1]) + + // Strip optional 'd' suffix (e.g., "30d" -> "30") + daysStr = strings.TrimSuffix(daysStr, "d") + daysStr = strings.TrimSuffix(daysStr, "D") + + // Parse days as integer + days, err := strconv.Atoi(daysStr) + if err != nil { + log.Printf("[EXPIRY] Line %d: invalid days value '%s' for newsgroup '%s': %v", lineNum, daysStr, newsgroup, err) + errors++ + continue + } + + // Update database + if err := db.UpdateNewsgroupExpiry(newsgroup, days); err != nil { + log.Printf("[EXPIRY] Line %d: failed to update newsgroup '%s': %v", lineNum, newsgroup, err) + errors++ + continue + } + + updated++ + if verbose { + log.Printf("[EXPIRY] Updated '%s' to %d days", newsgroup, days) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading file: %w", err) + } + + log.Printf("[EXPIRY] Processed %d lines: %d updated, %d errors", lineNum, updated, errors) + + if errors > 0 { + return fmt.Errorf("completed with %d errors", errors) + } + + return nil +} From 5c3c145c63951d57bc0b91bfd038041f0bb776d8 Mon Sep 17 00:00:00 2001 From: go-while Date: Fri, 10 Oct 2025 00:38:16 +0000 Subject: [PATCH 036/100] debugs --- cmd/nntp-transfer/main.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 081965f..a201b87 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2483,14 +2483,14 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() return } - //log.Printf("CheckWorker (%d): Read CHECK response (do conn check) for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + //log.Printf("CheckWorker (%d): Read CHECK response (do conn check) for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) if !conn.IsConnected() { log.Printf("CheckWorker (%d): Read CHECK connection lost, exiting", workerID) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } - log.Printf("CheckWorker (%d): Pre-Read CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + log.Printf("CheckWorker (%d): Pre-Read CHECK response for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) start := time.Now() /* disabled if err := conn.SetReadDeadline(time.Now().Add(1 * time.Minute)); err != nil { @@ -2501,7 +2501,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } */ conn.TextConn.StartResponse(rr.CmdID) - log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdId:%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + //log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) code, line, err := conn.TextConn.ReadCodeLine(238) conn.TextConn.EndResponse(rr.CmdID) if code == 0 && err != nil { @@ -2523,12 +2523,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue responseCount++ rr.Job.Increment(nntp.IncrFLAG_CHECKED) if rr.N == 1 { - log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) tookTime = 0 } else if responseCount >= 100 { avg := time.Duration(float64(tookTime) / float64(responseCount)) if avg > 1 { - log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %v, last: %v (cmdId:%d MID=%d/%d)", workerID, responseCount, avg, took, rr.CmdID, rr.N, rr.Reqs) + log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %v, last: %v (cmdID=%d MID=%d/%d)", workerID, responseCount, avg, took, rr.CmdID, rr.N, rr.Reqs) } responseCount = 0 tookTime = 0 @@ -2541,31 +2541,31 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // ReadCodeLine returns: code=238, message=" article wanted" parts := strings.Fields(line) if len(parts) < 1 { - log.Printf("ERROR in CheckWorker: Malformed CHECK response code=%d line: '%s' (cmdId:%d MID=%d/%d)", code, line, rr.CmdID, rr.N, rr.Reqs) + log.Printf("ERROR in CheckWorker: Malformed CHECK response code=%d line: '%s' (cmdID=%d MID=%d/%d)", code, line, rr.CmdID, rr.N, rr.Reqs) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } if parts[0] != *rr.MsgID { - log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected '%s', got '%s' code=%d (cmdId:%d MID=%d/%d)", *rr.MsgID, parts[0], code, rr.CmdID, rr.N, rr.Reqs) + log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected '%s', got '%s' code=%d (cmdID=%d MID=%d/%d)", *rr.MsgID, parts[0], code, rr.CmdID, rr.N, rr.Reqs) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } - log.Printf("CheckWorker (%d): Got CHECK response: '%s' (cmdId:%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + log.Printf("CheckWorker (%d): Got CHECK response: '%s' (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) rs.Mux.Lock() job, exists := rs.jobMap[rr.MsgID] rs.Mux.Unlock() job.NGTProgress.AddNGTP(1, 0, 0) - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) if !exists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId '%s' did not exist in jobMap.", *job.Newsgroup, *rr.MsgID) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() continue loop } - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG2 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG2 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) rs.Mux.Lock() rs.jobMap[rr.MsgID] = nil // Nil the pointer before deleting delete(rs.jobMap, rr.MsgID) @@ -2588,7 +2588,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, line, code, *rr.MsgID) } // check if all jobs are done - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG3 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG3 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) rs.Mux.Lock() queuedCount, qexists := rs.jobsQueued[job] readCount, rexists := rs.jobsReadOK[job] @@ -2599,7 +2599,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() continue loop } - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdId:%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() if queuedCount == readCount { From fc826a07ec4d54390a95990958f042ba041b6a64 Mon Sep 17 00:00:00 2001 From: go-while Date: Fri, 10 Oct 2025 00:38:35 +0000 Subject: [PATCH 037/100] debugs --- internal/nntp/nntp-client-commands.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 1c9459b..d72d781 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -229,7 +229,7 @@ func (ngp *NewsgroupTransferProgress) CalcSpeed() { if ngp.ArticlesTT > 0 { ngp.LastArtPerfT = ngp.ArticlesTT / since } - log.Printf("Newsgroup: '%s' | Transfer Perf: %d KB/s (%d bytes in %v) did: CH=(%d|%d/s) TT=(%d|%d/s)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since, ngp.ArticlesCH, ngp.LastArtPerfC, ngp.ArticlesTT, ngp.LastArtPerfT) + //log.Printf("Newsgroup: '%s' | Transfer Perf: %d KB/s (%d bytes in %v) did: CH=(%d|%d/s) TT=(%d|%d/s)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since, ngp.ArticlesCH, ngp.LastArtPerfC, ngp.ArticlesTT, ngp.LastArtPerfT) ngp.ArticlesCH = 0 ngp.ArticlesTT = 0 @@ -1536,11 +1536,11 @@ func (c *BackendConn) ReadTakeThisResponseStreaming(newsgroup string, cr *CheckR log.Printf("Newsgroup: '%s' | TAKETHIS wait for response CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) // Read TAKETHIS response c.TextConn.StartResponse(cr.CmdId) - defer c.TextConn.EndResponse(cr.CmdId) - log.Printf("Newsgroup: '%s' | TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed StartResponse CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) + //log.Printf("Newsgroup: '%s' | TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed StartResponse CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) //c.mux.Lock() //defer c.mux.Unlock() code, line, err := c.TextConn.ReadCodeLine(239) + c.TextConn.EndResponse(cr.CmdId) if code == 0 && err != nil { return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) } From edb299229123b5477f0169b71f9b57d06aeab0ca Mon Sep 17 00:00:00 2001 From: go-while Date: Fri, 10 Oct 2025 19:51:25 +0000 Subject: [PATCH 038/100] AI did it! => testing I didn't do it, nobody saw me do it, there's no way you can prove anything! --- cmd/nntp-transfer/main.go | 468 ++++++++++---------------- internal/common/common.go | 9 +- internal/nntp/nntp-backend-pool.go | 7 +- internal/nntp/nntp-client-commands.go | 312 ++++++++++++++--- internal/nntp/nntp-client.go | 74 ++-- 5 files changed, 497 insertions(+), 373 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index a201b87..59ac853 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1589,129 +1589,13 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl CheckQueues[workerID] <- batchedJob // checkQueue <- batchedJob log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, batchedJob.JobID) return batchedJob.ResponseChan, nil - - /* disabled - switch doCheck { - case true: // ttMode.CheckMode - // CHECK mode: verify articles are wanted before sending - //log.Printf("Newsgroup: '%s' | CHECK: %d articles (success rate: %.1f%%)", newsgroup, len(articles), successRate) - // Batch check Redis cache using pipeline (1 round trip for all keys) - var redis_cache_hits int - if redisCli != nil && len(articles) > 0 { - pipe := redisCli.Pipeline() - cmds := make([]*redis.IntCmd, len(articles)) - - // Queue all EXISTS commands - for i, article := range articles { - if article == nil { - continue - } - cmds[i] = pipe.Exists(redisCtx, article.MessageID) - } - - // Execute all in one network round trip - _, err := pipe.Exec(redisCtx) - if err != nil && VERBOSE { - log.Printf("Newsgroup: '%s' | Redis pipeline error: %v", *ttMode.Newsgroup, err) - } - - // Process results - for i, cmd := range cmds { - if cmd == nil || articles[i] == nil { - continue - } - article := articles[i] - exists, cmdErr := cmd.Result() - if cmdErr == nil && exists > 0 { - // Cached in Redis - skip this article - if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [CHECK])", *ttMode.Newsgroup, article.MessageID) - } - batchedJob.Increment(nntp.IncrFLAG_REDIS_CACHED) - redis_cache_hits++ - articles[i] = nil - continue - } - - // Not cached - add to valid list - batchedJob.Articles = append(batchedJob.Articles, article) - batchedJob.ArticleMap[&article.MessageID] = article - batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) - } - } else { - // No Redis - add all non-nil message IDs - for _, article := range articles { - if article == nil { - continue - } - batchedJob.Articles = append(batchedJob.Articles, article) - batchedJob.ArticleMap[&article.MessageID] = article - batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) - } - } - - if len(batchedJob.MessageIDs) == 0 { - log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", *ttMode.Newsgroup, redis_cache_hits) - return nil, nil - } - if VERBOSE { - log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(batchedJob.MessageIDs), len(articles)) - } - - // Assign job to worker (consistent assignment + load balancing) - if len(CheckQueues) == 0 { - return nil, fmt.Errorf("no workers available") - } - - workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) - - // Track queue length for load balancing - WorkerQueueLengthMux.Lock() - WorkerQueueLength[workerID]++ - WorkerQueueLengthMux.Unlock() - - log.Printf("Newsgroup: '%s' | CheckWorker (%d) Queue job #%d with %d message IDs", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs)) - CheckQueues[workerID] <- batchedJob // checkQueue <- batchedJob - log.Printf("Newsgroup: '%s' | CheckWorker (%d) Job #%d queued", *ttMode.Newsgroup, workerID, batchedJob.JobID) - return batchedJob.ResponseChan, nil - - // end case ttMode.CheckMode - // case !ttMode.CheckMode - case false: - // TAKETHIS mode: send articles directly without CHECK - //log.Printf("Newsgroup: '%s' | TAKETHIS: %d articles", newsgroup, len(articles)) - - // Validate articles before sending in TAKETHIS mode - for _, article := range articles { - if article == nil { - continue - } - batchedJob.Articles = append(batchedJob.Articles, article) - batchedJob.ArticleMap[&article.MessageID] = article - batchedJob.WantedIDs = append(batchedJob.WantedIDs, &article.MessageID) - } - - if len(batchedJob.Articles) == 0 { - log.Printf("Newsgroup: '%s' | WARN: No valid articles for TAKETHIS mode, skipping batch", *ttMode.Newsgroup) - return nil, nil - } - log.Printf("Newsgroup: '%s' | job #%d Sending to TakeThisQueue with %d articles", *ttMode.Newsgroup, batchedJob.JobID, len(batchedJob.WantedIDs)) - nntp.TakeThisQueue <- batchedJob - log.Printf("Newsgroup: '%s' | Job #%d sent to TakeThisQueue successfully", *ttMode.Newsgroup, batchedJob.JobID) - return batchedJob.ResponseChan, nil - - } // end case !ttMode.CheckMode - // end switch ttMode.CheckMode - - return nil, nil - */ } // end func processBatch // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode -// Sends all TAKETHIS commands first, then reads all responses (true streaming) -func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, job *nntp.CHTTJob, newsgroup string, redisCli *redis.Client) (transferred uint64, rejected uint64, redis_cached uint64, err error) { +// Sends all TAKETHIS commands and queues ReadRequests for concurrent processing +func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Article, job *nntp.CHTTJob, newsgroup string, redisCli *redis.Client, demuxer *nntp.ResponseDemuxer, readTAKETHISResponsesChan chan *nntp.ReadRequest) (redis_cached uint64, err error) { if len(articles) == 0 { - return 0, 0, 0, nil + return 0, nil } // Phase 1: Send all TAKETHIS commands without waiting for responses @@ -1760,17 +1644,23 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } // Now send TAKETHIS for non-cached articles - artChan := make(chan *nntp.CheckResponse, len(articles)) - // ← Also close artChan + // Tracker registration happens inside SendTakeThisArticleStreaming (like SendCheckMultiple) + var sentCount int conn.Lock() + var ttxBytes uint64 + start := time.Now() for _, article := range articles { if article == nil { continue // Skip cached articles } + astart := time.Now() // Send TAKETHIS command with article content (non-blocking) - log.Printf("Newsgroup: '%s' | Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) - cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup) + // This also queues the ReadRequest to readTAKETHISResponsesChan BEFORE returning + log.Printf("Newsgroup: '%s' | ++Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) + cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup, demuxer, readTAKETHISResponsesChan, job) + astart2 := time.Now() job.NGTProgress.AddNGTP(0, 0, int64(txBytes)) + ttxBytes += uint64(txBytes) if err != nil { if err == common.ErrNoNewsgroups { log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': no newsgroups header", newsgroup, article.MessageID) @@ -1779,96 +1669,14 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art conn.Unlock() conn.ForceCloseConn() log.Printf("ERROR Newsgroup: '%s' | Failed to send TAKETHIS for %s: %v", newsgroup, article.MessageID, err) - return 0, 0, redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) - } - log.Printf("Newsgroup: '%s' | Sent TAKETHIS '%s' CmdID=%d", newsgroup, article.MessageID, cmdID) - artChan <- &nntp.CheckResponse{ - Article: article, - CmdId: cmdID, + return redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) } + sentCount++ + log.Printf("Newsgroup: '%s' | DONE TAKETHIS '%s' CmdID=%d (%d/%d sent) in %v awaiting responses astart2='%v'", newsgroup, article.MessageID, cmdID, sentCount, len(articles), time.Since(astart), time.Since(astart2)) } conn.Unlock() - close(artChan) - //log.Printf("Sent %d TAKETHIS commands, reading responses...", len(commandIDs)) - var done []*string - var countDone int - // Phase 2: Read all responses in order - log.Printf("Newsgroup: '%s' | Reading TAKETHIS responses for %d sent articles...", newsgroup, len(artChan)) - for cr := range artChan { - log.Printf("Newsgroup: '%s' | Pre-Read TAKETHIS response for article '%s' CmdID=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, cr.CmdId, countDone+1, len(articles)) - job.TTMode.IncrementTmp() - takeThisResponseCode, err := conn.ReadTakeThisResponseStreaming(newsgroup, cr) - if err != nil { - job.Increment(nntp.IncrFLAG_CONN_ERRORS) - conn.ForceCloseConn() - log.Printf("ERROR Newsgroup: '%s' | Failed to read TAKETHIS response for %s: %v", newsgroup, cr.Article.MessageID, err) - return transferred, rejected, redis_cached, fmt.Errorf("failed to read TAKETHIS response for %s: %v", cr.Article.MessageID, err) - } - log.Printf("Newsgroup: '%s' | GOT TAKETHIS response '%s': %d CmdID=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, cr.CmdId, countDone+1, len(articles)) - countDone++ - job.NGTProgress.AddNGTP(0, 1, 0) - // Update success rate tracking - switch takeThisResponseCode { - case 239: - job.TTMode.IncrementSuccess() - job.Increment(nntp.IncrFLAG_TRANSFERRED) - transferred++ - case 439: - job.Increment(nntp.IncrFLAG_REJECTED) - rejected++ - if VERBOSE { - log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - //resultsMutex.Lock() - //rejectedArticles[newsgroup] = append(rejectedArticles[newsgroup], article.MessageID) - //resultsMutex.Unlock() - } - case 400, 480, 500, 501, 502, 503, 504: - log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - job.Increment(nntp.IncrFLAG_TX_ERRORS) - conn.ForceCloseConn() - return transferred, rejected, redis_cached, fmt.Errorf("failed to transfer article '%s': response=%d", cr.Article.MessageID, takeThisResponseCode) - - default: - log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d (i=%d/%d)", newsgroup, cr.Article.MessageID, takeThisResponseCode, countDone, len(articles)) - job.Increment(nntp.IncrFLAG_TX_ERRORS) - continue - } - if redisCli != nil { - done = append(done, &cr.Article.MessageID) - } - /* - if countDone > 100 && rejected > 10 { - failRate := float64(rejected) / float64(countDone) * 100 - if failRate > 10 { - ttMode.CheckMode = true - breakChan <- struct{}{} - return transferred, redis_cached, fmt.Errorf("Newsgroup: '%s' | ABORT streamed takethis batch. failRate: %.1f%%. transferred=%d rejected=%d", newsgroup, failRate, transferred, rejected) - } - } - */ - } // end for cmdChan - - if redisCli != nil && len(done) > 0 { - // Cache transferred or rejected message IDs in Redis using pipeline (1 round trip) - pipe := redisCli.Pipeline() - - // Queue all SET commands - for _, msgID := range done { - pipe.Set(redisCtx, *msgID, "1", REDIS_TTL) - } - - // Execute all SET commands in one network round trip - _, err := pipe.Exec(redisCtx) - if err != nil { - log.Printf("Newsgroup: '%s' | Failed to cache %d message IDs in Redis: %v", newsgroup, len(done), err) - } else if VERBOSE { - log.Printf("Newsgroup: '%s' | Cached %d message IDs in Redis", newsgroup, len(done)) - } - } - if VERBOSE { - log.Printf("Newsgroup: '%s' | Batch transferred: %d/%d articles. rejected=%d redis_cached=%d", newsgroup, transferred, len(articles), rejected, redis_cached) - } - return transferred, rejected, redis_cached, nil + log.Printf("Newsgroup: '%s' | DONE TAKETHIS BATCH sent: %d commands. ttxBytes: %d in %v", newsgroup, sentCount, ttxBytes, time.Since(start)) + return redis_cached, nil } // end func sendArticlesBatchViaTakeThis var jobRequeueMutex sync.RWMutex @@ -2166,11 +1974,11 @@ type ReturnSignal struct { jobs []*nntp.CHTTJob } -func (rs *ReturnSignal) UnlockTT() { +func (rs *ReturnSignal) BlockTT() { rs.Mux.Lock() rs.RunTT = false rs.Mux.Unlock() - log.Printf("UnlockTT: released RunTT lock") + log.Printf("BlockTT: released RunTT lock") } func (rs *ReturnSignal) GetLockTT() { @@ -2264,15 +2072,16 @@ func replyChan(request chan struct{}, reply chan struct{}) { } func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { - readResponsesChan := make(chan *nntp.ReadRequest, BatchCheck*2) - //rrRetChan := make(chan struct{}, BatchCheck) - //takeThisChan := make(chan *nntp.CHTTJob, 2) // buffer 2 + readCHECKResponsesChan := make(chan *nntp.ReadRequest, 128000) + readTAKETHISResponsesChan := make(chan *nntp.ReadRequest, 128000) errChan := make(chan struct{}, 4) tickChan := make(chan struct{}, 1) - //flipflopChan := make(chan struct{}, 1) requestReplyJobDone := make(chan struct{}, 1) replyJobDone := make(chan struct{}, 1) + // Create ResponseDemuxer to eliminate race conditions in ReadCodeLine + demuxer := nntp.NewResponseDemuxer(conn, errChan, BatchCheck) + defer func(conn *nntp.BackendConn, rs *ReturnSignal) { conn.ForceCloseConn() rs.ExitChan <- rs @@ -2280,6 +2089,10 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue }(conn, rs) //lastRun := time.Now() + // Start the central response reader (CRITICAL: only ONE goroutine reads from connection) + demuxer.Start() + log.Printf("CheckWorker (%d): Started ResponseDemuxer", workerID) + // launch go routine which sends CHECK commands go func() { // tick every n seconds to check if any CHECKs to do @@ -2320,8 +2133,9 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue waiting: for { if len(TakeThisQueues[workerID]) > 1 { + rs.BlockCHECK() log.Printf("CheckWorker (%d): waiting shared takeThisChan full (%d)", workerID, len(TakeThisQueues[workerID])) - time.Sleep(time.Millisecond * 16) + time.Sleep(time.Second / 4) continue waiting } break @@ -2351,7 +2165,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire LOCK CHECK for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) rs.LockCHECK() log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) - err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readResponsesChan, currentJob) + err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readCHECKResponsesChan, currentJob, demuxer) if err != nil { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) time.Sleep(time.Second) @@ -2363,44 +2177,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue return } log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) - // NOTE: CHECK lock remains locked! It will be unlocked by the response reader - // when all responses are processed (see rs.UnlockCHECKforTT() in response reader) - /* disabled - deadline := time.After(time.Minute) - timedOut := false - replies := 0 - - for i, msgID := range currentJob.MessageIDs[batchStart:batchEnd] { - if msgID != nil { - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waiting for rrRetChan (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) - select { - case <-deadline: - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d timeout waiting for rrRetChan for batch (offset %d: %d-%d) replies=%d readResponsesChan=%d n=%d/%d", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, replies, len(readResponsesChan), i+1, len(currentJob.MessageIDs[batchStart:batchEnd])) - timedOut = true - case <-rrRetChan: - // got reply - replies++ - } - if timedOut { - break - } - } - } - // If timeout occurred, clean up the job - if timedOut { - rs.Mux.Lock() - // requeue to front - rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) - // Send failure response - currentJob.Response(false, fmt.Errorf("CHECK response timeout")) - - // Release lock - common.ChanRelease(flipflopChan) - return - } - */ - // Release lock after batch is complete, allowing TAKETHIS to run - //common.ChanRelease(flipflopChan) } } else { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) @@ -2471,7 +2247,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("CheckWorker (%d): Read CHECK responses exiting", workerID) return - case rr := <-readResponsesChan: + case rr := <-readCHECKResponsesChan: //log.Printf("CheckWorker (%d): Read CHECK got readRequest for rr: '%v'", workerID, rr) if rr == nil || rr.MsgID == nil { log.Printf("CheckWorker (%d): Read CHECK got nil readRequest, skipping", workerID) @@ -2492,20 +2268,31 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } log.Printf("CheckWorker (%d): Pre-Read CHECK response for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) start := time.Now() - /* disabled - if err := conn.SetReadDeadline(time.Now().Add(1 * time.Minute)); err != nil { - log.Printf("Failed to set read deadline: %v", err) - nntp.ReturnReadRequest(rrRetChan) + + // NEW: Get pre-read response from demuxer (eliminates race condition) + var respData *nntp.ResponseData + select { + case respData = <-demuxer.GetCheckResponseChan(): + // Got response from demuxer + case <-errChan: + log.Printf("CheckWorker (%d): Read CHECK got errChan while waiting for response", workerID) rr.ClearReadRequest() return } - */ - conn.TextConn.StartResponse(rr.CmdID) - //log.Printf("CheckWorker (%d): Reading CHECK response for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) - code, line, err := conn.TextConn.ReadCodeLine(238) - conn.TextConn.EndResponse(rr.CmdID) - if code == 0 && err != nil { - log.Printf("Failed to read CHECK response: %v", err) + + // Verify we got the expected command ID + if respData.CmdID != rr.CmdID { + log.Printf("ERROR CheckWorker (%d): Command ID mismatch! Expected %d, got %d", workerID, rr.CmdID, respData.CmdID) + rr.ClearReadRequest() + return + } + + //code := respData.Code + //line := respData.Line + //err := respData.Err + + if respData.Code == 0 && respData.Err != nil { + log.Printf("Failed to read CHECK response: %v", respData.Err) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return @@ -2539,15 +2326,15 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // 431 - article not wanted // 438 - article not wanted (already have it) // ReadCodeLine returns: code=238, message=" article wanted" - parts := strings.Fields(line) + parts := strings.Fields(respData.Line) if len(parts) < 1 { - log.Printf("ERROR in CheckWorker: Malformed CHECK response code=%d line: '%s' (cmdID=%d MID=%d/%d)", code, line, rr.CmdID, rr.N, rr.Reqs) + log.Printf("ERROR in CheckWorker: Malformed CHECK response code=%d line: '%s' (cmdID=%d MID=%d/%d)", respData.Code, respData.Line, rr.CmdID, rr.N, rr.Reqs) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } if parts[0] != *rr.MsgID { - log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected '%s', got '%s' code=%d (cmdID=%d MID=%d/%d)", *rr.MsgID, parts[0], code, rr.CmdID, rr.N, rr.Reqs) + log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected '%s', got '%s' code=%d (cmdID=%d MID=%d/%d)", *rr.MsgID, parts[0], respData.Code, rr.CmdID, rr.N, rr.Reqs) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return @@ -2571,7 +2358,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue delete(rs.jobMap, rr.MsgID) rs.jobsReadOK[job]++ rs.Mux.Unlock() - switch code { + switch respData.Code { case 238: //log.Printf("Newsgroup: '%s' | Got Response: Wanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.AppendWantedMessageID(rr.MsgID) @@ -2585,7 +2372,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue job.Increment(nntp.IncrFLAG_RETRY) default: - log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, line, code, *rr.MsgID) + log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, respData.Line, respData.Code, *rr.MsgID) } // check if all jobs are done //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG3 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) @@ -2611,22 +2398,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Pass job to TAKETHIS worker via channel log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(TakeThisQueues[workerID])) rs.UnlockCHECKforTT() - /* disabled - if len(readResponsesChan) == 0 { - rs.UnlockCHECKforTT() // Unlock CHECK, lock for TAKETHIS - } else { - rs.BlockCHECK() - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5a WARNING: readResponsesChan not empty (%d), delaying TAKETHIS unlock", *job.Newsgroup, workerID, len(readResponsesChan)) - go func() { - for { - time.Sleep(time.Millisecond) - if len(readResponsesChan) == 0 { - rs.UnlockCHECKforTT() - return - } - } - }() - }*/ TakeThisQueues[workerID] <- job // local takethis chan sharing the same connection log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(TakeThisQueues[workerID])) @@ -2637,7 +2408,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if len(TakeThisQueues[workerID]) > 0 { rs.UnlockCHECKforTT() } else { - rs.UnlockTT() + rs.BlockTT() } } } else { @@ -2648,6 +2419,115 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } // end forever }() + // launch a goroutine to process TAKETHIS responses concurrently + // This follows the EXACT pattern as CHECK response reader (lines 2366-2552) + go func() { + defer func() { + errChan <- struct{}{} + }() + + ttloop: + for { + select { + case <-errChan: + log.Printf("TTResponseWorker (%d): got errChan signal, exiting", workerID) + errChan <- struct{}{} + return + + case rr := <-readTAKETHISResponsesChan: + if rr == nil || rr.MsgID == nil { + log.Printf("TTResponseWorker (%d): got nil readRequest, skipping", workerID) + continue ttloop + } + if common.WantShutdown() { + log.Printf("TTResponseWorker (%d): WantShutdown, exiting", workerID) + rr.ClearReadRequest() + return + } + if !conn.IsConnected() { + log.Printf("TTResponseWorker (%d): connection lost, exiting", workerID) + rr.ClearReadRequest() + return + } + + log.Printf("TTResponseWorker (%d): Pre-Read TAKETHIS response for msgID: %s (cmdID=%d)", workerID, *rr.MsgID, rr.CmdID) + + // Get pre-read response from demuxer (same pattern as CHECK) + var respData *nntp.ResponseData + select { + case respData = <-demuxer.GetTakeThisResponseChan(): + // Got response from demuxer + case <-errChan: + log.Printf("TTResponseWorker (%d): got errChan while waiting for response", workerID) + rr.ClearReadRequest() + return + } + + // Verify we got the expected command ID + if respData.CmdID != rr.CmdID { + log.Printf("ERROR TTResponseWorker (%d): Command ID mismatch! Expected %d, got %d", workerID, rr.CmdID, respData.CmdID) + rr.ClearReadRequest() + return + } + + if respData.Err != nil { + log.Printf("ERROR TTResponseWorker (%d): Failed to read TAKETHIS response for %s: %v", + workerID, *rr.MsgID, respData.Err) + rr.Job.Increment(nntp.IncrFLAG_CONN_ERRORS) + rr.ClearReadRequest() + conn.ForceCloseConn() + return + } + + rr.Job.TTMode.IncrementTmp() + rr.Job.NGTProgress.AddNGTP(0, 1, 0) + + // Handle response codes + switch respData.Code { + case 239: + rr.Job.TTMode.IncrementSuccess() + rr.Job.Increment(nntp.IncrFLAG_TRANSFERRED) + // Cache in Redis if enabled (inline, no separate tracker struct needed) + if rs.redisCli != nil { + err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() + if err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache message ID in Redis: %v", *rr.Job.Newsgroup, err) + } + } + + case 439: + rr.Job.Increment(nntp.IncrFLAG_REJECTED) + // Cache rejection in Redis if enabled + if rs.redisCli != nil { + err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() + if err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache rejected message ID in Redis: %v", *rr.Job.Newsgroup, err) + } + } + if VERBOSE { + log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d", + *rr.Job.Newsgroup, *rr.MsgID, respData.Code) + } + + case 400, 480, 500, 501, 502, 503, 504: + log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d", + *rr.Job.Newsgroup, *rr.MsgID, respData.Code) + rr.Job.Increment(nntp.IncrFLAG_TX_ERRORS) + rr.ClearReadRequest() + conn.ForceCloseConn() + return + + default: + log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d", + *rr.Job.Newsgroup, *rr.MsgID, respData.Code) + rr.Job.Increment(nntp.IncrFLAG_TX_ERRORS) + } + + rr.ClearReadRequest() + } // end select + } // end for + }() + // launch a goroutine to process TAKETHIS jobs from local channel sharing the same connection go func() { defer func() { @@ -2699,9 +2579,9 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue //common.ChanLock(flipflopChan) log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) // Send TAKETHIS commands using existing function - transferred, rejected, redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli) + redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli, demuxer, readTAKETHISResponsesChan) //common.ChanRelease(flipflopChan) - rs.UnlockTT() + rs.BlockTT() if err != nil { log.Printf("Newsgroup: '%s' | TTworker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) job.Response(false, err) @@ -2711,14 +2591,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() return } - - log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d completed: transferred=%d, rejected=%d, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, transferred, rejected, redis_cached) + log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d sent, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, redis_cached) // Send response back log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, workerID, job.JobID, len(job.ResponseChan)) job.Response(true, nil) log.Printf("Newsgroup: '%s' | TTworker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, workerID, job.JobID) - } }() diff --git a/internal/common/common.go b/internal/common/common.go index 45f6552..305408d 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -29,11 +29,14 @@ func WantShutdown() bool { func IsClosedChannel(ch chan struct{}) bool { select { - case <-ch: - return true + case _, ok := <-ch: + if !ok { + // channel is closed + return true + } default: - return false } + return false } func ChanLock(lockChan chan struct{}) { diff --git a/internal/nntp/nntp-backend-pool.go b/internal/nntp/nntp-backend-pool.go index 2b38dea..b7213f7 100644 --- a/internal/nntp/nntp-backend-pool.go +++ b/internal/nntp/nntp-backend-pool.go @@ -248,6 +248,7 @@ newConn: pool.activeConns-- pool.failedConns++ pool.mux.Unlock() + log.Printf("[NNTP-POOL] Failed to create new connection: provider='%s': %v", pool.Backend.Provider.Name, err) return nil, err } err = pconn.SwitchMode(wantMode) @@ -312,7 +313,7 @@ func (pool *Pool) Put(conn *BackendConn) error { // Check if connection should be closed if conn != nil { conn.mux.Lock() - if conn.forceClose || !conn.connected { + if conn.forceClose || !conn.IsConnected() { forceClose = true } conn.mux.Unlock() @@ -433,7 +434,7 @@ func (pool *Pool) createConnection() (*BackendConn, error) { log.Printf("[NNTP-POOL] Failed to create connection to %s:%d: %v", pool.Backend.Host, pool.Backend.Port, err) return nil, fmt.Errorf("failed to create connection: %w", err) } - //log.Printf("[NNTP-POOL] Successfully created connection to %s:%d", pool.Backend.Host, pool.Backend.Port) + log.Printf("[NNTP-POOL] Successfully created connection to %s:%d", pool.Backend.Host, pool.Backend.Port) return client, nil } @@ -447,7 +448,7 @@ func (pool *Pool) isConnectionValid(client *BackendConn) bool { client.mux.Lock() defer client.mux.Unlock() - if client.forceClose || !client.connected { + if client.forceClose || !client.IsConnected() { return false } diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index d72d781..02dd7c8 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -14,6 +14,7 @@ import ( "github.com/go-while/go-pugleaf/internal/common" "github.com/go-while/go-pugleaf/internal/models" "github.com/go-while/go-pugleaf/internal/utils" + "github.com/redis/go-redis/v9" ) // Constants for maximum lines to read in various commands @@ -37,15 +38,214 @@ var NNTPTransferThreads int = 1 var JobIDCounter uint64 // Atomic counter for unique job IDs +// ResponseType indicates which handler should process a response +type ResponseType int + +const ( + TYPE_CHECK ResponseType = iota + TYPE_TAKETHIS +) + +// ResponseData holds a pre-read response from the connection +type ResponseData struct { + CmdID uint + Code int + Line string + Err error +} + +type CmdIDinfo struct { + CmdID uint + RespType ResponseType +} + +// ResponseDemuxer reads all responses from a connection in ONE goroutine +// and dispatches them to the appropriate handler channel (CHECK or TAKETHIS) +// This eliminates race conditions in concurrent ReadCodeLine calls +type ResponseDemuxer struct { + conn *BackendConn + cmdIDQ []*CmdIDinfo + signalChan chan struct{} + cmdIDQMux sync.RWMutex + LastID uint + checkResponseChan chan *ResponseData + ttResponseChan chan *ResponseData + errChan chan struct{} + started bool + startedMux sync.Mutex +} + +// NewResponseDemuxer creates a new response demultiplexer +func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}, BatchCheck int) *ResponseDemuxer { + return &ResponseDemuxer{ + conn: conn, + signalChan: make(chan struct{}, 1), + checkResponseChan: make(chan *ResponseData, 128000), // Buffer for CHECK responses + ttResponseChan: make(chan *ResponseData, 128000), // Buffer for TAKETHIS responses + errChan: errChan, + started: false, + } +} + +// RegisterCommand registers a command ID with its type (CHECK or TAKETHIS) +func (d *ResponseDemuxer) RegisterCommand(cmdID uint, cmdType ResponseType) { + d.cmdIDQMux.Lock() + d.cmdIDQ = append(d.cmdIDQ, &CmdIDinfo{CmdID: cmdID, RespType: cmdType}) + d.cmdIDQMux.Unlock() + select { + case d.signalChan <- struct{}{}: + default: + } +} + +// PopCommand removes a command ID from the queue +func (d *ResponseDemuxer) PopCommand() *CmdIDinfo { + d.cmdIDQMux.Lock() + defer d.cmdIDQMux.Unlock() + + if len(d.cmdIDQ) == 0 { + return nil + } + + cmdIDInfo := d.cmdIDQ[0] + d.cmdIDQ = d.cmdIDQ[1:] + return cmdIDInfo +} + +// GetCheckResponseChan returns the channel for CHECK responses +func (d *ResponseDemuxer) GetCheckResponseChan() chan *ResponseData { + return d.checkResponseChan +} + +// GetTakeThisResponseChan returns the channel for TAKETHIS responses +func (d *ResponseDemuxer) GetTakeThisResponseChan() chan *ResponseData { + return d.ttResponseChan +} + +// Start launches the central response reader goroutine (call once) +func (d *ResponseDemuxer) Start() { + d.startedMux.Lock() + defer d.startedMux.Unlock() + + if d.started { + return // Already started + } + d.started = true + + go d.readAndDispatch() +} + +// readAndDispatch is the SINGLE goroutine that reads ALL responses from the shared connection +func (d *ResponseDemuxer) readAndDispatch() { + defer func() { + if r := recover(); r != nil { + log.Printf("ResponseDemuxer: panic in readAndDispatch: %v", r) + } + select { + case d.errChan <- struct{}{}: + default: + } + }() + outoforderBacklog := make(map[uint]*CmdIDinfo, 1024) + for { + select { + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan signal, exiting") + return + default: + } + + if !d.conn.IsConnected() { + log.Printf("ResponseDemuxer: connection lost, exiting") + return + } + + var cmdInfo *CmdIDinfo + if len(outoforderBacklog) > 0 { + if cmdInfoBacklog, exists := outoforderBacklog[d.LastID+1]; exists { + log.Printf("ResponseDemuxer: processing out-of-order backlog cmdID=%d d.LastID=%d", cmdInfoBacklog.CmdID, d.LastID) + cmdInfo = cmdInfoBacklog + outoforderBacklog[d.LastID+1] = nil + delete(outoforderBacklog, d.LastID+1) + } + } else { + cmdInfo = d.PopCommand() + } + if cmdInfo == nil { + if len(outoforderBacklog) > 0 { + log.Printf("ResponseDemuxer: got no cmdInfo but have outoforderBacklog: %d", len(outoforderBacklog)) + if _, exists := outoforderBacklog[d.LastID+1]; exists { + continue + } + } + log.Printf("ResponseDemuxer: nothing to process, waiting on signalChan") + <-d.signalChan + continue + } + if d.LastID+1 != cmdInfo.CmdID { + log.Printf("ResponseDemuxer: WARNING - out of order cmdID received, expected %d got %d", d.LastID+1, cmdInfo.CmdID) + outoforderBacklog[cmdInfo.CmdID] = cmdInfo + continue + } else { + d.LastID = cmdInfo.CmdID + } + + log.Printf("ResponseDemuxer: waiting for response cmdID=%d respType=%d", cmdInfo.CmdID, cmdInfo.RespType) + start := time.Now() + d.conn.TextConn.StartResponse(cmdInfo.CmdID) + code, line, err := d.conn.TextConn.ReadCodeLine(0) // Read any code + d.conn.TextConn.EndResponse(cmdInfo.CmdID) + log.Printf("ResponseDemuxer: received response cmdID=%d: code=%d line='%s' err='%v' respType=%d (waited %v)", cmdInfo.CmdID, code, line, err, cmdInfo.RespType, time.Since(start)) + if err != nil && code == 0 { + d.errChan <- struct{}{} + log.Printf("ResponseDemuxer: error reading response for cmdID=%d: %v", cmdInfo.CmdID, err) + return + } + respData := &ResponseData{ + CmdID: cmdInfo.CmdID, + Code: code, + Line: line, + Err: err, + } + // Dispatch based on registered type + + switch cmdInfo.RespType { + case TYPE_CHECK: + select { + case d.checkResponseChan <- respData: + // Dispatched successfully + log.Printf("ResponseDemuxer: dispatched CHECK response cmdID=%d d.checkResponseChan=%d", cmdInfo.CmdID, len(d.checkResponseChan)) + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while dispatching CHECK response, exiting") + d.errChan <- struct{}{} + return + } + + case TYPE_TAKETHIS: + select { + case d.ttResponseChan <- respData: + // Dispatched successfully + log.Printf("ResponseDemuxer: dispatched TAKETHIS response cmdID=%d d.ttResponseChan=%d", cmdInfo.CmdID, len(d.ttResponseChan)) + case <-d.errChan: + d.errChan <- struct{}{} + log.Printf("ResponseDemuxer: got errChan while dispatching TAKETHIS response, exiting") + return + } + + default: + log.Printf("ResponseDemuxer: WARNING - unknown command type for cmdID=%d, signaling ERROR", cmdInfo.CmdID) + select { + case d.errChan <- struct{}{}: + default: + } + } + } +} + // used in nntp-transfer/main.go type TakeThisMode struct { - mux sync.Mutex - Newsgroup *string - //Wanted uint64 - //Unwanted uint64 - //Rejected uint64 - //TX_Errors uint64 - //ConnErrors uint64 + mux sync.Mutex + Newsgroup *string TmpSuccessCount uint64 TmpTTotalsCount uint64 CheckMode bool // Start with TAKETHIS mode (false) @@ -143,6 +343,14 @@ func (rr *ReadRequest) ReturnReadRequest(channel chan struct{}) { rr.ClearReadRequest() } +// TakeThisTracker tracks metadata for pending TAKETHIS responses +type TakeThisTracker struct { + CmdID uint + Job *CHTTJob + Article *models.Article + RedisCli *redis.Client // Will be *redis.Client in practice +} + // batched CHECK/TAKETHIS Job type CHTTJob struct { JobID uint64 // Unique job ID for tracing @@ -217,8 +425,7 @@ type NewsgroupTransferProgress struct { func (ngp *NewsgroupTransferProgress) CalcSpeed() { ngp.Mux.Lock() - defer ngp.Mux.Unlock() - if time.Since(ngp.LastCronTX) >= time.Second*5 { + if time.Since(ngp.LastCronTX) >= time.Second*3 { since := int64(time.Since(ngp.LastCronTX).Seconds()) if ngp.TXBytesTMP > 0 { ngp.LastSpeedKB = ngp.TXBytesTMP / since / 1024 @@ -235,8 +442,8 @@ func (ngp *NewsgroupTransferProgress) CalcSpeed() { ngp.ArticlesTT = 0 ngp.TXBytesTMP = 0 ngp.LastCronTX = time.Now() - } + ngp.Mux.Unlock() } func (ngp *NewsgroupTransferProgress) AddNGTP(articlesCH int64, articlesTT int64, txbytes int64) { @@ -375,21 +582,12 @@ func (ttMode *TakeThisMode) FlipMode(lowerLevel float64, upperLevel float64) boo return retval } -func (c *BackendConn) ForceCloseConn() { - c.mux.Lock() - if !c.forceClose { - c.forceClose = true - } - c.mux.Unlock() - c.Pool.Put(c) -} - // StatArticle checks if an article exists on the server func (c *BackendConn) StatArticle(messageID string) (bool, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return false, fmt.Errorf("not connected") } @@ -423,7 +621,7 @@ func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Arti c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } @@ -487,7 +685,7 @@ func (c *BackendConn) GetHead(messageID string) (*models.Article, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } @@ -543,7 +741,7 @@ func (c *BackendConn) GetBody(messageID string) ([]byte, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } @@ -591,7 +789,7 @@ func (c *BackendConn) ListGroups() ([]GroupInfo, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } @@ -638,7 +836,7 @@ func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } @@ -719,7 +917,7 @@ func (c *BackendConn) SelectGroup(groupName string) (*GroupInfo, int, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, 0, fmt.Errorf("not connected") } @@ -784,7 +982,7 @@ func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit boo return nil, fmt.Errorf("error XOver: group name is required") } //log.Printf("XOver group '%s' start=%d end=%d", groupName, start, end) - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } groupInfo, code, err := c.SelectGroup(groupName) @@ -847,7 +1045,7 @@ func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit boo // Automatically limits to max 1000 articles to prevent SQLite overload func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*HeaderLine, error) { c.mux.Lock() - if !c.connected { + if !c.IsConnected() { c.mux.Unlock() return nil, fmt.Errorf("not connected") } @@ -979,7 +1177,7 @@ func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xh // XHdrStreamedBatch performs XHDR command and streams results line by line through a channel func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { c.mux.Lock() - if !c.connected { + if !c.IsConnected() { c.mux.Unlock() return fmt.Errorf("not connected") } @@ -1076,7 +1274,7 @@ func (c *BackendConn) ListGroup(groupName string, start, end int64) ([]int64, er c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil, fmt.Errorf("not connected") } @@ -1383,10 +1581,11 @@ func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { } // SendCheckMultiple sends CHECK commands for multiple message IDs without returning responses! -func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan chan *ReadRequest, job *CHTTJob) error { +// Registers each command ID with the demuxer for proper response routing +func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponsesChan chan *ReadRequest, job *CHTTJob, demuxer *ResponseDemuxer) error { c.mux.Lock() - if !c.connected { + if !c.IsConnected() { c.mux.Unlock() return fmt.Errorf("not connected") } @@ -1411,26 +1610,29 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan log.Printf("Newsgroup: '%s' | Skipping empty message ID in CHECK command", *job.Newsgroup) continue } - log.Printf("Newsgroup: '%s' | CHECK '%s' acquire c.mux.Lock() (%d/%d)", *job.Newsgroup, *msgID, n+1, len(messageIDs)) + //log.Printf("Newsgroup: '%s' | CHECK '%s' acquire c.mux.Lock() (%d/%d)", *job.Newsgroup, *msgID, n+1, len(messageIDs)) c.mux.Lock() - id, err := c.TextConn.Cmd("CHECK %s", *msgID) + cmdID, err := c.TextConn.Cmd("CHECK %s", *msgID) c.mux.Unlock() if err != nil { return fmt.Errorf("failed to send CHECK '%s': %w", *msgID, err) } - log.Printf("Newsgroup: '%s' | CHECK sent '%s' (CmdID=%d) pass notify to readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) - readResponsesChan <- &ReadRequest{CmdID: id, Job: job, MsgID: msgID, N: n + 1, Reqs: len(messageIDs)} - log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readResponsesChan=%d", *job.Newsgroup, *msgID, id, len(readResponsesChan)) - id++ + + // Register command ID with demuxer as TYPE_CHECK + demuxer.RegisterCommand(cmdID, TYPE_CHECK) + + //log.Printf("Newsgroup: '%s' | CHECK sent '%s' (CmdID=%d) pass notify to readResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) + readCHECKResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: msgID, N: n + 1, Reqs: len(messageIDs)} + log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readCHECKResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) } return nil } func (c *BackendConn) GetBufSize(size int) int { - if size+2048 <= 16384 { + if size+2048 <= 1024*1024 { return size + 2048 } - return 16384 // hardcoded default max buffer size + return 1024 * 1024 // hardcoded default max buffer size } func (c *BackendConn) Lock() { @@ -1440,13 +1642,16 @@ func (c *BackendConn) Unlock() { c.mux.Unlock() } -// SendTakeThisArticleStreaming sends TAKETHIS command and article content without waiting for response +// SendTakeThisArticleStreaming IS UNSAFE! MUST BE LOCKED AND UNLOCKED OUTSIDE FOR THE WHOLE BATCH!!! +// sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode -func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (cmdID uint, txBytes int, err error) { +// Registers the command ID with the demuxer for proper response routing +func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string, demuxer *ResponseDemuxer, readTAKETHISResponsesChan chan *ReadRequest, job *CHTTJob) (cmdID uint, txBytes int, err error) { + start := time.Now() //c.mux.Lock() //defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { //c.mux.Unlock() return 0, 0, fmt.Errorf("not connected") } @@ -1463,12 +1668,13 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp if err != nil { return 0, 0, err } - writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers - defer writer.Flush() + //writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers + writer := bufio.NewWriter(c.conn) // Slightly larger buffer than article size for headers //c.mux.Lock() //defer c.mux.Unlock() + startSend := time.Now() // Send TAKETHIS command cmdID, err = c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { @@ -1521,7 +1727,21 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp } else { txBytes += tx } + log.Printf("Newsgroup: '%s' | TAKETHIS sent CmdID=%d '%s' txBytes: %d in %v (sending took: %v) readTAKETHISResponsesChanLen=%d/%d", newsgroup, cmdID, article.MessageID, txBytes, time.Since(start), time.Since(startSend), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) + + startFlush := time.Now() + if err := writer.Flush(); err != nil { + return 0, txBytes, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err) + } + + chanStart := time.Now() + // Register command ID with demuxer as TYPE_TAKETHIS (CRITICAL: must match CHECK pattern) + demuxer.RegisterCommand(cmdID, TYPE_TAKETHIS) + log.Printf("Newsgroup: '%s' | TAKETHIS flushed CmdID=%d '%s' (flushing took: %v) total time: %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(startFlush), time.Since(start), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) + // Queue ReadRequest IMMEDIATELY after command (like SendCheckMultiple does at line 1608) + readTAKETHISResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: &article.MessageID, N: 1, Reqs: 1} + log.Printf("Newsgroup: '%s' | TAKETHIS notified response reader CmdID=%d '%s' waited %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(chanStart), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) // Return command ID without reading response (streaming mode) return cmdID, txBytes, nil } @@ -1569,7 +1789,7 @@ func (c *BackendConn) PostArticle(article *models.Article) (int, error) { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return 0, fmt.Errorf("not connected") } // Prepare article for posting diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index 990ce2e..525590a 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -11,6 +11,7 @@ import ( "sync" "time" + "github.com/go-while/go-pugleaf/internal/common" "github.com/go-while/go-pugleaf/internal/config" ) @@ -60,13 +61,13 @@ type BackendConn struct { Pool *Pool // link to parent pool // Connection state - connected bool - authenticated bool - ModeReader bool - ModeStream bool - forceClose bool - created time.Time - lastUsed time.Time + isConnected chan struct{} + hasClosed chan struct{} + ModeReader bool + ModeStream bool + forceClose bool + created time.Time + lastUsed time.Time // INN allows switching to mode reader when mode stream is active // but INN does not allow switching to mode stream when mode reader is active // when using check/takethis and we got a mode reader connection from pool @@ -137,8 +138,10 @@ type HeaderLine struct { // NewConn creates a new empty NNTP connection with the provided backend configuration. func NewConn(backend *BackendConfig) *BackendConn { return &BackendConn{ - Backend: backend, - created: time.Now(), + Backend: backend, + created: time.Now(), + isConnected: make(chan struct{}, 1), + hasClosed: make(chan struct{}, 1), } } @@ -149,10 +152,12 @@ func (c *BackendConn) Connect() error { c.Backend.ConnectTimeout = config.DefaultConnectTimeout } c.Backend.Mux.Unlock() + c.mux.Lock() defer c.mux.Unlock() - if c.connected { - return nil + if c.IsConnected() { + log.Printf("[NNTP-CONN] ERROR: Tried Connect() but this conn is already connected to %s:%d hasClosed=%d", c.Backend.Host, c.Backend.Port, len(c.hasClosed)) + return fmt.Errorf("connection already established to %s:%d", c.Backend.Host, c.Backend.Port) } // Check if this is a .onion address and automatically enable Tor if not already configured @@ -215,22 +220,21 @@ func (c *BackendConn) Connect() error { //log.Printf("[NNTP-CONN] Successfully connected to %s:%d with welcome code %d", c.Backend.Host, c.Backend.Port, code) - c.connected = true c.lastUsed = time.Now() // Authenticate if credentials provided if c.Backend.Username != "" { //log.Printf("[NNTP-AUTH] Attempting authentication for user '%s' on %s:%d", c.Backend.Username, c.Backend.Host, c.Backend.Port) if err := c.authenticate(); err != nil { - log.Printf("[NNTP-AUTH] Authentication FAILED for user '%s' on %s:%d err: %v", c.Backend.Username, c.Backend.Host, c.Backend.Port, err) - time.Sleep(time.Second * 5) + log.Printf("[NNTP-AUTH] Authentication FAILED for user '%s' on %s:%d err: %v (sleep 15s)", c.Backend.Username, c.Backend.Host, c.Backend.Port, err) + time.Sleep(time.Second * 15) return err } //log.Printf("[NNTP-AUTH] Authentication SUCCESS for user '%s' on %s:%d", c.Backend.Username, c.Backend.Host, c.Backend.Port) } else { //log.Printf("[NNTP-AUTH] No credentials provided, skipping authentication for %s:%d", c.Backend.Host, c.Backend.Port) } - + c.isConnected <- struct{}{} return nil } @@ -272,7 +276,6 @@ func (c *BackendConn) authenticate() error { return fmt.Errorf("authentication failed: %d %s", code, message) } - c.authenticated = true return nil } @@ -281,7 +284,7 @@ func (c *BackendConn) CloseFromPoolOnly() error { c.mux.Lock() defer c.mux.Unlock() - if !c.connected { + if !c.IsConnected() { return nil } @@ -293,29 +296,48 @@ func (c *BackendConn) CloseFromPoolOnly() error { if c.conn != nil { if err := c.conn.Close(); err != nil { - //log.Printf("xx Error closing connection: %v", err) + //log.Printf("Error closing connection: %v", err) } } - c.connected = false - c.authenticated = false + close(c.hasClosed) + <-c.isConnected //c.TextConn = nil // CloseFromPoolOnly //c.conn = nil // CloseFromPoolOnly //c.Writer = nil - //log.Printf("Closed NNTP Connection to %s", c.Backend.Host) + log.Printf("Closed NNTP Connection to %s", c.Backend.Host) return nil } +func (c *BackendConn) ForceCloseConn() { + go func() { + c.mux.Lock() + if !c.forceClose { + c.forceClose = true + } + c.mux.Unlock() + c.Pool.Put(c) + }() +} + func (c *BackendConn) IsConnected() bool { - c.mux.Lock() - //log.Printf("IsConnected check: connected=%v conn=%v", c.connected, c.conn) - defer c.mux.Unlock() - if !c.connected { + if common.IsClosedChannel(c.hasClosed) { return false } - if c.conn == nil { + if len(c.isConnected) == 0 { return false } + /* + c.mux.Lock() + //log.Printf("IsConnected check: connected=%v conn=%v", c.connected, c.conn) + defer c.mux.Unlock() + if !c.connected { + return false + } + if c.conn == nil { + return false + } + */ return true } From 3db753dfc4af3aed4961502f07074cab6107744d Mon Sep 17 00:00:00 2001 From: go-while Date: Sat, 11 Oct 2025 01:19:25 +0000 Subject: [PATCH 039/100] ai ai ai --- cmd/nntp-transfer/main.go | 180 ++++++++++++++++---------- internal/nntp/nntp-client-commands.go | 110 +++++----------- internal/nntp/nntp-client.go | 14 ++ 3 files changed, 161 insertions(+), 143 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 59ac853..2c1508f 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -100,7 +100,7 @@ var appVersion = "-unset-" var redisCtx = context.Background() var REDIS_TTL time.Duration = 3600 * time.Second // default 1h - +var MaxQueuedJobs int = 8 var BatchCheck int func main() { @@ -612,11 +612,13 @@ func parseProxyConfig(address, proxyType, username, password string) (*ProxyConf } const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles` -const query_getArticlesBatchWithDateFilter_nodatefilter = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles ORDER BY date_sent ASC LIMIT ? OFFSET ?` -const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" +const query_getArticlesBatchWithDateFilter_nodatefilter = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles ORDER BY date_sent ASC, article_num ASC LIMIT ?` +const query_getArticlesBatchWithDateFilter_nodatefilter_keyset = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles WHERE (date_sent > ? OR (date_sent = ? AND article_num > ?)) ORDER BY date_sent ASC, article_num ASC LIMIT ?` +const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ?" +const query_getArticlesBatchWithDateFilter_orderby_keyset = " AND (date_sent > ? OR (date_sent = ? AND article_num > ?)) ORDER BY date_sent ASC LIMIT ?" // getArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering -func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, offset int64, startTime, endTime *time.Time) ([]*models.Article, error) { +func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, lastDateSent *time.Time, lastArticleNum int64, startTime, endTime *time.Time) ([]*models.Article, error) { // Get group database groupDBs, err := db.GetGroupDBs(ng.Name) if err != nil { @@ -630,6 +632,9 @@ func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, var query string var args []interface{} + // Use keyset pagination (much faster than OFFSET) + useKeyset := lastDateSent != nil + if startTime != nil || endTime != nil { // Build query with date filtering @@ -650,12 +655,28 @@ func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, whereClause = " WHERE " + strings.Join(whereConditions, " AND ") } - query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby - args = append(args, dbBatchSize, offset) + if useKeyset { + // Keyset pagination with date filter + query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby_keyset + lastDateStr := lastDateSent.UTC().Format("2006-01-02 15:04:05") + args = append(args, lastDateStr, lastDateStr, lastArticleNum, dbBatchSize) + } else { + // First batch with date filter + query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby + args = append(args, dbBatchSize) + } } else { - // No date filtering, use original query but with date_sent ordering - query = query_getArticlesBatchWithDateFilter_nodatefilter - args = []interface{}{dbBatchSize, offset} + // No date filtering + if useKeyset { + // Keyset pagination without date filter + query = query_getArticlesBatchWithDateFilter_nodatefilter_keyset + lastDateStr := lastDateSent.UTC().Format("2006-01-02 15:04:05") + args = []interface{}{lastDateStr, lastDateStr, lastArticleNum, dbBatchSize} + } else { + // First batch without date filter + query = query_getArticlesBatchWithDateFilter_nodatefilter + args = []interface{}{dbBatchSize} + } } rows, err := groupDBs.DB.Query(query, args...) @@ -1248,7 +1269,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } } //time.Sleep(3 * time.Second) // debug sleep - var ioffset int64 remainingArticles := totalArticles ttMode := &nntp.TakeThisMode{ Newsgroup: &ng.Name, @@ -1264,7 +1284,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i // WaitGroup to track individual response channel processors var responseWG sync.WaitGroup - go func() { + go func(responseWG *sync.WaitGroup) { defer collectorWG.Done() var amux sync.Mutex var transferred, unwanted, rejected, checked, txErrors, connErrors uint64 @@ -1275,11 +1295,11 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i continue } num++ - log.Printf("Newsgroup: '%s' | Starting response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) + //log.Printf("Newsgroup: '%s' | Starting response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) responseWG.Add(1) - go func(rc chan *nntp.TTResponse, num uint64) { + go func(rc chan *nntp.TTResponse, num uint64, responseWG *sync.WaitGroup) { defer responseWG.Done() - defer log.Printf("Newsgroup: '%s' | Quit response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) + //defer log.Printf("Newsgroup: '%s' | Quit response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) // Read exactly ONE response from this channel (channel is buffered with cap 1) resp := <-rc // job.Response(ForceCleanUp, err) arrives here @@ -1304,7 +1324,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } // free memory - CRITICAL: Lock and unlock in same scope, not with defer! resp.Job.Mux.Lock() - //log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) + log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) // Clean up Articles and their internal fields for i := range resp.Job.Articles { if resp.Job.Articles[i] != nil { @@ -1330,7 +1350,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i resp.Job.ArticleMap = nil resp.Job.Mux.Unlock() resp.Job = nil - }(setup.ResponseChan, num) + }(setup.ResponseChan, num, responseWG) } log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", ng.Name, num) // Wait for all response channel processors to finish @@ -1361,12 +1381,18 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i delete(rejectedArticles, ng.Name) // free memory } resultsMutex.Unlock() - }() + }(&responseWG) OffsetQueue := &nntp.OffsetQueue{} + + // Use keyset pagination instead of OFFSET for much better performance + var lastDateSent *time.Time + var lastArticleNum int64 + articlesProcessed := int64(0) + // Get articles in database batches (much larger than network batches) - for offset := ioffset; offset < totalArticles; offset += dbBatchSize { + for articlesProcessed < totalArticles { if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: '%s' offset: %d", ng.Name, offset) + log.Printf("WantShutdown in newsgroup: '%s' (processed %d articles)", ng.Name, articlesProcessed) return nil } // Process any requeued jobs first (from previous failed batches) @@ -1374,17 +1400,24 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i return err } start := time.Now() - // Load batch from database with date filtering - articles, err := getArticlesBatchWithDateFilter(db, ng, offset, startTime, endTime) + // Load batch from database using keyset pagination (no OFFSET!) + articles, err := getArticlesBatchWithDateFilter(db, ng, lastDateSent, lastArticleNum, startTime, endTime) if err != nil { - log.Printf("Error loading article batch (offset %d) for newsgroup %s: %v", offset, ng.Name, err) - return fmt.Errorf("failed to load article batch (offset %d) for newsgroup '%s': %v", offset, ng.Name, err) + log.Printf("Error loading article batch (processed %d) for newsgroup %s: %v", articlesProcessed, ng.Name, err) + return fmt.Errorf("failed to load article batch (processed %d) for newsgroup '%s': %v", articlesProcessed, ng.Name, err) } if len(articles) == 0 { - //log.Printf("No more articles in newsgroup %s (offset %d)", ng.Name, offset) + //log.Printf("No more articles in newsgroup %s (processed %d)", ng.Name, articlesProcessed) break } + + // Update keyset for next iteration (last article in current batch) + lastArticle := articles[len(articles)-1] + lastDateSent = &lastArticle.DateSent + lastArticleNum = lastArticle.DBArtNum + articlesProcessed += int64(len(articles)) + if dryRun && debugCapture { debugMutex.Lock() debugArticles[ng.Name] = append(debugArticles[ng.Name], articles...) @@ -1396,13 +1429,13 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i for _, a := range articles { size += a.Bytes } - log.Printf("Newsgroup: '%s' | Loaded %d articles from database (offset %d) (Bytes=%d) took %v", ng.Name, len(articles), offset, size, time.Since(start)) + log.Printf("Newsgroup: '%s' | Loaded %d articles from database (processed %d/%d) (Bytes=%d) took %v", ng.Name, len(articles), articlesProcessed, totalArticles, size, time.Since(start)) //} // Process articles in network batches for i := 0; i < len(articles); i += batchCheck { OffsetQueue.Add(1) if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: '%s' (offset %d)", ng.Name, offset) + log.Printf("WantShutdown in newsgroup: '%s' (processed %d)", ng.Name, articlesProcessed) return nil } // Determine end index for the batch @@ -1411,24 +1444,26 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i end = len(articles) } // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), offset, OffsetQueue, ngtprogress) + responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), articlesProcessed-int64(len(articles))+int64(i), OffsetQueue, ngtprogress) if err != nil { log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", ng.Name, i+1, end, err) return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, ng.Name, err) } - // pass the response channel to the collector channel: ttResponses - ttResponses <- &nntp.TTSetup{ - ResponseChan: responseChan, + if responseChan != nil { + // pass the response channel to the collector channel: ttResponses + ttResponses <- &nntp.TTSetup{ + ResponseChan: responseChan, + } } - OffsetQueue.Wait(2) // wait for offset batches to finish, less than 2 in flight + OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than 2 in flight } remainingArticles -= int64(len(articles)) if VERBOSE { - log.Printf("Newsgroup: '%s' | Pushed to queue (offset %d/%d) remaining: %d (Check=%t)", ng.Name, offset, totalArticles, remainingArticles, ttMode.UseCHECK()) - //log.Printf("Newsgroup: '%s' | Pushed (offset %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, offset, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) + log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t)", ng.Name, articlesProcessed, totalArticles, remainingArticles, ttMode.UseCHECK()) + //log.Printf("Newsgroup: '%s' | Pushed (processed %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, articlesProcessed, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } - } // end for offset range totalArticles + } // end for keyset pagination loop //log.Printf("Newsgroup: '%s' | Main article loop completed, checking for requeued jobs...", ng.Name) @@ -1587,7 +1622,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queue job #%d with %d message IDs. CheckQ=%d", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs), len(CheckQueues[workerID])) CheckQueues[workerID] <- batchedJob // checkQueue <- batchedJob - log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, batchedJob.JobID) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, batchedJob.JobID) return batchedJob.ResponseChan, nil } // end func processBatch @@ -1659,7 +1694,10 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art log.Printf("Newsgroup: '%s' | ++Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup, demuxer, readTAKETHISResponsesChan, job) astart2 := time.Now() - job.NGTProgress.AddNGTP(0, 0, int64(txBytes)) + job.Mux.Lock() + job.TTxBytes += uint64(txBytes) + job.TmpTxBytes += uint64(txBytes) + job.Mux.Unlock() ttxBytes += uint64(txBytes) if err != nil { if err == common.ErrNoNewsgroups { @@ -1978,7 +2016,7 @@ func (rs *ReturnSignal) BlockTT() { rs.Mux.Lock() rs.RunTT = false rs.Mux.Unlock() - log.Printf("BlockTT: released RunTT lock") + //log.Printf("BlockTT: released RunTT lock") } func (rs *ReturnSignal) GetLockTT() { @@ -2044,7 +2082,7 @@ func (rs *ReturnSignal) LockCHECK() { rs.Mux.Lock() if !rs.RunTT { rs.CHECK = true - log.Printf("LockCHECK: acquired CHECK lock (RunTT=%t) waited %v", rs.RunTT, time.Since(start)) + //log.Printf("LockCHECK: acquired CHECK lock (RunTT=%t) waited %v", rs.RunTT, time.Since(start)) rs.Mux.Unlock() return } @@ -2132,9 +2170,9 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue workerID := assignWorkerToNewsgroup(*currentJob.Newsgroup) waiting: for { - if len(TakeThisQueues[workerID]) > 1 { + if len(TakeThisQueues[workerID]) >= MaxQueuedJobs { rs.BlockCHECK() - log.Printf("CheckWorker (%d): waiting shared takeThisChan full (%d)", workerID, len(TakeThisQueues[workerID])) + log.Printf("CheckWorker (%d): waiting... shared takeThisChan full (%d)", workerID, len(TakeThisQueues[workerID])) time.Sleep(time.Second / 4) continue waiting } @@ -2162,9 +2200,9 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue time.Sleep(time.Second) return } - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire LOCK CHECK for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire LOCK CHECK for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) rs.LockCHECK() - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readCHECKResponsesChan, currentJob, demuxer) if err != nil { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) @@ -2176,15 +2214,15 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue //common.ChanRelease(flipflopChan) return } - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) } } else { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) currentJob.WantedIDs = currentJob.MessageIDs //rs.UnlockCHECKforTTwithWait() rs.BlockCHECK() TakeThisQueues[workerID] <- currentJob // local takethis chan sharing the same connection - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) } //lastRun = time.Now() // Check if there are more jobs to process @@ -2209,7 +2247,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Channel full, will be processed on next tick } } - log.Printf("CheckWorker (%d): job #%d CHECKs sent, loop to next job", workerID, currentJob.JobID) + //log.Printf("CheckWorker (%d): job #%d CHECKs sent, loop to next job", workerID, currentJob.JobID) case <-ticker.C: if common.WantShutdown() { @@ -2266,7 +2304,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() return } - log.Printf("CheckWorker (%d): Pre-Read CHECK response for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) + //log.Printf("CheckWorker (%d): Pre-Read CHECK response for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) start := time.Now() // NEW: Get pre-read response from demuxer (eliminates race condition) @@ -2309,12 +2347,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue tookTime += took responseCount++ rr.Job.Increment(nntp.IncrFLAG_CHECKED) - if rr.N == 1 { - log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + if rr.N == 1 && took.Milliseconds() > 100 { + log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, took.Milliseconds()) tookTime = 0 - } else if responseCount >= 100 { + } else if responseCount >= 10000 { avg := time.Duration(float64(tookTime) / float64(responseCount)) - if avg > 1 { + if avg.Milliseconds() > 0 { log.Printf("CheckWorker (%d): Read %d CHECK responses, avg latency: %v, last: %v (cmdID=%d MID=%d/%d)", workerID, responseCount, avg, took, rr.CmdID, rr.N, rr.Reqs) } responseCount = 0 @@ -2339,7 +2377,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.ClearReadRequest() return } - log.Printf("CheckWorker (%d): Got CHECK response: '%s' (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) + //log.Printf("CheckWorker (%d): Got CHECK response: '%s' (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) rs.Mux.Lock() job, exists := rs.jobMap[rr.MsgID] @@ -2396,13 +2434,13 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(TakeThisQueues[workerID])) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(TakeThisQueues[workerID])) rs.UnlockCHECKforTT() TakeThisQueues[workerID] <- job // local takethis chan sharing the same connection - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(TakeThisQueues[workerID])) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(TakeThisQueues[workerID])) } else { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) // Send response and close channel for jobs with no wanted articles job.Response(true, nil) if len(TakeThisQueues[workerID]) > 0 { @@ -2450,7 +2488,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue return } - log.Printf("TTResponseWorker (%d): Pre-Read TAKETHIS response for msgID: %s (cmdID=%d)", workerID, *rr.MsgID, rr.CmdID) + //log.Printf("TTResponseWorker (%d): Pre-Read TAKETHIS response for msgID: %s (cmdID=%d)", workerID, *rr.MsgID, rr.CmdID) // Get pre-read response from demuxer (same pattern as CHECK) var respData *nntp.ResponseData @@ -2480,7 +2518,11 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } rr.Job.TTMode.IncrementTmp() - rr.Job.NGTProgress.AddNGTP(0, 1, 0) + rr.Job.Mux.Lock() + txbytes := rr.Job.TmpTxBytes + rr.Job.TmpTxBytes = 0 + rr.Job.Mux.Unlock() + rr.Job.NGTProgress.AddNGTP(0, 1, txbytes) // Handle response codes switch respData.Code { @@ -2574,10 +2616,10 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue continue } - log.Printf("Newsgroup: '%s' | TTworker (%d): Prepare locking to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) + //log.Printf("Newsgroup: '%s' | TTworker (%d): Prepare locking to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) rs.GetLockTT() //common.ChanLock(flipflopChan) - log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) + //log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) // Send TAKETHIS commands using existing function redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli, demuxer, readTAKETHISResponsesChan) //common.ChanRelease(flipflopChan) @@ -2591,12 +2633,14 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() return } - log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d sent, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, redis_cached) + if VERBOSE && redis_cached > 0 { + log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d sent, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, redis_cached) + } // Send response back - log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, workerID, job.JobID, len(job.ResponseChan)) + //log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, workerID, job.JobID, len(job.ResponseChan)) job.Response(true, nil) - log.Printf("Newsgroup: '%s' | TTworker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, workerID, job.JobID) + //log.Printf("Newsgroup: '%s' | TTworker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, workerID, job.JobID) } }() @@ -2621,7 +2665,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Build jobMap for tracking which message IDs belong to this job // and count queued messages rs.Mux.Lock() - queueFull := len(rs.jobs) > 1 || len(TakeThisQueues[workerID]) > 1 + queueFull := len(rs.jobs) >= MaxQueuedJobs || len(TakeThisQueues[workerID]) >= MaxQueuedJobs if queueFull { log.Printf("Newsgroup: '%s' | CHTTworker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) select { @@ -2642,7 +2686,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // pass case <-time.After(time.Millisecond * 16): rs.Mux.Lock() - queueFull = len(rs.jobs) > 1 || len(TakeThisQueues[workerID]) > 1 + queueFull = len(rs.jobs) >= MaxQueuedJobs || len(TakeThisQueues[workerID]) >= MaxQueuedJobs rs.Mux.Unlock() if !queueFull { break waitForReply @@ -2670,7 +2714,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Signal ticker to process this job select { case tickChan <- struct{}{}: - log.Printf("Newsgroup: '%s' | CHTTworker (%d): signal ticker start job #%d with %d message IDs. queued=%d", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) + //log.Printf("Newsgroup: '%s' | CHTTworker (%d): signal ticker start job #%d with %d message IDs. queued=%d", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) default: // tickChan full, will be processed on next tick } @@ -2960,11 +3004,11 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Started string LastUpdated string Finished bool - SpeedKB int64 + SpeedKB uint64 Duration string TimeSince string - LastArtPerfC int64 - LastArtPerfT int64 + LastArtPerfC uint64 + LastArtPerfT uint64 } started := len(nntp.NewsgroupTransferProgressMap) diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 02dd7c8..2c62e21 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -178,7 +178,7 @@ func (d *ResponseDemuxer) readAndDispatch() { continue } } - log.Printf("ResponseDemuxer: nothing to process, waiting on signalChan") + //log.Printf("ResponseDemuxer: nothing to process, waiting on signalChan") <-d.signalChan continue } @@ -190,12 +190,14 @@ func (d *ResponseDemuxer) readAndDispatch() { d.LastID = cmdInfo.CmdID } - log.Printf("ResponseDemuxer: waiting for response cmdID=%d respType=%d", cmdInfo.CmdID, cmdInfo.RespType) + //log.Printf("ResponseDemuxer: waiting for response cmdID=%d respType=%d", cmdInfo.CmdID, cmdInfo.RespType) start := time.Now() d.conn.TextConn.StartResponse(cmdInfo.CmdID) code, line, err := d.conn.TextConn.ReadCodeLine(0) // Read any code d.conn.TextConn.EndResponse(cmdInfo.CmdID) - log.Printf("ResponseDemuxer: received response cmdID=%d: code=%d line='%s' err='%v' respType=%d (waited %v)", cmdInfo.CmdID, code, line, err, cmdInfo.RespType, time.Since(start)) + if time.Since(start) > time.Second { + log.Printf("LongWait ResponseDemuxer: received response cmdID=%d: code=%d line='%s' err='%v' respType=%d (waited %v)", cmdInfo.CmdID, code, line, err, cmdInfo.RespType, time.Since(start)) + } if err != nil && code == 0 { d.errChan <- struct{}{} log.Printf("ResponseDemuxer: error reading response for cmdID=%d: %v", cmdInfo.CmdID, err) @@ -214,7 +216,7 @@ func (d *ResponseDemuxer) readAndDispatch() { select { case d.checkResponseChan <- respData: // Dispatched successfully - log.Printf("ResponseDemuxer: dispatched CHECK response cmdID=%d d.checkResponseChan=%d", cmdInfo.CmdID, len(d.checkResponseChan)) + //log.Printf("ResponseDemuxer: dispatched CHECK response cmdID=%d d.checkResponseChan=%d", cmdInfo.CmdID, len(d.checkResponseChan)) case <-d.errChan: log.Printf("ResponseDemuxer: got errChan while dispatching CHECK response, exiting") d.errChan <- struct{}{} @@ -225,7 +227,7 @@ func (d *ResponseDemuxer) readAndDispatch() { select { case d.ttResponseChan <- respData: // Dispatched successfully - log.Printf("ResponseDemuxer: dispatched TAKETHIS response cmdID=%d d.ttResponseChan=%d", cmdInfo.CmdID, len(d.ttResponseChan)) + //log.Printf("ResponseDemuxer: dispatched TAKETHIS response cmdID=%d d.ttResponseChan=%d", cmdInfo.CmdID, len(d.ttResponseChan)) case <-d.errChan: d.errChan <- struct{}{} log.Printf("ResponseDemuxer: got errChan while dispatching TAKETHIS response, exiting") @@ -273,7 +275,9 @@ func (o *OffsetQueue) Wait(n int) { o.mux.RUnlock() o.mux.Lock() - log.Printf("OffsetQueue: waited (%d ms) for %d batches to finish, currently queued: %d", time.Since(start).Milliseconds(), n, o.queued) + if time.Since(start).Milliseconds() > 1000 { + log.Printf("OffsetQueue: waited (%d ms) for %d batches to finish, currently queued: %d", time.Since(start).Milliseconds(), n, o.queued) + } o.isleep = o.isleep / 2 if o.isleep < time.Millisecond { o.isleep = 0 @@ -371,6 +375,8 @@ type CHTTJob struct { transferred uint64 redisCached uint64 TxErrors uint64 + TmpTxBytes uint64 + TTxBytes uint64 ConnErrors uint64 OffsetStart int64 BatchStart int64 @@ -412,29 +418,35 @@ type NewsgroupTransferProgress struct { BatchStart int64 BatchEnd int64 TotalArticles int64 - ArticlesTT int64 - ArticlesCH int64 + ArticlesTT uint64 + ArticlesCH uint64 Finished bool - TXBytes int64 - TXBytesTMP int64 + TXBytes uint64 + TXBytesTMP uint64 LastCronTX time.Time - LastSpeedKB int64 - LastArtPerfC int64 // check articles per second - LastArtPerfT int64 // takethis articles per second + LastSpeedKB uint64 + LastArtPerfC uint64 // check articles per second + LastArtPerfT uint64 // takethis articles per second } func (ngp *NewsgroupTransferProgress) CalcSpeed() { ngp.Mux.Lock() if time.Since(ngp.LastCronTX) >= time.Second*3 { - since := int64(time.Since(ngp.LastCronTX).Seconds()) + since := uint64(time.Since(ngp.LastCronTX).Seconds()) if ngp.TXBytesTMP > 0 { ngp.LastSpeedKB = ngp.TXBytesTMP / since / 1024 + } else { + ngp.LastSpeedKB = 0 } if ngp.ArticlesCH > 0 { ngp.LastArtPerfC = ngp.ArticlesCH / since + } else { + ngp.LastArtPerfC = 0 } if ngp.ArticlesTT > 0 { ngp.LastArtPerfT = ngp.ArticlesTT / since + } else { + ngp.LastArtPerfT = 0 } //log.Printf("Newsgroup: '%s' | Transfer Perf: %d KB/s (%d bytes in %v) did: CH=(%d|%d/s) TT=(%d|%d/s)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since, ngp.ArticlesCH, ngp.LastArtPerfC, ngp.ArticlesTT, ngp.LastArtPerfT) @@ -446,7 +458,7 @@ func (ngp *NewsgroupTransferProgress) CalcSpeed() { ngp.Mux.Unlock() } -func (ngp *NewsgroupTransferProgress) AddNGTP(articlesCH int64, articlesTT int64, txbytes int64) { +func (ngp *NewsgroupTransferProgress) AddNGTP(articlesCH uint64, articlesTT uint64, txbytes uint64) { if articlesCH > 0 { ngp.Mux.Lock() ngp.ArticlesCH += articlesCH @@ -1623,31 +1635,17 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses //log.Printf("Newsgroup: '%s' | CHECK sent '%s' (CmdID=%d) pass notify to readResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) readCHECKResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: msgID, N: n + 1, Reqs: len(messageIDs)} - log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readCHECKResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) + //log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readCHECKResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) } return nil } -func (c *BackendConn) GetBufSize(size int) int { - if size+2048 <= 1024*1024 { - return size + 2048 - } - return 1024 * 1024 // hardcoded default max buffer size -} - -func (c *BackendConn) Lock() { - c.mux.Lock() -} -func (c *BackendConn) Unlock() { - c.mux.Unlock() -} - // SendTakeThisArticleStreaming IS UNSAFE! MUST BE LOCKED AND UNLOCKED OUTSIDE FOR THE WHOLE BATCH!!! // sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode // Registers the command ID with the demuxer for proper response routing func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string, demuxer *ResponseDemuxer, readTAKETHISResponsesChan chan *ReadRequest, job *CHTTJob) (cmdID uint, txBytes int, err error) { - start := time.Now() + //start := time.Now() //c.mux.Lock() //defer c.mux.Unlock() @@ -1674,7 +1672,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp //c.mux.Lock() //defer c.mux.Unlock() - startSend := time.Now() + //startSend := time.Now() // Send TAKETHIS command cmdID, err = c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { @@ -1727,63 +1725,25 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp } else { txBytes += tx } - log.Printf("Newsgroup: '%s' | TAKETHIS sent CmdID=%d '%s' txBytes: %d in %v (sending took: %v) readTAKETHISResponsesChanLen=%d/%d", newsgroup, cmdID, article.MessageID, txBytes, time.Since(start), time.Since(startSend), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) + //log.Printf("Newsgroup: '%s' | TAKETHIS sent CmdID=%d '%s' txBytes: %d in %v (sending took: %v) readTAKETHISResponsesChanLen=%d/%d", newsgroup, cmdID, article.MessageID, txBytes, time.Since(start), time.Since(startSend), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) - startFlush := time.Now() + //startFlush := time.Now() if err := writer.Flush(); err != nil { return 0, txBytes, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err) } - chanStart := time.Now() + //chanStart := time.Now() // Register command ID with demuxer as TYPE_TAKETHIS (CRITICAL: must match CHECK pattern) demuxer.RegisterCommand(cmdID, TYPE_TAKETHIS) - log.Printf("Newsgroup: '%s' | TAKETHIS flushed CmdID=%d '%s' (flushing took: %v) total time: %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(startFlush), time.Since(start), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) + //log.Printf("Newsgroup: '%s' | TAKETHIS flushed CmdID=%d '%s' (flushing took: %v) total time: %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(startFlush), time.Since(start), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) // Queue ReadRequest IMMEDIATELY after command (like SendCheckMultiple does at line 1608) readTAKETHISResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: &article.MessageID, N: 1, Reqs: 1} - log.Printf("Newsgroup: '%s' | TAKETHIS notified response reader CmdID=%d '%s' waited %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(chanStart), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) + //log.Printf("Newsgroup: '%s' | TAKETHIS notified response reader CmdID=%d '%s' waited %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(chanStart), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) // Return command ID without reading response (streaming mode) return cmdID, txBytes, nil } -// ReadTakeThisResponseStreaming reads a TAKETHIS response using the command ID -// Used in streaming mode after all articles have been sent -func (c *BackendConn) ReadTakeThisResponseStreaming(newsgroup string, cr *CheckResponse) (int, error) { - //log.Printf("Newsgroup: '%s' | TAKETHIS acquire c.mux.Lock(): CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) - //c.mux.Lock() - //defer c.mux.Unlock() - //log.Printf("Newsgroup: '%s' | TAKETHIS acquired Lock(): wait Response CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) - log.Printf("Newsgroup: '%s' | TAKETHIS wait for response CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) - // Read TAKETHIS response - c.TextConn.StartResponse(cr.CmdId) - //log.Printf("Newsgroup: '%s' | TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed StartResponse CmdID=%d message-id '%s'", newsgroup, cr.CmdId, cr.Article.MessageID) - //c.mux.Lock() - //defer c.mux.Unlock() - code, line, err := c.TextConn.ReadCodeLine(239) - c.TextConn.EndResponse(cr.CmdId) - if code == 0 && err != nil { - return 0, fmt.Errorf("failed to read TAKETHIS response: %w", err) - } - parts := strings.Fields(line) - if len(parts) < 1 { - log.Printf("ERROR in ReadTakeThisResponseStreaming: Malformed response code=%d line: '%s' CmdID=%d message-id '%s')", code, line, cr.CmdId, cr.Article.MessageID) - //rr.ReturnReadRequest(rrRetChan) - return 0, fmt.Errorf("malformed TAKETHIS response: %s", line) - } - - if parts[0] != cr.Article.MessageID { - log.Printf("ERROR in ReadTakeThisResponseStreaming: Mismatched response code=%d line: '%s' (expected msgID '%s') CmdID=%d", code, line, cr.Article.MessageID, cr.CmdId) - return 0, fmt.Errorf("out of order TAKETHIS response: expected %s, got %s", cr.Article.MessageID, parts[0]) - } - log.Printf("Newsgroup: '%s' | TAKETHIS got *BackendConn.ReadTakeThisResponseStreaming: passed ReadCodeLine CmdID=%d: code=%d message-id '%s'", newsgroup, cr.CmdId, code, cr.Article.MessageID) - - // Parse response - // Format: code [message] - // 239 - article transferred successfully - // 439 - article transfer failed - return code, nil -} - // PostArticle posts an article using the POST command func (c *BackendConn) PostArticle(article *models.Article) (int, error) { c.mux.Lock() diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index 525590a..f702426 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -368,3 +368,17 @@ func (c *BackendConn) UpdateLastUsed() { //c.SetReadDeadline(time.Now().Add(c.Backend.ReadTimeout)) //c.SetWriteDeadline(time.Now().Add(c.Backend.WriteTimeout)) } + +func (c *BackendConn) GetBufSize(size int) int { + if size+4096 <= 1024*1024 { + return size + 4096 + } + return 1024 * 1024 // hardcoded default max buffer size +} + +func (c *BackendConn) Lock() { + c.mux.Lock() +} +func (c *BackendConn) Unlock() { + c.mux.Unlock() +} From 5ee2002c8bdc5f2b1266b06fd14b023cd370cb09 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 03:28:15 +0000 Subject: [PATCH 040/100] testing --- cmd/nntp-transfer/main.go | 107 +++++++++++--------------- internal/nntp/nntp-client-commands.go | 8 +- 2 files changed, 49 insertions(+), 66 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 2c1508f..29fa5b2 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -612,32 +612,22 @@ func parseProxyConfig(address, proxyType, username, password string) (*ProxyConf } const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles` -const query_getArticlesBatchWithDateFilter_nodatefilter = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles ORDER BY date_sent ASC, article_num ASC LIMIT ?` -const query_getArticlesBatchWithDateFilter_nodatefilter_keyset = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles WHERE (date_sent > ? OR (date_sent = ? AND article_num > ?)) ORDER BY date_sent ASC, article_num ASC LIMIT ?` -const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ?" -const query_getArticlesBatchWithDateFilter_orderby_keyset = " AND (date_sent > ? OR (date_sent = ? AND article_num > ?)) ORDER BY date_sent ASC LIMIT ?" +const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" // getArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering -func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, lastDateSent *time.Time, lastArticleNum int64, startTime, endTime *time.Time) ([]*models.Article, error) { +func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, offset int64, startTime, endTime *time.Time) ([]*models.Article, error) { // Get group database groupDBs, err := db.GetGroupDBs(ng.Name) if err != nil { return nil, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) } - defer func() { - if ferr := db.ForceCloseGroupDBs(groupDBs); ferr != nil { - log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) - } - }() + defer groupDBs.Return(db) + var query string var args []interface{} - // Use keyset pagination (much faster than OFFSET) - useKeyset := lastDateSent != nil - if startTime != nil || endTime != nil { // Build query with date filtering - var whereConditions []string if startTime != nil { @@ -655,28 +645,12 @@ func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, whereClause = " WHERE " + strings.Join(whereConditions, " AND ") } - if useKeyset { - // Keyset pagination with date filter - query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby_keyset - lastDateStr := lastDateSent.UTC().Format("2006-01-02 15:04:05") - args = append(args, lastDateStr, lastDateStr, lastArticleNum, dbBatchSize) - } else { - // First batch with date filter - query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby - args = append(args, dbBatchSize) - } + query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby + args = append(args, dbBatchSize, offset) } else { - // No date filtering - if useKeyset { - // Keyset pagination without date filter - query = query_getArticlesBatchWithDateFilter_nodatefilter_keyset - lastDateStr := lastDateSent.UTC().Format("2006-01-02 15:04:05") - args = []interface{}{lastDateStr, lastDateStr, lastArticleNum, dbBatchSize} - } else { - // First batch without date filter - query = query_getArticlesBatchWithDateFilter_nodatefilter - args = []interface{}{dbBatchSize} - } + // No date filtering - simple OFFSET pagination + query = query_getArticlesBatchWithDateFilter_selectPart + query_getArticlesBatchWithDateFilter_orderby + args = []interface{}{dbBatchSize, offset} } rows, err := groupDBs.DB.Query(query, args...) @@ -698,12 +672,15 @@ func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, } // getArticleCountWithDateFilter gets the total count of articles with optional date filtering -func getArticleCountWithDateFilter(groupDBs *database.GroupDBs, startTime, endTime *time.Time) (int64, error) { +// When no date filter is specified, uses cached message_count from newsgroups table in main DB +func getArticleCountWithDateFilter(db *database.Database, groupDBs *database.GroupDBs, startTime, endTime *time.Time) (int64, error) { var query string var args []interface{} + var count int64 + start := time.Now() if startTime != nil || endTime != nil { - // Build count query with date filtering + // Build count query with date filtering - must use live COUNT(*) query var whereConditions []string if startTime != nil { @@ -722,24 +699,31 @@ func getArticleCountWithDateFilter(groupDBs *database.GroupDBs, startTime, endTi } query = "SELECT COUNT(*) FROM articles" + whereClause + err := groupDBs.DB.QueryRow(query, args...).Scan(&count) + if err != nil { + return 0, err + } } else { - // No date filtering - query = "SELECT COUNT(*) FROM articles" + // No date filtering - use cached message_count from newsgroups table in main DB + // This is MUCH faster than COUNT(*) on large tables (O(1) vs O(N)) + query = "SELECT COALESCE(message_count, 0) FROM newsgroups WHERE name = ?" + err := db.GetMainDB().QueryRow(query, groupDBs.Newsgroup).Scan(&count) + if err != nil { + // Fallback to direct COUNT if newsgroups table doesn't have the entry + log.Printf("WARNING: Could not get message_count from newsgroups table for '%s', falling back to COUNT(*): %v", groupDBs.Newsgroup, err) + query = "SELECT COUNT(*) FROM articles" + err = groupDBs.DB.QueryRow(query).Scan(&count) + if err != nil { + return 0, err + } + } } - start := time.Now() - var count int64 - err := groupDBs.DB.QueryRow(query, args...).Scan(&count) elapsed := time.Since(start) - if elapsed > 5*time.Second { log.Printf("WARNING: Slow COUNT query for group '%s' took %v (count=%d)", groupDBs.Newsgroup, elapsed, count) } - if err != nil { - return 0, err - } - return count, nil } @@ -1195,7 +1179,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i if err != nil { return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) } - //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", ng.Name) // Initialize newsgroup progress tracking resultsMutex.Lock() @@ -1210,18 +1193,18 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } } resultsMutex.Unlock() + // Get total article count first with date filtering - totalArticles, err := getArticleCountWithDateFilter(groupDBsA, startTime, endTime) + totalArticles, err := getArticleCountWithDateFilter(db, groupDBsA, startTime, endTime) if err != nil { + if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { + log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) + } return fmt.Errorf("failed to get article count for newsgroup '%s': %v", ng.Name, err) } - + groupDBsA.Return(db) //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got article count (%d), closing group DBs...", ng.Name, totalArticles) - if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { - log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) - } - //log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", ng.Name) if totalArticles == 0 { @@ -1384,10 +1367,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i }(&responseWG) OffsetQueue := &nntp.OffsetQueue{} - // Use keyset pagination instead of OFFSET for much better performance - var lastDateSent *time.Time - var lastArticleNum int64 - articlesProcessed := int64(0) + // Use simple OFFSET pagination + var articlesProcessed int64 + var offset int64 // Get articles in database batches (much larger than network batches) for articlesProcessed < totalArticles { @@ -1400,8 +1382,8 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i return err } start := time.Now() - // Load batch from database using keyset pagination (no OFFSET!) - articles, err := getArticlesBatchWithDateFilter(db, ng, lastDateSent, lastArticleNum, startTime, endTime) + // Load batch from database using OFFSET pagination + articles, err := getArticlesBatchWithDateFilter(db, ng, offset, startTime, endTime) if err != nil { log.Printf("Error loading article batch (processed %d) for newsgroup %s: %v", articlesProcessed, ng.Name, err) return fmt.Errorf("failed to load article batch (processed %d) for newsgroup '%s': %v", articlesProcessed, ng.Name, err) @@ -1412,10 +1394,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i break } - // Update keyset for next iteration (last article in current batch) - lastArticle := articles[len(articles)-1] - lastDateSent = &lastArticle.DateSent - lastArticleNum = lastArticle.DBArtNum + offset += int64(len(articles)) articlesProcessed += int64(len(articles)) if dryRun && debugCapture { diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 2c62e21..1e04869 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -167,14 +167,18 @@ func (d *ResponseDemuxer) readAndDispatch() { cmdInfo = cmdInfoBacklog outoforderBacklog[d.LastID+1] = nil delete(outoforderBacklog, d.LastID+1) + } else { + log.Printf("ResponseDemuxer: no backlog with cmdID=%d found. try PopCommand", d.LastID+1) + cmdInfo = d.PopCommand() } } else { cmdInfo = d.PopCommand() } if cmdInfo == nil { if len(outoforderBacklog) > 0 { - log.Printf("ResponseDemuxer: got no cmdInfo but have outoforderBacklog: %d", len(outoforderBacklog)) + log.Printf("ResponseDemuxer: got no cmdInfo but have outoforderBacklog: %d [%v]", len(outoforderBacklog), outoforderBacklog) if _, exists := outoforderBacklog[d.LastID+1]; exists { + log.Printf("ResponseDemuxer: pre-processing out-of-order backlog cmdID=%d d.LastID=%d", d.LastID+1, d.LastID) continue } } @@ -183,7 +187,7 @@ func (d *ResponseDemuxer) readAndDispatch() { continue } if d.LastID+1 != cmdInfo.CmdID { - log.Printf("ResponseDemuxer: WARNING - out of order cmdID received, expected %d got %d", d.LastID+1, cmdInfo.CmdID) + log.Printf("ResponseDemuxer: WARNING - out-of-order cmdID received, expected %d got %d", d.LastID+1, cmdInfo.CmdID) outoforderBacklog[cmdInfo.CmdID] = cmdInfo continue } else { From ccde975d61f8434047dcf55b895d49bf78d6e246 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 03:28:40 +0000 Subject: [PATCH 041/100] cherry-pick: scan-out-of-order-overview --- cmd/recover-db/main.go | 187 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 178 insertions(+), 9 deletions(-) diff --git a/cmd/recover-db/main.go b/cmd/recover-db/main.go index d99b578..ac2ed7a 100644 --- a/cmd/recover-db/main.go +++ b/cmd/recover-db/main.go @@ -33,15 +33,16 @@ func main() { database.NO_CACHE_BOOT = true // prevents booting caches log.Printf("go-pugleaf Database Recovery Tool (version: %s)", config.AppVersion) var ( - dbPath = flag.String("db", "data", "Data Path to main data directory (required)") - newsgroup = flag.String("group", "$all", "Newsgroup name to check (required) (\\$all to check for all or news.* to check for all in that hierarchy)") - verbose = flag.Bool("v", true, "Verbose output") - repair = flag.Bool("repair", false, "Attempt to repair detected inconsistencies") - parseDates = flag.Bool("parsedates", false, "Check and log date parsing differences between date_string and date_sent") - rewriteDates = flag.Bool("rewritedates", false, "Rewrite incorrect dates (requires -parsedates)") - rebuildThreads = flag.Bool("rebuild-threads", false, "Rebuild all thread relationships from scratch (destructive)") - maxPar = flag.Int("max-par", 1, "use with -rebuild-threads to process N newsgroups") - dataDir = flag.String("data", "./data", "Directory to store database files") + dbPath = flag.String("db", "data", "Data Path to main data directory (required)") + newsgroup = flag.String("group", "$all", "Newsgroup name to check (required) (\\$all to check for all or news.* to check for all in that hierarchy)") + verbose = flag.Bool("v", true, "Verbose output") + repair = flag.Bool("repair", false, "Attempt to repair detected inconsistencies") + parseDates = flag.Bool("parsedates", false, "Check and log date parsing differences between date_string and date_sent") + rewriteDates = flag.Bool("rewritedates", false, "Rewrite incorrect dates (requires -parsedates)") + rebuildThreads = flag.Bool("rebuild-threads", false, "Rebuild all thread relationships from scratch (destructive)") + maxPar = flag.Int("max-par", 1, "use with -rebuild-threads to process N newsgroups") + dataDir = flag.String("data", "./data", "Directory to store database files") + scanOutOfOrderCheck = flag.Bool("scan-out-of-order-overview", false, "Scan newsgroups for articles with out-of-order date_sent values") ) flag.Parse() @@ -184,6 +185,17 @@ func main() { os.Exit(0) } + // If only out-of-order scan is requested, run that and exit + if *scanOutOfOrderCheck { + fmt.Printf("🔍 Starting out-of-order article scan...\n") + fmt.Printf("=====================================\n") + err := scanOutOfOrderOverview(db, newsgroups, *verbose) + if err != nil { + log.Fatalf("Out-of-order scan failed: %v", err) + } + os.Exit(0) + } + // Initialize tracking variables var results []GroupResult var ( @@ -988,3 +1000,160 @@ func printDateProblemsSummary(problems []DateProblem, rewriteDates bool) { } fmt.Printf("=====================================\n") } + +// OutOfOrderArticle represents an article that has an out-of-order date_sent +type OutOfOrderArticle struct { + Newsgroup string + ArticleNum int64 + DateSent time.Time + PreviousArtNum int64 + PreviousDate time.Time +} + +// scanOutOfOrderOverview scans newsgroups for articles with out-of-order date_sent values +func scanOutOfOrderOverview(db *database.Database, newsgroups []*models.Newsgroup, verbose bool) error { + const batchSize = 10000 + var totalArticles int64 + var totalOutOfOrder int64 + var allProblems []OutOfOrderArticle + + for _, newsgroup := range newsgroups { + fmt.Printf("\n📊 Scanning newsgroup: %s\n", newsgroup.Name) + + // Get newsgroup database + groupDBs, err := db.GetGroupDBs(newsgroup.Name) + if err != nil { + fmt.Printf("❌ Failed to get database for '%s': %v\n", newsgroup.Name, err) + continue + } + if groupDBs == nil || groupDBs.DB == nil { + fmt.Printf("⚠️ No database found for '%s', skipping...\n", newsgroup.Name) + continue + } + + // Get total article count + var count int64 + err = groupDBs.DB.QueryRow("SELECT COUNT(*) FROM articles").Scan(&count) + if err != nil { + fmt.Printf("❌ Failed to count articles: %v\n", err) + continue + } + + if count == 0 { + fmt.Printf("📭 Empty newsgroup, skipping...\n") + continue + } + + fmt.Printf(" Total articles: %d\n", count) + totalArticles += count + + var groupProblems []OutOfOrderArticle + var lastArticleNum int64 + var lastDateSent time.Time + offset := int64(0) + query := ` + SELECT article_num, date_sent + FROM articles + ORDER BY article_num ASC + LIMIT ? OFFSET ? + ` + for { + // Query articles ordered by date_sent ASC using OFFSET pagination + + rows, err := groupDBs.DB.Query(query, batchSize, offset) + if err != nil { + return fmt.Errorf("failed to query articles at offset %d: %v", offset, err) + } + + articlesInBatch := 0 + for rows.Next() { + var articleNum int64 + var dateSentStr string + + if err := rows.Scan(&articleNum, &dateSentStr); err != nil { + rows.Close() + return fmt.Errorf("failed to scan article: %v", err) + } + + // Parse date_sent + dateSent, err := time.Parse("2006-01-02 15:04:05", dateSentStr) + if err != nil { + rows.Close() + return fmt.Errorf("failed to parse date_sent '%s': %v", dateSentStr, err) + } + + // Check if this article's date is before the previous one + if offset > 0 || articlesInBatch > 0 { + if dateSent.Before(lastDateSent) { + problem := OutOfOrderArticle{ + Newsgroup: newsgroup.Name, + ArticleNum: articleNum, + DateSent: dateSent, + PreviousArtNum: lastArticleNum, + PreviousDate: lastDateSent, + } + groupProblems = append(groupProblems, problem) + allProblems = append(allProblems, problem) + } + } + + lastArticleNum = articleNum + lastDateSent = dateSent + articlesInBatch++ + } + rows.Close() + + if articlesInBatch == 0 { + break + } + + offset += int64(articlesInBatch) + + if verbose && offset%100000 == 0 { + fmt.Printf(" Progress: %d/%d articles scanned...\n", offset, count) + } + } + + if len(groupProblems) > 0 { + fmt.Printf("⚠️ Found %d out-of-order articles in %s\n", len(groupProblems), newsgroup.Name) + totalOutOfOrder += int64(len(groupProblems)) + + // Show first few problems + showCount := len(groupProblems) + if showCount > 10 { + showCount = 10 + } + for i := 0; i < showCount; i++ { + p := groupProblems[i] + fmt.Printf(" • Article %d (%s) comes AFTER article %d (%s)\n", + p.ArticleNum, p.DateSent.Format("2006-01-02 15:04:05"), + p.PreviousArtNum, p.PreviousDate.Format("2006-01-02 15:04:05")) + } + if len(groupProblems) > 10 { + fmt.Printf(" ... and %d more out-of-order articles\n", len(groupProblems)-10) + } + } else { + fmt.Printf("✅ All articles are in chronological order by date_sent\n") + } + } + + // Print summary + fmt.Printf("\n=====================================\n") + fmt.Printf("📊 OUT-OF-ORDER SCAN SUMMARY\n") + fmt.Printf("=====================================\n") + fmt.Printf("Total articles scanned: %d\n", totalArticles) + fmt.Printf("Out-of-order articles: %d\n", totalOutOfOrder) + if totalOutOfOrder > 0 { + fmt.Printf("\n⚠️ WARNING: Found articles with dates that don't match their chronological position!\n") + fmt.Printf("This can cause issues with keyset pagination in nntp-transfer.\n") + fmt.Printf("\n💡 RECOMMENDATIONS:\n") + fmt.Printf(" • Review the import process for late-arriving articles\n") + fmt.Printf(" • Consider using article_num-based pagination for affected newsgroups\n") + fmt.Printf(" • The stuck article detection in nntp-transfer should handle these cases\n") + } else { + fmt.Printf("\n✅ All articles are properly ordered by date_sent across all newsgroups!\n") + } + fmt.Printf("=====================================\n") + + return nil +} From 5c88ddd7072dc85a8a6824ccdc086830dc60569d Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 03:29:11 +0000 Subject: [PATCH 042/100] Update FuncStructList.txt --- FuncStructList.txt | 442 ++++++++++++++++++++++++--------------------- 1 file changed, 239 insertions(+), 203 deletions(-) diff --git a/FuncStructList.txt b/FuncStructList.txt index 77238e4..fbfa250 100644 --- a/FuncStructList.txt +++ b/FuncStructList.txt @@ -1,7 +1,7 @@ ### Struct Signatures ./active_files/hierarchies/organize_by_theme.go:15:type ThemeCategory struct { ./cmd/extract_hierarchies/extract_hierarchies.go:138: type hierarchyStat struct { -./cmd/fix-thread-activity/main.go:73: type threadInfo struct { +./cmd/fix-thread-activity/main.go:80: type threadInfo struct { ./cmd/history-rebuild/main.go:26:type RebuildStats struct { ./cmd/history-rebuild/main.go:36:type HistoryAnalysisStats struct { ./cmd/import-flat-files/main.go:193:type FileScanner struct { @@ -10,9 +10,11 @@ ./cmd/merge-active/main.go:21:type ActiveEntry struct { ./cmd/merge-active/main.go:31:type OverviewEntry struct { ./cmd/nntp-server/processor_adapter.go:10:type ProcessorAdapter struct { -./cmd/nntp-transfer/main.go:503:type ProxyConfig struct { +./cmd/nntp-transfer/main.go:2152:type ReturnSignal struct { +./cmd/nntp-transfer/main.go:3066: type ProgressInfo struct { +./cmd/nntp-transfer/main.go:570:type ProxyConfig struct { ./cmd/recover-db/main.go:20:type GroupResult struct { -./cmd/recover-db/main.go:676:type DateProblem struct { +./cmd/recover-db/main.go:675:type DateProblem struct { ./cmd/tcp2tor/main.go:28:type ProxyConfig struct { ./cmd/tcp2tor/main.go:35:type ProxyTarget struct { ./cmd/tcp2tor/main.go:43:type ConfigEntry struct { @@ -24,11 +26,11 @@ ./internal/cache/newsgroup_cache.go:38:type NewsgroupCache struct { ./internal/cache/sanitized_cache.go:12:type SanitizedArticle struct { ./internal/cache/sanitized_cache.go:30:type SanitizedCache struct { -./internal/config/config.go:113:type ServerConfig struct { -./internal/config/config.go:129:type DatabaseConfig struct { -./internal/config/config.go:136:type WebConfig struct { -./internal/config/config.go:68:type MainConfig struct { -./internal/config/config.go:90:type Provider struct { +./internal/config/config.go:115:type ServerConfig struct { +./internal/config/config.go:131:type DatabaseConfig struct { +./internal/config/config.go:138:type WebConfig struct { +./internal/config/config.go:70:type MainConfig struct { +./internal/config/config.go:92:type Provider struct { ./internal/database/article_cache.go:13:type ArticleCacheEntry struct { ./internal/database/article_cache.go:22:type ArticleCache struct { ./internal/database/config_cache.go:10:type ConfigCache struct { @@ -131,20 +133,27 @@ ./internal/models/models.go:72:type Overview struct { ./internal/models/models.go:91:type User struct { ./internal/nntp/nntp-article-common.go:26:type ArticleRetrievalResult struct { -./internal/nntp/nntp-article-common.go:35:type TakeThisMode struct { ./internal/nntp/nntp-auth-manager.go:12:type AuthManager struct { ./internal/nntp/nntp-backend-pool.go:15:type Pool struct { -./internal/nntp/nntp-backend-pool.go:406:type PoolStats struct { +./internal/nntp/nntp-backend-pool.go:417:type PoolStats struct { ./internal/nntp/nntp-cache-local.go:11:type Local430 struct { ./internal/nntp/nntp-cache-local.go:57:type CacheMessageIDNumtoGroup struct { ./internal/nntp/nntp-cache-local.go:62:type ItemCMIDNG struct { -./internal/nntp/nntp-client.go:107:type GroupInfo struct { -./internal/nntp/nntp-client.go:118:type OverviewLine struct { -./internal/nntp/nntp-client.go:130:type HeaderLine struct { -./internal/nntp/nntp-client.go:52:type BackendConn struct { -./internal/nntp/nntp-client.go:75:type BackendConfig struct { -./internal/nntp/nntp-client.go:98:type Article struct { -./internal/nntp/nntp-cmd-posting.go:163:type ArticleData struct { +./internal/nntp/nntp-client-commands.go:114:type TTResponse struct { +./internal/nntp/nntp-client-commands.go:120:type CheckResponse struct { // deprecated +./internal/nntp/nntp-client-commands.go:125:type ReadRequest struct { +./internal/nntp/nntp-client-commands.go:148:type CHTTJob struct { +./internal/nntp/nntp-client-commands.go:199:type NewsgroupTransferProgress struct { +./internal/nntp/nntp-client-commands.go:41:type TakeThisMode struct { +./internal/nntp/nntp-client-commands.go:54:type TTSetup struct { +./internal/nntp/nntp-client-commands.go:59:type OffsetQueue struct { +./internal/nntp/nntp-client.go:100:type Article struct { +./internal/nntp/nntp-client.go:109:type GroupInfo struct { +./internal/nntp/nntp-client.go:120:type OverviewLine struct { +./internal/nntp/nntp-client.go:132:type HeaderLine struct { +./internal/nntp/nntp-client.go:55:type BackendConn struct { +./internal/nntp/nntp-client.go:77:type BackendConfig struct { +./internal/nntp/nntp-cmd-posting.go:164:type ArticleData struct { ./internal/nntp/nntp-peering.go:153:type PeeringStats struct { ./internal/nntp/nntp-peering.go:47:type PeeringManager struct { ./internal/nntp/nntp-peering.go:70:type PeeringConfig struct { @@ -163,9 +172,9 @@ ./internal/processor/bridges.go:24:type BridgeManager struct { ./internal/processor/counter.go:12:type Counter struct { ./internal/processor/PostQueue.go:12:type PostQueueWorker struct { -./internal/processor/proc_DLArt.go:18:type BatchQueue struct { -./internal/processor/proc_DLArt.go:26:type GroupBatch struct { -./internal/processor/proc_DLArt.go:30:type BatchItem struct { +./internal/processor/proc_DLArt.go:19:type BatchQueue struct { +./internal/processor/proc_DLArt.go:27:type GroupBatch struct { +./internal/processor/proc_DLArt.go:31:type BatchItem struct { ./internal/processor/processor.go:23:type Processor struct { ./internal/processor/proc_MsgIDtmpCache.go:14://type MsgTmpCache struct { ./internal/processor/proc_MsgIDtmpCache.go:57:type MsgIdTmpCacheItem struct { @@ -174,12 +183,12 @@ ./internal/processor/rslight.go:45:type GroupsEntry struct { ./internal/processor/rslight.go:52:type LegacyArticle struct { ./internal/processor/rslight.go:65:type LegacyThread struct { -./internal/web/cronjobs.go:20:type CronJobManager struct { -./internal/web/cronjobs.go:29:type CronJob struct { +./internal/web/cronjobs.go:21:type CronJobManager struct { +./internal/web/cronjobs.go:30:type CronJob struct { ./internal/web/web_admin.go:11:type FlashMessage struct { ./internal/web/web_admin.go:17:type SpamArticleInfo struct { ./internal/web/web_admin.go:23:type AdminPageData struct { -./internal/web/web_admin_newsgroups.go:565: type threadInfo struct { +./internal/web/web_admin_newsgroups.go:568: type threadInfo struct { ./internal/web/web_admin_ollama.go:18:type ProxyModelResponse struct { ./internal/web/web_admin_ollama.go:22:type ProxyModel struct { ./internal/web/web_admin_settings_unified.go:17:type SettingConfig struct { @@ -192,26 +201,26 @@ ./internal/web/web_newsPage.go:12:type NewsPageData struct { ./internal/web/web_profile.go:17:type ProfilePageData struct { ./internal/web/web_registerPage.go:15:type RegisterPageData struct { -./internal/web/webserver_core_routes.go:108:type HierarchiesPageData struct { -./internal/web/webserver_core_routes.go:116:type HierarchyGroupsPageData struct { -./internal/web/webserver_core_routes.go:125:type HierarchyTreePageData struct { -./internal/web/webserver_core_routes.go:143:type HierarchyBreadcrumb struct { -./internal/web/webserver_core_routes.go:150:type HierarchyNode struct { -./internal/web/webserver_core_routes.go:158:type SectionPageData struct { -./internal/web/webserver_core_routes.go:169:type SectionGroupPageData struct { -./internal/web/webserver_core_routes.go:179:type SectionArticlePageData struct { -./internal/web/webserver_core_routes.go:191:type SearchPageData struct { +./internal/web/webserver_core_routes.go:109:type HierarchiesPageData struct { +./internal/web/webserver_core_routes.go:117:type HierarchyGroupsPageData struct { +./internal/web/webserver_core_routes.go:126:type HierarchyTreePageData struct { +./internal/web/webserver_core_routes.go:144:type HierarchyBreadcrumb struct { +./internal/web/webserver_core_routes.go:151:type HierarchyNode struct { +./internal/web/webserver_core_routes.go:159:type SectionPageData struct { +./internal/web/webserver_core_routes.go:170:type SectionGroupPageData struct { +./internal/web/webserver_core_routes.go:180:type SectionArticlePageData struct { +./internal/web/webserver_core_routes.go:192:type SearchPageData struct { ./internal/web/webserver_core_routes.go:25:type WebServer struct { ./internal/web/webserver_core_routes.go:39:type TemplateData struct { ./internal/web/webserver_core_routes.go:56:type GroupPageData struct { ./internal/web/webserver_core_routes.go:65:type ArticlePageData struct { ./internal/web/webserver_core_routes.go:77:type StatsPageData struct { -./internal/web/webserver_core_routes.go:84:type GroupsPageData struct { -./internal/web/webserver_core_routes.go:92:type GroupThreadsPageData struct { +./internal/web/webserver_core_routes.go:85:type GroupsPageData struct { +./internal/web/webserver_core_routes.go:93:type GroupThreadsPageData struct { ./internal/web/web_sitePostPage.go:23:type PostPageData struct { ### Function Signatures -./cmd/history-rebuild/main.go:297:func (s *RebuildStats) PrintProgress() { -./cmd/history-rebuild/main.go:328:func (s *RebuildStats) PrintFinal() { +./cmd/history-rebuild/main.go:301:func (s *RebuildStats) PrintProgress() { +./cmd/history-rebuild/main.go:332:func (s *RebuildStats) PrintFinal() { ./cmd/import-flat-files/main.go:162:func (dm *DBManager) ArticleExists(article *Article) (bool, error) { ./cmd/import-flat-files/main.go:181:func (dm *DBManager) Close() { ./cmd/import-flat-files/main.go:206:func (fs *FileScanner) ScanFiles() <-chan *Article { @@ -220,6 +229,12 @@ ./cmd/nntp-server/processor_adapter.go:20:func (pa *ProcessorAdapter) ProcessIncomingArticle(article *models.Article) (int, error) { ./cmd/nntp-server/processor_adapter.go:27:func (pa *ProcessorAdapter) Lookup(msgIdItem *history.MessageIdItem) (int, error) { ./cmd/nntp-server/processor_adapter.go:32:func (pa *ProcessorAdapter) CheckNoMoreWorkInHistory() bool { +./cmd/nntp-transfer/main.go:2166:func (rs *ReturnSignal) UnlockTT() { +./cmd/nntp-transfer/main.go:2173:func (rs *ReturnSignal) GetLockTT() { +./cmd/nntp-transfer/main.go:2193:func (rs *ReturnSignal) UnlockCHECKforTTwithWait() { +./cmd/nntp-transfer/main.go:2209:func (rs *ReturnSignal) UnlockCHECKforTT() { +./cmd/nntp-transfer/main.go:2221:func (rs *ReturnSignal) BlockCHECK() { +./cmd/nntp-transfer/main.go:2229:func (rs *ReturnSignal) LockCHECK() { ./cmd/tcp2tor/main.go:558:func (s *ProxyServer) Start() error { ./cmd/tcp2tor/main.go:601:func (s *ProxyServer) Stop() { ./cmd/tcp2tor/main.go:611:func (s *ProxyServer) handleConnection(clientConn net.Conn) { @@ -309,10 +324,10 @@ ./internal/database/db_batch.go:1084:func (c *SQ3batch) findThreadRoot(groupDBs *GroupDBs, refs []string) (int64, error) { ./internal/database/db_batch.go:1111:func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map[int64][]threadCacheUpdateData) error { ./internal/database/db_batch.go:1236:func (o *BatchOrchestrator) StartOrch() { -./internal/database/db_batch.go:1274:func (o *BatchOrchestrator) StartOrchestrator() { -./internal/database/db_batch.go:1333:func (o *BatchOrchestrator) checkThresholds() (haswork bool) { +./internal/database/db_batch.go:1277:func (o *BatchOrchestrator) StartOrchestrator() { +./internal/database/db_batch.go:1338:func (o *BatchOrchestrator) checkThresholds() (haswork bool) { ./internal/database/db_batch.go:139:func (sq *SQ3batch) BatchCaptureOverviewForLater(newsgroupPtr *string, article *models.Article) { -./internal/database/db_batch.go:1418:func (sq *SQ3batch) BatchDivider() { +./internal/database/db_batch.go:1423:func (sq *SQ3batch) BatchDivider() { ./internal/database/db_batch.go:149:func (sq *SQ3batch) ExpireCache() { ./internal/database/db_batch.go:170:func (sq *SQ3batch) GetNewsgroupPointer(newsgroup string) *string { ./internal/database/db_batch.go:196:func (sq *SQ3batch) GetChan(newsgroup *string) chan *models.Article { @@ -356,13 +371,13 @@ ./internal/database/db_groupdbs.go:172:func (db *GroupDBs) ExistsMsgIdInArticlesDB(messageID string) bool { ./internal/database/db_groupdbs.go:181:func (dbs *GroupDBs) Close(who string) error { ./internal/database/db_groupdbs.go:28:func (db *Database) GetGroupDBs(groupName string) (*GroupDBs, error) { -./internal/database/db_init.go:233:func (db *Database) IsDBshutdown() bool { -./internal/database/db_init.go:249:func (db *Database) initMainDB() error { -./internal/database/db_init.go:290:func (db *Database) applySQLitePragmas(conn *sql.DB) error { -./internal/database/db_init.go:318:func (db *Database) applySQLitePragmasGroupDB(conn *sql.DB) error { -./internal/database/db_init.go:358:func (db *Database) LoadDefaultProviders() error { -./internal/database/db_init.go:399:func (db *Database) loadBotConfiguration() { -./internal/database/db_init.go:443:func (db *Database) loadIPBlockingConfiguration() { +./internal/database/db_init.go:251:func (db *Database) IsDBshutdown() bool { +./internal/database/db_init.go:267:func (db *Database) initMainDB() error { +./internal/database/db_init.go:308:func (db *Database) applySQLitePragmas(conn *sql.DB) error { +./internal/database/db_init.go:336:func (db *Database) applySQLitePragmasGroupDB(conn *sql.DB) error { +./internal/database/db_init.go:376:func (db *Database) LoadDefaultProviders() error { +./internal/database/db_init.go:417:func (db *Database) loadBotConfiguration() { +./internal/database/db_init.go:441:func (db *Database) loadIPBlockingConfiguration() { ./internal/database/db_migrate.go:245:func (db *Database) migrateMainDB() error { ./internal/database/db_migrate.go:281:func (db *Database) MigrateGroup(groupName string) error { ./internal/database/db_migrate.go:293:func (db *Database) migrateGroupDB(groupDBs *GroupDBs) error { @@ -631,28 +646,28 @@ ./internal/history/history_config.go:293:func (h *History) xxxGetHashPrefix(hash string) string { ./internal/history/history_config.go:302:func (h *History) initDatabase() error { ./internal/history/history_config.go:324:func (h *History) openHistoryFile() error { -./internal/history/history.go:1004:func (h *History) xxLookupStorageToken(msgIdItem *MessageIdItem) int { -./internal/history/history.go:1059:func (h *History) CheckNoMoreWorkInHistory() bool { -./internal/history/history.go:1079:func (h *History) SetDatabaseWorkChecker(checker DatabaseWorkChecker) { -./internal/history/history.go:160:func (h *History) bootLookupWorkers() { -./internal/history/history.go:168:func (h *History) LookupWorker(wid int) { -./internal/history/history.go:214:func (h *History) Lookup(msgIdItem *MessageIdItem) (int, error) { -./internal/history/history.go:235:func (h *History) lookupInDatabase(msgIdItem *MessageIdItem) (bool, error) { -./internal/history/history.go:334:func (h *History) GetStats() HistoryStats { -./internal/history/history.go:348:func (h *History) updateStats(fn func(*HistoryStats)) { -./internal/history/history.go:355:func (h *History) Close() error { -./internal/history/history.go:372:func (h *History) writerWorker() { -./internal/history/history.go:466:func (h *History) ServerShutdown() bool { -./internal/history/history.go:479:func (h *History) readHistoryEntryAtOffset(offset int64, msgIdItem *MessageIdItem) (int, error) { -./internal/history/history.go:567:func (h *History) routeHash(msgId string) (int, string, string, error) { -./internal/history/history.go:604:func (h *History) flushPendingBatch() { -./internal/history/history.go:623:func (h *History) processBatch() { -./internal/history/history.go:666:func (h *History) writeBatchToFile() error { -./internal/history/history.go:765:func (h *History) writeBatchToDatabase() error { -./internal/history/history.go:834:func (h *History) writeBatchToHashDB(dbIndex int, entries []*MessageIdItem) error { -./internal/history/history.go:861:func (h *History) executeDBTransaction(dbIndex int, entries []*MessageIdItem) error { -./internal/history/history.go:92:func (h *History) Add(msgIdItem *MessageIdItem) { -./internal/history/history.go:938:func (h *History) processTableInTransaction(tx *sql.Tx, tableName string, hashGroups map[string][]*MessageIdItem) error { +./internal/history/history.go:1007:func (h *History) xxLookupStorageToken(msgIdItem *MessageIdItem) int { +./internal/history/history.go:1062:func (h *History) CheckNoMoreWorkInHistory() bool { +./internal/history/history.go:1082:func (h *History) SetDatabaseWorkChecker(checker DatabaseWorkChecker) { +./internal/history/history.go:161:func (h *History) bootLookupWorkers() { +./internal/history/history.go:169:func (h *History) LookupWorker(wid int) { +./internal/history/history.go:215:func (h *History) Lookup(msgIdItem *MessageIdItem) (int, error) { +./internal/history/history.go:236:func (h *History) lookupInDatabase(msgIdItem *MessageIdItem) (bool, error) { +./internal/history/history.go:335:func (h *History) GetStats() HistoryStats { +./internal/history/history.go:349:func (h *History) updateStats(fn func(*HistoryStats)) { +./internal/history/history.go:356:func (h *History) Close() error { +./internal/history/history.go:373:func (h *History) writerWorker() { +./internal/history/history.go:469:func (h *History) ServerShutdown() bool { +./internal/history/history.go:482:func (h *History) readHistoryEntryAtOffset(offset int64, msgIdItem *MessageIdItem) (int, error) { +./internal/history/history.go:570:func (h *History) routeHash(msgId string) (int, string, string, error) { +./internal/history/history.go:607:func (h *History) flushPendingBatch() { +./internal/history/history.go:626:func (h *History) processBatch() { +./internal/history/history.go:669:func (h *History) writeBatchToFile() error { +./internal/history/history.go:768:func (h *History) writeBatchToDatabase() error { +./internal/history/history.go:837:func (h *History) writeBatchToHashDB(dbIndex int, entries []*MessageIdItem) error { +./internal/history/history.go:864:func (h *History) executeDBTransaction(dbIndex int, entries []*MessageIdItem) error { +./internal/history/history.go:93:func (h *History) Add(msgIdItem *MessageIdItem) { +./internal/history/history.go:941:func (h *History) processTableInTransaction(tx *sql.Tx, tableName string, hashGroups map[string][]*MessageIdItem) error { ./internal/history/history_L1-cache.go:113:func (c *L1Cache) Close() { ./internal/history/history_L1-cache.go:205:func (l1 *L1CACHE) BootL1Cache() { ./internal/history/history_L1-cache.go:248:func (l1 *L1CACHE) LockL1Cache(hash string, value int) int { @@ -716,31 +731,31 @@ ./internal/models/sanitizing.go:481:func (o *Overview) PrintSanitized(field string, groupName ...string) template.HTML { ./internal/models/sanitizing.go:547:func (a *Article) GetCleanSubject() string { ./internal/models/sanitizing.go:567:func (o *Overview) GetCleanSubject() string { -./internal/nntp/nntp-article-common.go:320:func (c *ClientConnection) sendArticleContent(result *ArticleRetrievalResult) error { -./internal/nntp/nntp-article-common.go:355:func (c *ClientConnection) sendHeadContent(result *ArticleRetrievalResult) error { -./internal/nntp/nntp-article-common.go:376:func (c *ClientConnection) sendBodyContent(result *ArticleRetrievalResult) error { -./internal/nntp/nntp-article-common.go:397:func (c *ClientConnection) sendStatContent(result *ArticleRetrievalResult) error { -./internal/nntp/nntp-article-common.go:46:func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType ArticleRetrievalType) error { -./internal/nntp/nntp-article-common.go:89:func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResult, error) { +./internal/nntp/nntp-article-common.go:309:func (c *ClientConnection) sendArticleContent(result *ArticleRetrievalResult) error { +./internal/nntp/nntp-article-common.go:344:func (c *ClientConnection) sendHeadContent(result *ArticleRetrievalResult) error { +./internal/nntp/nntp-article-common.go:35:func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType ArticleRetrievalType) error { +./internal/nntp/nntp-article-common.go:365:func (c *ClientConnection) sendBodyContent(result *ArticleRetrievalResult) error { +./internal/nntp/nntp-article-common.go:386:func (c *ClientConnection) sendStatContent(result *ArticleRetrievalResult) error { +./internal/nntp/nntp-article-common.go:78:func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResult, error) { ./internal/nntp/nntp-auth-manager.go:24:func (am *AuthManager) AuthenticateUser(username, password string) (*models.NNTPUser, error) { ./internal/nntp/nntp-auth-manager.go:47:func (am *AuthManager) CheckGroupAccess(user *models.NNTPUser, groupName string) bool { ./internal/nntp/nntp-auth-manager.go:58:func (am *AuthManager) CanPost(user *models.NNTPUser) bool { ./internal/nntp/nntp-auth-manager.go:66:func (am *AuthManager) IsAdmin(user *models.NNTPUser) bool { ./internal/nntp/nntp-auth-manager.go:77:func (am *AuthManager) CheckConnectionLimit(user *models.NNTPUser) bool { -./internal/nntp/nntp-backend-pool.go:119:func (pool *Pool) GetArticle(messageID *string, bulkmode bool) (*models.Article, error) { -./internal/nntp/nntp-backend-pool.go:151:func (pool *Pool) SelectGroup(group string) (*GroupInfo, error) { -./internal/nntp/nntp-backend-pool.go:185:func (pool *Pool) Get(mode int) (*BackendConn, error) { -./internal/nntp/nntp-backend-pool.go:299:func (pool *Pool) Put(client *BackendConn) error { -./internal/nntp/nntp-backend-pool.go:342:func (pool *Pool) CloseConn(client *BackendConn, lock bool) error { -./internal/nntp/nntp-backend-pool.go:363:func (pool *Pool) ClosePool() error { -./internal/nntp/nntp-backend-pool.go:391:func (pool *Pool) Stats() PoolStats { -./internal/nntp/nntp-backend-pool.go:416:func (pool *Pool) createConnection() (*BackendConn, error) { -./internal/nntp/nntp-backend-pool.go:430:func (pool *Pool) isConnectionValid(client *BackendConn) bool { -./internal/nntp/nntp-backend-pool.go:449:func (pool *Pool) Cleanup() { -./internal/nntp/nntp-backend-pool.go:45:func (pool *Pool) XOver(group string, start, end int64, enforceLimit bool) ([]OverviewLine, error) { -./internal/nntp/nntp-backend-pool.go:498:func (pool *Pool) startCleanupWorker() { -./internal/nntp/nntp-backend-pool.go:72:func (pool *Pool) XHdr(group string, header string, start, end int64) ([]*HeaderLine, error) { -./internal/nntp/nntp-backend-pool.go:94:func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +./internal/nntp/nntp-backend-pool.go:118:func (pool *Pool) GetArticle(messageID *string, bulkmode bool) (*models.Article, error) { +./internal/nntp/nntp-backend-pool.go:150:func (pool *Pool) SelectGroup(group string) (*GroupInfo, error) { +./internal/nntp/nntp-backend-pool.go:184:func (pool *Pool) Get(wantMode int) (*BackendConn, error) { +./internal/nntp/nntp-backend-pool.go:310:func (pool *Pool) Put(conn *BackendConn) error { +./internal/nntp/nntp-backend-pool.go:351:func (pool *Pool) closeConn(client *BackendConn, lock bool) error { +./internal/nntp/nntp-backend-pool.go:372:func (pool *Pool) ClosePool() error { +./internal/nntp/nntp-backend-pool.go:402:func (pool *Pool) Stats() PoolStats { +./internal/nntp/nntp-backend-pool.go:427:func (pool *Pool) createConnection() (*BackendConn, error) { +./internal/nntp/nntp-backend-pool.go:441:func (pool *Pool) isConnectionValid(client *BackendConn) bool { +./internal/nntp/nntp-backend-pool.go:463:func (pool *Pool) Cleanup() { +./internal/nntp/nntp-backend-pool.go:47:func (pool *Pool) XOver(group string, start, end int64, enforceLimit bool) ([]OverviewLine, error) { +./internal/nntp/nntp-backend-pool.go:504:func (pool *Pool) startCleanupWorker() { +./internal/nntp/nntp-backend-pool.go:74:func (pool *Pool) XHdr(group string, header string, start, end int64) ([]*HeaderLine, error) { +./internal/nntp/nntp-backend-pool.go:96:func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { ./internal/nntp/nntp-cache-local.go:102:func (c *CacheMessageIDNumtoGroup) Del(messageID, group string) { ./internal/nntp/nntp-cache-local.go:115:func (c *CacheMessageIDNumtoGroup) Clear(messageID string) { ./internal/nntp/nntp-cache-local.go:122:func (c *CacheMessageIDNumtoGroup) CleanupCron() { @@ -751,37 +766,58 @@ ./internal/nntp/nntp-cache-local.go:43:func (lc *Local430) Cleanup() { ./internal/nntp/nntp-cache-local.go:75:func (c *CacheMessageIDNumtoGroup) Get(messageID, group string) (int64, bool) { ./internal/nntp/nntp-cache-local.go:88:func (c *CacheMessageIDNumtoGroup) Set(messageID, group string, articleNum int64) { -./internal/nntp/nntp-client-commands.go:1016:func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { -./internal/nntp/nntp-client-commands.go:1035:func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) (chan *string, error) { -./internal/nntp/nntp-client-commands.go:1119:func (c *BackendConn) TakeThisArticle(article *models.Article, nntphostname *string) (int, error) { -./internal/nntp/nntp-client-commands.go:1211:func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string) (uint, error) { -./internal/nntp/nntp-client-commands.go:1290:func (c *BackendConn) ReadTakeThisResponseStreaming(id uint) (int, error) { -./internal/nntp/nntp-client-commands.go:130:func (c *BackendConn) GetHead(messageID string) (*models.Article, error) { -./internal/nntp/nntp-client-commands.go:1311:func (c *BackendConn) PostArticle(article *models.Article) (int, error) { -./internal/nntp/nntp-client-commands.go:1431:func (c *BackendConn) SwitchMode(mode int) error { -./internal/nntp/nntp-client-commands.go:1443:func (c *BackendConn) SwitchToModeReader() error { -./internal/nntp/nntp-client-commands.go:1475:func (c *BackendConn) SwitchToModeStream() error { -./internal/nntp/nntp-client-commands.go:186:func (c *BackendConn) GetBody(messageID string) ([]byte, error) { -./internal/nntp/nntp-client-commands.go:234:func (c *BackendConn) ListGroups() ([]GroupInfo, error) { -./internal/nntp/nntp-client-commands.go:281:func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { -./internal/nntp/nntp-client-commands.go:32:func (c *BackendConn) StatArticle(messageID string) (bool, error) { -./internal/nntp/nntp-client-commands.go:362:func (c *BackendConn) SelectGroup(groupName string) (*GroupInfo, int, error) { -./internal/nntp/nntp-client-commands.go:430:func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit bool) ([]OverviewLine, error) { -./internal/nntp/nntp-client-commands.go:496:func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*HeaderLine, error) { -./internal/nntp/nntp-client-commands.go:558:func (c *BackendConn) WantShutdown(shutdownChan <-chan struct{}) bool { -./internal/nntp/nntp-client-commands.go:572:func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { -./internal/nntp/nntp-client-commands.go:628:func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { -./internal/nntp/nntp-client-commands.go:66:func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Article, error) { -./internal/nntp/nntp-client-commands.go:724:func (c *BackendConn) ListGroup(groupName string, start, end int64) ([]int64, error) { -./internal/nntp/nntp-client-commands.go:781:func (c *BackendConn) readMultilineResponse(src string) ([]string, error) { -./internal/nntp/nntp-client-commands.go:956:func (c *BackendConn) parseGroupLine(line string) (GroupInfo, error) { -./internal/nntp/nntp-client-commands.go:989:func (c *BackendConn) parseOverviewLine(line string) (OverviewLine, error) { -./internal/nntp/nntp-client.go:144:func (c *BackendConn) Connect() error { -./internal/nntp/nntp-client.go:247:func (c *BackendConn) authenticate() error { -./internal/nntp/nntp-client.go:289:func (c *BackendConn) CloseFromPoolOnly() error { -./internal/nntp/nntp-client.go:319:func (c *BackendConn) xSetReadDeadline(t time.Time) error { -./internal/nntp/nntp-client.go:329:func (c *BackendConn) xSetWriteDeadline(t time.Time) error { -./internal/nntp/nntp-client.go:338:func (c *BackendConn) UpdateLastUsed() { +./internal/nntp/nntp-client-commands.go:100:func (o *OffsetQueue) Done() { +./internal/nntp/nntp-client-commands.go:1049:func (c *BackendConn) ListGroup(groupName string, start, end int64) ([]int64, error) { +./internal/nntp/nntp-client-commands.go:107:func (o *OffsetQueue) Add(n int) { +./internal/nntp/nntp-client-commands.go:1106:func (c *BackendConn) readMultilineResponse(src string) ([]string, error) { +./internal/nntp/nntp-client-commands.go:1281:func (c *BackendConn) parseGroupLine(line string) (GroupInfo, error) { +./internal/nntp/nntp-client-commands.go:1314:func (c *BackendConn) parseOverviewLine(line string) (OverviewLine, error) { +./internal/nntp/nntp-client-commands.go:133:func (rr *ReadRequest) ClearReadRequest() { +./internal/nntp/nntp-client-commands.go:1341:func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { +./internal/nntp/nntp-client-commands.go:1360:func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readResponsesChan chan *ReadRequest, job *CHTTJob) error { +./internal/nntp/nntp-client-commands.go:139:func (rr *ReadRequest) ReturnReadRequest(channel chan struct{}) { +./internal/nntp/nntp-client-commands.go:1404:func (c *BackendConn) CheckMultiple(messageIDs []*string, ttMode *TakeThisMode) ([]*string, error) { +./internal/nntp/nntp-client-commands.go:1482:func (c *BackendConn) xxTakeThisArticle(article *models.Article, nntphostname *string, newsgroup string) (int, error) { +./internal/nntp/nntp-client-commands.go:1570:func (c *BackendConn) GetBufSize(size int) int { +./internal/nntp/nntp-client-commands.go:1579:func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string) (cmdID uint, txBytes int, err error) { +./internal/nntp/nntp-client-commands.go:1665:func (c *BackendConn) ReadTakeThisResponseStreaming(newsgroup string, cr *CheckResponse) (int, error) { +./internal/nntp/nntp-client-commands.go:1702:func (c *BackendConn) PostArticle(article *models.Article) (int, error) { +./internal/nntp/nntp-client-commands.go:175:func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { +./internal/nntp/nntp-client-commands.go:1819:func (c *BackendConn) SwitchMode(mode int) error { +./internal/nntp/nntp-client-commands.go:1831:func (c *BackendConn) SwitchToModeReader() error { +./internal/nntp/nntp-client-commands.go:1863:func (c *BackendConn) SwitchToModeStream() error { +./internal/nntp/nntp-client-commands.go:215:func (ngp *NewsgroupTransferProgress) CalcSpeed() (speed int64) { +./internal/nntp/nntp-client-commands.go:231:func (ngp *NewsgroupTransferProgress) AddTXBytes(n int) { +./internal/nntp/nntp-client-commands.go:251:func (job *CHTTJob) Increment(counter int) { +./internal/nntp/nntp-client-commands.go:276:func (job *CHTTJob) AppendWantedMessageID(msgID *string) { +./internal/nntp/nntp-client-commands.go:283:func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, txErrors, connErrors *uint64) { +./internal/nntp/nntp-client-commands.go:294:func (ttMode *TakeThisMode) UseCHECK() bool { +./internal/nntp/nntp-client-commands.go:303:func (ttMode *TakeThisMode) SetForceCHECK() { +./internal/nntp/nntp-client-commands.go:309:func (ttMode *TakeThisMode) IncrementSuccess() { +./internal/nntp/nntp-client-commands.go:315:func (ttMode *TakeThisMode) IncrementTmp() { +./internal/nntp/nntp-client-commands.go:321:func (ttMode *TakeThisMode) SetNoCHECK() { +./internal/nntp/nntp-client-commands.go:327:func (ttMode *TakeThisMode) FlipMode(lowerLevel float64, upperLevel float64) bool { +./internal/nntp/nntp-client-commands.go:352:func (c *BackendConn) ForceCloseConn() { +./internal/nntp/nntp-client-commands.go:362:func (c *BackendConn) StatArticle(messageID string) (bool, error) { +./internal/nntp/nntp-client-commands.go:396:func (c *BackendConn) GetArticle(messageID *string, bulkmode bool) (*models.Article, error) { +./internal/nntp/nntp-client-commands.go:460:func (c *BackendConn) GetHead(messageID string) (*models.Article, error) { +./internal/nntp/nntp-client-commands.go:516:func (c *BackendConn) GetBody(messageID string) ([]byte, error) { +./internal/nntp/nntp-client-commands.go:564:func (c *BackendConn) ListGroups() ([]GroupInfo, error) { +./internal/nntp/nntp-client-commands.go:611:func (c *BackendConn) ListGroupsLimited(maxGroups int) ([]GroupInfo, error) { +./internal/nntp/nntp-client-commands.go:67:func (o *OffsetQueue) Wait(n int) { +./internal/nntp/nntp-client-commands.go:692:func (c *BackendConn) SelectGroup(groupName string) (*GroupInfo, int, error) { +./internal/nntp/nntp-client-commands.go:756:func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit bool) ([]OverviewLine, error) { +./internal/nntp/nntp-client-commands.go:822:func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*HeaderLine, error) { +./internal/nntp/nntp-client-commands.go:884:func (c *BackendConn) WantShutdown(shutdownChan <-chan struct{}) bool { +./internal/nntp/nntp-client-commands.go:898:func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +./internal/nntp/nntp-client-commands.go:954:func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +./internal/nntp/nntp-client.go:146:func (c *BackendConn) Connect() error { +./internal/nntp/nntp-client.go:238:func (c *BackendConn) authenticate() error { +./internal/nntp/nntp-client.go:280:func (c *BackendConn) CloseFromPoolOnly() error { +./internal/nntp/nntp-client.go:309:func (c *BackendConn) IsConnected() bool { +./internal/nntp/nntp-client.go:323:func (c *BackendConn) xSetReadDeadline(t time.Time) error { +./internal/nntp/nntp-client.go:333:func (c *BackendConn) xSetWriteDeadline(t time.Time) error { +./internal/nntp/nntp-client.go:342:func (c *BackendConn) UpdateLastUsed() { ./internal/nntp/nntp-cmd-article.go:4:func (c *ClientConnection) handleArticle(args []string) error { ./internal/nntp/nntp-cmd-auth.go:9:func (c *ClientConnection) handleAuthInfo(args []string) error { ./internal/nntp/nntp-cmd-basic.go:15:func (c *ClientConnection) handleMode(args []string) error { @@ -801,10 +837,10 @@ ./internal/nntp/nntp-cmd-list.go:26:func (c *ClientConnection) handleListActive() error { ./internal/nntp/nntp-cmd-list.go:46:func (c *ClientConnection) handleListNewsgroups() error { ./internal/nntp/nntp-cmd-list.go:9:func (c *ClientConnection) handleList(args []string) error { -./internal/nntp/nntp-cmd-posting.go:108:func (c *ClientConnection) handleTakeThis(args []string) error { -./internal/nntp/nntp-cmd-posting.go:13:func (c *ClientConnection) handlePost() error { -./internal/nntp/nntp-cmd-posting.go:173:func (c *ClientConnection) readArticleData() (*models.Article, error) { -./internal/nntp/nntp-cmd-posting.go:49:func (c *ClientConnection) handleIHave(args []string) error { +./internal/nntp/nntp-cmd-posting.go:109:func (c *ClientConnection) handleTakeThis(args []string) error { +./internal/nntp/nntp-cmd-posting.go:14:func (c *ClientConnection) handlePost() error { +./internal/nntp/nntp-cmd-posting.go:174:func (c *ClientConnection) readArticleData() (*models.Article, error) { +./internal/nntp/nntp-cmd-posting.go:50:func (c *ClientConnection) handleIHave(args []string) error { ./internal/nntp/nntp-cmd-stat.go:4:func (c *ClientConnection) handleStat(args []string) error { ./internal/nntp/nntp-cmd-xhdr.go:10:func (c *ClientConnection) handleXHdr(args []string) error { ./internal/nntp/nntp-cmd-xover.go:9:func (c *ClientConnection) handleXOver(args []string) error { @@ -889,17 +925,16 @@ ./internal/processor/interface.go:14:func (proc *Processor) IsNewsGroupInSectionsDB(name *string) bool { ./internal/processor/interface.go:20:func (proc *Processor) ForceCloseGroupDBs(groupsDB *database.GroupDBs) error { ./internal/processor/interface.go:9:func (proc *Processor) MsgIdExists(group *string, messageID string) bool { -./internal/processor/PostQueue.go:112:func (w *PostQueueWorker) processArticleForNewsgroup(article *models.Article, newsgroup string) error { +./internal/processor/PostQueue.go:130:func (w *PostQueueWorker) processArticleForNewsgroup(article *models.Article, newsgroup string) error { ./internal/processor/PostQueue.go:18:func (processor *Processor) NewPostQueueWorker() *PostQueueWorker { ./internal/processor/PostQueue.go:26:func (w *PostQueueWorker) Start() { ./internal/processor/PostQueue.go:32:func (w *PostQueueWorker) Stop() { ./internal/processor/PostQueue.go:38:func (w *PostQueueWorker) processLoop() { ./internal/processor/PostQueue.go:64:func (w *PostQueueWorker) pre_processArticle(article *models.Article) error { -./internal/processor/proc_DLArt.go:292:func (proc *Processor) FindStartArticleByDate(groupName string, targetDate time.Time, groupInfo *nntp.GroupInfo) (int64, error) { -./internal/processor/proc_DLArt.go:378:func (proc *Processor) DownloadArticlesFromDate(groupName string, startDate time.Time, DLParChan chan struct{}, progressDB *database.ProgressDB, groupInfo *nntp.GroupInfo, shutdownChan <-chan struct{}) error { -./internal/processor/proc_DLArt.go:44:func (bq *BatchQueue) GetOrCreateGroupBatch(newsgroup string) *GroupBatch { -./internal/processor/proc_DLArt.go:472:func (proc *Processor) WantShutdown(shutdownChan <-chan struct{}) bool { -./internal/processor/proc_DLArt.go:67:func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{}, progressDB *database.ProgressDB, start int64, end int64, shutdownChan <-chan struct{}) error { +./internal/processor/proc_DLArt.go:293:func (proc *Processor) FindStartArticleByDate(groupName string, targetDate time.Time, groupInfo *nntp.GroupInfo) (int64, error) { +./internal/processor/proc_DLArt.go:379:func (proc *Processor) DownloadArticlesFromDate(groupName string, startDate time.Time, DLParChan chan struct{}, progressDB *database.ProgressDB, groupInfo *nntp.GroupInfo, shutdownChan <-chan struct{}) error { +./internal/processor/proc_DLArt.go:45:func (bq *BatchQueue) GetOrCreateGroupBatch(newsgroup string) *GroupBatch { +./internal/processor/proc_DLArt.go:68:func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{}, progressDB *database.ProgressDB, start int64, end int64, shutdownChan <-chan struct{}) error { ./internal/processor/proc_DLXHDR.go:6:func (proc *Processor) GetXHDR(groupName string, header string, start, end int64) ([]*nntp.HeaderLine, error) { ./internal/processor/processor.go:112:func (proc *Processor) CheckNoMoreWorkInHistory() bool { ./internal/processor/processor.go:117:func (proc *Processor) AddProcessedArticleToHistory(msgIdItem *history.MessageIdItem, newsgroupPtr *string, articleNumber int64) { @@ -907,11 +942,11 @@ ./internal/processor/processor.go:180:func (proc *Processor) GetHistoryStats() history.HistoryStats { ./internal/processor/processor.go:188:func (proc *Processor) Close() error { ./internal/processor/processor.go:204:func (proc *Processor) WaitForBatchCompletion() { -./internal/processor/processor.go:239:func (proc *Processor) Lookup(msgIdItem *history.MessageIdItem) (int, error) { -./internal/processor/processor.go:245:func (proc *Processor) AddArticleToHistory(article *nntp.Article, newsgroup string) { -./internal/processor/processor.go:250:func (proc *Processor) ProcessIncomingArticle(article *models.Article) (int, error) { -./internal/processor/processor.go:270:func (proc *Processor) EnableBridges(config *BridgeConfig) { -./internal/processor/processor.go:282:func (proc *Processor) DisableBridges() { +./internal/processor/processor.go:238:func (proc *Processor) Lookup(msgIdItem *history.MessageIdItem) (int, error) { +./internal/processor/processor.go:244:func (proc *Processor) AddArticleToHistory(article *nntp.Article, newsgroup string) { +./internal/processor/processor.go:249:func (proc *Processor) ProcessIncomingArticle(article *models.Article) (int, error) { +./internal/processor/processor.go:269:func (proc *Processor) EnableBridges(config *BridgeConfig) { +./internal/processor/processor.go:281:func (proc *Processor) DisableBridges() { ./internal/processor/proc_ImportOV.go:14:func (proc *Processor) ImportOverview(groupName string) error { ./internal/processor/proc_MsgIDtmpCache.go:105://func (c *MsgTmpCache) Clear() { ./internal/processor/proc_MsgIDtmpCache.go:112://func (c *MsgTmpCache) UpdateThreadRootToTmpCache(group string, messageID string, rootArticle int64, isThreadRoot bool) bool { @@ -933,46 +968,45 @@ ./internal/processor/rslight.go:582:func (leg *LegacyImporter) insertNewsgroupIfNotExists(name, description string) error { ./internal/processor/rslight.go:86:func (leg *LegacyImporter) Close() error { ./internal/processor/rslight.go:94:func (leg *LegacyImporter) ImportSections() error { -./internal/processor/threading.go:46:func (proc *Processor) setCaseDupes(msgIdItem *history.MessageIdItem, bulkmode bool) { -./internal/processor/threading.go:56:func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup string, bulkmode bool) (int, error) { -./internal/web/cronjobs.go:120:func (cm *CronJobManager) StopCronManager() { -./internal/web/cronjobs.go:212:func (cm *CronJobManager) GetJobOutput(jobID int64) []string { -./internal/web/cronjobs.go:232:func (cm *CronJobManager) StopJob(jobId int64) error { -./internal/web/cronjobs.go:279:func (cm *CronJobManager) startJob(cronJob *models.CronJob) error { -./internal/web/cronjobs.go:307:func (cm *CronJobManager) runJobScheduler(job *CronJob) { -./internal/web/cronjobs.go:372:func (cm *CronJobManager) executeJob(job *CronJob, execWG *sync.WaitGroup) { -./internal/web/cronjobs.go:467:func (job *CronJob) addLogLine(line string) { -./internal/web/cronjobs.go:482:func (job *CronJob) setRunningStatus(running bool) { -./internal/web/cronjobs.go:495:func (cm *CronJobManager) GetJobStatus(jobID int64) (*CronJob, bool) { -./internal/web/cronjobs.go:503:func (cm *CronJobManager) GetJobPID(jobID int64) int { -./internal/web/cronjobs.go:516:func (cm *CronJobManager) GetAllJobStatuses() map[int64]*CronJob { -./internal/web/cronjobs.go:68:func (cm *CronJobManager) StartCronManager() { +./internal/processor/threading.go:47:func (proc *Processor) setCaseDupes(msgIdItem *history.MessageIdItem, bulkmode bool) { +./internal/processor/threading.go:57:func (proc *Processor) processArticle(article *models.Article, legacyNewsgroup string, bulkmode bool) (int, error) { +./internal/web/cronjobs.go:113:func (cm *CronJobManager) StopCronManager() { +./internal/web/cronjobs.go:219:func (cm *CronJobManager) GetJobOutput(jobID int64) []string { +./internal/web/cronjobs.go:238:func (cm *CronJobManager) StopJob(jobId int64) error { +./internal/web/cronjobs.go:285:func (cm *CronJobManager) startJob(cronJob *models.CronJob) error { +./internal/web/cronjobs.go:313:func (cm *CronJobManager) runJobScheduler(job *CronJob) { +./internal/web/cronjobs.go:395:func (cm *CronJobManager) executeJob(job *CronJob, execWG *sync.WaitGroup) { +./internal/web/cronjobs.go:492:func (job *CronJob) addLogLine(line string) { +./internal/web/cronjobs.go:507:func (cm *CronJobManager) GetJobStatus(jobID int64) (*CronJob, bool) { +./internal/web/cronjobs.go:515:func (cm *CronJobManager) GetJobPID(jobID int64) int { +./internal/web/cronjobs.go:529:func (cm *CronJobManager) GetAllJobStatuses() map[int64]*CronJob { +./internal/web/cronjobs.go:70:func (cm *CronJobManager) StartCronManager() { ./internal/web/web_admin_apitokens.go:124:func (s *WebServer) adminDeleteAPIToken(c *gin.Context) { ./internal/web/web_admin_apitokens.go:14:func (s *WebServer) countEnabledAPITokens(tokens []*database.APIToken) (count int64) { ./internal/web/web_admin_apitokens.go:159:func (s *WebServer) adminCleanupExpiredTokens(c *gin.Context) { ./internal/web/web_admin_apitokens.go:24:func (s *WebServer) adminCreateAPIToken(c *gin.Context) { ./internal/web/web_admin_apitokens.go:81:func (s *WebServer) adminToggleAPIToken(c *gin.Context) { ./internal/web/web_admin_cache.go:11:func (s *WebServer) adminClearCache(c *gin.Context) { -./internal/web/web_admin_crons.go:13:func (s *WebServer) adminCreateCronJob(c *gin.Context) { -./internal/web/web_admin_crons.go:143:func (s *WebServer) adminToggleCronJob(c *gin.Context) { -./internal/web/web_admin_crons.go:171:func (s *WebServer) adminDeleteCronJob(c *gin.Context) { -./internal/web/web_admin_crons.go:205:func (s *WebServer) adminViewCronJobLog(c *gin.Context) { -./internal/web/web_admin_crons.go:253:func (s *WebServer) adminStopCronJob(c *gin.Context) { -./internal/web/web_admin_crons.go:67:func (s *WebServer) adminUpdateCronJob(c *gin.Context) { -./internal/web/web_admin.go:88:func (s *WebServer) getUptime() string { -./internal/web/web_admin_newsgroups.go:123:func (s *WebServer) adminUpdateNewsgroup(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:223:func (s *WebServer) adminDeleteNewsgroup(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:251:func (s *WebServer) adminAssignNewsgroupSection(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:30:func (s *WebServer) adminCreateNewsgroup(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:334:func (s *WebServer) adminToggleNewsgroup(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:378:func (s *WebServer) adminBulkEnableNewsgroups(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:383:func (s *WebServer) adminBulkDisableNewsgroups(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:388:func (s *WebServer) adminBulkDeleteNewsgroups(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:421:func (s *WebServer) handleBulkNewsgroupAction(c *gin.Context, activeStatus bool, actionName string) { -./internal/web/web_admin_newsgroups.go:454:func (s *WebServer) adminMigrateNewsgroupActivity(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:513:func (s *WebServer) adminFixThreadActivity(c *gin.Context) { -./internal/web/web_admin_newsgroups.go:552:func (s *WebServer) fixGroupThreadActivity(groupName string) error { -./internal/web/web_admin_newsgroups.go:679:func (s *WebServer) adminHideFuturePosts(c *gin.Context) { +./internal/web/web_admin_crons.go:145:func (s *WebServer) adminToggleCronJob(c *gin.Context) { +./internal/web/web_admin_crons.go:15:func (s *WebServer) adminCreateCronJob(c *gin.Context) { +./internal/web/web_admin_crons.go:173:func (s *WebServer) adminDeleteCronJob(c *gin.Context) { +./internal/web/web_admin_crons.go:207:func (s *WebServer) adminViewCronJobLog(c *gin.Context) { +./internal/web/web_admin_crons.go:299:func (s *WebServer) adminStopCronJob(c *gin.Context) { +./internal/web/web_admin_crons.go:69:func (s *WebServer) adminUpdateCronJob(c *gin.Context) { +./internal/web/web_admin.go:91:func (s *WebServer) getUptime() string { +./internal/web/web_admin_newsgroups.go:126:func (s *WebServer) adminUpdateNewsgroup(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:226:func (s *WebServer) adminDeleteNewsgroup(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:254:func (s *WebServer) adminAssignNewsgroupSection(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:337:func (s *WebServer) adminToggleNewsgroup(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:33:func (s *WebServer) adminCreateNewsgroup(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:381:func (s *WebServer) adminBulkEnableNewsgroups(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:386:func (s *WebServer) adminBulkDisableNewsgroups(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:391:func (s *WebServer) adminBulkDeleteNewsgroups(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:424:func (s *WebServer) handleBulkNewsgroupAction(c *gin.Context, activeStatus bool, actionName string) { +./internal/web/web_admin_newsgroups.go:457:func (s *WebServer) adminMigrateNewsgroupActivity(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:516:func (s *WebServer) adminFixThreadActivity(c *gin.Context) { +./internal/web/web_admin_newsgroups.go:555:func (s *WebServer) fixGroupThreadActivity(groupName string) error { +./internal/web/web_admin_newsgroups.go:682:func (s *WebServer) adminHideFuturePosts(c *gin.Context) { ./internal/web/web_admin_nntp.go:130:func (s *WebServer) adminUpdateNNTPUser(c *gin.Context) { ./internal/web/web_admin_nntp.go:210:func (s *WebServer) adminDeleteNNTPUser(c *gin.Context) { ./internal/web/web_admin_nntp.go:246:func (s *WebServer) adminToggleNNTPUser(c *gin.Context) { @@ -984,7 +1018,7 @@ ./internal/web/web_admin_ollama.go:196:func (s *WebServer) adminSyncOllamaModels(c *gin.Context) { ./internal/web/web_admin_ollama.go:33:func (s *WebServer) adminCreateAIModel(c *gin.Context) { ./internal/web/web_admin_ollama.go:94:func (s *WebServer) adminUpdateAIModel(c *gin.Context) { -./internal/web/web_adminPage.go:18:func (s *WebServer) adminPage(c *gin.Context) { +./internal/web/web_adminPage.go:19:func (s *WebServer) adminPage(c *gin.Context) { ./internal/web/web_admin_postqueue.go:12:func (s *WebServer) adminDeletePostQueueEntry(c *gin.Context) { ./internal/web/web_admin_provider.go:16:func (s *WebServer) adminCreateProvider(c *gin.Context) { ./internal/web/web_admin_provider.go:186:func (s *WebServer) adminUpdateProvider(c *gin.Context) { @@ -995,17 +1029,18 @@ ./internal/web/web_admin_sections.go:22:func (s *WebServer) CreateSectionHandler(c *gin.Context) { ./internal/web/web_admin_sections.go:349:func (s *WebServer) UnassignNewsgroupHandler(c *gin.Context) { ./internal/web/web_admin_sections.go:93:func (s *WebServer) UpdateSectionHandler(c *gin.Context) { -./internal/web/web_admin_settings_unified.go:238:func (s *WebServer) validateHostname(hostname string) error { -./internal/web/web_admin_settings_unified.go:246:func (s *WebServer) validateWebPostSize(sizeStr string) error { -./internal/web/web_admin_settings_unified.go:263:func (s *WebServer) validateEmail(email string) error { -./internal/web/web_admin_settings_unified.go:272:func (s *WebServer) processHostname(server *WebServer, hostname string) error { -./internal/web/web_admin_settings_unified.go:276:func (s *WebServer) processRegistrationToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:266:func (s *WebServer) validateHostname(hostname string) error { +./internal/web/web_admin_settings_unified.go:274:func (s *WebServer) validateWebPostSize(sizeStr string) error { ./internal/web/web_admin_settings_unified.go:27:func (s *WebServer) adminUpdateSettings(c *gin.Context) { -./internal/web/web_admin_settings_unified.go:301:func (s *WebServer) processBlockBadBotsToggle(server *WebServer, value string) error { -./internal/web/web_admin_settings_unified.go:333:func (s *WebServer) processBlockBadIPsToggle(server *WebServer, value string) error { -./internal/web/web_admin_settings_unified.go:364:func (s *WebServer) validateCIDRList(value string) error { -./internal/web/web_admin_settings_unified.go:391:func (s *WebServer) processBadBotsUpdate(server *WebServer, value string) error { -./internal/web/web_admin_settings_unified.go:412:func (s *WebServer) processBadIPsUpdate(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:291:func (s *WebServer) validateEmail(email string) error { +./internal/web/web_admin_settings_unified.go:300:func (s *WebServer) processHostname(server *WebServer, hostname string) error { +./internal/web/web_admin_settings_unified.go:304:func (s *WebServer) processRegistrationToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:329:func (s *WebServer) processBlockBadBotsToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:361:func (s *WebServer) processBlockBadIPsToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:393:func (s *WebServer) processAPIEnabledToggle(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:418:func (s *WebServer) validateCIDRList(value string) error { +./internal/web/web_admin_settings_unified.go:445:func (s *WebServer) processBadBotsUpdate(server *WebServer, value string) error { +./internal/web/web_admin_settings_unified.go:466:func (s *WebServer) processBadIPsUpdate(server *WebServer, value string) error { ./internal/web/web_admin_sitenews.go:136:func (s *WebServer) adminDeleteSiteNews(c *gin.Context) { ./internal/web/web_admin_sitenews.go:166:func (s *WebServer) adminToggleSiteNewsVisibility(c *gin.Context) { ./internal/web/web_admin_sitenews.go:16:func (s *WebServer) adminCreateSiteNews(c *gin.Context) { @@ -1025,19 +1060,20 @@ ./internal/web/web_aichatPage.go:430:func (s *WebServer) aichatGetCounts(c *gin.Context) { ./internal/web/web_aichatPage.go:459:func (s *WebServer) renderChatError(c *gin.Context, title, message string) { ./internal/web/web_aichatPage.go:82:func (s *WebServer) aichatPage(c *gin.Context) { -./internal/web/web_apiHandlers.go:158:func (s *WebServer) getArticle(c *gin.Context) { -./internal/web/web_apiHandlers.go:188:func (s *WebServer) getArticleByMessageId(c *gin.Context) { -./internal/web/web_apiHandlers.go:20:// - func (s *WebServer) listGroups(c *gin.Context) (line ~313) -./internal/web/web_apiHandlers.go:212:func (s *WebServer) getGroupThreads(c *gin.Context) { -./internal/web/web_apiHandlers.go:22:// - func (s *WebServer) getGroupOverview(c *gin.Context) (line ~354) -./internal/web/web_apiHandlers.go:236:func (s *WebServer) getStats(c *gin.Context) { -./internal/web/web_apiHandlers.go:24:// - func (s *WebServer) getArticle(c *gin.Context) (line ~403) -./internal/web/web_apiHandlers.go:26:// - func (s *WebServer) getArticleByMessageId(c *gin.Context) (line ~428) -./internal/web/web_apiHandlers.go:28:// - func (s *WebServer) getGroupThreads(c *gin.Context) (line ~447) -./internal/web/web_apiHandlers.go:307:func (s *WebServer) getArticlePreview(c *gin.Context) { -./internal/web/web_apiHandlers.go:30:// - func (s *WebServer) getStats(c *gin.Context) (line ~466) -./internal/web/web_apiHandlers.go:35:func (s *WebServer) listGroups(c *gin.Context) { -./internal/web/web_apiHandlers.go:70:func (s *WebServer) getGroupOverview(c *gin.Context) { +./internal/web/web_apiHandlers.go:174:func (s *WebServer) getArticle(c *gin.Context) { +./internal/web/web_apiHandlers.go:204:func (s *WebServer) getArticleByMessageId(c *gin.Context) { +./internal/web/web_apiHandlers.go:20:func (s *WebServer) requireAPIEnabled() gin.HandlerFunc { +./internal/web/web_apiHandlers.go:228:func (s *WebServer) getGroupThreads(c *gin.Context) { +./internal/web/web_apiHandlers.go:252:func (s *WebServer) getStats(c *gin.Context) { +./internal/web/web_apiHandlers.go:323:func (s *WebServer) getArticlePreview(c *gin.Context) { +./internal/web/web_apiHandlers.go:36:// - func (s *WebServer) listGroups(c *gin.Context) (line ~313) +./internal/web/web_apiHandlers.go:38:// - func (s *WebServer) getGroupOverview(c *gin.Context) (line ~354) +./internal/web/web_apiHandlers.go:40:// - func (s *WebServer) getArticle(c *gin.Context) (line ~403) +./internal/web/web_apiHandlers.go:42:// - func (s *WebServer) getArticleByMessageId(c *gin.Context) (line ~428) +./internal/web/web_apiHandlers.go:44:// - func (s *WebServer) getGroupThreads(c *gin.Context) (line ~447) +./internal/web/web_apiHandlers.go:46:// - func (s *WebServer) getStats(c *gin.Context) (line ~466) +./internal/web/web_apiHandlers.go:51:func (s *WebServer) listGroups(c *gin.Context) { +./internal/web/web_apiHandlers.go:86:func (s *WebServer) getGroupOverview(c *gin.Context) { ./internal/web/web_apitokens.go:120:func (s *WebServer) disableAPITokenHandler(c *gin.Context) { ./internal/web/web_apitokens.go:14:func (s *WebServer) APIAuthRequired() gin.HandlerFunc { ./internal/web/web_apitokens.go:153:func (s *WebServer) enableAPITokenHandler(c *gin.Context) { @@ -1102,20 +1138,20 @@ ./internal/web/web_sectionsPage.go:347:func (s *WebServer) sectionArticleByMessageIdPage(c *gin.Context) { ./internal/web/web_sectionsPage.go:36:func (s *WebServer) sectionsPage(c *gin.Context) { ./internal/web/web_sectionsPage.go:65:func (s *WebServer) sectionPage(c *gin.Context) { -./internal/web/webserver_core_routes.go:282:func (s *WebServer) setupRoutes() { -./internal/web/webserver_core_routes.go:491:func (s *WebServer) Start() error { -./internal/web/webserver_core_routes.go:507:func (s *WebServer) BotDetectionMiddleware() gin.HandlerFunc { -./internal/web/webserver_core_routes.go:553:func (s *WebServer) ReverseProxyMiddleware() gin.HandlerFunc { -./internal/web/webserver_core_routes.go:630:func (s *WebServer) ApacheLogFormat() gin.HandlerFunc { -./internal/web/webserver_core_routes.go:647:func (s *WebServer) loadSectionsCache() { -./internal/web/webserver_core_routes.go:664:func (s *WebServer) refreshSectionsCache() { -./internal/web/webserver_core_routes.go:669:func (s *WebServer) isValidSection(sectionName string) bool { -./internal/web/webserver_core_routes.go:677:func (s *WebServer) sectionValidationMiddleware() gin.HandlerFunc { +./internal/web/webserver_core_routes.go:283:func (s *WebServer) setupRoutes() { +./internal/web/webserver_core_routes.go:494:func (s *WebServer) Start() error { +./internal/web/webserver_core_routes.go:510:func (s *WebServer) BotDetectionMiddleware() gin.HandlerFunc { +./internal/web/webserver_core_routes.go:556:func (s *WebServer) ReverseProxyMiddleware() gin.HandlerFunc { +./internal/web/webserver_core_routes.go:633:func (s *WebServer) ApacheLogFormat() gin.HandlerFunc { +./internal/web/webserver_core_routes.go:650:func (s *WebServer) loadSectionsCache() { +./internal/web/webserver_core_routes.go:667:func (s *WebServer) refreshSectionsCache() { +./internal/web/webserver_core_routes.go:672:func (s *WebServer) isValidSection(sectionName string) bool { +./internal/web/webserver_core_routes.go:680:func (s *WebServer) sectionValidationMiddleware() gin.HandlerFunc { ./internal/web/web_session_cleanup.go:9:func (s *WebServer) StartSessionCleanup() { ./internal/web/web_sitePostPage.go:165:func (s *WebServer) sitePostSubmit(c *gin.Context) { ./internal/web/web_sitePostPage.go:40:func (s *WebServer) sitePostPage(c *gin.Context) { -./internal/web/web_statsPage.go:14:// - func (s *WebServer) statsPage(c *gin.Context) (line ~857) -./internal/web/web_statsPage.go:18:func (s *WebServer) statsPage(c *gin.Context) { +./internal/web/web_statsPage.go:15:// - func (s *WebServer) statsPage(c *gin.Context) (line ~857) +./internal/web/web_statsPage.go:19:func (s *WebServer) statsPage(c *gin.Context) { ./internal/web/web_threadPage.go:17:// - func (s *WebServer) singleThreadPage(c *gin.Context) (line ~1394) ./internal/web/web_threadPage.go:25:func (s *WebServer) singleThreadPage(c *gin.Context) { ./internal/web/web_threadTreePage.go:16:// - func (s *WebServer) threadTreePage(c *gin.Context) (line ~1596) From db99aaab1653c8c3771c7f280885b823e184fe37 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 06:11:58 +0000 Subject: [PATCH 043/100] debugs --- cmd/nntp-transfer/main.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 29fa5b2..217f2a8 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1663,16 +1663,18 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art conn.Lock() var ttxBytes uint64 start := time.Now() + astart := start + astart2 := start for _, article := range articles { if article == nil { continue // Skip cached articles } - astart := time.Now() + astart = time.Now() // Send TAKETHIS command with article content (non-blocking) // This also queues the ReadRequest to readTAKETHISResponsesChan BEFORE returning - log.Printf("Newsgroup: '%s' | ++Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) + //log.Printf("Newsgroup: '%s' | ++Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup, demuxer, readTAKETHISResponsesChan, job) - astart2 := time.Now() + astart2 = time.Now() job.Mux.Lock() job.TTxBytes += uint64(txBytes) job.TmpTxBytes += uint64(txBytes) @@ -1689,7 +1691,9 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art return redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) } sentCount++ - log.Printf("Newsgroup: '%s' | DONE TAKETHIS '%s' CmdID=%d (%d/%d sent) in %v awaiting responses astart2='%v'", newsgroup, article.MessageID, cmdID, sentCount, len(articles), time.Since(astart), time.Since(astart2)) + if VERBOSE { + log.Printf("Newsgroup: '%s' | DONE TAKETHIS '%s' CmdID=%d (%d/%d sent) in %v awaiting responses astart2='%v'", newsgroup, article.MessageID, cmdID, sentCount, len(articles), time.Since(astart), time.Since(astart2)) + } } conn.Unlock() log.Printf("Newsgroup: '%s' | DONE TAKETHIS BATCH sent: %d commands. ttxBytes: %d in %v", newsgroup, sentCount, ttxBytes, time.Since(start)) From 3fe900e3e2d85cf1f17d4d7af1367c76bb4e7922 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 07:58:28 +0000 Subject: [PATCH 044/100] testing --- cmd/nntp-transfer/main.go | 72 ++++++++++++++++++++------- internal/nntp/nntp-backend-pool.go | 13 ++++- internal/nntp/nntp-client-commands.go | 15 ++++-- 3 files changed, 75 insertions(+), 25 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 217f2a8..fa0c92f 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -129,7 +129,7 @@ func main() { proxyPassword = flag.String("proxy-password", "", "Proxy authentication password") // Transfer configuration - batchCheck = flag.Int("batch-check", 100, "Number of message IDs/articles to send in streamed CHECK/TAKETHIS") + batchCheck = flag.Int("batch-check", 1000, "Number of message IDs/articles to send in streamed CHECK/TAKETHIS") batchDB = flag.Int64("batch-db", 1000, "Fetch N articles from DB in a batch") maxThreads = flag.Int("max-threads", 1, "Transfer N newsgroups in concurrent threads. Each thread uses 1 connection.") redisCache = flag.Bool("redis-cache", true, "Use Redis caching for message IDs") @@ -166,7 +166,7 @@ func main() { webPort = flag.Int("web-port", 0, "Enable web server on this port to view results (e.g. 8080, default: disabled)") pprofPort = flag.Int("pprof-port", 0, "Enable pprof profiling server on this port (e.g., 6060). Access at http://localhost:PORT/debug/pprof/") memStats = flag.Bool("mem-stats", false, "Log memory statistics every 30 seconds") - gcPercent = flag.Int("gc-percent", 100, "Set GOGC percentage (default 100). Lower values = more frequent GC, less memory") + gcPercent = flag.Int("gc-percent", 50, "Set GOGC percentage (default 100). Lower values = more frequent GC, less memory") ) flag.Parse() common.IgnoreGoogleHeaders = *ignoreGoogleHeaders @@ -311,7 +311,7 @@ func main() { sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, os.Interrupt) // Cross-platform (Ctrl+C on both Windows and Linux) - //db.WG.Add(2) // Adds to wait group for db_batch.go cron jobs + db.WG.Add(2) // Adds to wait group for db_batch.go cron jobs db.WG.Add(1) // Adds for history: one for writer worker // Get UseShortHashLen from database (with safety check) @@ -409,7 +409,6 @@ func main() { if !*dryRun { log.Printf("Starting NNTP connection worker pool...") go BootConnWorkers(pool, redisCli) - time.Sleep(2 * time.Second) // Give workers time to establish connections } // Start transfer process var wgP sync.WaitGroup @@ -523,6 +522,8 @@ func main() { } wgP.Wait() pool.ClosePool() + // Signal background tasks to stop + close(db.StopChan) // Close processor if proc != nil { @@ -1211,7 +1212,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i resultsMutex.Lock() nntp.NewsgroupTransferProgressMap[ng.Name].Finished = true nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() - results = append(results, fmt.Sprintf("END Newsgroup: '%s' | No articles to process", ng.Name)) + if VERBOSE { + results = append(results, fmt.Sprintf("END Newsgroup: '%s' | No articles to process", ng.Name)) + } resultsMutex.Unlock() // No articles to process if startTime != nil || endTime != nil { @@ -1365,7 +1368,10 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } resultsMutex.Unlock() }(&responseWG) - OffsetQueue := &nntp.OffsetQueue{} + OffsetQueue := &nntp.OffsetQueue{ + Newsgroup: &ng.Name, + MaxQueuedJobs: MaxQueuedJobs, + } // Use simple OFFSET pagination var articlesProcessed int64 @@ -1588,19 +1594,26 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl } // Assign job to worker (consistent assignment + load balancing) + QueuesMutex.RLock() if len(CheckQueues) == 0 { + QueuesMutex.RUnlock() + log.Printf("Newsgroup: '%s' | No workers available to process batch job #%d with %d message IDs", *ttMode.Newsgroup, batchedJob.JobID, len(batchedJob.MessageIDs)) return nil, fmt.Errorf("no workers available") } + QueuesMutex.RUnlock() workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) - + QueuesMutex.RLock() + WorkersCheckChannel := CheckQueues[workerID] + QueuesMutex.RUnlock() // Track queue length for load balancing WorkerQueueLengthMux.Lock() WorkerQueueLength[workerID]++ WorkerQueueLengthMux.Unlock() //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queue job #%d with %d message IDs. CheckQ=%d", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs), len(CheckQueues[workerID])) - CheckQueues[workerID] <- batchedJob // checkQueue <- batchedJob + + WorkersCheckChannel <- batchedJob // checkQueue <- batchedJob //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, batchedJob.JobID) return batchedJob.ResponseChan, nil } // end func processBatch @@ -1704,6 +1717,7 @@ var jobRequeueMutex sync.RWMutex var jobRequeue = make(map[*string][]*nntp.CHTTJob) // CheckQueues holds per-worker CheckQueue channels for consistent newsgroup routing +var QueuesMutex sync.RWMutex var CheckQueues []chan *nntp.CHTTJob var TakeThisQueues []chan *nntp.CHTTJob @@ -1779,6 +1793,7 @@ func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { isleep := defaultSleep var mux sync.Mutex // Create per-worker queues + QueuesMutex.Lock() CheckQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) TakeThisQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) WorkerQueueLength = make([]int, nntp.NNTPTransferThreads) @@ -1787,6 +1802,7 @@ func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { TakeThisQueues[i] = make(chan *nntp.CHTTJob, 2) // allows max 2 queued TT jobs WorkerQueueLength[i] = 0 } + QueuesMutex.Unlock() allEstablished := false forever: for { @@ -1906,9 +1922,9 @@ forever: job.Mux.Unlock() jobRequeueMutex.Lock() - jobRequeue[rqj.Newsgroup] = append(jobRequeue[rqj.Newsgroup], rqj) // TODO DEAD END + jobRequeue[rqj.Newsgroup] = append(jobRequeue[rqj.Newsgroup], rqj) jobRequeueMutex.Unlock() - + log.Printf("CHTTWorker (%d) did requeue job %d with %d articles for newsgroup '%s'", i, rqj.JobID, len(rqj.Articles), *rqj.Newsgroup) // unlink pointers job.Mux.Lock() if job.TTMode != nil { @@ -1974,6 +1990,7 @@ forever: } }() } // end forever + log.Printf("BootConnWorkers: quit") } // end func BootConnWorkers var DefaultCheckTicker = 5 * time.Second @@ -2006,14 +2023,14 @@ func (rs *ReturnSignal) GetLockTT() { for { rs.Mux.Lock() if rs.RunTT { - log.Printf("GetLockTT: RunTT already true") + //log.Printf("GetLockTT: RunTT already true") rs.Mux.Unlock() return } if !rs.RunTT && !rs.CHECK { rs.RunTT = true rs.Mux.Unlock() - log.Printf("GetLockTT: acquired RunTT lock") + //log.Printf("GetLockTT: acquired RunTT lock") return } rs.Mux.Unlock() @@ -2054,7 +2071,7 @@ func (rs *ReturnSignal) BlockCHECK() { rs.Mux.Lock() rs.CHECK = false rs.RunTT = true - log.Printf("BlockCHECK: set CHECK to false (RunTT=%t)", rs.RunTT) + //log.Printf("BlockCHECK: set CHECK to false (RunTT=%t)", rs.RunTT) rs.Mux.Unlock() } @@ -2151,9 +2168,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue continue loop } workerID := assignWorkerToNewsgroup(*currentJob.Newsgroup) + QueuesMutex.RLock() + WorkersTTChannel := TakeThisQueues[workerID] + QueuesMutex.RUnlock() waiting: for { - if len(TakeThisQueues[workerID]) >= MaxQueuedJobs { + if len(WorkersTTChannel) >= MaxQueuedJobs { rs.BlockCHECK() log.Printf("CheckWorker (%d): waiting... shared takeThisChan full (%d)", workerID, len(TakeThisQueues[workerID])) time.Sleep(time.Second / 4) @@ -2204,7 +2224,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue currentJob.WantedIDs = currentJob.MessageIDs //rs.UnlockCHECKforTTwithWait() rs.BlockCHECK() - TakeThisQueues[workerID] <- currentJob // local takethis chan sharing the same connection + WorkersTTChannel <- currentJob // local takethis chan sharing the same connection //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) } //lastRun = time.Now() @@ -2752,6 +2772,7 @@ func monitorMemoryStats() { // startWebServer starts a simple HTTP server to display transfer results func startWebServer(port int) { http.HandleFunc("/", handleIndex) + http.HandleFunc("/results", handleResults) addr := fmt.Sprintf(":%d", port) log.Printf("Starting web server on http://ANY_ADDR:%s", addr) if err := http.ListenAndServe(addr, nil); err != nil { @@ -2949,9 +2970,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { {{if .Results}}

Completed Results

- {{range .Results}} -
{{.}}
- {{end}} +
View results at /results
{{else}}
No transfer results yet. Waiting for transfers to complete...
{{end}} @@ -3051,3 +3070,20 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { log.Printf("Template execution error: %v", err) } } + +// handleResults serves the results page as plain text +func handleResults(w http.ResponseWriter, r *http.Request) { + resultsMutex.RLock() + defer resultsMutex.RUnlock() + + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + + if len(results) == 0 { + fmt.Fprintln(w, "No transfer results yet. Waiting for transfers to complete...") + return + } + + for _, result := range results { + fmt.Fprintln(w, result) + } +} diff --git a/internal/nntp/nntp-backend-pool.go b/internal/nntp/nntp-backend-pool.go index b7213f7..76cbdd1 100644 --- a/internal/nntp/nntp-backend-pool.go +++ b/internal/nntp/nntp-backend-pool.go @@ -386,8 +386,17 @@ func (pool *Pool) ClosePool() error { pool.mux.Unlock() // Close all connections in the pool - for client := range pool.connections { // drain channel - client.ForceCloseConn() +closeWait: + for { + select { + case conn := <-pool.connections: + if conn != nil { + conn.ForceCloseConn() + } + default: + // pass + break closeWait + } } pool.mux.Lock() diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 1e04869..c1877ff 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -263,9 +263,11 @@ type TTSetup struct { } type OffsetQueue struct { - mux sync.RWMutex - isleep time.Duration - queued int + Newsgroup *string + MaxQueuedJobs int + mux sync.RWMutex + isleep time.Duration + queued int } var ReturnDelay = time.Millisecond * 16 @@ -308,14 +310,17 @@ func (o *OffsetQueue) Done() { o.mux.Lock() defer o.mux.Unlock() o.queued-- - log.Printf("OffsetQueue: a batch is done, still queued: %d", o.queued) + //log.Printf("OffsetQueue: a batch is done, still queued: %d", o.queued) } func (o *OffsetQueue) Add(n int) { o.mux.Lock() defer o.mux.Unlock() o.queued += n - log.Printf("OffsetQueue: added %d batches, now queued: %d", n, o.queued) + if o.MaxQueuedJobs > 10 && o.queued > o.MaxQueuedJobs/100*90 { + // prints only if occupancy is over 90% + log.Printf("Newsgroup: '%s' | OffsetQueue: added %d batches, now queued: %d/%d", *o.Newsgroup, n, o.queued, o.MaxQueuedJobs) + } } type TTResponse struct { From 5243b70ae8721a9988c5b47a6f2c7d210fad7cac Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 09:28:20 +0000 Subject: [PATCH 045/100] testing --- cmd/nntp-transfer/main.go | 112 +++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 39 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index fa0c92f..2805969 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -102,8 +102,10 @@ var redisCtx = context.Background() var REDIS_TTL time.Duration = 3600 * time.Second // default 1h var MaxQueuedJobs int = 8 var BatchCheck int +var START_WITH_CHECK_MODE bool func main() { + bootTime := time.Now() common.VerboseHeaders = false config.AppVersion = appVersion database.NO_CACHE_BOOT = true // prevents booting caches and several other not needed functions @@ -129,7 +131,7 @@ func main() { proxyPassword = flag.String("proxy-password", "", "Proxy authentication password") // Transfer configuration - batchCheck = flag.Int("batch-check", 1000, "Number of message IDs/articles to send in streamed CHECK/TAKETHIS") + batchCheck = flag.Int("batch-check", 100, "Number of message IDs/articles to send in streamed CHECK/TAKETHIS") batchDB = flag.Int64("batch-db", 1000, "Fetch N articles from DB in a batch") maxThreads = flag.Int("max-threads", 1, "Transfer N newsgroups in concurrent threads. Each thread uses 1 connection.") redisCache = flag.Bool("redis-cache", true, "Use Redis caching for message IDs") @@ -156,6 +158,7 @@ func main() { // History configuration useShortHashLen = flag.Int("useshorthashlen", 7, "Short hash length for history storage (2-7, default: 7)") + startWithCheck = flag.Bool("start-with-check", false, "Enable 'start with check' mode") // Newsgroup filtering options fileInclude = flag.String("file-include", "", "File containing newsgroup patterns to include (one per line)") @@ -170,6 +173,7 @@ func main() { ) flag.Parse() common.IgnoreGoogleHeaders = *ignoreGoogleHeaders + START_WITH_CHECK_MODE = *startWithCheck // Configure garbage collector if *gcPercent != 100 { @@ -544,8 +548,8 @@ func main() { } else { log.Printf("Database shutdown successfully") } - - log.Printf("Graceful shutdown completed. Exiting.") + time.Sleep(time.Second * 3) // wait for all goroutines to finish + log.Printf("nntp-transfer exit. Runtime: %v", time.Since(bootTime)) } // parseDateTime parses a date string in multiple supported formats @@ -622,7 +626,6 @@ func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, if err != nil { return nil, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) } - defer groupDBs.Return(db) var query string var args []interface{} @@ -656,6 +659,7 @@ func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, rows, err := groupDBs.DB.Query(query, args...) if err != nil { + db.ForceCloseGroupDBs(groupDBs) return nil, err } defer rows.Close() @@ -669,6 +673,11 @@ func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, out = append(out, &a) } + if int64(len(out)) < dbBatchSize { + db.ForceCloseGroupDBs(groupDBs) + } else { + groupDBs.Return(db) + } return out, nil } @@ -1203,12 +1212,15 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } return fmt.Errorf("failed to get article count for newsgroup '%s': %v", ng.Name, err) } - groupDBsA.Return(db) + //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got article count (%d), closing group DBs...", ng.Name, totalArticles) //log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", ng.Name) if totalArticles == 0 { + if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { + log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) + } resultsMutex.Lock() nntp.NewsgroupTransferProgressMap[ng.Name].Finished = true nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() @@ -1225,9 +1237,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } else { log.Printf("No articles found in newsgroup: %s", ng.Name) } - return nil } + groupDBsA.Return(db) // Initialize newsgroup progress tracking resultsMutex.Lock() @@ -1258,7 +1270,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i remainingArticles := totalArticles ttMode := &nntp.TakeThisMode{ Newsgroup: &ng.Name, - CheckMode: true, + CheckMode: START_WITH_CHECK_MODE, } ttResponses := make(chan *nntp.TTSetup, totalArticles/int64(batchCheck)+2) start := time.Now() @@ -1311,6 +1323,15 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i // free memory - CRITICAL: Lock and unlock in same scope, not with defer! resp.Job.Mux.Lock() log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) + /* + // Decrement queue length for this worker (job processing complete) + workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) + WorkerQueueLengthMux.Lock() + if workerID < len(WorkerQueueLength) && WorkerQueueLength[workerID] > 0 { + WorkerQueueLength[workerID]-- + } + WorkerQueueLengthMux.Unlock() + */ // Clean up Articles and their internal fields for i := range resp.Job.Articles { if resp.Job.Articles[i] != nil { @@ -1338,10 +1359,14 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i resp.Job = nil }(setup.ResponseChan, num, responseWG) } - log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", ng.Name, num) + if VERBOSE { + log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", ng.Name, num) + } // Wait for all response channel processors to finish responseWG.Wait() - log.Printf("Newsgroup: '%s' | Collector: all response processors closed", ng.Name) + if VERBOSE { + log.Printf("Newsgroup: '%s' | Collector: all response processors closed", ng.Name) + } amux.Lock() result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d | unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", ng.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) @@ -1440,7 +1465,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i ResponseChan: responseChan, } } - OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than 2 in flight + OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than N in flight } remainingArticles -= int64(len(articles)) if VERBOSE { @@ -1606,10 +1631,6 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl QueuesMutex.RLock() WorkersCheckChannel := CheckQueues[workerID] QueuesMutex.RUnlock() - // Track queue length for load balancing - WorkerQueueLengthMux.Lock() - WorkerQueueLength[workerID]++ - WorkerQueueLengthMux.Unlock() //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queue job #%d with %d message IDs. CheckQ=%d", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs), len(CheckQueues[workerID])) @@ -1709,7 +1730,9 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art } } conn.Unlock() - log.Printf("Newsgroup: '%s' | DONE TAKETHIS BATCH sent: %d commands. ttxBytes: %d in %v", newsgroup, sentCount, ttxBytes, time.Since(start)) + if VERBOSE { + log.Printf("Newsgroup: '%s' | DONE TAKETHIS BATCH sent: %d commands. ttxBytes: %d in %v", newsgroup, sentCount, ttxBytes, time.Since(start)) + } return redis_cached, nil } // end func sendArticlesBatchViaTakeThis @@ -1744,7 +1767,7 @@ func assignWorkerToNewsgroup(newsgroup string) int { WorkerQueueLengthMux.Lock() if len(WorkerQueueLength) == 0 { WorkerQueueLengthMux.Unlock() - return 0 + log.Fatalf("assignWorkerToNewsgroup: no workers available?") } minLoad := WorkerQueueLength[0] @@ -1753,6 +1776,8 @@ func assignWorkerToNewsgroup(newsgroup string) int { if WorkerQueueLength[i] < minLoad { minLoad = WorkerQueueLength[i] workerID = i + WorkerQueueLength[i]++ + break } } WorkerQueueLengthMux.Unlock() @@ -1828,7 +1853,7 @@ forever: returnSignals := make([]*ReturnSignal, bootN) errChan := make(chan struct{}, 1) newConns := 0 - for i := range bootN { + for workerID := range bootN { // Get a connection from pool conn, err := pool.Get(nntp.MODE_STREAM_MV) if err != nil { @@ -1867,7 +1892,7 @@ forever: jobs: make([]*nntp.CHTTJob, 0, BatchCheck), } - returnSignals[i] = returnSignal + returnSignals[workerID] = returnSignal // assign checkQueue by openConns counter // so restarted workers get same channels to read from go CHTTWorker(slotID, conn, returnSignal, CheckQueues[slotID]) @@ -1888,27 +1913,30 @@ forever: monitoring := newConns for { time.Sleep(100 * time.Millisecond) - for i, wait := range returnSignals { + for workerID, wait := range returnSignals { if wait == nil { continue } select { case rs := <-wait.ExitChan: - log.Printf("CHTTWorker (%d) exited", i) + WorkerQueueLengthMux.Lock() + log.Printf("CHTTWorker (%d) exited. processed jobs: %d", workerID, WorkerQueueLength[workerID]) + WorkerQueueLengthMux.Unlock() + monitoring-- UnsetWorker(&openConns, rs.slotID, workerSlots, &mux) - returnSignals[i] = nil + returnSignals[workerID] = nil rs.Mux.Lock() if len(rs.jobs) > 0 { - log.Printf("CHTTWorker (%d) try requeue %d jobs", i, len(rs.jobs)) + log.Printf("CHTTWorker (%d) try requeue %d jobs", workerID, len(rs.jobs)) for _, job := range rs.jobs { if job != nil { // copy articles pointer job.Mux.Lock() if len(job.Articles) == 0 { - log.Printf("ERROR in CHTTWorker (%d) job %d has no articles, skipping requeue", i, job.JobID) + log.Printf("ERROR in CHTTWorker (%d) job %d has no articles, skipping requeue", workerID, job.JobID) job.Mux.Unlock() continue } @@ -1924,7 +1952,7 @@ forever: jobRequeueMutex.Lock() jobRequeue[rqj.Newsgroup] = append(jobRequeue[rqj.Newsgroup], rqj) jobRequeueMutex.Unlock() - log.Printf("CHTTWorker (%d) did requeue job %d with %d articles for newsgroup '%s'", i, rqj.JobID, len(rqj.Articles), *rqj.Newsgroup) + log.Printf("CHTTWorker (%d) did requeue job %d with %d articles for newsgroup '%s'", workerID, rqj.JobID, len(rqj.Articles), *rqj.Newsgroup) // unlink pointers job.Mux.Lock() if job.TTMode != nil { @@ -1942,7 +1970,7 @@ forever: job.Mux.Unlock() } } - log.Printf("CHTTWorker (%d) did requeue %d jobs", i, len(rs.jobs)) + log.Printf("CHTTWorker (%d) did requeue %d jobs", workerID, len(rs.jobs)) } // Clean up ReturnSignal maps and unlink pointers @@ -2020,6 +2048,8 @@ func (rs *ReturnSignal) BlockTT() { } func (rs *ReturnSignal) GetLockTT() { + start := time.Now() + printLast := start for { rs.Mux.Lock() if rs.RunTT { @@ -2034,12 +2064,17 @@ func (rs *ReturnSignal) GetLockTT() { return } rs.Mux.Unlock() - log.Printf("GetLockTT: waiting for RunTT to be true...") + if time.Since(printLast) > time.Second*30 { + log.Printf("GetLockTT: waiting since %v for RunTT to become true...", time.Since(start)) + printLast = time.Now() + } time.Sleep(nntp.ReturnDelay) } } func (rs *ReturnSignal) UnlockCHECKforTTwithWait() { + start := time.Now() + printLast := start for { rs.Mux.Lock() if !rs.RunTT { @@ -2050,8 +2085,11 @@ func (rs *ReturnSignal) UnlockCHECKforTTwithWait() { return } rs.Mux.Unlock() - log.Printf("UnlockCHECKforTTwithWait: waiting for RunTT to be false...") - time.Sleep(nntp.ReturnDelay) + if time.Since(printLast) > time.Second*30 { + log.Printf("UnlockCHECKforTTwithWait: waiting since %v for RunTT to become false...", time.Since(start)) + time.Sleep(nntp.ReturnDelay) + printLast = time.Now() + } } } @@ -2059,10 +2097,10 @@ func (rs *ReturnSignal) UnlockCHECKforTT() { rs.Mux.Lock() defer rs.Mux.Unlock() if !rs.CHECK || rs.RunTT { - log.Printf("UnlockCHECKforTT: cannot switch to RunTT, CHECK=%t RunTT=%t", rs.CHECK, rs.RunTT) + //log.Printf("UnlockCHECKforTT: already set... CHECK=%t RunTT=%t", rs.CHECK, rs.RunTT) return } - log.Printf("UnlockCHECKforTT: switched CHECK to RunTT") + //log.Printf("UnlockCHECKforTT: switched CHECK to RunTT") rs.CHECK = false rs.RunTT = true } @@ -2086,8 +2124,8 @@ func (rs *ReturnSignal) LockCHECK() { rs.Mux.Unlock() return } - if time.Since(printLast) > time.Second { - log.Printf("LockCHECK: waiting for RunTT to be false... CHECK=%t RunTT=%t", rs.CHECK, rs.RunTT) + if time.Since(printLast) > time.Second*30 { + log.Printf("LockCHECK: waiting since %v for RunTT to become false... CHECK=%t RunTT=%t", time.Since(start), rs.CHECK, rs.RunTT) printLast = time.Now() } rs.Mux.Unlock() @@ -2160,7 +2198,9 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("CheckWorker (%d): Ticked but no jobs in queue, continue...", workerID) continue loop } - log.Printf("CheckWorker (%d): Ticked and found %d jobs in queue", workerID, len(rs.jobs)) + if len(rs.jobs) >= MaxQueuedJobs { + log.Printf("CheckWorker (%d): Ticked and found %d jobs in queue (max: %d)", workerID, len(rs.jobs), MaxQueuedJobs) + } currentJob := rs.jobs[0] rs.jobs = rs.jobs[1:] // Remove first job from queue rs.Mux.Unlock() @@ -2235,12 +2275,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() replyChan(requestReplyJobDone, replyJobDone) // see if anybody is waiting and reply //log.Printf("CheckWorker (%d): job #%d CHECK done, hasMoreJobs=%v", workerID, currentJob.JobID, hasMoreJobs) - // Decrement queue length for this worker (job processing complete) - WorkerQueueLengthMux.Lock() - if workerID < len(WorkerQueueLength) && WorkerQueueLength[workerID] > 0 { - WorkerQueueLength[workerID]-- - } - WorkerQueueLengthMux.Unlock() // If there are more jobs waiting, immediately trigger next job processing if hasMoreJobs { From a4ebec3c196d0bb1f848608e75fdcfbacb74619d Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 13:09:37 +0000 Subject: [PATCH 046/100] yayaya TNT --- cmd/nntp-transfer/main.go | 230 +++++++++++++++++--------- internal/nntp/nntp-client-commands.go | 6 + 2 files changed, 162 insertions(+), 74 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 2805969..db6228d 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -104,6 +104,29 @@ var MaxQueuedJobs int = 8 var BatchCheck int var START_WITH_CHECK_MODE bool +// statistics +var TotalNewsgroups int64 +var NewsgroupsToProcess int64 +var ServerHostName string +var StartDate string +var EndDate string + +var GlobalSpeed uint64 + +func CalcGlobalSpeed() { + for { + time.Sleep(time.Second * 3) + var speed uint64 + resultsMutex.Lock() + for _, progress := range nntp.NewsgroupTransferProgressMap { + progress.CalcSpeed() + speed += progress.GetSpeed() + } + GlobalSpeed = speed + resultsMutex.Unlock() + } +} + func main() { bootTime := time.Now() common.VerboseHeaders = false @@ -174,6 +197,9 @@ func main() { flag.Parse() common.IgnoreGoogleHeaders = *ignoreGoogleHeaders START_WITH_CHECK_MODE = *startWithCheck + ServerHostName = *host + StartDate = *startDate + EndDate = *endDate // Configure garbage collector if *gcPercent != 100 { @@ -414,6 +440,11 @@ func main() { log.Printf("Starting NNTP connection worker pool...") go BootConnWorkers(pool, redisCli) } + resultsMutex.Lock() + TotalNewsgroups = int64(len(newsgroups)) + NewsgroupsToProcess = TotalNewsgroups + resultsMutex.Unlock() + go CalcGlobalSpeed() // Start transfer process var wgP sync.WaitGroup wgP.Add(2) @@ -1103,6 +1134,9 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe if err != nil { log.Printf("Error transferring newsgroup %s: %v", ng.Name, err) } + resultsMutex.Lock() + NewsgroupsToProcess-- + resultsMutex.Unlock() }(ng, &wg, redisCli) } @@ -1235,7 +1269,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } return ErrNotInDateRange } else { - log.Printf("No articles found in newsgroup: %s", ng.Name) + if VERBOSE { + log.Printf("No articles found in newsgroup: %s", ng.Name) + } } return nil } @@ -1323,15 +1359,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i // free memory - CRITICAL: Lock and unlock in same scope, not with defer! resp.Job.Mux.Lock() log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) - /* - // Decrement queue length for this worker (job processing complete) - workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) - WorkerQueueLengthMux.Lock() - if workerID < len(WorkerQueueLength) && WorkerQueueLength[workerID] > 0 { - WorkerQueueLength[workerID]-- - } - WorkerQueueLengthMux.Unlock() - */ // Clean up Articles and their internal fields for i := range resp.Job.Articles { if resp.Job.Articles[i] != nil { @@ -1772,14 +1799,14 @@ func assignWorkerToNewsgroup(newsgroup string) int { minLoad := WorkerQueueLength[0] workerID := 0 - for i := 1; i < len(WorkerQueueLength); i++ { - if WorkerQueueLength[i] < minLoad { - minLoad = WorkerQueueLength[i] - workerID = i - WorkerQueueLength[i]++ + for wid := 1; wid < len(WorkerQueueLength); wid++ { + if WorkerQueueLength[wid] < minLoad { + minLoad = WorkerQueueLength[wid] + workerID = wid break } } + WorkerQueueLength[workerID]++ WorkerQueueLengthMux.Unlock() // Assign newsgroup to this worker @@ -2715,7 +2742,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if queueFull { start := time.Now() - wait := start + lastPrint := start waitForReply: for { select { @@ -2729,9 +2756,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue break waitForReply } // log every 5s - if time.Since(wait) > time.Second { + if time.Since(lastPrint) > time.Second { + if common.WantShutdown() { + return + } log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(TakeThisQueues[workerID])) - wait = time.Now() + lastPrint = time.Now() } } } @@ -2825,10 +2855,10 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { - NNTP Transfer Results + {{.NewsgroupsToProcess}}:{{.ServerHostName}} - go-pugleaf nntp-transfer -

🚀 NNTP Transfer Results

- +

+ 🚀 NNTP Transfer to {{.ServerHostName}} | +

+

+ Start Date: {{.StartDate}} to + End Date: {{.EndDate}} +

{{if eq .Started 0}} - Status: Waiting for transfers to start...
+ Waiting for transfers to start... {{else}} - Transfer Progress: {{.Finished}}/{{.Started}} newsgroups completed - {{if eq .Finished .Started}} + {{if eq 0 .NewsgroupsToProcess}} ✅ All complete! {{else}} - ({{subtract .Started .Finished}} in progress) +
+
+
+ {{subtract .TotalNewsgroups .NewsgroupsToProcess}} / {{.TotalNewsgroups}} {{if gt .TotalNewsgroups 0}} @ {{multiply (divide (subtract .TotalNewsgroups .NewsgroupsToProcess) .TotalNewsgroups) 100}}{{else}}0{{end}}% +
+
{{end}} -
{{end}} - - (Auto-refresh every 3 seconds)
+ {{if .Results}} +

View /results

+ {{else}} +

No transfer results yet. Waiting for transfers to complete...

+ {{end}} + +
Last updated: {{.Timestamp}}
+ {{if .Progress}} -

⏳ In Progress

+ - - - - - - - - + + + + {{range .Progress}} - - + - - - - - - - + + {{end}}
NewsgroupProgressSpeedCH/sTT/sActiveStartedDurationNG Workers: ( {{subtract .Started .Finished}} )ProgressSpeed{{if gtUint64 .GlobalSpeed 0}}
{{.GlobalSpeed}} KByte/s{{end}}
CH/s
TT/s
{{.Name}} + + {{.Name}} +
+ + Started {{.Duration}} ago at {{.Started}} | idle: {{.TimeSince}} + +
{{if gt .TotalArticles 0}} -
-
-
+
+
+ + {{if gt .OffsetStart 0}}{{multiply (divide .OffsetStart .TotalArticles) 100}}{{else}}0{{end}}% +
- {{.OffsetStart}}/{{.TotalArticles}}
{{else}} Initializing... {{end}}
{{.SpeedKB}} KByte/s{{.LastArtPerfC}}/s{{.LastArtPerfT}}/s{{.TimeSince}} ago{{.Started}}{{.Duration}} + {{if gt .TotalArticles 0}} + {{.OffsetStart}}/{{.TotalArticles}} + {{end}} +
+ {{if gtUint64 .SpeedKB 0}} + {{.SpeedKB}} KByte/s + {{else}} + - + {{end}} +
CH: {{.LastArtPerfC}}/s
TT: {{.LastArtPerfT}}/s
{{end}} - {{if .Results}} -

Completed Results

-
View results at /results
- {{else}} -
No transfer results yet. Waiting for transfers to complete...
- {{end}} - -
Last updated: {{.Timestamp}}
` tmpl, err := template.New("index").Funcs(template.FuncMap{ - "subtract": func(a, b int) int { return a - b }, - "eq": func(a, b int) bool { return a == b }, + "subtract": func(a, b int64) int64 { return a - b }, + "eq": func(a, b int64) bool { return a == b }, "gt": func(a, b int64) bool { return a > b }, + "gtUint64": func(a, b uint64) bool { return a > b }, "divide": func(a, b int64) float64 { if b == 0 { return 0 @@ -3047,8 +3117,8 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { LastArtPerfT uint64 } - started := len(nntp.NewsgroupTransferProgressMap) - finished := 0 + started := int64(len(nntp.NewsgroupTransferProgressMap)) + var finished int64 var progressList []ProgressInfo for name, progress := range nntp.NewsgroupTransferProgressMap { @@ -3086,17 +3156,29 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { }) data := struct { - Results []string - Started int - Finished int - Progress []ProgressInfo - Timestamp string + TotalNewsgroups int64 + NewsgroupsToProcess int64 + Results []string + Started int64 + Finished int64 + Progress []ProgressInfo + Timestamp string + StartDate string + EndDate string + ServerHostName string + GlobalSpeed uint64 }{ - Results: results, - Started: started, - Finished: finished, - Progress: progressList, - Timestamp: time.Now().Format("2006-01-02 15:04:05"), + TotalNewsgroups: TotalNewsgroups, + NewsgroupsToProcess: NewsgroupsToProcess, + Results: results, + Started: started, + Finished: finished, + Progress: progressList, + Timestamp: time.Now().Format("2006-01-02 15:04:05"), + StartDate: StartDate, + EndDate: EndDate, + ServerHostName: ServerHostName, + GlobalSpeed: GlobalSpeed, } w.Header().Set("Content-Type", "text/html; charset=utf-8") diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index c1877ff..3aa699a 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -438,6 +438,12 @@ type NewsgroupTransferProgress struct { LastArtPerfT uint64 // takethis articles per second } +func (ngp *NewsgroupTransferProgress) GetSpeed() uint64 { + ngp.Mux.RLock() + defer ngp.Mux.RUnlock() + return ngp.LastSpeedKB +} + func (ngp *NewsgroupTransferProgress) CalcSpeed() { ngp.Mux.Lock() if time.Since(ngp.LastCronTX) >= time.Second*3 { From c2e527670461e02e3d9de7c43213e64170d7ca1d Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 14:28:02 +0000 Subject: [PATCH 047/100] testing --- cmd/nntp-transfer/main.go | 21 ++++++++------- internal/common/headers.go | 39 +++++++++++++++++++-------- internal/nntp/nntp-client-commands.go | 11 +++++++- 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index db6228d..0f6a27c 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -128,9 +128,10 @@ func CalcGlobalSpeed() { } func main() { - bootTime := time.Now() - common.VerboseHeaders = false config.AppVersion = appVersion + + bootTime := time.Now() + common.VERBOSE_HEADERS = false database.NO_CACHE_BOOT = true // prevents booting caches and several other not needed functions log.Printf("Starting go-pugleaf NNTP Transfer Tool (version %s)", config.AppVersion) @@ -1296,13 +1297,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } if !dryRun && !debugCapture { - if startTime != nil || endTime != nil { - log.Printf("Found %d articles in newsgroup %s (within specified date range) - processing in batches", totalArticles, ng.Name) - } else { - log.Printf("Found %d articles in newsgroup %s - processing in batches", totalArticles, ng.Name) - } + log.Printf("+ Found %d articles in newsgroup %s", totalArticles, ng.Name) } - //time.Sleep(3 * time.Second) // debug sleep + remainingArticles := totalArticles ttMode := &nntp.TakeThisMode{ Newsgroup: &ng.Name, @@ -1663,7 +1660,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl WorkersCheckChannel <- batchedJob // checkQueue <- batchedJob //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, batchedJob.JobID) - return batchedJob.ResponseChan, nil + return batchedJob.ReturnResponseChan(), nil } // end func processBatch // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode @@ -1982,12 +1979,16 @@ forever: log.Printf("CHTTWorker (%d) did requeue job %d with %d articles for newsgroup '%s'", workerID, rqj.JobID, len(rqj.Articles), *rqj.Newsgroup) // unlink pointers job.Mux.Lock() + select { + case job.ResponseChan <- nil: + close(job.ResponseChan) + default: + } if job.TTMode != nil { job.TTMode.Newsgroup = nil } job.Newsgroup = nil job.TTMode = nil - job.ResponseChan = nil job.Articles = nil job.ArticleMap = nil job.MessageIDs = nil diff --git a/internal/common/headers.go b/internal/common/headers.go index 47293c9..a921f52 100644 --- a/internal/common/headers.go +++ b/internal/common/headers.go @@ -12,7 +12,7 @@ import ( "github.com/go-while/go-pugleaf/internal/models" ) -var VerboseHeaders bool = false +var VERBOSE_HEADERS bool = false var IgnoreGoogleHeaders bool = false var UseStrictGroupValidation bool = false var ErrNoNewsgroups = fmt.Errorf("ErrNoNewsgroups") @@ -41,6 +41,18 @@ var IgnoreHeadersMap = map[string]bool{ "xref": true, } +// refers to +// https://github.com/InterNetNews/inn/blob/ba39e0ace92aea2f9e59117f1757deaf28416d91/innd/innd.c#L79 +// const ARTHEADER ARTheaders[] ---> HTreq +var RequiredHeadersMap = map[string]bool{ + "date": true, + "from": true, + "message-id": true, + "newsgroups": true, + "path": true, + "subject": true, +} + var formats = []string{ time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700" time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST" @@ -156,7 +168,7 @@ func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *st // DateString is not RFC compliant, try DateSent first if !article.DateSent.IsZero() && article.DateSent.Year() >= 1979 { dateHeader = article.DateSent.UTC().Format(time.RFC1123Z) - if VerboseHeaders { + if VERBOSE_HEADERS { log.Printf("Using DateSent '%s' instead of DateString '%s' for article %s", dateHeader, article.DateString, article.MessageID) } } else { @@ -166,7 +178,7 @@ func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *st parsedTime := parseDateReceivedHeader(dateReceivedStr) if !parsedTime.IsZero() && parsedTime.Year() >= 1979 { dateHeader = parsedTime.UTC().Format(time.RFC1123Z) - //if VerboseHeaders { + //if VERBOSE_HEADERS { log.Printf("Using Date-Received '%s' (parsed as '%s') instead of invalid DateString '%s' and invalid DateSent (year %d) for article %s", dateReceivedStr, dateHeader, article.DateString, article.DateSent.Year(), article.MessageID) //} article.DateSent = parsedTime // Update article DateSent with corrected time @@ -193,7 +205,7 @@ func ReconstructHeaders(article *models.Article, withPath bool, nntphostname *st parsedTime := parseDateReceivedHeader(dateReceivedStr) if !parsedTime.IsZero() && parsedTime.Year() >= 1979 { dateHeader = parsedTime.UTC().Format(time.RFC1123Z) - if VerboseHeaders { + if VERBOSE_HEADERS { log.Printf("Using Date-Received '%s' (parsed as '%s') when DateString is empty and DateSent is invalid (year %d) for article %s", dateReceivedStr, dateHeader, article.DateSent.Year(), article.MessageID) } article.DateSent = parsedTime // Update article DateSent with corrected time @@ -266,7 +278,7 @@ checkHeader: } // check if first char is lowercase if unicode.IsLower(rune(headerLine[0])) { - if VerboseHeaders { + if VERBOSE_HEADERS { log.Printf("Lowercase header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) } headerLine = strings.ToUpper(string(headerLine[0])) + headerLine[1:] @@ -309,8 +321,10 @@ checkHeader: } if !strings.HasPrefix(header, "X-") { if headersMap[strings.ToLower(header)] { - log.Printf("Duplicate header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) - headerLine = "X-RW-" + headerLine + if RequiredHeadersMap[strings.ToLower(header)] { + log.Printf("Duplicate header: '%s' line=%d in msgId='%s' (rewrite)", headerLine, i, article.MessageID) + headerLine = "X-RW-" + headerLine + } } headersMap[strings.ToLower(header)] = true } @@ -379,7 +393,9 @@ checkHeader: trimmedNG = strings.TrimSpace(trimmedNG) if trimmedNG == "" || strings.Contains(trimmedNG, " ") || !IsValidGroupName(trimmedNG) { if trimmedNG == "" { - log.Printf("Invalid newsgroup name: '%s' empty after cleanup in line=%d idx=%d in msgId='%s'", group, i, x, article.MessageID) + if VERBOSE_HEADERS { + log.Printf("Invalid newsgroup name: '%s' empty after cleanup in line=%d idx=%d in msgId='%s'", group, i, x, article.MessageID) + } } else { log.Printf("Invalid newsgroup name: '%s' in line=%d idx=%d in msgId='%s'", group, i, x, article.MessageID) } @@ -392,8 +408,9 @@ checkHeader: validNewsgroups = append(validNewsgroups, trimmedNG) } // end for checkGroups - if len(validNewsgroups) == 0 { - log.Printf("Invalid Newsgroups header: '%s' line=%d in msgId='%s' (return err)", headerLine, i, article.MessageID) + if len(validNewsgroups) == 0 && newsgroup == "" { + log.Printf("Invalid Newsgroups header: '%s' line=%d in msgId='%s'", headerLine, i, article.MessageID) + return nil, ErrNoNewsgroups } if badGroups > 0 { @@ -407,7 +424,7 @@ checkHeader: headers = append(headers, headerLine) } // end for moreHeaders - if VerboseHeaders && ignoredLines > 0 { + if VERBOSE_HEADERS && ignoredLines > 0 { log.Printf("Reconstructed %d header lines, ignored %d: msgId='%s'", len(headers), ignoredLines, article.MessageID) } fallbackNewsgroup := false diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 3aa699a..ffe2eeb 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -368,7 +368,7 @@ type TakeThisTracker struct { type CHTTJob struct { JobID uint64 // Unique job ID for tracing Newsgroup *string - Mux sync.Mutex + Mux sync.RWMutex TTMode *TakeThisMode ResponseChan chan *TTResponse responseSent bool // Track if response already sent (prevents double send) @@ -394,6 +394,15 @@ type CHTTJob struct { NGTProgress *NewsgroupTransferProgress } +func (job *CHTTJob) ReturnResponseChan() chan *TTResponse { + job.Mux.RLock() + defer job.Mux.RUnlock() + if job.ResponseChan != nil { + return job.ResponseChan + } + return nil +} + func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { if job.ResponseChan == nil { log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job #%d", job.JobID) From c0b0756519558cdafeb3cad9b3e57bc73efce7d5 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 14:51:18 +0000 Subject: [PATCH 048/100] testing --- cmd/nntp-transfer/main.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 0f6a27c..40cb391 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -102,7 +102,7 @@ var redisCtx = context.Background() var REDIS_TTL time.Duration = 3600 * time.Second // default 1h var MaxQueuedJobs int = 8 var BatchCheck int -var START_WITH_CHECK_MODE bool +var CHECK_FIRST bool // statistics var TotalNewsgroups int64 @@ -110,7 +110,6 @@ var NewsgroupsToProcess int64 var ServerHostName string var StartDate string var EndDate string - var GlobalSpeed uint64 func CalcGlobalSpeed() { @@ -182,7 +181,7 @@ func main() { // History configuration useShortHashLen = flag.Int("useshorthashlen", 7, "Short hash length for history storage (2-7, default: 7)") - startWithCheck = flag.Bool("start-with-check", false, "Enable 'start with check' mode") + checkFirst = flag.Bool("check-first", true, "Use CHECK command before TAKETHIS to avoid duplicates (recommended)") // Newsgroup filtering options fileInclude = flag.String("file-include", "", "File containing newsgroup patterns to include (one per line)") @@ -197,7 +196,7 @@ func main() { ) flag.Parse() common.IgnoreGoogleHeaders = *ignoreGoogleHeaders - START_WITH_CHECK_MODE = *startWithCheck + CHECK_FIRST = *checkFirst ServerHostName = *host StartDate = *startDate EndDate = *endDate @@ -1303,7 +1302,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i remainingArticles := totalArticles ttMode := &nntp.TakeThisMode{ Newsgroup: &ng.Name, - CheckMode: START_WITH_CHECK_MODE, + CheckMode: CHECK_FIRST, } ttResponses := make(chan *nntp.TTSetup, totalArticles/int64(batchCheck)+2) start := time.Now() @@ -1328,12 +1327,12 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i num++ //log.Printf("Newsgroup: '%s' | Starting response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) responseWG.Add(1) - go func(rc chan *nntp.TTResponse, num uint64, responseWG *sync.WaitGroup) { + go func(responseChan chan *nntp.TTResponse, num uint64, responseWG *sync.WaitGroup) { defer responseWG.Done() //defer log.Printf("Newsgroup: '%s' | Quit response channel processor num %d (goroutines: %d)", ng.Name, num, runtime.NumGoroutine()) // Read exactly ONE response from this channel (channel is buffered with cap 1) - resp := <-rc // job.Response(ForceCleanUp, err) arrives here + resp := <-responseChan // job.Response(ForceCleanUp, err) arrives here if resp == nil { log.Printf("Newsgroup: '%s' | Warning: nil TT response received!?", ng.Name) @@ -1355,7 +1354,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } // free memory - CRITICAL: Lock and unlock in same scope, not with defer! resp.Job.Mux.Lock() - log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) + if VERBOSE { + log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) + } // Clean up Articles and their internal fields for i := range resp.Job.Articles { if resp.Job.Articles[i] != nil { @@ -1960,7 +1961,7 @@ forever: // copy articles pointer job.Mux.Lock() if len(job.Articles) == 0 { - log.Printf("ERROR in CHTTWorker (%d) job %d has no articles, skipping requeue", workerID, job.JobID) + log.Printf("ERROR in CHTTWorker (%d) job #%d has no articles, skipping requeue", workerID, job.JobID) job.Mux.Unlock() continue } @@ -1976,12 +1977,11 @@ forever: jobRequeueMutex.Lock() jobRequeue[rqj.Newsgroup] = append(jobRequeue[rqj.Newsgroup], rqj) jobRequeueMutex.Unlock() - log.Printf("CHTTWorker (%d) did requeue job %d with %d articles for newsgroup '%s'", workerID, rqj.JobID, len(rqj.Articles), *rqj.Newsgroup) + log.Printf("CHTTWorker (%d) did requeue job #%d with %d articles for newsgroup '%s'", workerID, rqj.JobID, len(rqj.Articles), *rqj.Newsgroup) // unlink pointers job.Mux.Lock() select { case job.ResponseChan <- nil: - close(job.ResponseChan) default: } if job.TTMode != nil { From 63098f90b62a793c1319eee55279b96b2fdd17d5 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 21:47:41 +0000 Subject: [PATCH 049/100] cherry-pick: disableNewsgroupsNotInActiveFile --- cmd/web/main.go | 24 ++++++-- cmd/web/main_functions.go | 87 +++++++++++++++++++++++++++++ web/templates/admin_newsgroups.html | 4 +- 3 files changed, 108 insertions(+), 7 deletions(-) diff --git a/cmd/web/main.go b/cmd/web/main.go index f35959c..0cb7bb8 100644 --- a/cmd/web/main.go +++ b/cmd/web/main.go @@ -66,6 +66,7 @@ var ( // Compare flags compareActiveFile string compareActiveMinArticles int64 + disableActiveFile string // Expiry update flag updateNewsgroupsExpiryFile string @@ -110,13 +111,13 @@ func main() { flag.IntVar(&nntptlsport, "nntptlsport", 0, "NNTP TLS port") flag.StringVar(&nntpcertFile, "nntpcertfile", "", "NNTP TLS certificate file (/path/to/fullchain.pem)") flag.StringVar(&nntpkeyFile, "nntpkeyfile", "", "NNTP TLS key file (/path/to/privkey.pem)") - flag.BoolVar(&forceReloadDesc, "update-descr", false, "Updates (overwrites existing!) internal newsgroup descriptions from file preload/newsgroups.descriptions (default: false)") - flag.StringVar(&importActiveFile, "import-active", "", "Import newsgroups from NNTP active file (format: groupname highwater lowwater status)") - flag.StringVar(&importDescFile, "import-desc", "", "Import newsgroups from descriptions file (format: groupname\\tdescription)") - flag.BoolVar(&importCreateMissing, "import-create", false, "Create missing newsgroups when importing from descriptions file (default: false)") + flag.BoolVar(&forceReloadDesc, "update-overwrite-newsgroups-descriptions", false, "Updates (overwrites existing!) internal newsgroup descriptions from file preload/newsgroups.descriptions (default: false)") + flag.StringVar(&importDescFile, "update-newsgroups-descriptions-file", "", "Import newsgroups from descriptions file (format: groupname\\tdescription)") + flag.BoolVar(&importCreateMissing, "update-newsgroups-descriptions-import-create-newsgroups", false, "Create missing newsgroups when importing from descriptions file (default: false)") flag.BoolVar(&repairWatermarks, "repair-watermarks", false, "Repair corrupted newsgroup watermarks caused by preloader (default: false)") - flag.IntVar(&updateNGAConcurrent, "update-newsgroups-activity", 0, "Updates newsgroup updated_at timestamps to reflect actual article activity. N = process this many newsgroups concurrently") + flag.IntVar(&updateNGAConcurrent, "update-newsgroups-activity", 0, "Updates newsgroup updated_at timestamps to reflect actual article activity. Set N to process this many newsgroups concurrently") flag.BoolVar(&updateNewsgroupsHideFuture, "update-newsgroups-hide-futureposts", false, "Hide articles posted more than 48 hours in the future (default: false)") + flag.StringVar(&importActiveFile, "import-active", "", "Import newsgroups from NNTP active file (format: groupname highwater lowwater status)") flag.StringVar(&writeActiveFile, "write-active-file", "", "Write NNTP active file from main database newsgroups table to specified path") flag.BoolVar(&writeActiveOnly, "write-active-only", true, "use with -write-active-file (false writes only non active groups!)") flag.StringVar(&rsyncInactiveGroups, "rsync-inactive-groups", "", "path to new data dir, uses rsync to copy all inactive group databases to new data folder.") @@ -125,6 +126,7 @@ func main() { flag.BoolVar(&noCronjobs, "no-cronjobs", false, "use this flag to not run cron jobs") flag.BoolVar(&findOrphanDBs, "find-orphan-dbs", false, "Find orphaned database folders in data/db that don't correspond to any newsgroup in main database") flag.StringVar(&compareActiveFile, "compare-active", "", "Compare active file with database and show missing groups (format: groupname highwater lowwater status)") + flag.StringVar(&disableActiveFile, "disable-active", "", "Disable newsgroups not listed in active file (format: groupname highwater lowwater status)") flag.Int64Var(&compareActiveMinArticles, "compare-active-min-articles", 0, "use with -compare-active: only show groups with more than N articles (calculated as high-low)") flag.StringVar(&updateNewsgroupsExpiryFile, "update-newsgroups-expiry-from-file", "", "Update newsgroup expiry_days from file (format: newsgroup:days, one per line)") flag.BoolVar(&verbose, "verbose", false, "print more (debug) output") @@ -298,6 +300,18 @@ func main() { } } + // disableActiveFile + if disableActiveFile != "" { + log.Printf("[WEB]: Disabling newsgroups not listed in active file: %s", disableActiveFile) + if err := disableNewsgroupsNotInActiveFile(db, disableActiveFile); err != nil { + log.Printf("[WEB]: Error: Failed to disable newsgroups: %v", err) + os.Exit(1) + } else { + log.Printf("[WEB]: Newsgroup disabling completed successfully") + os.Exit(0) + } + } + // findOrphanDBs if findOrphanDBs { log.Printf("[WEB]: Finding orphaned database folders...") diff --git a/cmd/web/main_functions.go b/cmd/web/main_functions.go index f3516ea..6e73afc 100644 --- a/cmd/web/main_functions.go +++ b/cmd/web/main_functions.go @@ -1005,3 +1005,90 @@ func updateNewsgroupsExpiryFromFile(db *database.Database, filename string) erro return nil } + +// disableNewsgroupsNotInActiveFile disables newsgroups NOT listed in the active file +func disableNewsgroupsNotInActiveFile(db *database.Database, activeFilePath string) error { + log.Printf("[WEB]: Disabling newsgroups not listed in active file: %s", activeFilePath) + + // Open and read the active file + file, err := os.Open(activeFilePath) + if err != nil { + return fmt.Errorf("failed to open active file '%s': %w", activeFilePath, err) + } + defer file.Close() + + // Parse active file to get list of groups to KEEP active + activeGroups := make(map[string]bool) + scanner := bufio.NewScanner(file) + lineNum := 0 + + for scanner.Scan() { + lineNum++ + line := strings.TrimSpace(scanner.Text()) + + // Skip empty lines and comments + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + // Parse active file format: groupname high low status + fields := strings.Fields(line) + if len(fields) < 4 { + log.Printf("[WEB]: Warning: Skipping malformed line %d in active file: %s", lineNum, line) + continue + } + + groupName := fields[0] + if groupName != "" { + activeGroups[groupName] = true + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading active file: %w", err) + } + + log.Printf("[WEB]: Found %d active groups in file", len(activeGroups)) + + // Get all newsgroups from database + dbGroups, err := db.MainDBGetAllNewsgroups() + if err != nil { + return fmt.Errorf("failed to get newsgroups from database: %w", err) + } + + log.Printf("[WEB]: Found %d total newsgroups in database", len(dbGroups)) + + // Disable groups NOT in active file + disabledCount := 0 + alreadyInactiveCount := 0 + keptActiveCount := 0 + + for _, group := range dbGroups { + if _, exists := activeGroups[group.Name]; !exists { + // Group not in active file - disable it + if group.Active { + if err := db.UpdateNewsgroupActive(group.Name, false); err != nil { + log.Printf("[WEB]: Warning: Failed to disable newsgroup '%s': %v", group.Name, err) + continue + } + disabledCount++ + if disabledCount%1000 == 0 { + log.Printf("[WEB]: Disabled %d newsgroups so far...", disabledCount) + } + } else { + alreadyInactiveCount++ + } + } else { + // Group IS in active file - keep it active + keptActiveCount++ + } + } + + log.Printf("[WEB]: Disable operation completed:") + log.Printf("[WEB]: - Groups kept active (in active file): %d", keptActiveCount) + log.Printf("[WEB]: - Groups newly disabled (not in active file): %d", disabledCount) + log.Printf("[WEB]: - Groups already inactive (not in active file): %d", alreadyInactiveCount) + log.Printf("[WEB]: - Total groups processed: %d", len(dbGroups)) + + return nil +} diff --git a/web/templates/admin_newsgroups.html b/web/templates/admin_newsgroups.html index df24e3e..30431e4 100644 --- a/web/templates/admin_newsgroups.html +++ b/web/templates/admin_newsgroups.html @@ -218,7 +218,7 @@
📰 Newsgroup Management
{{end}} @@ -246,7 +246,7 @@
📰 Newsgroup Management
{{end}} From a01075839d9752a6e9e90efd5d2763c1628e13de Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 12 Oct 2025 23:38:53 +0000 Subject: [PATCH 050/100] testing --- cmd/nntp-transfer/main.go | 129 ++++++++++++++++---------- internal/nntp/nntp-client-commands.go | 36 ++++--- 2 files changed, 105 insertions(+), 60 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 40cb391..1cb9faa 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -116,13 +116,13 @@ func CalcGlobalSpeed() { for { time.Sleep(time.Second * 3) var speed uint64 - resultsMutex.Lock() + nntp.ResultsMutex.Lock() for _, progress := range nntp.NewsgroupTransferProgressMap { progress.CalcSpeed() speed += progress.GetSpeed() } GlobalSpeed = speed - resultsMutex.Unlock() + nntp.ResultsMutex.Unlock() } } @@ -440,10 +440,10 @@ func main() { log.Printf("Starting NNTP connection worker pool...") go BootConnWorkers(pool, redisCli) } - resultsMutex.Lock() + nntp.ResultsMutex.Lock() TotalNewsgroups = int64(len(newsgroups)) NewsgroupsToProcess = TotalNewsgroups - resultsMutex.Unlock() + nntp.ResultsMutex.Unlock() go CalcGlobalSpeed() // Start transfer process var wgP sync.WaitGroup @@ -1095,7 +1095,7 @@ func matchesAnyWildcardPattern(newsgroup string, patterns []string) bool { return false } -var totalTransferred, totalUnwanted, totalRejected, totalRedisCacheHits, totalTXErrors, totalConnErrors, nothingInDateRange uint64 +var totalTransferred, totalUnwanted, totalRejected, totalRedisCacheHits, totalTXErrors, totalConnErrors, globalTotalArticles, nothingInDateRange uint64 var transferMutex sync.Mutex // runTransfer performs the actual article transfer process @@ -1134,9 +1134,9 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe if err != nil { log.Printf("Error transferring newsgroup %s: %v", ng.Name, err) } - resultsMutex.Lock() + nntp.ResultsMutex.Lock() NewsgroupsToProcess-- - resultsMutex.Unlock() + nntp.ResultsMutex.Unlock() }(ng, &wg, redisCli) } @@ -1147,11 +1147,11 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe if nothingInDateRange > 0 { log.Printf("Note: %d newsgroups had no articles in the specified date range", nothingInDateRange) } - resultsMutex.Lock() + nntp.ResultsMutex.Lock() for _, result := range results { log.Print(result) } - resultsMutex.Unlock() + nntp.ResultsMutex.Unlock() log.Printf("Summary: transferred: %d | redis_cache_hits: %d | unwanted: %d | rejected: %d | TX_Errors: %d | connErrors: %d", totalTransferred, totalRedisCacheHits, totalUnwanted, totalRejected, totalTXErrors, totalConnErrors) return nil @@ -1225,7 +1225,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } //log.Printf("Newsgroup: '%s' | transferNewsgroup: Got group DBs, querying article count...", ng.Name) // Initialize newsgroup progress tracking - resultsMutex.Lock() + nntp.ResultsMutex.Lock() if _, exists := nntp.NewsgroupTransferProgressMap[ng.Name]; !exists { nntp.NewsgroupTransferProgressMap[ng.Name] = &nntp.NewsgroupTransferProgress{ Newsgroup: &ng.Name, @@ -1236,7 +1236,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i TotalArticles: 0, } } - resultsMutex.Unlock() + nntp.ResultsMutex.Unlock() // Get total article count first with date filtering totalArticles, err := getArticleCountWithDateFilter(db, groupDBsA, startTime, endTime) @@ -1255,13 +1255,13 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) } - resultsMutex.Lock() + nntp.ResultsMutex.Lock() nntp.NewsgroupTransferProgressMap[ng.Name].Finished = true nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() if VERBOSE { results = append(results, fmt.Sprintf("END Newsgroup: '%s' | No articles to process", ng.Name)) } - resultsMutex.Unlock() + nntp.ResultsMutex.Unlock() // No articles to process if startTime != nil || endTime != nil { if VERBOSE { @@ -1278,11 +1278,11 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i groupDBsA.Return(db) // Initialize newsgroup progress tracking - resultsMutex.Lock() + nntp.ResultsMutex.Lock() nntp.NewsgroupTransferProgressMap[ng.Name].TotalArticles = totalArticles nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() ngtprogress := nntp.NewsgroupTransferProgressMap[ng.Name] - resultsMutex.Unlock() + nntp.ResultsMutex.Unlock() if dryRun { if startTime != nil || endTime != nil { @@ -1395,9 +1395,21 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i amux.Lock() result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d | unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", ng.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) + amux.Unlock() - //log.Print(result) - resultsMutex.Lock() + + ngtprogress.Mux.Lock() + redis_cached := ngtprogress.RedisCached + ngtprogress.Mux.Unlock() + + nntp.ResultsMutex.Lock() + globalTotalArticles += uint64(totalArticles) + totalTransferred += transferred + totalRedisCacheHits += redis_cached + totalUnwanted += unwanted + totalRejected += rejected + totalTXErrors += txErrors + totalConnErrors += connErrors results = append(results, result) // Mark newsgroup as finished if progress, exists := nntp.NewsgroupTransferProgressMap[ng.Name]; exists { @@ -1416,7 +1428,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } delete(rejectedArticles, ng.Name) // free memory } - resultsMutex.Unlock() + nntp.ResultsMutex.Unlock() }(&responseWG) OffsetQueue := &nntp.OffsetQueue{ Newsgroup: &ng.Name, @@ -1540,7 +1552,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i var results []string var rejectedArticles = make(map[string][]string) -var resultsMutex sync.RWMutex var lowerLevel float64 = 90.0 var upperLevel float64 = 95.0 @@ -1554,7 +1565,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl } // Update newsgroup progress with current offset - resultsMutex.RLock() + nntp.ResultsMutex.RLock() if progress, exists := nntp.NewsgroupTransferProgressMap[*ttMode.Newsgroup]; exists { progress.Mux.Lock() progress.OffsetStart = dbOffset @@ -1563,7 +1574,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl progress.LastUpdated = time.Now() progress.Mux.Unlock() } - resultsMutex.RUnlock() + nntp.ResultsMutex.RUnlock() ttMode.FlipMode(lowerLevel, upperLevel) @@ -1612,7 +1623,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl if VERBOSE { log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [CHECK])", *ttMode.Newsgroup, article.MessageID) } - batchedJob.Increment(nntp.IncrFLAG_REDIS_CACHED) + batchedJob.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) redis_cache_hits++ articles[i] = nil continue @@ -1637,7 +1648,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl if len(batchedJob.MessageIDs) == 0 { log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", *ttMode.Newsgroup, redis_cache_hits) - return nil, nil + return batchedJob.QuitResponseChan(), nil } if VERBOSE { log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(batchedJob.MessageIDs), len(articles)) @@ -2390,28 +2401,17 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue return } - //code := respData.Code - //line := respData.Line - //err := respData.Err - if respData.Code == 0 && respData.Err != nil { log.Printf("Failed to read CHECK response: %v", respData.Err) //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } - /* disabled - if err := conn.SetReadDeadline(time.Time{}); err != nil { - log.Printf("Failed to set unset read deadline: %v", err) - nntp.ReturnReadRequest(rrRetChan) - rr.ClearReadRequest() - return - } - */ + took := time.Since(start) tookTime += took responseCount++ - rr.Job.Increment(nntp.IncrFLAG_CHECKED) + rr.Job.Increment(nntp.IncrFLAG_CHECKED, 1) if rr.N == 1 && took.Milliseconds() > 100 { log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, took.Milliseconds()) tookTime = 0 @@ -2468,11 +2468,11 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 438: //log.Printf("Newsgroup: '%s' | Got Response: Unwanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) - job.Increment(nntp.IncrFLAG_UNWANTED) + job.Increment(nntp.IncrFLAG_UNWANTED, 1) case 431: //log.Printf("Newsgroup: '%s' | Got Response: Retry Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) - job.Increment(nntp.IncrFLAG_RETRY) + job.Increment(nntp.IncrFLAG_RETRY, 1) default: log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, respData.Line, respData.Code, *rr.MsgID) @@ -2576,7 +2576,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if respData.Err != nil { log.Printf("ERROR TTResponseWorker (%d): Failed to read TAKETHIS response for %s: %v", workerID, *rr.MsgID, respData.Err) - rr.Job.Increment(nntp.IncrFLAG_CONN_ERRORS) + rr.Job.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) rr.ClearReadRequest() conn.ForceCloseConn() return @@ -2593,7 +2593,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue switch respData.Code { case 239: rr.Job.TTMode.IncrementSuccess() - rr.Job.Increment(nntp.IncrFLAG_TRANSFERRED) + rr.Job.Increment(nntp.IncrFLAG_TRANSFERRED, 1) // Cache in Redis if enabled (inline, no separate tracker struct needed) if rs.redisCli != nil { err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() @@ -2603,7 +2603,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } case 439: - rr.Job.Increment(nntp.IncrFLAG_REJECTED) + rr.Job.Increment(nntp.IncrFLAG_REJECTED, 1) // Cache rejection in Redis if enabled if rs.redisCli != nil { err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() @@ -2619,7 +2619,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 400, 480, 500, 501, 502, 503, 504: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) - rr.Job.Increment(nntp.IncrFLAG_TX_ERRORS) + rr.Job.Increment(nntp.IncrFLAG_TX_ERRORS, 1) rr.ClearReadRequest() conn.ForceCloseConn() return @@ -2627,7 +2627,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue default: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) - rr.Job.Increment(nntp.IncrFLAG_TX_ERRORS) + rr.Job.Increment(nntp.IncrFLAG_TX_ERRORS, 1) } rr.ClearReadRequest() @@ -2688,6 +2688,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Send TAKETHIS commands using existing function redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli, demuxer, readTAKETHISResponsesChan) //common.ChanRelease(flipflopChan) + job.Increment(nntp.IncrFLAG_REDIS_CACHED, redis_cached) rs.BlockTT() if err != nil { log.Printf("Newsgroup: '%s' | TTworker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) @@ -2736,7 +2737,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue select { case requestReplyJobDone <- struct{}{}: default: - log.Printf("ERROR Newsgroup: '%s' | CHTTworker (%d): job #%d could not signal requestReplyJobDone, channel full", *job.Newsgroup, workerID, job.JobID) + log.Printf("Newsgroup: '%s' | Debug: CHTTworker (%d): job #%d could not signal requestReplyJobDone, channel full. pass", *job.Newsgroup, workerID, job.JobID) // pass } } @@ -2847,8 +2848,8 @@ func startWebServer(port int) { // handleIndex serves the main page with transfer results func handleIndex(w http.ResponseWriter, r *http.Request) { - resultsMutex.RLock() - defer resultsMutex.RUnlock() + nntp.ResultsMutex.RLock() + defer nntp.ResultsMutex.RUnlock() // HTML template for displaying results const htmlTemplate = ` @@ -3009,6 +3010,15 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { {{else}} {{if eq 0 .NewsgroupsToProcess}} ✅ All complete! +

+ Summary Statistics:
+ Total Articles: {{.TotalArticles}}
+ Transferred: {{.TotalTransferred}}
+ Redis Cache Hits: {{.TotalRedisCacheHits}}
+ Unwanted: {{.TotalUnwanted}}
+ Rejected: {{.TotalRejected}}
+ TX Errors: {{.TotalTXErrors}}
+ Conn Errors: {{.TotalConnErrors}} {{else}}
@@ -3016,6 +3026,15 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { {{subtract .TotalNewsgroups .NewsgroupsToProcess}} / {{.TotalNewsgroups}} {{if gt .TotalNewsgroups 0}} @ {{multiply (divide (subtract .TotalNewsgroups .NewsgroupsToProcess) .TotalNewsgroups) 100}}{{else}}0{{end}}%
+
+ Live Statistics:
+ Total Articles: {{.TotalArticles}}
+ Transferred: {{.TotalTransferred}}
+ Redis Cache Hits: {{.TotalRedisCacheHits}}
+ Unwanted: {{.TotalUnwanted}}
+ Rejected: {{.TotalRejected}}
+ TX Errors: {{.TotalTXErrors}}
+ Conn Errors: {{.TotalConnErrors}} {{end}} {{end}} @@ -3168,6 +3187,13 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { EndDate string ServerHostName string GlobalSpeed uint64 + TotalArticles uint64 + TotalTransferred uint64 + TotalRedisCacheHits uint64 + TotalUnwanted uint64 + TotalRejected uint64 + TotalTXErrors uint64 + TotalConnErrors uint64 }{ TotalNewsgroups: TotalNewsgroups, NewsgroupsToProcess: NewsgroupsToProcess, @@ -3180,6 +3206,13 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { EndDate: EndDate, ServerHostName: ServerHostName, GlobalSpeed: GlobalSpeed, + TotalArticles: globalTotalArticles, + TotalTransferred: totalTransferred, + TotalRedisCacheHits: totalRedisCacheHits, + TotalUnwanted: totalUnwanted, + TotalRejected: totalRejected, + TotalTXErrors: totalTXErrors, + TotalConnErrors: totalConnErrors, } w.Header().Set("Content-Type", "text/html; charset=utf-8") @@ -3190,8 +3223,8 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { // handleResults serves the results page as plain text func handleResults(w http.ResponseWriter, r *http.Request) { - resultsMutex.RLock() - defer resultsMutex.RUnlock() + nntp.ResultsMutex.RLock() + defer nntp.ResultsMutex.RUnlock() w.Header().Set("Content-Type", "text/plain; charset=utf-8") diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index ffe2eeb..02df6d2 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -403,6 +403,16 @@ func (job *CHTTJob) ReturnResponseChan() chan *TTResponse { return nil } +func (job *CHTTJob) QuitResponseChan() chan *TTResponse { + job.Response(true, nil) + job.Mux.RLock() + defer job.Mux.RUnlock() + if job.ResponseChan != nil { + return job.ResponseChan + } + return nil +} + func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { if job.ResponseChan == nil { log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job #%d", job.JobID) @@ -423,7 +433,8 @@ func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { close(job.ResponseChan) } -// NewsgroupTransferProgressMap is protected by resultsMutex in nntp-transfer/main.go +// NewsgroupTransferProgressMap is protected by ResultsMutex, used in nntp-transfer/main.go +var ResultsMutex sync.RWMutex var NewsgroupTransferProgressMap = make(map[string]*NewsgroupTransferProgress) // NewsgroupProgress tracks the progress of a newsgroup transfer @@ -436,6 +447,7 @@ type NewsgroupTransferProgress struct { BatchStart int64 BatchEnd int64 TotalArticles int64 + RedisCached uint64 ArticlesTT uint64 ArticlesCH uint64 Finished bool @@ -517,28 +529,28 @@ const IncrFLAG_REDIS_CACHED = 7 const IncrFLAG_TX_ERRORS = 8 const IncrFLAG_CONN_ERRORS = 9 -func (job *CHTTJob) Increment(counter int) { +func (job *CHTTJob) Increment(counter int, n uint64) { job.Mux.Lock() defer job.Mux.Unlock() switch counter { case IncrFLAG_CHECKED: - job.checked++ + job.checked += n case IncrFLAG_WANTED: - job.wanted++ + job.wanted += n case IncrFLAG_UNWANTED: - job.unwanted++ + job.unwanted += n case IncrFLAG_REJECTED: - job.rejected++ + job.rejected += n case IncrFLAG_RETRY: - job.retry++ + job.retry += n case IncrFLAG_TRANSFERRED: - job.transferred++ + job.transferred += n case IncrFLAG_REDIS_CACHED: - job.redisCached++ + job.redisCached += n case IncrFLAG_TX_ERRORS: - job.TxErrors++ + job.TxErrors += n case IncrFLAG_CONN_ERRORS: - job.ConnErrors++ + job.ConnErrors += n } } @@ -546,7 +558,7 @@ func (job *CHTTJob) AppendWantedMessageID(msgID *string) { job.Mux.Lock() job.WantedIDs = append(job.WantedIDs, msgID) job.Mux.Unlock() - job.Increment(IncrFLAG_WANTED) + job.Increment(IncrFLAG_WANTED, 1) } func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, txErrors, connErrors *uint64) { From 5bbe0e0c2c32c48d9ea17b010b8d16ef2c76e9ac Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 13 Oct 2025 01:21:06 +0000 Subject: [PATCH 051/100] testing --- cmd/nntp-transfer/main.go | 106 ++++++++++++++++---------- internal/nntp/nntp-client-commands.go | 25 +++--- 2 files changed, 82 insertions(+), 49 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 1cb9faa..8327c52 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1311,13 +1311,13 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i var collectorWG sync.WaitGroup collectorWG.Add(1) - // WaitGroup to track individual response channel processors + // WaitGroup to track individual batched jobs response channel processors var responseWG sync.WaitGroup go func(responseWG *sync.WaitGroup) { defer collectorWG.Done() var amux sync.Mutex - var transferred, unwanted, rejected, checked, txErrors, connErrors uint64 + var transferred, unwanted, rejected, checked, redis_cached, txErrors, connErrors uint64 var num uint64 for setup := range ttResponses { if setup == nil || setup.ResponseChan == nil { @@ -1347,7 +1347,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } // get numbers amux.Lock() - resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &txErrors, &connErrors) + resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &redis_cached, &txErrors, &connErrors) amux.Unlock() if !resp.ForceCleanUp { return @@ -1398,9 +1398,11 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i amux.Unlock() - ngtprogress.Mux.Lock() - redis_cached := ngtprogress.RedisCached - ngtprogress.Mux.Unlock() + /* + ngtprogress.Mux.Lock() + redis_cached := ngtprogress.RedisCached + ngtprogress.Mux.Unlock() + */ nntp.ResultsMutex.Lock() globalTotalArticles += uint64(totalArticles) @@ -1578,7 +1580,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl ttMode.FlipMode(lowerLevel, upperLevel) - batchedJob := &nntp.CHTTJob{ + job := &nntp.CHTTJob{ JobID: atomic.AddUint64(&nntp.JobIDCounter, 1), Newsgroup: ttMode.Newsgroup, MessageIDs: make([]*string, 0, len(articles)), @@ -1592,7 +1594,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl OffsetQ: offsetQ, NGTProgress: ngtprogress, } - var redis_cache_hits int + var redis_cached uint64 if redisCli != nil && len(articles) > 0 { pipe := redisCli.Pipeline() cmds := make([]*redis.IntCmd, len(articles)) @@ -1611,28 +1613,36 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl log.Printf("Newsgroup: '%s' | Redis pipeline error: %v", *ttMode.Newsgroup, err) } - // Process results + // Process results and filter cached articles for i, cmd := range cmds { if cmd == nil || articles[i] == nil { - continue + continue // Skip if command wasn't queued or article is nil } article := articles[i] exists, cmdErr := cmd.Result() if cmdErr == nil && exists > 0 { // Cached in Redis - skip this article if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [CHECK])", *ttMode.Newsgroup, article.MessageID) + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, article.MessageID, job.JobID) } - batchedJob.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) - redis_cache_hits++ + job.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) + redis_cached++ articles[i] = nil continue } - // Not cached - add to valid list - batchedJob.Articles = append(batchedJob.Articles, article) - batchedJob.ArticleMap[&article.MessageID] = article - batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) + job.Articles = append(job.Articles, article) + job.ArticleMap[&article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, &article.MessageID) + } + if redis_cached == uint64(len(articles)) { + if VERBOSE { + log.Printf("Newsgroup: '%s' | All %d articles in batch are cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, len(articles), job.JobID) + } + return job.QuitResponseChan(), nil + } + if VERBOSE && redis_cached > 0 { + log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before CHECK)", *ttMode.Newsgroup, redis_cached, len(articles), job.JobID) } } else { // No Redis - add all non-nil message IDs @@ -1640,25 +1650,24 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl if article == nil { continue } - batchedJob.Articles = append(batchedJob.Articles, article) - batchedJob.ArticleMap[&article.MessageID] = article - batchedJob.MessageIDs = append(batchedJob.MessageIDs, &article.MessageID) + job.Articles = append(job.Articles, article) + job.ArticleMap[&article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, &article.MessageID) } } - - if len(batchedJob.MessageIDs) == 0 { - log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", *ttMode.Newsgroup, redis_cache_hits) - return batchedJob.QuitResponseChan(), nil + if len(job.MessageIDs) == 0 { + log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", *ttMode.Newsgroup, redis_cached) + return job.QuitResponseChan(), nil } if VERBOSE { - log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(batchedJob.MessageIDs), len(articles)) + log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(job.MessageIDs), len(articles)) } // Assign job to worker (consistent assignment + load balancing) QueuesMutex.RLock() if len(CheckQueues) == 0 { QueuesMutex.RUnlock() - log.Printf("Newsgroup: '%s' | No workers available to process batch job #%d with %d message IDs", *ttMode.Newsgroup, batchedJob.JobID, len(batchedJob.MessageIDs)) + log.Printf("Newsgroup: '%s' | No workers available to process batch job #%d with %d message IDs", *ttMode.Newsgroup, job.JobID, len(job.MessageIDs)) return nil, fmt.Errorf("no workers available") } QueuesMutex.RUnlock() @@ -1668,11 +1677,11 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl WorkersCheckChannel := CheckQueues[workerID] QueuesMutex.RUnlock() - //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queue job #%d with %d message IDs. CheckQ=%d", *ttMode.Newsgroup, workerID, batchedJob.JobID, len(batchedJob.MessageIDs), len(CheckQueues[workerID])) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queue job #%d with %d message IDs. CheckQ=%d", *ttMode.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(CheckQueues[workerID])) - WorkersCheckChannel <- batchedJob // checkQueue <- batchedJob - //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, batchedJob.JobID) - return batchedJob.ReturnResponseChan(), nil + WorkersCheckChannel <- job // checkQueue <- job + //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, job.JobID) + return job.ReturnResponseChan(), nil } // end func processBatch // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode @@ -1685,9 +1694,6 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Phase 1: Send all TAKETHIS commands without waiting for responses //log.Printf("Phase 1: Sending %d TAKETHIS commands...", len(articles)) - //commandIDs := make([]uint, 0, len(articles)) - //checkArticles := make([]*models.Article, 0, len(articles)) - // Batch check Redis cache using pipeline before sending TAKETHIS if redisCli != nil { pipe := redisCli.Pipeline() @@ -1717,18 +1723,27 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art if cmdErr == nil && exists > 0 { // Cached in Redis - skip this article if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis (skip [TAKETHIS])", newsgroup, articles[i].MessageID) + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip [TAKETHIS])", newsgroup, articles[i].MessageID, job.JobID) } - articles[i] = nil // free memory + job.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) redis_cached++ + articles[i] = nil // free memory continue } // Not cached - will be sent } + if redis_cached == uint64(len(articles)) { + if VERBOSE { + log.Printf("Newsgroup: '%s' | All %d articles are cached in Redis in job #%d (skip TAKETHIS)", newsgroup, len(articles), job.JobID) + } + return redis_cached, nil + } + if VERBOSE && redis_cached > 0 { + log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before TAKETHIS)", newsgroup, redis_cached, len(articles), job.JobID) + } } // Now send TAKETHIS for non-cached articles - // Tracker registration happens inside SendTakeThisArticleStreaming (like SendCheckMultiple) var sentCount int conn.Lock() var ttxBytes uint64 @@ -1737,7 +1752,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art astart2 := start for _, article := range articles { if article == nil { - continue // Skip cached articles + continue // Skip cached article } astart = time.Now() // Send TAKETHIS command with article content (non-blocking) @@ -2260,7 +2275,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } break } - currentJob.OffsetQ.Done() + currentJob.OffsetQ.OffsetBatchDone() if currentJob.TTMode.UseCHECK() { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waits to check %d message IDs in batches of %d", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs), BatchCheck) @@ -2469,10 +2484,22 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 438: //log.Printf("Newsgroup: '%s' | Got Response: Unwanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.Increment(nntp.IncrFLAG_UNWANTED, 1) + if rs.redisCli != nil { + err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() + if err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache rejected message ID in Redis: %v", *rr.Job.Newsgroup, err) + } + } case 431: //log.Printf("Newsgroup: '%s' | Got Response: Retry Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.Increment(nntp.IncrFLAG_RETRY, 1) + if rs.redisCli != nil { + err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() + if err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache rejected message ID in Redis: %v", *rr.Job.Newsgroup, err) + } + } default: log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, respData.Line, respData.Code, *rr.MsgID) @@ -2688,7 +2715,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Send TAKETHIS commands using existing function redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli, demuxer, readTAKETHISResponsesChan) //common.ChanRelease(flipflopChan) - job.Increment(nntp.IncrFLAG_REDIS_CACHED, redis_cached) rs.BlockTT() if err != nil { log.Printf("Newsgroup: '%s' | TTworker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) @@ -3154,7 +3180,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { progressList = append(progressList, ProgressInfo{ Name: name, - OffsetStart: progress.OffsetStart + progress.BatchStart, + OffsetStart: progress.OffsetStart, BatchStart: progress.BatchStart, BatchEnd: progress.BatchEnd, TotalArticles: progress.TotalArticles, diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 02df6d2..078072e 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -282,7 +282,7 @@ func (o *OffsetQueue) Wait(n int) { o.mux.Lock() if time.Since(start).Milliseconds() > 1000 { - log.Printf("OffsetQueue: waited (%d ms) for %d batches to finish, currently queued: %d", time.Since(start).Milliseconds(), n, o.queued) + log.Printf("Newsgroup: '%s' | OffsetQueue: waited (%d ms) for %d batches to finish, currently queued: %d", *o.Newsgroup, time.Since(start).Milliseconds(), n, o.queued) } o.isleep = o.isleep / 2 if o.isleep < time.Millisecond { @@ -291,8 +291,11 @@ func (o *OffsetQueue) Wait(n int) { o.mux.Unlock() return } - if time.Since(lastPrint) > time.Second { - log.Printf("OffsetQueue: waiting for batches to finish, currently queued: %d", o.queued) + if time.Since(lastPrint) > time.Second*5 { + if common.WantShutdown() { + return + } + log.Printf("Newsgroup: '%s' | OffsetQueue: waiting for batches to finish, currently queued: %d", *o.Newsgroup, o.queued) lastPrint = time.Now() } o.mux.RUnlock() @@ -306,7 +309,7 @@ func (o *OffsetQueue) Wait(n int) { } } -func (o *OffsetQueue) Done() { +func (o *OffsetQueue) OffsetBatchDone() { o.mux.Lock() defer o.mux.Unlock() o.queued-- @@ -404,10 +407,12 @@ func (job *CHTTJob) ReturnResponseChan() chan *TTResponse { } func (job *CHTTJob) QuitResponseChan() chan *TTResponse { + job.OffsetQ.OffsetBatchDone() job.Response(true, nil) job.Mux.RLock() defer job.Mux.RUnlock() if job.ResponseChan != nil { + log.Printf("Newsgroup: '%s' | CHTTJob.QuitResponseChan(): returning closed ResponseChan for job #%d", *job.Newsgroup, job.JobID) return job.ResponseChan } return nil @@ -439,10 +444,11 @@ var NewsgroupTransferProgressMap = make(map[string]*NewsgroupTransferProgress) // NewsgroupProgress tracks the progress of a newsgroup transfer type NewsgroupTransferProgress struct { - Mux sync.RWMutex - Newsgroup *string - Started time.Time - LastUpdated time.Time + Mux sync.RWMutex + Newsgroup *string + Started time.Time + LastUpdated time.Time + OffsetStart int64 BatchStart int64 BatchEnd int64 @@ -561,12 +567,13 @@ func (job *CHTTJob) AppendWantedMessageID(msgID *string) { job.Increment(IncrFLAG_WANTED, 1) } -func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, txErrors, connErrors *uint64) { +func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, redisCached, txErrors, connErrors *uint64) { job.Mux.Lock() *transferred += job.transferred *unwanted += job.unwanted *rejected += job.rejected *checked += job.checked + *redisCached += job.redisCached *txErrors += job.TxErrors *connErrors += job.ConnErrors job.Mux.Unlock() From e258e80878e0f4e416c7076adc7fc56feb36c868 Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 13 Oct 2025 04:02:01 +0000 Subject: [PATCH 052/100] testing --- cmd/nntp-transfer/main.go | 66 ++++++++++--------- internal/nntp/nntp-client-commands.go | 94 +++++++++++++++------------ 2 files changed, 88 insertions(+), 72 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 8327c52..335705c 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1316,8 +1316,8 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i go func(responseWG *sync.WaitGroup) { defer collectorWG.Done() - var amux sync.Mutex - var transferred, unwanted, rejected, checked, redis_cached, txErrors, connErrors uint64 + //var amux sync.Mutex + //var transferred, unwanted, rejected, checked, redis_cached, txErrors, connErrors uint64 var num uint64 for setup := range ttResponses { if setup == nil || setup.ResponseChan == nil { @@ -1346,9 +1346,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i return } // get numbers - amux.Lock() - resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &redis_cached, &txErrors, &connErrors) - amux.Unlock() + //amux.Lock() + //resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &redis_cached, &txErrors, &connErrors) + //amux.Unlock() if !resp.ForceCleanUp { return } @@ -1392,11 +1392,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i if VERBOSE { log.Printf("Newsgroup: '%s' | Collector: all response processors closed", ng.Name) } - amux.Lock() - result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d | unwanted: %d | rejected: %d | checked: %d | TX_Errors: %d | connErrors: %d | took %v", - ng.Name, transferred, totalArticles, unwanted, rejected, checked, txErrors, connErrors, time.Since(start)) + //amux.Lock() - amux.Unlock() + //amux.Unlock() /* ngtprogress.Mux.Lock() @@ -1405,21 +1403,30 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i */ nntp.ResultsMutex.Lock() + result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d | unwanted: %d | rejected: %d | checked: %d | redis: %d | TX_Errors: %d | connErrors: %d | took %v", + ng.Name, nntp.NewsgroupTransferProgressMap[ng.Name].Transferred, + totalArticles, + nntp.NewsgroupTransferProgressMap[ng.Name].Unwanted, + nntp.NewsgroupTransferProgressMap[ng.Name].Rejected, + nntp.NewsgroupTransferProgressMap[ng.Name].Checked, + nntp.NewsgroupTransferProgressMap[ng.Name].RedisCached, + nntp.NewsgroupTransferProgressMap[ng.Name].TxErrors, + nntp.NewsgroupTransferProgressMap[ng.Name].ConnErrors, + time.Since(start)) + globalTotalArticles += uint64(totalArticles) - totalTransferred += transferred - totalRedisCacheHits += redis_cached - totalUnwanted += unwanted - totalRejected += rejected - totalTXErrors += txErrors - totalConnErrors += connErrors + totalTransferred += nntp.NewsgroupTransferProgressMap[ng.Name].Transferred + totalRedisCacheHits += nntp.NewsgroupTransferProgressMap[ng.Name].RedisCached + totalUnwanted += nntp.NewsgroupTransferProgressMap[ng.Name].Unwanted + totalRejected += nntp.NewsgroupTransferProgressMap[ng.Name].Rejected + totalTXErrors += nntp.NewsgroupTransferProgressMap[ng.Name].TxErrors + totalConnErrors += nntp.NewsgroupTransferProgressMap[ng.Name].ConnErrors results = append(results, result) // Mark newsgroup as finished if progress, exists := nntp.NewsgroupTransferProgressMap[ng.Name]; exists { progress.Mux.Lock() progress.Finished = true progress.LastUpdated = time.Now() - progress.TXBytes += progress.TXBytesTMP - progress.TXBytesTMP = 0 progress.LastCronTX = progress.LastUpdated progress.Mux.Unlock() } @@ -1625,7 +1632,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl if VERBOSE { log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, article.MessageID, job.JobID) } - job.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) + job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) redis_cached++ articles[i] = nil continue @@ -1725,7 +1732,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art if VERBOSE { log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip [TAKETHIS])", newsgroup, articles[i].MessageID, job.JobID) } - job.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) + job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) redis_cached++ articles[i] = nil // free memory continue @@ -2426,8 +2433,8 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue took := time.Since(start) tookTime += took responseCount++ - rr.Job.Increment(nntp.IncrFLAG_CHECKED, 1) - if rr.N == 1 && took.Milliseconds() > 100 { + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_CHECKED, 1) + if rr.N == 1 && took.Milliseconds() > 1000 { log.Printf("CheckWorker (%d): time to first response for msgID: %s (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, took.Milliseconds()) tookTime = 0 } else if responseCount >= 10000 { @@ -2483,7 +2490,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 438: //log.Printf("Newsgroup: '%s' | Got Response: Unwanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) - job.Increment(nntp.IncrFLAG_UNWANTED, 1) + job.NGTProgress.Increment(nntp.IncrFLAG_UNWANTED, 1) if rs.redisCli != nil { err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() if err != nil && VERBOSE { @@ -2493,7 +2500,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 431: //log.Printf("Newsgroup: '%s' | Got Response: Retry Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) - job.Increment(nntp.IncrFLAG_RETRY, 1) + job.NGTProgress.Increment(nntp.IncrFLAG_RETRY, 1) if rs.redisCli != nil { err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() if err != nil && VERBOSE { @@ -2603,7 +2610,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if respData.Err != nil { log.Printf("ERROR TTResponseWorker (%d): Failed to read TAKETHIS response for %s: %v", workerID, *rr.MsgID, respData.Err) - rr.Job.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) rr.ClearReadRequest() conn.ForceCloseConn() return @@ -2620,7 +2627,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue switch respData.Code { case 239: rr.Job.TTMode.IncrementSuccess() - rr.Job.Increment(nntp.IncrFLAG_TRANSFERRED, 1) + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TRANSFERRED, 1) // Cache in Redis if enabled (inline, no separate tracker struct needed) if rs.redisCli != nil { err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() @@ -2630,7 +2637,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } case 439: - rr.Job.Increment(nntp.IncrFLAG_REJECTED, 1) + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_REJECTED, 1) // Cache rejection in Redis if enabled if rs.redisCli != nil { err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() @@ -2639,14 +2646,13 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } } if VERBOSE { - log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d", - *rr.Job.Newsgroup, *rr.MsgID, respData.Code) + log.Printf("Newsgroup: '%s' | Rejected article '%s': response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) } case 400, 480, 500, 501, 502, 503, 504: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) - rr.Job.Increment(nntp.IncrFLAG_TX_ERRORS, 1) + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TX_ERRORS, 1) rr.ClearReadRequest() conn.ForceCloseConn() return @@ -2654,7 +2660,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue default: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': unknown response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) - rr.Job.Increment(nntp.IncrFLAG_TX_ERRORS, 1) + rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TX_ERRORS, 1) } rr.ClearReadRequest() diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 078072e..17eeab9 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -379,22 +379,22 @@ type CHTTJob struct { ArticleMap map[*string]*models.Article MessageIDs []*string WantedIDs []*string - checked uint64 - wanted uint64 - unwanted uint64 - rejected uint64 - retry uint64 - transferred uint64 - redisCached uint64 - TxErrors uint64 - TmpTxBytes uint64 - TTxBytes uint64 - ConnErrors uint64 - OffsetStart int64 - BatchStart int64 - BatchEnd int64 - OffsetQ *OffsetQueue - NGTProgress *NewsgroupTransferProgress + //checked uint64 + //wanted uint64 + //unwanted uint64 + //rejected uint64 + //retry uint64 + //transferred uint64 + //redisCached uint64 + //TxErrors uint64 + //ConnErrors uint64 + TmpTxBytes uint64 + TTxBytes uint64 + OffsetStart int64 + BatchStart int64 + BatchEnd int64 + OffsetQ *OffsetQueue + NGTProgress *NewsgroupTransferProgress } func (job *CHTTJob) ReturnResponseChan() chan *TTResponse { @@ -453,7 +453,15 @@ type NewsgroupTransferProgress struct { BatchStart int64 BatchEnd int64 TotalArticles int64 + Checked uint64 + Wanted uint64 + Unwanted uint64 + Rejected uint64 + Retry uint64 + Transferred uint64 RedisCached uint64 + TxErrors uint64 + ConnErrors uint64 ArticlesTT uint64 ArticlesCH uint64 Finished bool @@ -535,28 +543,28 @@ const IncrFLAG_REDIS_CACHED = 7 const IncrFLAG_TX_ERRORS = 8 const IncrFLAG_CONN_ERRORS = 9 -func (job *CHTTJob) Increment(counter int, n uint64) { - job.Mux.Lock() - defer job.Mux.Unlock() +func (ntp *NewsgroupTransferProgress) Increment(counter int, n uint64) { + ntp.Mux.Lock() + defer ntp.Mux.Unlock() switch counter { case IncrFLAG_CHECKED: - job.checked += n + ntp.Checked += n case IncrFLAG_WANTED: - job.wanted += n + ntp.Wanted += n case IncrFLAG_UNWANTED: - job.unwanted += n + ntp.Unwanted += n case IncrFLAG_REJECTED: - job.rejected += n + ntp.Rejected += n case IncrFLAG_RETRY: - job.retry += n + ntp.Retry += n case IncrFLAG_TRANSFERRED: - job.transferred += n + ntp.Transferred += n case IncrFLAG_REDIS_CACHED: - job.redisCached += n + ntp.RedisCached += n case IncrFLAG_TX_ERRORS: - job.TxErrors += n + ntp.TxErrors += n case IncrFLAG_CONN_ERRORS: - job.ConnErrors += n + ntp.ConnErrors += n } } @@ -564,20 +572,22 @@ func (job *CHTTJob) AppendWantedMessageID(msgID *string) { job.Mux.Lock() job.WantedIDs = append(job.WantedIDs, msgID) job.Mux.Unlock() - job.Increment(IncrFLAG_WANTED, 1) -} - -func (job *CHTTJob) GetUpdateCounters(transferred, unwanted, rejected, checked, redisCached, txErrors, connErrors *uint64) { - job.Mux.Lock() - *transferred += job.transferred - *unwanted += job.unwanted - *rejected += job.rejected - *checked += job.checked - *redisCached += job.redisCached - *txErrors += job.TxErrors - *connErrors += job.ConnErrors - job.Mux.Unlock() -} + job.NGTProgress.Increment(IncrFLAG_WANTED, 1) +} + +/* +func (job *CHTTJob) xxGetUpdateCounters(transferred, unwanted, rejected, checked, redisCached, txErrors, connErrors *uint64) { + job.NGTProgress.Mux.Lock() + *transferred = job.NGTProgress.transferred + *unwanted = job.NGTProgress.unwanted + *rejected = job.NGTProgress.rejected + *checked = job.NGTProgress.checked + *redisCached = job.NGTProgress.redisCached + *txErrors = job.NGTProgress.TxErrors + *connErrors = job.NGTProgress.ConnErrors + job.NGTProgress.Mux.Unlock() +} +*/ func (ttMode *TakeThisMode) UseCHECK() bool { ttMode.mux.Lock() From bf1ce322b0fac1051a40622a66f7545c871faee2 Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 13 Oct 2025 12:26:11 +0000 Subject: [PATCH 053/100] testing --- cmd/nntp-transfer/main.go | 125 ++++++++++++++------------ internal/nntp/nntp-client-commands.go | 4 + 2 files changed, 72 insertions(+), 57 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 335705c..6ce22d9 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -112,6 +112,8 @@ var StartDate string var EndDate string var GlobalSpeed uint64 +var totalTransferred, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalRedisCacheHits, totalTXErrors, totalConnErrors, globalTotalArticles, nothingInDateRange uint64 + func CalcGlobalSpeed() { for { time.Sleep(time.Second * 3) @@ -1095,9 +1097,6 @@ func matchesAnyWildcardPattern(newsgroup string, patterns []string) bool { return false } -var totalTransferred, totalUnwanted, totalRejected, totalRedisCacheHits, totalTXErrors, totalConnErrors, globalTotalArticles, nothingInDateRange uint64 -var transferMutex sync.Mutex - // runTransfer performs the actual article transfer process func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchCheck int, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client) error { defer wgP.Done() @@ -1126,9 +1125,9 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe } err := transferNewsgroup(db, ng, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) if err == ErrNotInDateRange { - transferMutex.Lock() + nntp.ResultsMutex.Lock() nothingInDateRange++ - transferMutex.Unlock() + nntp.ResultsMutex.Unlock() err = nil // not a real error } if err != nil { @@ -1139,21 +1138,17 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe nntp.ResultsMutex.Unlock() }(ng, &wg, redisCli) } - + nntp.ResultsMutex.Lock() // Wait for all transfers to complete - wg.Wait() - transferMutex.Lock() - defer transferMutex.Unlock() if nothingInDateRange > 0 { log.Printf("Note: %d newsgroups had no articles in the specified date range", nothingInDateRange) } - nntp.ResultsMutex.Lock() for _, result := range results { log.Print(result) } + log.Printf("Summary: total: %d | transferred: %d | cache_hits: %d | checked: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d", + globalTotalArticles, totalTransferred, totalRedisCacheHits, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalTXErrors, totalConnErrors) nntp.ResultsMutex.Unlock() - log.Printf("Summary: transferred: %d | redis_cache_hits: %d | unwanted: %d | rejected: %d | TX_Errors: %d | connErrors: %d", - totalTransferred, totalRedisCacheHits, totalUnwanted, totalRejected, totalTXErrors, totalConnErrors) return nil } @@ -1239,7 +1234,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i nntp.ResultsMutex.Unlock() // Get total article count first with date filtering - totalArticles, err := getArticleCountWithDateFilter(db, groupDBsA, startTime, endTime) + totalNGArticles, err := getArticleCountWithDateFilter(db, groupDBsA, startTime, endTime) if err != nil { if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) @@ -1251,7 +1246,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i //log.Printf("Newsgroup: '%s' | transferNewsgroup: Closed group DBs, checking if articles exist...", ng.Name) - if totalArticles == 0 { + if totalNGArticles == 0 { if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) } @@ -1279,16 +1274,16 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i // Initialize newsgroup progress tracking nntp.ResultsMutex.Lock() - nntp.NewsgroupTransferProgressMap[ng.Name].TotalArticles = totalArticles + nntp.NewsgroupTransferProgressMap[ng.Name].TotalArticles = totalNGArticles nntp.NewsgroupTransferProgressMap[ng.Name].LastUpdated = time.Now() ngtprogress := nntp.NewsgroupTransferProgressMap[ng.Name] nntp.ResultsMutex.Unlock() if dryRun { if startTime != nil || endTime != nil { - log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s (within specified date range)", totalArticles, ng.Name) + log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s (within specified date range)", totalNGArticles, ng.Name) } else { - log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s", totalArticles, ng.Name) + log.Printf("DRY RUN: Would transfer %d articles from newsgroup %s", totalNGArticles, ng.Name) } if !debugCapture { return nil @@ -1296,15 +1291,15 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } if !dryRun && !debugCapture { - log.Printf("+ Found %d articles in newsgroup %s", totalArticles, ng.Name) + log.Printf("+ Found %d articles in newsgroup %s", totalNGArticles, ng.Name) } - remainingArticles := totalArticles + remainingArticles := totalNGArticles ttMode := &nntp.TakeThisMode{ Newsgroup: &ng.Name, CheckMode: CHECK_FIRST, } - ttResponses := make(chan *nntp.TTSetup, totalArticles/int64(batchCheck)+2) + ttResponses := make(chan *nntp.TTSetup, totalNGArticles/int64(batchCheck)+2) start := time.Now() // WaitGroup to ensure collector goroutine finishes before returning @@ -1403,40 +1398,42 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i */ nntp.ResultsMutex.Lock() - result := fmt.Sprintf("END Newsgroup: '%s' | transferred: %d/%d | unwanted: %d | rejected: %d | checked: %d | redis: %d | TX_Errors: %d | connErrors: %d | took %v", - ng.Name, nntp.NewsgroupTransferProgressMap[ng.Name].Transferred, - totalArticles, - nntp.NewsgroupTransferProgressMap[ng.Name].Unwanted, - nntp.NewsgroupTransferProgressMap[ng.Name].Rejected, - nntp.NewsgroupTransferProgressMap[ng.Name].Checked, - nntp.NewsgroupTransferProgressMap[ng.Name].RedisCached, - nntp.NewsgroupTransferProgressMap[ng.Name].TxErrors, - nntp.NewsgroupTransferProgressMap[ng.Name].ConnErrors, + ngtprogress.Mux.Lock() + + result := fmt.Sprintf("END Newsgroup: '%s' total: %d | transferred: %d | cache_hits: %d | checked: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d | took %v", + ng.Name, totalNGArticles, + ngtprogress.Transferred, ngtprogress.RedisCached, ngtprogress.Checked, + ngtprogress.Wanted, ngtprogress.Unwanted, ngtprogress.Rejected, + ngtprogress.Retry, ngtprogress.Skipped, + ngtprogress.TxErrors, ngtprogress.ConnErrors, time.Since(start)) - globalTotalArticles += uint64(totalArticles) - totalTransferred += nntp.NewsgroupTransferProgressMap[ng.Name].Transferred - totalRedisCacheHits += nntp.NewsgroupTransferProgressMap[ng.Name].RedisCached - totalUnwanted += nntp.NewsgroupTransferProgressMap[ng.Name].Unwanted - totalRejected += nntp.NewsgroupTransferProgressMap[ng.Name].Rejected - totalTXErrors += nntp.NewsgroupTransferProgressMap[ng.Name].TxErrors - totalConnErrors += nntp.NewsgroupTransferProgressMap[ng.Name].ConnErrors - results = append(results, result) + globalTotalArticles += uint64(totalNGArticles) + totalTransferred += ngtprogress.Transferred + totalRedisCacheHits += ngtprogress.RedisCached + totalWanted += ngtprogress.Wanted + totalUnwanted += ngtprogress.Unwanted + totalChecked += ngtprogress.Checked + totalRejected += ngtprogress.Rejected + totalRetry += ngtprogress.Retry + totalSkipped += ngtprogress.Skipped + totalTXErrors += ngtprogress.TxErrors + totalConnErrors += ngtprogress.ConnErrors + // Mark newsgroup as finished - if progress, exists := nntp.NewsgroupTransferProgressMap[ng.Name]; exists { - progress.Mux.Lock() - progress.Finished = true - progress.LastUpdated = time.Now() - progress.LastCronTX = progress.LastUpdated - progress.Mux.Unlock() - } - if VERBOSE { - for _, msgId := range rejectedArticles[ng.Name] { - // prints all at the end again - log.Printf("END Newsgroup: '%s' | REJECTED '%s'", ng.Name, msgId) - } - delete(rejectedArticles, ng.Name) // free memory + ngtprogress.Finished = true + ngtprogress.LastUpdated = time.Now() + ngtprogress.LastCronTX = ngtprogress.LastUpdated + ngtprogress.Mux.Unlock() + + results = append(results, result) + + for _, msgId := range rejectedArticles[ng.Name] { + // prints all at the end again + log.Printf("END Newsgroup: '%s' | REJECTED '%s'", ng.Name, msgId) } + delete(rejectedArticles, ng.Name) // free memory + nntp.ResultsMutex.Unlock() }(&responseWG) OffsetQueue := &nntp.OffsetQueue{ @@ -1449,7 +1446,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i var offset int64 // Get articles in database batches (much larger than network batches) - for articlesProcessed < totalArticles { + for articlesProcessed < totalNGArticles { if common.WantShutdown() { log.Printf("WantShutdown in newsgroup: '%s' (processed %d articles)", ng.Name, articlesProcessed) return nil @@ -1485,7 +1482,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i for _, a := range articles { size += a.Bytes } - log.Printf("Newsgroup: '%s' | Loaded %d articles from database (processed %d/%d) (Bytes=%d) took %v", ng.Name, len(articles), articlesProcessed, totalArticles, size, time.Since(start)) + log.Printf("Newsgroup: '%s' | Loaded %d articles from database (processed %d/%d) (Bytes=%d) took %v", ng.Name, len(articles), articlesProcessed, totalNGArticles, size, time.Since(start)) //} // Process articles in network batches for i := 0; i < len(articles); i += batchCheck { @@ -1515,7 +1512,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } remainingArticles -= int64(len(articles)) if VERBOSE { - log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t)", ng.Name, articlesProcessed, totalArticles, remainingArticles, ttMode.UseCHECK()) + log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t)", ng.Name, articlesProcessed, totalNGArticles, remainingArticles, ttMode.UseCHECK()) //log.Printf("Newsgroup: '%s' | Pushed (processed %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, articlesProcessed, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } @@ -1774,11 +1771,13 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art ttxBytes += uint64(txBytes) if err != nil { if err == common.ErrNoNewsgroups { + job.NGTProgress.Increment(nntp.IncrFLAG_SKIPPED, 1) log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': no newsgroups header", newsgroup, article.MessageID) continue } conn.Unlock() conn.ForceCloseConn() + job.NGTProgress.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) log.Printf("ERROR Newsgroup: '%s' | Failed to send TAKETHIS for %s: %v", newsgroup, article.MessageID, err) return redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) } @@ -3045,10 +3044,14 @@ func handleIndex(w http.ResponseWriter, r *http.Request) {

Summary Statistics:
Total Articles: {{.TotalArticles}}
- Transferred: {{.TotalTransferred}}
- Redis Cache Hits: {{.TotalRedisCacheHits}}
+ Cache Hits: {{.TotalRedisCacheHits}}
+ Checked: {{.TotalChecked}}
+ Wanted: {{.TotalWanted}}
Unwanted: {{.TotalUnwanted}}
+ Transferred: {{.TotalTransferred}}
Rejected: {{.TotalRejected}}
+ Retry: {{.TotalRetry}}
+ Skipped: {{.TotalSkipped}}
TX Errors: {{.TotalTXErrors}}
Conn Errors: {{.TotalConnErrors}} {{else}} @@ -3061,10 +3064,14 @@ func handleIndex(w http.ResponseWriter, r *http.Request) {
Live Statistics:
Total Articles: {{.TotalArticles}}
- Transferred: {{.TotalTransferred}}
- Redis Cache Hits: {{.TotalRedisCacheHits}}
+ Cache Hits: {{.TotalRedisCacheHits}}
+ Checked: {{.TotalChecked}}
+ Wanted: {{.TotalWanted}}
Unwanted: {{.TotalUnwanted}}
+ Transferred: {{.TotalTransferred}}
Rejected: {{.TotalRejected}}
+ Retry: {{.TotalRetry}}
+ Skipped: {{.TotalSkipped}}
TX Errors: {{.TotalTXErrors}}
Conn Errors: {{.TotalConnErrors}} {{end}} @@ -3223,6 +3230,8 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { TotalTransferred uint64 TotalRedisCacheHits uint64 TotalUnwanted uint64 + TotalWanted uint64 + TotalChecked uint64 TotalRejected uint64 TotalTXErrors uint64 TotalConnErrors uint64 @@ -3242,6 +3251,8 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { TotalTransferred: totalTransferred, TotalRedisCacheHits: totalRedisCacheHits, TotalUnwanted: totalUnwanted, + TotalWanted: totalWanted, + TotalChecked: totalChecked, TotalRejected: totalRejected, TotalTXErrors: totalTXErrors, TotalConnErrors: totalConnErrors, diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 17eeab9..f95105a 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -462,6 +462,7 @@ type NewsgroupTransferProgress struct { RedisCached uint64 TxErrors uint64 ConnErrors uint64 + Skipped uint64 ArticlesTT uint64 ArticlesCH uint64 Finished bool @@ -542,6 +543,7 @@ const IncrFLAG_TRANSFERRED = 6 const IncrFLAG_REDIS_CACHED = 7 const IncrFLAG_TX_ERRORS = 8 const IncrFLAG_CONN_ERRORS = 9 +const IncrFLAG_SKIPPED = 10 func (ntp *NewsgroupTransferProgress) Increment(counter int, n uint64) { ntp.Mux.Lock() @@ -565,6 +567,8 @@ func (ntp *NewsgroupTransferProgress) Increment(counter int, n uint64) { ntp.TxErrors += n case IncrFLAG_CONN_ERRORS: ntp.ConnErrors += n + case IncrFLAG_SKIPPED: + ntp.Skipped += n } } From 13ae93a25d95a63b4326de37b0d8fe8dcce859c2 Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 13 Oct 2025 12:28:45 +0000 Subject: [PATCH 054/100] testing --- cmd/nntp-transfer/main.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 6ce22d9..63c0099 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -3227,12 +3227,13 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { ServerHostName string GlobalSpeed uint64 TotalArticles uint64 - TotalTransferred uint64 TotalRedisCacheHits uint64 + TotalChecked uint64 TotalUnwanted uint64 TotalWanted uint64 - TotalChecked uint64 + TotalTransferred uint64 TotalRejected uint64 + TotalRetry uint64 TotalTXErrors uint64 TotalConnErrors uint64 }{ @@ -3248,12 +3249,13 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { ServerHostName: ServerHostName, GlobalSpeed: GlobalSpeed, TotalArticles: globalTotalArticles, - TotalTransferred: totalTransferred, TotalRedisCacheHits: totalRedisCacheHits, - TotalUnwanted: totalUnwanted, - TotalWanted: totalWanted, TotalChecked: totalChecked, + TotalWanted: totalWanted, + TotalUnwanted: totalUnwanted, + TotalTransferred: totalTransferred, TotalRejected: totalRejected, + TotalRetry: totalRetry, TotalTXErrors: totalTXErrors, TotalConnErrors: totalConnErrors, } From 84dc35a7f2b20689baf6ce0ae1e15d149157d697 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 14 Oct 2025 00:06:36 +0000 Subject: [PATCH 055/100] testing --- cmd/nntp-transfer/main.go | 281 ++++++----- internal/common/common.go | 40 +- internal/nntp/nntp-client-commands.go | 622 ------------------------- internal/nntp/nntp-transfer-demuxer.go | 213 +++++++++ internal/nntp/nntp-transfer.go | 459 ++++++++++++++++++ web/templates/admin_postqueue.html | 4 +- 6 files changed, 870 insertions(+), 749 deletions(-) create mode 100644 internal/nntp/nntp-transfer-demuxer.go create mode 100644 internal/nntp/nntp-transfer.go diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 63c0099..26902f0 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -112,7 +112,7 @@ var StartDate string var EndDate string var GlobalSpeed uint64 -var totalTransferred, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalRedisCacheHits, totalTXErrors, totalConnErrors, globalTotalArticles, nothingInDateRange uint64 +var totalTransferred, totalTTSentCount, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalRedisCacheHits, totalRedisCacheBeforeCheck, totalRedisCacheBeforeTakethis, totalTXErrors, totalConnErrors, globalTotalArticles, nothingInDateRange uint64 func CalcGlobalSpeed() { for { @@ -1146,9 +1146,10 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe for _, result := range results { log.Print(result) } - log.Printf("Summary: total: %d | transferred: %d | cache_hits: %d | checked: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d", - globalTotalArticles, totalTransferred, totalRedisCacheHits, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalTXErrors, totalConnErrors) + log.Printf("Summary: total: %d | transferred: %d | cache_hits: %d (before_check: %d, before_takethis: %d) | checked: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d", + globalTotalArticles, totalTransferred, totalRedisCacheHits, totalRedisCacheBeforeCheck, totalRedisCacheBeforeTakethis, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalTXErrors, totalConnErrors) nntp.ResultsMutex.Unlock() + log.Printf("Debug: StructChansCap1: %d/%d", len(common.StructChansCap1), cap(common.StructChansCap1)) return nil } @@ -1410,7 +1411,10 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i globalTotalArticles += uint64(totalNGArticles) totalTransferred += ngtprogress.Transferred + totalTTSentCount += ngtprogress.TTSentCount totalRedisCacheHits += ngtprogress.RedisCached + totalRedisCacheBeforeCheck += ngtprogress.RedisCachedBeforeCheck + totalRedisCacheBeforeTakethis += ngtprogress.RedisCachedBeforeTakethis totalWanted += ngtprogress.Wanted totalUnwanted += ngtprogress.Unwanted totalChecked += ngtprogress.Checked @@ -1436,6 +1440,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i nntp.ResultsMutex.Unlock() }(&responseWG) + OffsetQueue := &nntp.OffsetQueue{ Newsgroup: &ng.Name, MaxQueuedJobs: MaxQueuedJobs, @@ -1599,54 +1604,63 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl NGTProgress: ngtprogress, } var redis_cached uint64 - if redisCli != nil && len(articles) > 0 { + + // Batch check Redis cache using pipeline before sending CHECK + if redisCli != nil { pipe := redisCli.Pipeline() cmds := make([]*redis.IntCmd, len(articles)) - + redis2Check := 0 // Queue all EXISTS commands for i, article := range articles { if article == nil { continue } cmds[i] = pipe.Exists(redisCtx, article.MessageID) + redis2Check++ } // Execute all in one network round trip - _, err := pipe.Exec(redisCtx) - if err != nil && VERBOSE { + if _, err := pipe.Exec(redisCtx); err != nil { log.Printf("Newsgroup: '%s' | Redis pipeline error: %v", *ttMode.Newsgroup, err) - } + } else { - // Process results and filter cached articles - for i, cmd := range cmds { - if cmd == nil || articles[i] == nil { - continue // Skip if command wasn't queued or article is nil - } - article := articles[i] - exists, cmdErr := cmd.Result() - if cmdErr == nil && exists > 0 { - // Cached in Redis - skip this article - if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, article.MessageID, job.JobID) + // Process results and filter cached articles + for i, cmd := range cmds { + if cmd == nil || articles[i] == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Redis command or nil article in batch for job #%d (skip CHECK)", *ttMode.Newsgroup, job.JobID) + continue // Skip if command wasn't queued or article is nil } - job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) - redis_cached++ - articles[i] = nil - continue + article := articles[i] + exists, cmdErr := cmd.Result() + if cmdErr == nil && exists > 0 { + // Cached in Redis - skip this article + if VERBOSE { + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, article.MessageID, job.JobID) + } + job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED_BEFORE_CHECK, 1) + redis_cached++ + articles[i] = nil + continue + } + if cmdErr != nil { + log.Printf("Newsgroup: '%s' | Redis cache error for message ID '%s' in job #%d: %v (include in CHECK)", *ttMode.Newsgroup, article.MessageID, job.JobID, cmdErr) + } + // Not cached - add to valid list + job.Articles = append(job.Articles, article) + job.ArticleMap[&article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, &article.MessageID) } - // Not cached - add to valid list - job.Articles = append(job.Articles, article) - job.ArticleMap[&article.MessageID] = article - job.MessageIDs = append(job.MessageIDs, &article.MessageID) } if redis_cached == uint64(len(articles)) { if VERBOSE { log.Printf("Newsgroup: '%s' | All %d articles in batch are cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, len(articles), job.JobID) } return job.QuitResponseChan(), nil - } - if VERBOSE && redis_cached > 0 { - log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before CHECK)", *ttMode.Newsgroup, redis_cached, len(articles), job.JobID) + + } else if redis_cached > 0 { + if VERBOSE { + log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before CHECK)", *ttMode.Newsgroup, redis_cached, len(articles), job.JobID) + } } } else { // No Redis - add all non-nil message IDs @@ -1702,48 +1716,56 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art if redisCli != nil { pipe := redisCli.Pipeline() cmds := make([]*redis.IntCmd, len(articles)) - + //redis2Check := 0 // Queue all EXISTS commands (only for non-nil articles) for i, article := range articles { if article == nil { continue } cmds[i] = pipe.Exists(redisCtx, article.MessageID) + //redis2Check++ } // Execute all in one network round trip _, err := pipe.Exec(redisCtx) - if err != nil && VERBOSE { + if err != nil { log.Printf("Newsgroup: '%s' | Redis pipeline error in TAKETHIS: %v", newsgroup, err) - } + } else { - // Process results and filter cached articles - for i, cmd := range cmds { - if cmd == nil || articles[i] == nil { - continue // Skip if command wasn't queued or article is nil - } + // Process results and filter cached articles + for i, cmd := range cmds { + if cmd == nil || articles[i] == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Redis command or nil article in TAKETHIS batch for job #%d (skip)", newsgroup, job.JobID) + continue // Skip if command wasn't queued or article is nil + } - exists, cmdErr := cmd.Result() - if cmdErr == nil && exists > 0 { - // Cached in Redis - skip this article - if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip [TAKETHIS])", newsgroup, articles[i].MessageID, job.JobID) + exists, cmdErr := cmd.Result() + if cmdErr == nil && exists > 0 { + // Cached in Redis - skip this article + if VERBOSE { + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip [TAKETHIS])", newsgroup, articles[i].MessageID, job.JobID) + } + job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED_BEFORE_TAKETHIS, 1) + redis_cached++ + articles[i] = nil // free memory + continue } - job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED, 1) - redis_cached++ - articles[i] = nil // free memory - continue + if cmdErr != nil { + log.Printf("Newsgroup: '%s' | Redis cache error for message ID '%s' in job #%d: %v (include in TAKETHIS)", newsgroup, articles[i].MessageID, job.JobID, cmdErr) + } + // Not cached - will be sent } - // Not cached - will be sent } if redis_cached == uint64(len(articles)) { if VERBOSE { log.Printf("Newsgroup: '%s' | All %d articles are cached in Redis in job #%d (skip TAKETHIS)", newsgroup, len(articles), job.JobID) } return redis_cached, nil - } - if VERBOSE && redis_cached > 0 { - log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before TAKETHIS)", newsgroup, redis_cached, len(articles), job.JobID) + + } else if redis_cached > 0 { + if VERBOSE { + log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before TAKETHIS)", newsgroup, redis_cached, len(articles), job.JobID) + } } } @@ -1782,6 +1804,8 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art return redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) } sentCount++ + job.NGTProgress.Increment(nntp.IncrFLAG_TTSentCount, 1) + if VERBOSE { log.Printf("Newsgroup: '%s' | DONE TAKETHIS '%s' CmdID=%d (%d/%d sent) in %v awaiting responses astart2='%v'", newsgroup, article.MessageID, cmdID, sentCount, len(articles), time.Since(astart), time.Since(astart2)) } @@ -1880,8 +1904,8 @@ func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { TakeThisQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) WorkerQueueLength = make([]int, nntp.NNTPTransferThreads) for i := range CheckQueues { - CheckQueues[i] = make(chan *nntp.CHTTJob) // no cap! only accepts if there is a reader! - TakeThisQueues[i] = make(chan *nntp.CHTTJob, 2) // allows max 2 queued TT jobs + CheckQueues[i] = make(chan *nntp.CHTTJob, MaxQueuedJobs) // no cap! only accepts if there is a reader! + TakeThisQueues[i] = make(chan *nntp.CHTTJob, MaxQueuedJobs) // allows max N queued TT jobs WorkerQueueLength[i] = 0 } QueuesMutex.Unlock() @@ -1908,7 +1932,8 @@ forever: // get connections from pool log.Printf("BootConnWorkers: need %d connections (have %d), getting from pool...", bootN, openConns) returnSignals := make([]*ReturnSignal, bootN) - errChan := make(chan struct{}, 1) + errChan := common.GetStructChanCap1() + defer common.RecycleStructChanCap1(errChan) newConns := 0 for workerID := range bootN { // Get a connection from pool @@ -2211,9 +2236,13 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue readCHECKResponsesChan := make(chan *nntp.ReadRequest, 128000) readTAKETHISResponsesChan := make(chan *nntp.ReadRequest, 128000) errChan := make(chan struct{}, 4) - tickChan := make(chan struct{}, 1) - requestReplyJobDone := make(chan struct{}, 1) - replyJobDone := make(chan struct{}, 1) + + tickChan := common.GetStructChanCap1() + requestReplyJobDone := common.GetStructChanCap1() + replyJobDone := common.GetStructChanCap1() + defer common.RecycleStructChanCap1(tickChan) + defer common.RecycleStructChanCap1(requestReplyJobDone) + defer common.RecycleStructChanCap1(replyJobDone) // Create ResponseDemuxer to eliminate race conditions in ReadCodeLine demuxer := nntp.NewResponseDemuxer(conn, errChan, BatchCheck) @@ -2390,14 +2419,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } if common.WantShutdown() { log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", workerID) - //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } //log.Printf("CheckWorker (%d): Read CHECK response (do conn check) for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) if !conn.IsConnected() { log.Printf("CheckWorker (%d): Read CHECK connection lost, exiting", workerID) - //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } @@ -2424,7 +2451,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue if respData.Code == 0 && respData.Err != nil { log.Printf("Failed to read CHECK response: %v", respData.Err) - //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } @@ -2453,13 +2479,11 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue parts := strings.Fields(respData.Line) if len(parts) < 1 { log.Printf("ERROR in CheckWorker: Malformed CHECK response code=%d line: '%s' (cmdID=%d MID=%d/%d)", respData.Code, respData.Line, rr.CmdID, rr.N, rr.Reqs) - //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } if parts[0] != *rr.MsgID { log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected '%s', got '%s' code=%d (cmdID=%d MID=%d/%d)", *rr.MsgID, parts[0], respData.Code, rr.CmdID, rr.N, rr.Reqs) - //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() return } @@ -2472,7 +2496,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) if !exists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId '%s' did not exist in jobMap.", *job.Newsgroup, *rr.MsgID) - //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() continue loop } @@ -2486,13 +2509,14 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 238: //log.Printf("Newsgroup: '%s' | Got Response: Wanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.AppendWantedMessageID(rr.MsgID) + job.NGTProgress.Increment(nntp.IncrFLAG_WANTED, 1) case 438: //log.Printf("Newsgroup: '%s' | Got Response: Unwanted Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.NGTProgress.Increment(nntp.IncrFLAG_UNWANTED, 1) + // Cache unwanted in Redis if enabled if rs.redisCli != nil { - err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() - if err != nil && VERBOSE { + if err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err(); err != nil && VERBOSE { log.Printf("Newsgroup: '%s' | Failed to cache rejected message ID in Redis: %v", *rr.Job.Newsgroup, err) } } @@ -2500,12 +2524,13 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 431: //log.Printf("Newsgroup: '%s' | Got Response: Retry Article '%s': code=%d", *job.Newsgroup, *rr.MsgID, code) job.NGTProgress.Increment(nntp.IncrFLAG_RETRY, 1) + /* disabled caching of retries in Redis for now if rs.redisCli != nil { - err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() - if err != nil && VERBOSE { - log.Printf("Newsgroup: '%s' | Failed to cache rejected message ID in Redis: %v", *rr.Job.Newsgroup, err) + if err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err(); err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache retry message ID in Redis: %v", *rr.Job.Newsgroup, err) } } + */ default: log.Printf("Newsgroup: '%s' | Unknown CHECK response: line='%s' code=%d expected msgID %s", *job.Newsgroup, respData.Line, respData.Code, *rr.MsgID) @@ -2518,12 +2543,10 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if !qexists || !rexists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!", *job.Newsgroup) - //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() continue loop } //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) - //rr.ReturnReadRequest(rrRetChan) rr.ClearReadRequest() if queuedCount == readCount { rs.Mux.Lock() @@ -2627,11 +2650,10 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 239: rr.Job.TTMode.IncrementSuccess() rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TRANSFERRED, 1) - // Cache in Redis if enabled (inline, no separate tracker struct needed) + // Cache transferred in Redis if enabled if rs.redisCli != nil { - err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() - if err != nil && VERBOSE { - log.Printf("Newsgroup: '%s' | Failed to cache message ID in Redis: %v", *rr.Job.Newsgroup, err) + if err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err(); err != nil && VERBOSE { + log.Printf("Newsgroup: '%s' | Failed to cache transferred message ID in Redis: %v", *rr.Job.Newsgroup, err) } } @@ -2639,8 +2661,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.Job.NGTProgress.Increment(nntp.IncrFLAG_REJECTED, 1) // Cache rejection in Redis if enabled if rs.redisCli != nil { - err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err() - if err != nil && VERBOSE { + if err := rs.redisCli.Set(redisCtx, *rr.MsgID, "1", REDIS_TTL).Err(); err != nil && VERBOSE { log.Printf("Newsgroup: '%s' | Failed to cache rejected message ID in Redis: %v", *rr.Job.Newsgroup, err) } } @@ -2649,8 +2670,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } case 400, 480, 500, 501, 502, 503, 504: - log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d", - *rr.Job.Newsgroup, *rr.MsgID, respData.Code) + log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TX_ERRORS, 1) rr.ClearReadRequest() conn.ForceCloseConn() @@ -3001,7 +3021,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { } .progress-fill { height: 100%; - background: linear-gradient(90deg, #4CAF50, #45a049); + background: linear-gradient(90deg, #285829ff, #214b23ff); transition: width 0.3s; } .progress-text { @@ -3044,7 +3064,9 @@ func handleIndex(w http.ResponseWriter, r *http.Request) {

Summary Statistics:
Total Articles: {{.TotalArticles}}
- Cache Hits: {{.TotalRedisCacheHits}}
+ Cache Hits (Total): {{.TotalRedisCacheHits}}
+   ├─ Before CHECK: {{.TotalRedisCacheBeforeCheck}}
+   └─ Before TAKETHIS: {{.TotalRedisCacheBeforeTakethis}}
Checked: {{.TotalChecked}}
Wanted: {{.TotalWanted}}
Unwanted: {{.TotalUnwanted}}
@@ -3058,16 +3080,19 @@ func handleIndex(w http.ResponseWriter, r *http.Request) {
- {{subtract .TotalNewsgroups .NewsgroupsToProcess}} / {{.TotalNewsgroups}} {{if gt .TotalNewsgroups 0}} @ {{multiply (divide (subtract .TotalNewsgroups .NewsgroupsToProcess) .TotalNewsgroups) 100}}{{else}}0{{end}}% + {{subtract .TotalNewsgroups .NewsgroupsToProcess}} / {{.TotalNewsgroups}} {{if gt .TotalNewsgroups 0}} @ {{multiply (divide (subtract .TotalNewsgroups .NewsgroupsToProcess) .TotalNewsgroups) 100}}{{else}}0{{end}}%

Live Statistics:
Total Articles: {{.TotalArticles}}
- Cache Hits: {{.TotalRedisCacheHits}}
+ Cache Hits (Total): {{.TotalRedisCacheHits}}
+   ├─ Before CHECK: {{.TotalRedisCacheBeforeCheck}}
+   └─ Before TAKETHIS: {{.TotalRedisCacheBeforeTakethis}}
Checked: {{.TotalChecked}}
Wanted: {{.TotalWanted}}
Unwanted: {{.TotalUnwanted}}
+ TTSentCount: {{.TotalTTSentCount}}
Transferred: {{.TotalTransferred}}
Rejected: {{.TotalRejected}}
Retry: {{.TotalRetry}}
@@ -3215,49 +3240,57 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { }) data := struct { - TotalNewsgroups int64 - NewsgroupsToProcess int64 - Results []string - Started int64 - Finished int64 - Progress []ProgressInfo - Timestamp string - StartDate string - EndDate string - ServerHostName string - GlobalSpeed uint64 - TotalArticles uint64 - TotalRedisCacheHits uint64 - TotalChecked uint64 - TotalUnwanted uint64 - TotalWanted uint64 - TotalTransferred uint64 - TotalRejected uint64 - TotalRetry uint64 - TotalTXErrors uint64 - TotalConnErrors uint64 + TotalNewsgroups int64 + NewsgroupsToProcess int64 + Results []string + Started int64 + Finished int64 + Progress []ProgressInfo + Timestamp string + StartDate string + EndDate string + ServerHostName string + GlobalSpeed uint64 + TotalArticles uint64 + TotalRedisCacheHits uint64 + TotalRedisCacheBeforeCheck uint64 + TotalRedisCacheBeforeTakethis uint64 + TotalChecked uint64 + TotalUnwanted uint64 + TotalWanted uint64 + TotalTransferred uint64 + TotalTTSentCount uint64 + TotalRejected uint64 + TotalRetry uint64 + TotalSkipped uint64 + TotalTXErrors uint64 + TotalConnErrors uint64 }{ - TotalNewsgroups: TotalNewsgroups, - NewsgroupsToProcess: NewsgroupsToProcess, - Results: results, - Started: started, - Finished: finished, - Progress: progressList, - Timestamp: time.Now().Format("2006-01-02 15:04:05"), - StartDate: StartDate, - EndDate: EndDate, - ServerHostName: ServerHostName, - GlobalSpeed: GlobalSpeed, - TotalArticles: globalTotalArticles, - TotalRedisCacheHits: totalRedisCacheHits, - TotalChecked: totalChecked, - TotalWanted: totalWanted, - TotalUnwanted: totalUnwanted, - TotalTransferred: totalTransferred, - TotalRejected: totalRejected, - TotalRetry: totalRetry, - TotalTXErrors: totalTXErrors, - TotalConnErrors: totalConnErrors, + TotalNewsgroups: TotalNewsgroups, + NewsgroupsToProcess: NewsgroupsToProcess, + Results: results, + Started: started, + Finished: finished, + Progress: progressList, + Timestamp: time.Now().Format("2006-01-02 15:04:05"), + StartDate: StartDate, + EndDate: EndDate, + ServerHostName: ServerHostName, + GlobalSpeed: GlobalSpeed, + TotalArticles: globalTotalArticles, + TotalRedisCacheHits: totalRedisCacheHits, + TotalRedisCacheBeforeCheck: totalRedisCacheBeforeCheck, + TotalRedisCacheBeforeTakethis: totalRedisCacheBeforeTakethis, + TotalChecked: totalChecked, + TotalWanted: totalWanted, + TotalUnwanted: totalUnwanted, + TotalTransferred: totalTransferred, + TotalTTSentCount: totalTTSentCount, + TotalRejected: totalRejected, + TotalRetry: totalRetry, + TotalSkipped: totalSkipped, + TotalTXErrors: totalTXErrors, + TotalConnErrors: totalConnErrors, } w.Header().Set("Content-Type", "text/html; charset=utf-8") diff --git a/internal/common/common.go b/internal/common/common.go index 305408d..4af1631 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -1,6 +1,9 @@ package common -import "sync" +import ( + "log" + "sync" +) var shutdownMutex sync.Mutex var closedShutdownChan bool @@ -48,3 +51,38 @@ func ChanRelease(lockChan chan struct{}) { // release lock <-lockChan } + +var StructChansCap1 = make(chan chan struct{}, 16384) + +// GetStructChanCap1 returns a recycled chan struct{} or makes a new one with capacity of 1 if none are available +func GetStructChanCap1() chan struct{} { + select { + case ch := <-StructChansCap1: + return ch + default: + return make(chan struct{}, 1) + } +} + +// RecycleStructChan recycles a chan struct{} for later use +func RecycleStructChanCap1(ch chan struct{}) { + if cap(ch) != 1 { + log.Printf("Warning: Attempt to recycle chan struct{} with wrong capacity: %d", cap(ch)) + return + } + // empty out the channel + select { + case <-ch: + // successfully emptied + default: + // is already empty + } + // recycle it + select { + case StructChansCap1 <- ch: + // successfully recycled + default: + log.Printf("Warning: RecycleStructChan buffer full: %d", len(StructChansCap1)) + // recycle buffer full, let it go + } +} diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index f95105a..c66592f 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -8,13 +8,11 @@ import ( "log" "strconv" "strings" - "sync" "time" "github.com/go-while/go-pugleaf/internal/common" "github.com/go-while/go-pugleaf/internal/models" "github.com/go-while/go-pugleaf/internal/utils" - "github.com/redis/go-redis/v9" ) // Constants for maximum lines to read in various commands @@ -31,626 +29,6 @@ var MaxReadLinesXover int64 = 100 // XOVER command typically retrieves overview // MaxReadLinesBody Maximum lines for BODY command, which retrieves the body of an article const MaxReadLinesBody = MaxReadLinesArticle - MaxReadLinesHeaders -var NNTPTransferThreads int = 1 - -// var TakeThisQueue = make(chan *CHTTJob, NNTPTransferThreads) -//var CheckQueue = make(chan *CHTTJob, NNTPTransferThreads) - -var JobIDCounter uint64 // Atomic counter for unique job IDs - -// ResponseType indicates which handler should process a response -type ResponseType int - -const ( - TYPE_CHECK ResponseType = iota - TYPE_TAKETHIS -) - -// ResponseData holds a pre-read response from the connection -type ResponseData struct { - CmdID uint - Code int - Line string - Err error -} - -type CmdIDinfo struct { - CmdID uint - RespType ResponseType -} - -// ResponseDemuxer reads all responses from a connection in ONE goroutine -// and dispatches them to the appropriate handler channel (CHECK or TAKETHIS) -// This eliminates race conditions in concurrent ReadCodeLine calls -type ResponseDemuxer struct { - conn *BackendConn - cmdIDQ []*CmdIDinfo - signalChan chan struct{} - cmdIDQMux sync.RWMutex - LastID uint - checkResponseChan chan *ResponseData - ttResponseChan chan *ResponseData - errChan chan struct{} - started bool - startedMux sync.Mutex -} - -// NewResponseDemuxer creates a new response demultiplexer -func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}, BatchCheck int) *ResponseDemuxer { - return &ResponseDemuxer{ - conn: conn, - signalChan: make(chan struct{}, 1), - checkResponseChan: make(chan *ResponseData, 128000), // Buffer for CHECK responses - ttResponseChan: make(chan *ResponseData, 128000), // Buffer for TAKETHIS responses - errChan: errChan, - started: false, - } -} - -// RegisterCommand registers a command ID with its type (CHECK or TAKETHIS) -func (d *ResponseDemuxer) RegisterCommand(cmdID uint, cmdType ResponseType) { - d.cmdIDQMux.Lock() - d.cmdIDQ = append(d.cmdIDQ, &CmdIDinfo{CmdID: cmdID, RespType: cmdType}) - d.cmdIDQMux.Unlock() - select { - case d.signalChan <- struct{}{}: - default: - } -} - -// PopCommand removes a command ID from the queue -func (d *ResponseDemuxer) PopCommand() *CmdIDinfo { - d.cmdIDQMux.Lock() - defer d.cmdIDQMux.Unlock() - - if len(d.cmdIDQ) == 0 { - return nil - } - - cmdIDInfo := d.cmdIDQ[0] - d.cmdIDQ = d.cmdIDQ[1:] - return cmdIDInfo -} - -// GetCheckResponseChan returns the channel for CHECK responses -func (d *ResponseDemuxer) GetCheckResponseChan() chan *ResponseData { - return d.checkResponseChan -} - -// GetTakeThisResponseChan returns the channel for TAKETHIS responses -func (d *ResponseDemuxer) GetTakeThisResponseChan() chan *ResponseData { - return d.ttResponseChan -} - -// Start launches the central response reader goroutine (call once) -func (d *ResponseDemuxer) Start() { - d.startedMux.Lock() - defer d.startedMux.Unlock() - - if d.started { - return // Already started - } - d.started = true - - go d.readAndDispatch() -} - -// readAndDispatch is the SINGLE goroutine that reads ALL responses from the shared connection -func (d *ResponseDemuxer) readAndDispatch() { - defer func() { - if r := recover(); r != nil { - log.Printf("ResponseDemuxer: panic in readAndDispatch: %v", r) - } - select { - case d.errChan <- struct{}{}: - default: - } - }() - outoforderBacklog := make(map[uint]*CmdIDinfo, 1024) - for { - select { - case <-d.errChan: - log.Printf("ResponseDemuxer: got errChan signal, exiting") - return - default: - } - - if !d.conn.IsConnected() { - log.Printf("ResponseDemuxer: connection lost, exiting") - return - } - - var cmdInfo *CmdIDinfo - if len(outoforderBacklog) > 0 { - if cmdInfoBacklog, exists := outoforderBacklog[d.LastID+1]; exists { - log.Printf("ResponseDemuxer: processing out-of-order backlog cmdID=%d d.LastID=%d", cmdInfoBacklog.CmdID, d.LastID) - cmdInfo = cmdInfoBacklog - outoforderBacklog[d.LastID+1] = nil - delete(outoforderBacklog, d.LastID+1) - } else { - log.Printf("ResponseDemuxer: no backlog with cmdID=%d found. try PopCommand", d.LastID+1) - cmdInfo = d.PopCommand() - } - } else { - cmdInfo = d.PopCommand() - } - if cmdInfo == nil { - if len(outoforderBacklog) > 0 { - log.Printf("ResponseDemuxer: got no cmdInfo but have outoforderBacklog: %d [%v]", len(outoforderBacklog), outoforderBacklog) - if _, exists := outoforderBacklog[d.LastID+1]; exists { - log.Printf("ResponseDemuxer: pre-processing out-of-order backlog cmdID=%d d.LastID=%d", d.LastID+1, d.LastID) - continue - } - } - //log.Printf("ResponseDemuxer: nothing to process, waiting on signalChan") - <-d.signalChan - continue - } - if d.LastID+1 != cmdInfo.CmdID { - log.Printf("ResponseDemuxer: WARNING - out-of-order cmdID received, expected %d got %d", d.LastID+1, cmdInfo.CmdID) - outoforderBacklog[cmdInfo.CmdID] = cmdInfo - continue - } else { - d.LastID = cmdInfo.CmdID - } - - //log.Printf("ResponseDemuxer: waiting for response cmdID=%d respType=%d", cmdInfo.CmdID, cmdInfo.RespType) - start := time.Now() - d.conn.TextConn.StartResponse(cmdInfo.CmdID) - code, line, err := d.conn.TextConn.ReadCodeLine(0) // Read any code - d.conn.TextConn.EndResponse(cmdInfo.CmdID) - if time.Since(start) > time.Second { - log.Printf("LongWait ResponseDemuxer: received response cmdID=%d: code=%d line='%s' err='%v' respType=%d (waited %v)", cmdInfo.CmdID, code, line, err, cmdInfo.RespType, time.Since(start)) - } - if err != nil && code == 0 { - d.errChan <- struct{}{} - log.Printf("ResponseDemuxer: error reading response for cmdID=%d: %v", cmdInfo.CmdID, err) - return - } - respData := &ResponseData{ - CmdID: cmdInfo.CmdID, - Code: code, - Line: line, - Err: err, - } - // Dispatch based on registered type - - switch cmdInfo.RespType { - case TYPE_CHECK: - select { - case d.checkResponseChan <- respData: - // Dispatched successfully - //log.Printf("ResponseDemuxer: dispatched CHECK response cmdID=%d d.checkResponseChan=%d", cmdInfo.CmdID, len(d.checkResponseChan)) - case <-d.errChan: - log.Printf("ResponseDemuxer: got errChan while dispatching CHECK response, exiting") - d.errChan <- struct{}{} - return - } - - case TYPE_TAKETHIS: - select { - case d.ttResponseChan <- respData: - // Dispatched successfully - //log.Printf("ResponseDemuxer: dispatched TAKETHIS response cmdID=%d d.ttResponseChan=%d", cmdInfo.CmdID, len(d.ttResponseChan)) - case <-d.errChan: - d.errChan <- struct{}{} - log.Printf("ResponseDemuxer: got errChan while dispatching TAKETHIS response, exiting") - return - } - - default: - log.Printf("ResponseDemuxer: WARNING - unknown command type for cmdID=%d, signaling ERROR", cmdInfo.CmdID) - select { - case d.errChan <- struct{}{}: - default: - } - } - } -} - -// used in nntp-transfer/main.go -type TakeThisMode struct { - mux sync.Mutex - Newsgroup *string - TmpSuccessCount uint64 - TmpTTotalsCount uint64 - CheckMode bool // Start with TAKETHIS mode (false) -} - -type TTSetup struct { - ResponseChan chan *TTResponse - OffsetQ *OffsetQueue -} - -type OffsetQueue struct { - Newsgroup *string - MaxQueuedJobs int - mux sync.RWMutex - isleep time.Duration - queued int -} - -var ReturnDelay = time.Millisecond * 16 - -func (o *OffsetQueue) Wait(n int) { - start := time.Now() - lastPrint := start - for { - o.mux.RLock() - if o.queued < n { - o.mux.RUnlock() - - o.mux.Lock() - if time.Since(start).Milliseconds() > 1000 { - log.Printf("Newsgroup: '%s' | OffsetQueue: waited (%d ms) for %d batches to finish, currently queued: %d", *o.Newsgroup, time.Since(start).Milliseconds(), n, o.queued) - } - o.isleep = o.isleep / 2 - if o.isleep < time.Millisecond { - o.isleep = 0 - } - o.mux.Unlock() - return - } - if time.Since(lastPrint) > time.Second*5 { - if common.WantShutdown() { - return - } - log.Printf("Newsgroup: '%s' | OffsetQueue: waiting for batches to finish, currently queued: %d", *o.Newsgroup, o.queued) - lastPrint = time.Now() - } - o.mux.RUnlock() - o.mux.Lock() - o.isleep += time.Millisecond - if o.isleep > ReturnDelay { - o.isleep = ReturnDelay - } - time.Sleep(o.isleep) - o.mux.Unlock() - } -} - -func (o *OffsetQueue) OffsetBatchDone() { - o.mux.Lock() - defer o.mux.Unlock() - o.queued-- - //log.Printf("OffsetQueue: a batch is done, still queued: %d", o.queued) -} - -func (o *OffsetQueue) Add(n int) { - o.mux.Lock() - defer o.mux.Unlock() - o.queued += n - if o.MaxQueuedJobs > 10 && o.queued > o.MaxQueuedJobs/100*90 { - // prints only if occupancy is over 90% - log.Printf("Newsgroup: '%s' | OffsetQueue: added %d batches, now queued: %d/%d", *o.Newsgroup, n, o.queued, o.MaxQueuedJobs) - } -} - -type TTResponse struct { - Job *CHTTJob - ForceCleanUp bool - Err error -} - -type CheckResponse struct { // deprecated - CmdId uint - Article *models.Article -} - -type ReadRequest struct { - CmdID uint - Job *CHTTJob - N int - Reqs int - MsgID *string -} - -func (rr *ReadRequest) ClearReadRequest() { - rr.Job = nil - rr.MsgID = nil - rr = nil -} - -func (rr *ReadRequest) ReturnReadRequest(channel chan struct{}) { - select { - case channel <- struct{}{}: - default: - } - rr.ClearReadRequest() -} - -// TakeThisTracker tracks metadata for pending TAKETHIS responses -type TakeThisTracker struct { - CmdID uint - Job *CHTTJob - Article *models.Article - RedisCli *redis.Client // Will be *redis.Client in practice -} - -// batched CHECK/TAKETHIS Job -type CHTTJob struct { - JobID uint64 // Unique job ID for tracing - Newsgroup *string - Mux sync.RWMutex - TTMode *TakeThisMode - ResponseChan chan *TTResponse - responseSent bool // Track if response already sent (prevents double send) - Articles []*models.Article - ArticleMap map[*string]*models.Article - MessageIDs []*string - WantedIDs []*string - //checked uint64 - //wanted uint64 - //unwanted uint64 - //rejected uint64 - //retry uint64 - //transferred uint64 - //redisCached uint64 - //TxErrors uint64 - //ConnErrors uint64 - TmpTxBytes uint64 - TTxBytes uint64 - OffsetStart int64 - BatchStart int64 - BatchEnd int64 - OffsetQ *OffsetQueue - NGTProgress *NewsgroupTransferProgress -} - -func (job *CHTTJob) ReturnResponseChan() chan *TTResponse { - job.Mux.RLock() - defer job.Mux.RUnlock() - if job.ResponseChan != nil { - return job.ResponseChan - } - return nil -} - -func (job *CHTTJob) QuitResponseChan() chan *TTResponse { - job.OffsetQ.OffsetBatchDone() - job.Response(true, nil) - job.Mux.RLock() - defer job.Mux.RUnlock() - if job.ResponseChan != nil { - log.Printf("Newsgroup: '%s' | CHTTJob.QuitResponseChan(): returning closed ResponseChan for job #%d", *job.Newsgroup, job.JobID) - return job.ResponseChan - } - return nil -} - -func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { - if job.ResponseChan == nil { - log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job #%d", job.JobID) - return - } - - // Check if response already sent (prevents double send on connection loss) - job.Mux.Lock() - if job.responseSent { - log.Printf("WARNING CHTTJob.Response(): Response already sent for job #%d, skipping", job.JobID) - job.Mux.Unlock() - return - } - job.responseSent = true - job.Mux.Unlock() - - job.ResponseChan <- &TTResponse{Job: job, ForceCleanUp: ForceCleanUp, Err: Err} - close(job.ResponseChan) -} - -// NewsgroupTransferProgressMap is protected by ResultsMutex, used in nntp-transfer/main.go -var ResultsMutex sync.RWMutex -var NewsgroupTransferProgressMap = make(map[string]*NewsgroupTransferProgress) - -// NewsgroupProgress tracks the progress of a newsgroup transfer -type NewsgroupTransferProgress struct { - Mux sync.RWMutex - Newsgroup *string - Started time.Time - LastUpdated time.Time - - OffsetStart int64 - BatchStart int64 - BatchEnd int64 - TotalArticles int64 - Checked uint64 - Wanted uint64 - Unwanted uint64 - Rejected uint64 - Retry uint64 - Transferred uint64 - RedisCached uint64 - TxErrors uint64 - ConnErrors uint64 - Skipped uint64 - ArticlesTT uint64 - ArticlesCH uint64 - Finished bool - TXBytes uint64 - TXBytesTMP uint64 - LastCronTX time.Time - LastSpeedKB uint64 - LastArtPerfC uint64 // check articles per second - LastArtPerfT uint64 // takethis articles per second -} - -func (ngp *NewsgroupTransferProgress) GetSpeed() uint64 { - ngp.Mux.RLock() - defer ngp.Mux.RUnlock() - return ngp.LastSpeedKB -} - -func (ngp *NewsgroupTransferProgress) CalcSpeed() { - ngp.Mux.Lock() - if time.Since(ngp.LastCronTX) >= time.Second*3 { - since := uint64(time.Since(ngp.LastCronTX).Seconds()) - if ngp.TXBytesTMP > 0 { - ngp.LastSpeedKB = ngp.TXBytesTMP / since / 1024 - } else { - ngp.LastSpeedKB = 0 - } - if ngp.ArticlesCH > 0 { - ngp.LastArtPerfC = ngp.ArticlesCH / since - } else { - ngp.LastArtPerfC = 0 - } - if ngp.ArticlesTT > 0 { - ngp.LastArtPerfT = ngp.ArticlesTT / since - } else { - ngp.LastArtPerfT = 0 - } - //log.Printf("Newsgroup: '%s' | Transfer Perf: %d KB/s (%d bytes in %v) did: CH=(%d|%d/s) TT=(%d|%d/s)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since, ngp.ArticlesCH, ngp.LastArtPerfC, ngp.ArticlesTT, ngp.LastArtPerfT) - - ngp.ArticlesCH = 0 - ngp.ArticlesTT = 0 - ngp.TXBytesTMP = 0 - ngp.LastCronTX = time.Now() - } - ngp.Mux.Unlock() -} - -func (ngp *NewsgroupTransferProgress) AddNGTP(articlesCH uint64, articlesTT uint64, txbytes uint64) { - if articlesCH > 0 { - ngp.Mux.Lock() - ngp.ArticlesCH += articlesCH - ngp.Mux.Unlock() - } - if articlesTT > 0 { - ngp.Mux.Lock() - ngp.ArticlesTT += articlesTT - ngp.Mux.Unlock() - } - if txbytes > 0 { - ngp.Mux.Lock() - ngp.TXBytes += txbytes - ngp.TXBytesTMP += txbytes - ngp.Mux.Unlock() - } - if articlesCH > 0 || articlesTT > 0 || txbytes > 0 { - ngp.Mux.Lock() - ngp.LastUpdated = time.Now() - ngp.Mux.Unlock() - } - ngp.CalcSpeed() -} - -const IncrFLAG_CHECKED = 1 -const IncrFLAG_WANTED = 2 -const IncrFLAG_UNWANTED = 3 -const IncrFLAG_REJECTED = 4 -const IncrFLAG_RETRY = 5 -const IncrFLAG_TRANSFERRED = 6 -const IncrFLAG_REDIS_CACHED = 7 -const IncrFLAG_TX_ERRORS = 8 -const IncrFLAG_CONN_ERRORS = 9 -const IncrFLAG_SKIPPED = 10 - -func (ntp *NewsgroupTransferProgress) Increment(counter int, n uint64) { - ntp.Mux.Lock() - defer ntp.Mux.Unlock() - switch counter { - case IncrFLAG_CHECKED: - ntp.Checked += n - case IncrFLAG_WANTED: - ntp.Wanted += n - case IncrFLAG_UNWANTED: - ntp.Unwanted += n - case IncrFLAG_REJECTED: - ntp.Rejected += n - case IncrFLAG_RETRY: - ntp.Retry += n - case IncrFLAG_TRANSFERRED: - ntp.Transferred += n - case IncrFLAG_REDIS_CACHED: - ntp.RedisCached += n - case IncrFLAG_TX_ERRORS: - ntp.TxErrors += n - case IncrFLAG_CONN_ERRORS: - ntp.ConnErrors += n - case IncrFLAG_SKIPPED: - ntp.Skipped += n - } -} - -func (job *CHTTJob) AppendWantedMessageID(msgID *string) { - job.Mux.Lock() - job.WantedIDs = append(job.WantedIDs, msgID) - job.Mux.Unlock() - job.NGTProgress.Increment(IncrFLAG_WANTED, 1) -} - -/* -func (job *CHTTJob) xxGetUpdateCounters(transferred, unwanted, rejected, checked, redisCached, txErrors, connErrors *uint64) { - job.NGTProgress.Mux.Lock() - *transferred = job.NGTProgress.transferred - *unwanted = job.NGTProgress.unwanted - *rejected = job.NGTProgress.rejected - *checked = job.NGTProgress.checked - *redisCached = job.NGTProgress.redisCached - *txErrors = job.NGTProgress.TxErrors - *connErrors = job.NGTProgress.ConnErrors - job.NGTProgress.Mux.Unlock() -} -*/ - -func (ttMode *TakeThisMode) UseCHECK() bool { - ttMode.mux.Lock() - defer ttMode.mux.Unlock() - if ttMode.CheckMode { - return true - } - return false -} - -func (ttMode *TakeThisMode) SetForceCHECK() { - ttMode.mux.Lock() - ttMode.CheckMode = true - ttMode.mux.Unlock() -} - -func (ttMode *TakeThisMode) IncrementSuccess() { - ttMode.mux.Lock() - ttMode.TmpSuccessCount++ - ttMode.mux.Unlock() -} - -func (ttMode *TakeThisMode) IncrementTmp() { - ttMode.mux.Lock() - ttMode.TmpTTotalsCount++ - ttMode.mux.Unlock() -} - -func (ttMode *TakeThisMode) SetNoCHECK() { - ttMode.mux.Lock() - ttMode.CheckMode = false - ttMode.mux.Unlock() -} - -func (ttMode *TakeThisMode) FlipMode(lowerLevel float64, upperLevel float64) bool { - ttMode.mux.Lock() - defer ttMode.mux.Unlock() - if ttMode.TmpSuccessCount < 10 || ttMode.TmpTTotalsCount < 100 { - return true // Force CHECK mode for this batch - } - successRate := float64(ttMode.TmpSuccessCount) / float64(ttMode.TmpTTotalsCount) * 100.0 - ttMode.TmpSuccessCount = 0 - ttMode.TmpTTotalsCount = 0 - switch ttMode.CheckMode { - case false: // Currently in TAKETHIS mode - if successRate < lowerLevel { - ttMode.CheckMode = true - log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% < %f%%, switching to CHECK mode", *ttMode.Newsgroup, successRate, lowerLevel) - } - case true: // Currently in CHECK mode - if successRate > upperLevel { - ttMode.CheckMode = false - log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% >= %f%%, switching to TAKETHIS mode", *ttMode.Newsgroup, successRate, upperLevel) - } - } - retval := ttMode.CheckMode - return retval -} - // StatArticle checks if an article exists on the server func (c *BackendConn) StatArticle(messageID string) (bool, error) { c.mux.Lock() diff --git a/internal/nntp/nntp-transfer-demuxer.go b/internal/nntp/nntp-transfer-demuxer.go new file mode 100644 index 0000000..3b879b9 --- /dev/null +++ b/internal/nntp/nntp-transfer-demuxer.go @@ -0,0 +1,213 @@ +package nntp + +import ( + "log" + "sync" + "time" +) + +// ResponseDemuxer reads all responses from a connection in ONE goroutine +// and dispatches them to the appropriate handler channel (CHECK or TAKETHIS) +// This eliminates race conditions in concurrent ReadCodeLine calls +type ResponseDemuxer struct { + conn *BackendConn + cmdIDQ []*CmdIDinfo + signalChan chan struct{} + cmdIDQMux sync.RWMutex + LastID uint + checkResponseChan chan *ResponseData + ttResponseChan chan *ResponseData + errChan chan struct{} + started bool + startedMux sync.Mutex +} + +// NewResponseDemuxer creates a new response demultiplexer +func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}, BatchCheck int) *ResponseDemuxer { + return &ResponseDemuxer{ + conn: conn, + signalChan: make(chan struct{}, 1), + checkResponseChan: make(chan *ResponseData, 128000), // Buffer for CHECK responses + ttResponseChan: make(chan *ResponseData, 128000), // Buffer for TAKETHIS responses + errChan: errChan, + started: false, + } +} + +// RegisterCommand registers a command ID with its type (CHECK or TAKETHIS) +func (d *ResponseDemuxer) RegisterCommand(cmdID uint, cmdType ResponseType) { + d.cmdIDQMux.Lock() + d.cmdIDQ = append(d.cmdIDQ, &CmdIDinfo{CmdID: cmdID, RespType: cmdType}) + d.cmdIDQMux.Unlock() + select { + case d.signalChan <- struct{}{}: + default: + } +} + +// PopCommand removes a command ID from the queue +func (d *ResponseDemuxer) PopCommand() *CmdIDinfo { + d.cmdIDQMux.Lock() + defer d.cmdIDQMux.Unlock() + + if len(d.cmdIDQ) == 0 { + return nil + } + + cmdIDInfo := d.cmdIDQ[0] + d.cmdIDQ = d.cmdIDQ[1:] + return cmdIDInfo +} + +// GetCheckResponseChan returns the channel for CHECK responses +func (d *ResponseDemuxer) GetCheckResponseChan() chan *ResponseData { + return d.checkResponseChan +} + +// GetTakeThisResponseChan returns the channel for TAKETHIS responses +func (d *ResponseDemuxer) GetTakeThisResponseChan() chan *ResponseData { + return d.ttResponseChan +} + +// Start launches the central response reader goroutine (call once) +func (d *ResponseDemuxer) Start() { + d.startedMux.Lock() + defer d.startedMux.Unlock() + + if d.started { + return // Already started + } + d.started = true + + go d.readAndDispatch() + + go func() { + // keep alive + for { + time.Sleep(1 * time.Second) + select { + case d.signalChan <- struct{}{}: + default: + } + } + }() +} + +// readAndDispatch is the SINGLE goroutine that reads ALL responses from the shared connection +func (d *ResponseDemuxer) readAndDispatch() { + defer func() { + if r := recover(); r != nil { + log.Printf("ResponseDemuxer: panic in readAndDispatch: %v", r) + } + select { + case d.errChan <- struct{}{}: + default: + } + }() + outoforderBacklog := make(map[uint]*CmdIDinfo, 1024) + for { + select { + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan signal, exiting") + select { + case d.errChan <- struct{}{}: + default: + } + // exit + return + default: + // pass + } + + if !d.conn.IsConnected() { + log.Printf("ResponseDemuxer: connection lost, exiting") + return + } + + var cmdInfo *CmdIDinfo + if len(outoforderBacklog) > 0 { + if cmdInfoBacklog, exists := outoforderBacklog[d.LastID+1]; exists { + log.Printf("ResponseDemuxer: processing out-of-order backlog cmdID=%d d.LastID=%d", cmdInfoBacklog.CmdID, d.LastID) + cmdInfo = cmdInfoBacklog + outoforderBacklog[d.LastID+1] = nil + delete(outoforderBacklog, d.LastID+1) + } else { + log.Printf("ResponseDemuxer: no backlog with cmdID=%d found. try PopCommand", d.LastID+1) + cmdInfo = d.PopCommand() + } + } else { + cmdInfo = d.PopCommand() + } + if cmdInfo == nil { + if len(outoforderBacklog) > 0 { + log.Printf("ResponseDemuxer: got no cmdInfo but have outoforderBacklog: %d [%v]", len(outoforderBacklog), outoforderBacklog) + if _, exists := outoforderBacklog[d.LastID+1]; exists { + log.Printf("ResponseDemuxer: pre-processing out-of-order backlog cmdID=%d d.LastID=%d", d.LastID+1, d.LastID) + continue + } + } + //log.Printf("ResponseDemuxer: nothing to process, waiting on signalChan") + <-d.signalChan + continue + } + if d.LastID+1 != cmdInfo.CmdID { + log.Printf("ResponseDemuxer: WARNING - out-of-order cmdID received, expected %d got %d", d.LastID+1, cmdInfo.CmdID) + outoforderBacklog[cmdInfo.CmdID] = cmdInfo + continue + } else { + d.LastID = cmdInfo.CmdID + } + + //log.Printf("ResponseDemuxer: waiting for response cmdID=%d respType=%d", cmdInfo.CmdID, cmdInfo.RespType) + start := time.Now() + d.conn.TextConn.StartResponse(cmdInfo.CmdID) + code, line, err := d.conn.TextConn.ReadCodeLine(0) // Read any code + d.conn.TextConn.EndResponse(cmdInfo.CmdID) + if time.Since(start) > time.Second { + log.Printf("LongWait ResponseDemuxer: received response cmdID=%d: code=%d line='%s' err='%v' respType=%d (waited %v)", cmdInfo.CmdID, code, line, err, cmdInfo.RespType, time.Since(start)) + } + if err != nil && code == 0 { + d.errChan <- struct{}{} + log.Printf("ResponseDemuxer: error reading response for cmdID=%d: %v", cmdInfo.CmdID, err) + return + } + respData := &ResponseData{ + CmdID: cmdInfo.CmdID, + Code: code, + Line: line, + Err: err, + } + // Dispatch based on registered type + + switch cmdInfo.RespType { + case TYPE_CHECK: + select { + case d.checkResponseChan <- respData: + // Dispatched successfully + //log.Printf("ResponseDemuxer: dispatched CHECK response cmdID=%d d.checkResponseChan=%d", cmdInfo.CmdID, len(d.checkResponseChan)) + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while dispatching CHECK response, exiting") + d.errChan <- struct{}{} + return + } + + case TYPE_TAKETHIS: + select { + case d.ttResponseChan <- respData: + // Dispatched successfully + //log.Printf("ResponseDemuxer: dispatched TAKETHIS response cmdID=%d d.ttResponseChan=%d", cmdInfo.CmdID, len(d.ttResponseChan)) + case <-d.errChan: + d.errChan <- struct{}{} + log.Printf("ResponseDemuxer: got errChan while dispatching TAKETHIS response, exiting") + return + } + + default: + log.Printf("ResponseDemuxer: WARNING - unknown command type for cmdID=%d, signaling ERROR", cmdInfo.CmdID) + select { + case d.errChan <- struct{}{}: + default: + } + } + } +} diff --git a/internal/nntp/nntp-transfer.go b/internal/nntp/nntp-transfer.go new file mode 100644 index 0000000..646832c --- /dev/null +++ b/internal/nntp/nntp-transfer.go @@ -0,0 +1,459 @@ +package nntp + +import ( + "log" + "sync" + "time" + + "github.com/go-while/go-pugleaf/internal/common" + "github.com/go-while/go-pugleaf/internal/models" +) + +var NNTPTransferThreads int = 1 + +var JobIDCounter uint64 // Atomic counter for unique job IDs + +var ReturnDelay = time.Millisecond * 16 + +// ResponseType indicates which handler should process a response +type ResponseType int + +const ( + TYPE_CHECK ResponseType = iota + TYPE_TAKETHIS +) + +// ResponseData holds a read response from the connection +type ResponseData struct { + CmdID uint + Code int + Line string + Err error +} + +// CmdIDinfo holds information about a command sent to the remote server +type CmdIDinfo struct { + CmdID uint + RespType ResponseType +} + +// used in nntp-transfer/main.go +type TakeThisMode struct { + mux sync.Mutex + Newsgroup *string + TmpSuccessCount uint64 + TmpTTotalsCount uint64 + CheckMode bool +} + +type TTSetup struct { + ResponseChan chan *TTResponse + OffsetQ *OffsetQueue +} + +// OffsetQueue manages the number of concurrent batches being processed for a newsgroup +type OffsetQueue struct { + Newsgroup *string + MaxQueuedJobs int + mux sync.RWMutex + queued int + waiter []chan struct{} +} + +// Wait waits until the number of queued batches is less than n +func (o *OffsetQueue) Wait(n int) { + start := time.Now() + lastPrint := start + setWaiting := false + waitChan := common.GetStructChanCap1() + defer common.RecycleStructChanCap1(waitChan) + for { + if common.WantShutdown() { + return + } + o.mux.Lock() + // log.Printf("OffsetQueue: currently queued: %d, waiting for %d batches to finish", o.queued, n) + if o.queued < n { + // enough batches have finished + if time.Since(start).Milliseconds() > 1000 { + log.Printf("Newsgroup: '%s' | OffsetQueue: waited (%d ms) for %d batches. queued: %d", *o.Newsgroup, time.Since(start).Milliseconds(), n, o.queued) + } + o.mux.Unlock() + return + } + if time.Since(lastPrint) > time.Second*5 { + log.Printf("Newsgroup: '%s' | OffsetQueue: waiting for queued batches: %d", *o.Newsgroup, o.queued) + lastPrint = time.Now() + } + if !setWaiting { + o.waiter = append(o.waiter, waitChan) + setWaiting = true + } + o.mux.Unlock() + + // wait for signal or timeout to retry + if setWaiting { + wait: + for { + select { + case <-waitChan: + // got signal, recheck condition + break wait + case <-time.After(time.Second * 6): + // timeout, recheck condition + break wait + } + } + } + } +} + +// OffsetBatchDone signals that a batch has finished processing +func (o *OffsetQueue) OffsetBatchDone() { + o.mux.Lock() + defer o.mux.Unlock() + o.queued-- + if len(o.waiter) > 0 { + // notify one waiter + waitChan := o.waiter[0] + o.waiter = o.waiter[1:] + select { + case waitChan <- struct{}{}: + default: + // if the channel is full, skip sending + } + } + //log.Printf("OffsetQueue: a batch is done, still queued: %d", o.queued) +} + +func (o *OffsetQueue) Add(n int) { + o.mux.Lock() + defer o.mux.Unlock() + o.queued += n + if o.MaxQueuedJobs > 10 && o.queued > o.MaxQueuedJobs/100*90 { + // prints only if occupancy is over 90% + log.Printf("Newsgroup: '%s' | OffsetQueue: added %d batches, now queued: %d/%d", *o.Newsgroup, n, o.queued, o.MaxQueuedJobs) + } +} + +type TTResponse struct { + Job *CHTTJob + ForceCleanUp bool + Err error +} + +type CheckResponse struct { // deprecated + CmdId uint + Article *models.Article +} + +type ReadRequest struct { + CmdID uint + Job *CHTTJob + N int + Reqs int + MsgID *string +} + +func (rr *ReadRequest) ClearReadRequest() { + rr.Job = nil + rr.MsgID = nil + rr = nil +} + +// batched CHECK/TAKETHIS Job +type CHTTJob struct { + JobID uint64 // Unique job ID for tracing + Newsgroup *string + Mux sync.RWMutex + TTMode *TakeThisMode + ResponseChan chan *TTResponse + responseSent bool // Track if response already sent (prevents double send) + Articles []*models.Article + ArticleMap map[*string]*models.Article + MessageIDs []*string + WantedIDs []*string + //checked uint64 + //wanted uint64 + //unwanted uint64 + //rejected uint64 + //retry uint64 + //transferred uint64 + //redisCached uint64 + //TxErrors uint64 + //ConnErrors uint64 + TmpTxBytes uint64 + TTxBytes uint64 + OffsetStart int64 + BatchStart int64 + BatchEnd int64 + OffsetQ *OffsetQueue + NGTProgress *NewsgroupTransferProgress +} + +// ReturnResponseChan returns the ResponseChan for the job +func (job *CHTTJob) ReturnResponseChan() chan *TTResponse { + job.Mux.RLock() + defer job.Mux.RUnlock() + if job.ResponseChan != nil { + return job.ResponseChan + } + return nil +} + +// QuitResponseChan signals that no more responses will be sent and returns the closed ResponseChan +func (job *CHTTJob) QuitResponseChan() chan *TTResponse { + job.OffsetQ.OffsetBatchDone() + job.Response(true, nil) + job.Mux.RLock() + defer job.Mux.RUnlock() + if job.ResponseChan != nil { + log.Printf("Newsgroup: '%s' | CHTTJob.QuitResponseChan(): returning closed ResponseChan for job #%d", *job.Newsgroup, job.JobID) + return job.ResponseChan + } + return nil +} + +// Response sends the response back to a go routine via the ResponseChan +func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { + if job.ResponseChan == nil { + log.Printf("ERROR CHTTJob.Response(): ResponseChan is nil for job #%d", job.JobID) + return + } + + // Check if response already sent (prevents double send on connection loss) + job.Mux.Lock() + if job.responseSent { + log.Printf("WARNING CHTTJob.Response(): Response already sent for job #%d, skipping", job.JobID) + job.Mux.Unlock() + return + } + job.responseSent = true + job.Mux.Unlock() + + job.ResponseChan <- &TTResponse{Job: job, ForceCleanUp: ForceCleanUp, Err: Err} + close(job.ResponseChan) +} + +// NewsgroupTransferProgressMap is protected by ResultsMutex, used in nntp-transfer/main.go +var ResultsMutex sync.RWMutex +var NewsgroupTransferProgressMap = make(map[string]*NewsgroupTransferProgress) + +// NewsgroupProgress tracks the progress of a newsgroup transfer +type NewsgroupTransferProgress struct { + Mux sync.RWMutex + Newsgroup *string + Started time.Time + LastUpdated time.Time + + OffsetStart int64 + BatchStart int64 + BatchEnd int64 + TotalArticles int64 + Checked uint64 + Wanted uint64 + Unwanted uint64 + Rejected uint64 + Retry uint64 + Transferred uint64 + TTSentCount uint64 + RedisCached uint64 + TxErrors uint64 + ConnErrors uint64 + Skipped uint64 + RedisCachedBeforeCheck uint64 + RedisCachedBeforeTakethis uint64 + ArticlesTT uint64 + ArticlesCH uint64 + Finished bool + TXBytes uint64 + TXBytesTMP uint64 + LastCronTX time.Time + LastSpeedKB uint64 + LastArtPerfC uint64 // check articles per second + LastArtPerfT uint64 // takethis articles per second +} + +// GetSpeed returns the last calculated transfer speed in KB/s +func (ngp *NewsgroupTransferProgress) GetSpeed() uint64 { + ngp.Mux.RLock() + defer ngp.Mux.RUnlock() + return ngp.LastSpeedKB +} + +// CalcSpeed calculates the transfer speed and article performance +func (ngp *NewsgroupTransferProgress) CalcSpeed() { + ngp.Mux.Lock() + if time.Since(ngp.LastCronTX) >= time.Second*3 { + since := uint64(time.Since(ngp.LastCronTX).Seconds()) + if ngp.TXBytesTMP > 0 { + ngp.LastSpeedKB = ngp.TXBytesTMP / since / 1024 + } else { + ngp.LastSpeedKB = 0 + } + if ngp.ArticlesCH > 0 { + ngp.LastArtPerfC = ngp.ArticlesCH / since + } else { + ngp.LastArtPerfC = 0 + } + if ngp.ArticlesTT > 0 { + ngp.LastArtPerfT = ngp.ArticlesTT / since + } else { + ngp.LastArtPerfT = 0 + } + //log.Printf("Newsgroup: '%s' | Transfer Perf: %d KB/s (%d bytes in %v) did: CH=(%d|%d/s) TT=(%d|%d/s)", *ngp.Newsgroup, ngp.LastSpeedKB, ngp.TXBytesTMP, since, ngp.ArticlesCH, ngp.LastArtPerfC, ngp.ArticlesTT, ngp.LastArtPerfT) + + ngp.ArticlesCH = 0 + ngp.ArticlesTT = 0 + ngp.TXBytesTMP = 0 + ngp.LastCronTX = time.Now() + } + ngp.Mux.Unlock() +} + +// AddNGTP adds to the NewsgroupTransferProgress temporary counters to calculate speed +func (ngp *NewsgroupTransferProgress) AddNGTP(articlesCH uint64, articlesTT uint64, txbytes uint64) { + if articlesCH > 0 { + ngp.Mux.Lock() + ngp.ArticlesCH += articlesCH + ngp.Mux.Unlock() + } + if articlesTT > 0 { + ngp.Mux.Lock() + ngp.ArticlesTT += articlesTT + ngp.Mux.Unlock() + } + if txbytes > 0 { + ngp.Mux.Lock() + ngp.TXBytes += txbytes + ngp.TXBytesTMP += txbytes + ngp.Mux.Unlock() + } + if articlesCH > 0 || articlesTT > 0 || txbytes > 0 { + ngp.Mux.Lock() + ngp.LastUpdated = time.Now() + ngp.Mux.Unlock() + } + ngp.CalcSpeed() +} + +const IncrFLAG_CHECKED = 1 +const IncrFLAG_WANTED = 2 +const IncrFLAG_UNWANTED = 3 +const IncrFLAG_REJECTED = 4 +const IncrFLAG_RETRY = 5 +const IncrFLAG_TRANSFERRED = 6 +const IncrFLAG_REDIS_CACHED = 7 +const IncrFLAG_TX_ERRORS = 8 +const IncrFLAG_CONN_ERRORS = 9 +const IncrFLAG_SKIPPED = 10 +const IncrFLAG_REDIS_CACHED_BEFORE_CHECK = 11 +const IncrFLAG_REDIS_CACHED_BEFORE_TAKETHIS = 12 +const IncrFLAG_TTSentCount = 13 + +// Increment increments a counter in NewsgroupTransferProgress +func (ntp *NewsgroupTransferProgress) Increment(counter int, n uint64) { + ntp.Mux.Lock() + defer ntp.Mux.Unlock() + switch counter { + case IncrFLAG_CHECKED: + ntp.Checked += n + case IncrFLAG_WANTED: + ntp.Wanted += n + case IncrFLAG_UNWANTED: + ntp.Unwanted += n + case IncrFLAG_REJECTED: + ntp.Rejected += n + case IncrFLAG_RETRY: + ntp.Retry += n + case IncrFLAG_TRANSFERRED: + ntp.Transferred += n + case IncrFLAG_REDIS_CACHED: + ntp.RedisCached += n + case IncrFLAG_TX_ERRORS: + ntp.TxErrors += n + case IncrFLAG_CONN_ERRORS: + ntp.ConnErrors += n + case IncrFLAG_SKIPPED: + ntp.Skipped += n + case IncrFLAG_TTSentCount: + ntp.TTSentCount += n + case IncrFLAG_REDIS_CACHED_BEFORE_CHECK: + ntp.RedisCachedBeforeCheck += n + ntp.RedisCached += n // also increment total + case IncrFLAG_REDIS_CACHED_BEFORE_TAKETHIS: + ntp.RedisCachedBeforeTakethis += n + ntp.RedisCached += n // also increment total + } +} + +// AppendMessageID appends a message ID to the job +func (job *CHTTJob) AppendWantedMessageID(msgID *string) { + job.Mux.Lock() + job.WantedIDs = append(job.WantedIDs, msgID) + job.Mux.Unlock() +} + +// UseCHECK returns true if CHECK mode is active +func (ttMode *TakeThisMode) UseCHECK() bool { + ttMode.mux.Lock() + defer ttMode.mux.Unlock() + if ttMode.CheckMode { + return true + } + return false +} + +// SetForceCHECK forces CHECK mode +func (ttMode *TakeThisMode) SetForceCHECK() { + ttMode.mux.Lock() + ttMode.CheckMode = true + ttMode.mux.Unlock() +} + +// IncrementSuccess increments the temporary TAKETHIS success count +func (ttMode *TakeThisMode) IncrementSuccess() { + ttMode.mux.Lock() + ttMode.TmpSuccessCount++ + ttMode.mux.Unlock() +} + +// IncrementTmp increments the temporary TAKETHIS total count +func (ttMode *TakeThisMode) IncrementTmp() { + ttMode.mux.Lock() + ttMode.TmpTTotalsCount++ + ttMode.mux.Unlock() +} + +// SetNoCHECK forces TAKETHIS mode +func (ttMode *TakeThisMode) SetNoCHECK() { + ttMode.mux.Lock() + ttMode.CheckMode = false + ttMode.mux.Unlock() +} + +// FlipMode checks the TAKETHIS success rate and flips between CHECK and TAKETHIS modes +func (ttMode *TakeThisMode) FlipMode(lowerLevel float64, upperLevel float64) bool { + ttMode.mux.Lock() + defer ttMode.mux.Unlock() + if ttMode.TmpSuccessCount < 10 || ttMode.TmpTTotalsCount < 100 { + return true // Force CHECK mode for this batch + } + successRate := float64(ttMode.TmpSuccessCount) / float64(ttMode.TmpTTotalsCount) * 100.0 + ttMode.TmpSuccessCount = 0 + ttMode.TmpTTotalsCount = 0 + switch ttMode.CheckMode { + case false: // Currently in TAKETHIS mode + if successRate < lowerLevel { + ttMode.CheckMode = true + log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% < %f%%, switching to CHECK mode", *ttMode.Newsgroup, successRate, lowerLevel) + } + case true: // Currently in CHECK mode + if successRate > upperLevel { + ttMode.CheckMode = false + log.Printf("Newsgroup: '%s' | TAKETHIS success rate %.1f%% >= %f%%, switching to TAKETHIS mode", *ttMode.Newsgroup, successRate, upperLevel) + } + } + retval := ttMode.CheckMode + return retval +} diff --git a/web/templates/admin_postqueue.html b/web/templates/admin_postqueue.html index 1f4298f..311bce6 100644 --- a/web/templates/admin_postqueue.html +++ b/web/templates/admin_postqueue.html @@ -60,10 +60,10 @@
📮 Post Queue Management
{{.Created.Format "2006-01-02 15:04"}} - {{.Newsgroup}} + {{.Newsgroup}} - {{.MessageID}} + {{.MessageID}} {{if eq .Status "pending"}} From 2d51bffa724684c69ea942d1dcf462bbff7edcd1 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 14 Oct 2025 18:15:37 +0000 Subject: [PATCH 056/100] testing --- cmd/nntp-transfer/main.go | 213 ++++++++++++++++--------- internal/nntp/nntp-client-commands.go | 8 +- internal/nntp/nntp-transfer-demuxer.go | 15 +- internal/nntp/nntp-transfer.go | 205 +++++++++++++++++++++--- 4 files changed, 333 insertions(+), 108 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 26902f0..f64bde5 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1159,7 +1159,7 @@ var ErrNotInDateRange = fmt.Errorf("article not in specified date range") // processRequeuedJobs processes any failed jobs that were requeued for retry // Returns the number of jobs processed successfully -func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponses chan *nntp.TTSetup, redisCli *redis.Client) (int, error) { +func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponsesSetupChan chan *nntp.TTSetup, redisCli *redis.Client) (int, error) { var queuedJobs []*nntp.CHTTJob jobRequeueMutex.Lock() if jobs, exists := jobRequeue[ttMode.Newsgroup]; exists { @@ -1199,9 +1199,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse } if responseChan != nil { // pass the response channel to the collector channel: ttResponses - ttResponses <- &nntp.TTSetup{ - ResponseChan: responseChan, - } + ttResponsesSetupChan <- nntp.GetTTSetup(responseChan) } } @@ -1300,7 +1298,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i Newsgroup: &ng.Name, CheckMode: CHECK_FIRST, } - ttResponses := make(chan *nntp.TTSetup, totalNGArticles/int64(batchCheck)+2) + ttResponsesSetupChan := make(chan *nntp.TTSetup, totalNGArticles/int64(batchCheck)+2) start := time.Now() // WaitGroup to ensure collector goroutine finishes before returning @@ -1312,10 +1310,8 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i go func(responseWG *sync.WaitGroup) { defer collectorWG.Done() - //var amux sync.Mutex - //var transferred, unwanted, rejected, checked, redis_cached, txErrors, connErrors uint64 var num uint64 - for setup := range ttResponses { + for setup := range ttResponsesSetupChan { if setup == nil || setup.ResponseChan == nil { log.Printf("Newsgroup: '%s' | Warning: nil TT response channel received in collector!?", ng.Name) continue @@ -1341,10 +1337,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i log.Printf("Newsgroup: '%s' | Warning: nil Job in TT response job without error!? ForceCleanUp=%t", ng.Name, resp.ForceCleanUp) return } - // get numbers - //amux.Lock() - //resp.Job.GetUpdateCounters(&transferred, &unwanted, &rejected, &checked, &redis_cached, &txErrors, &connErrors) - //amux.Unlock() if !resp.ForceCleanUp { return } @@ -1377,8 +1369,10 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } resp.Job.ArticleMap = nil resp.Job.Mux.Unlock() - resp.Job = nil + nntp.RecycleTTResponseChan(responseChan) + nntp.RecycleTTResponse(resp) }(setup.ResponseChan, num, responseWG) + nntp.RecycleTTSetup(setup) } if VERBOSE { log.Printf("Newsgroup: '%s' | Collector: ttResponses closed, waiting for %d response processors to finish...", ng.Name, num) @@ -1388,15 +1382,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i if VERBOSE { log.Printf("Newsgroup: '%s' | Collector: all response processors closed", ng.Name) } - //amux.Lock() - - //amux.Unlock() - - /* - ngtprogress.Mux.Lock() - redis_cached := ngtprogress.RedisCached - ngtprogress.Mux.Unlock() - */ nntp.ResultsMutex.Lock() ngtprogress.Mux.Lock() @@ -1457,7 +1442,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i return nil } // Process any requeued jobs first (from previous failed batches) - if _, err := processRequeuedJobs(ng.Name, ttMode, ttResponses, redisCli); err != nil { + if _, err := processRequeuedJobs(ng.Name, ttMode, ttResponsesSetupChan, redisCli); err != nil { return err } start := time.Now() @@ -1509,12 +1494,13 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } if responseChan != nil { // pass the response channel to the collector channel: ttResponses - ttResponses <- &nntp.TTSetup{ - ResponseChan: responseChan, - } + ttResponsesSetupChan <- nntp.GetTTSetup(responseChan) + } + OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than N in flight } + articlesProcessed += int64(len(articles)) remainingArticles -= int64(len(articles)) if VERBOSE { log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t)", ng.Name, articlesProcessed, totalNGArticles, remainingArticles, ttMode.UseCHECK()) @@ -1532,7 +1518,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i log.Printf("WantShutdown during final requeue processing for '%s'", ng.Name) break } - processed, err := processRequeuedJobs(ng.Name, ttMode, ttResponses, redisCli) + processed, err := processRequeuedJobs(ng.Name, ttMode, ttResponsesSetupChan, redisCli) if err != nil { log.Printf("Newsgroup: '%s' | Error in final requeue processing: %v", ng.Name, err) // Don't return error, just log it - we've already processed most articles @@ -1549,7 +1535,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i //log.Printf("Newsgroup: '%s' | Final requeue processing completed, closing ttResponses channel...", ng.Name) // Close the ttResponses channel to signal collector goroutine to finish - close(ttResponses) + close(ttResponsesSetupChan) //log.Printf("Newsgroup: '%s' | ttResponses channel closed, waiting for collector to finish...", ng.Name) @@ -1595,7 +1581,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl MessageIDs: make([]*string, 0, len(articles)), Articles: make([]*models.Article, 0, len(articles)), ArticleMap: make(map[*string]*models.Article, len(articles)), - ResponseChan: make(chan *nntp.TTResponse, 1), + ResponseChan: nntp.GetTTResponseChan(), TTMode: ttMode, OffsetStart: dbOffset, BatchStart: batchStart, @@ -1699,7 +1685,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl WorkersCheckChannel <- job // checkQueue <- job //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, job.JobID) - return job.ReturnResponseChan(), nil + return job.GetResponseChan(), nil } // end func processBatch // sendArticlesBatchViaTakeThis sends multiple articles via TAKETHIS in streaming mode @@ -1776,15 +1762,21 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art start := time.Now() astart := start astart2 := start - for _, article := range articles { + skipped := 0 + for n, article := range articles { if article == nil { + skipped++ continue // Skip cached article } astart = time.Now() // Send TAKETHIS command with article content (non-blocking) // This also queues the ReadRequest to readTAKETHISResponsesChan BEFORE returning //log.Printf("Newsgroup: '%s' | ++Pre-Send TAKETHIS '%s'", newsgroup, article.MessageID) - cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup, demuxer, readTAKETHISResponsesChan, job) + + // Increment pending responses counter before sending + job.PendingResponses.Add(1) + + cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup, demuxer, readTAKETHISResponsesChan, job, n, len(articles)-skipped) astart2 = time.Now() job.Mux.Lock() job.TTxBytes += uint64(txBytes) @@ -1792,6 +1784,8 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art job.Mux.Unlock() ttxBytes += uint64(txBytes) if err != nil { + // Decrement on error since no response will come + job.PendingResponses.Done() if err == common.ErrNoNewsgroups { job.NGTProgress.Increment(nntp.IncrFLAG_SKIPPED, 1) log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': no newsgroups header", newsgroup, article.MessageID) @@ -2233,9 +2227,11 @@ func replyChan(request chan struct{}, reply chan struct{}) { } func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { + var mux sync.Mutex + var workerWG sync.WaitGroup readCHECKResponsesChan := make(chan *nntp.ReadRequest, 128000) readTAKETHISResponsesChan := make(chan *nntp.ReadRequest, 128000) - errChan := make(chan struct{}, 4) + errChan := make(chan struct{}, 9) tickChan := common.GetStructChanCap1() requestReplyJobDone := common.GetStructChanCap1() @@ -2250,7 +2246,10 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue defer func(conn *nntp.BackendConn, rs *ReturnSignal) { conn.ForceCloseConn() rs.ExitChan <- rs - errChan <- struct{}{} + select { + case errChan <- struct{}{}: + default: + } }(conn, rs) //lastRun := time.Now() @@ -2259,7 +2258,9 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("CheckWorker (%d): Started ResponseDemuxer", workerID) // launch go routine which sends CHECK commands - go func() { + workerWG.Add(1) + go func(workerWG *sync.WaitGroup) { + defer workerWG.Done() // tick every n seconds to check if any CHECKs to do ticker := time.NewTicker(DefaultCheckTicker) defer ticker.Stop() @@ -2393,14 +2394,19 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } } // end select } // end forever - }() + }(&workerWG) // launch a go routine to read CHECK responses from the supplied connection with textproto readline - go func() { + workerWG.Add(1) + go func(workerWG *sync.WaitGroup) { + defer workerWG.Done() var responseCount int var tookTime time.Duration defer func() { - errChan <- struct{}{} + select { + case errChan <- struct{}{}: + default: + } }() loop: for { @@ -2419,13 +2425,13 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } if common.WantShutdown() { log.Printf("CheckWorker (%d): Read CHECK WantShutdown, exiting", workerID) - rr.ClearReadRequest() + rr.ClearReadRequest(nil) return } //log.Printf("CheckWorker (%d): Read CHECK response (do conn check) for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) if !conn.IsConnected() { log.Printf("CheckWorker (%d): Read CHECK connection lost, exiting", workerID) - rr.ClearReadRequest() + rr.ClearReadRequest(nil) return } //log.Printf("CheckWorker (%d): Pre-Read CHECK response for msgID: %s (cmdID=%d MID=%d/%d)", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs) @@ -2438,20 +2444,20 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Got response from demuxer case <-errChan: log.Printf("CheckWorker (%d): Read CHECK got errChan while waiting for response", workerID) - rr.ClearReadRequest() + rr.ClearReadRequest(respData) return } // Verify we got the expected command ID if respData.CmdID != rr.CmdID { log.Printf("ERROR CheckWorker (%d): Command ID mismatch! Expected %d, got %d", workerID, rr.CmdID, respData.CmdID) - rr.ClearReadRequest() + rr.ClearReadRequest(respData) return } if respData.Code == 0 && respData.Err != nil { log.Printf("Failed to read CHECK response: %v", respData.Err) - rr.ClearReadRequest() + rr.ClearReadRequest(respData) return } @@ -2479,12 +2485,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue parts := strings.Fields(respData.Line) if len(parts) < 1 { log.Printf("ERROR in CheckWorker: Malformed CHECK response code=%d line: '%s' (cmdID=%d MID=%d/%d)", respData.Code, respData.Line, rr.CmdID, rr.N, rr.Reqs) - rr.ClearReadRequest() + rr.ClearReadRequest(respData) return } if parts[0] != *rr.MsgID { log.Printf("ERROR in CheckWorker: Mismatched CHECK response: expected '%s', got '%s' code=%d (cmdID=%d MID=%d/%d)", *rr.MsgID, parts[0], respData.Code, rr.CmdID, rr.N, rr.Reqs) - rr.ClearReadRequest() + rr.ClearReadRequest(respData) return } //log.Printf("CheckWorker (%d): Got CHECK response: '%s' (cmdID=%d MID=%d/%d) took: %v ms", workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, time.Since(start).Milliseconds()) @@ -2496,7 +2502,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG1 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) if !exists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: ReadCheckResponse msgId '%s' did not exist in jobMap.", *job.Newsgroup, *rr.MsgID) - rr.ClearReadRequest() + rr.ClearReadRequest(respData) continue loop } //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG2 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) @@ -2543,11 +2549,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() if !qexists || !rexists { log.Printf("Newsgroup: '%s' | ERROR in CheckWorker: queuedCount or readCount did not exist for a job?!", *job.Newsgroup) - rr.ClearReadRequest() + rr.ClearReadRequest(respData) continue loop } //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG4 Processing CHECK response for msgID: %s (cmdID=%d MID=%d/%d) code=%d", *job.Newsgroup, workerID, *rr.MsgID, rr.CmdID, rr.N, rr.Reqs, code) - rr.ClearReadRequest() + rr.ClearReadRequest(respData) + if queuedCount == readCount { rs.Mux.Lock() delete(rs.jobsQueued, job) @@ -2576,13 +2583,18 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue continue loop } // end select } // end forever - }() + }(&workerWG) // launch a goroutine to process TAKETHIS responses concurrently // This follows the EXACT pattern as CHECK response reader (lines 2366-2552) - go func() { + workerWG.Add(1) + go func(workerWG *sync.WaitGroup) { + defer workerWG.Done() defer func() { - errChan <- struct{}{} + select { + case errChan <- struct{}{}: + default: + } }() ttloop: @@ -2596,16 +2608,21 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case rr := <-readTAKETHISResponsesChan: if rr == nil || rr.MsgID == nil { log.Printf("TTResponseWorker (%d): got nil readRequest, skipping", workerID) + if rr.Job != nil { + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + } continue ttloop } if common.WantShutdown() { log.Printf("TTResponseWorker (%d): WantShutdown, exiting", workerID) - rr.ClearReadRequest() + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(nil) return } if !conn.IsConnected() { log.Printf("TTResponseWorker (%d): connection lost, exiting", workerID) - rr.ClearReadRequest() + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(nil) return } @@ -2618,14 +2635,16 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Got response from demuxer case <-errChan: log.Printf("TTResponseWorker (%d): got errChan while waiting for response", workerID) - rr.ClearReadRequest() + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(respData) return } // Verify we got the expected command ID if respData.CmdID != rr.CmdID { log.Printf("ERROR TTResponseWorker (%d): Command ID mismatch! Expected %d, got %d", workerID, rr.CmdID, respData.CmdID) - rr.ClearReadRequest() + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(respData) return } @@ -2633,7 +2652,8 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue log.Printf("ERROR TTResponseWorker (%d): Failed to read TAKETHIS response for %s: %v", workerID, *rr.MsgID, respData.Err) rr.Job.NGTProgress.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) - rr.ClearReadRequest() + rr.Job.PendingResponses.Done() // Mark response as handled (error case) + rr.ClearReadRequest(respData) conn.ForceCloseConn() return } @@ -2672,7 +2692,8 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue case 400, 480, 500, 501, 502, 503, 504: log.Printf("ERROR Newsgroup: '%s' | Failed to transfer article '%s': response=%d", *rr.Job.Newsgroup, *rr.MsgID, respData.Code) rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TX_ERRORS, 1) - rr.ClearReadRequest() + rr.Job.PendingResponses.Done() // Mark response handled (error case) + rr.ClearReadRequest(respData) conn.ForceCloseConn() return @@ -2681,16 +2702,23 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue *rr.Job.Newsgroup, *rr.MsgID, respData.Code) rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TX_ERRORS, 1) } - - rr.ClearReadRequest() + rr.Job.PendingResponses.Done() + // Mark this response as processed + rr.ClearReadRequest(respData) } // end select } // end for - }() + }(&workerWG) + var runningTTJobs int // protected by local mux // launch a goroutine to process TAKETHIS jobs from local channel sharing the same connection - go func() { + workerWG.Add(1) + go func(workerWG *sync.WaitGroup) { + defer workerWG.Done() defer func() { - errChan <- struct{}{} + select { + case errChan <- struct{}{}: + default: + } }() var job *nntp.CHTTJob for { @@ -2732,7 +2760,9 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue job.Response(true, nil) continue } - + mux.Lock() + runningTTJobs++ + mux.Unlock() //log.Printf("Newsgroup: '%s' | TTworker (%d): Prepare locking to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) rs.GetLockTT() //common.ChanLock(flipflopChan) @@ -2750,26 +2780,41 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() return } - if VERBOSE && redis_cached > 0 { - log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d sent, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, redis_cached) + if redis_cached > 0 { + if VERBOSE { + log.Printf("Newsgroup: '%s' | TTworker (%d): TAKETHIS job #%d sent, redis_cached=%d", *job.Newsgroup, workerID, job.JobID, redis_cached) + } } - - // Send response back - //log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, workerID, job.JobID, len(job.ResponseChan)) - job.Response(true, nil) - //log.Printf("Newsgroup: '%s' | TTworker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, workerID, job.JobID) + go func(job *nntp.CHTTJob) { + // Wait for all TAKETHIS responses to be processed before completing job + //log.Printf("Newsgroup: '%s' | TTworker (%d): Waiting for all TAKETHIS responses for job #%d", *job.Newsgroup, workerID, job.JobID) + job.PendingResponses.Wait() + //log.Printf("Newsgroup: '%s' | TTworker (%d): All TAKETHIS responses received for job #%d", *job.Newsgroup, workerID, job.JobID) + // Send response back + //log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TTresponse for job #%d to responseChan len=%d", *job.Newsgroup, workerID, job.JobID, len(job.ResponseChan)) + job.Response(true, nil) + //log.Printf("Newsgroup: '%s' | TTworker (%d): Sent TTresponse for job #%d to responseChan", *job.Newsgroup, workerID, job.JobID) + mux.Lock() + runningTTJobs-- + mux.Unlock() + }(job) } - }() + }(&workerWG) +forever: for { select { case <-errChan: - errChan <- struct{}{} - return + select { + case errChan <- struct{}{}: + default: + } + break forever + case job := <-checkQueue: if common.WantShutdown() { log.Printf("CHTTworker: WantShutdown, exiting") - return + break forever } if job == nil || len(job.MessageIDs) == 0 { log.Printf("CHTTworker: empty job, skipping") @@ -2811,7 +2856,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // log every 5s if time.Since(lastPrint) > time.Second { if common.WantShutdown() { - return + break forever } log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(TakeThisQueues[workerID])) lastPrint = time.Now() @@ -2841,7 +2886,23 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Unlock() } // end select } // end for -} // end func CheckWorker + workerWG.Wait() + startWait := time.Now() + lastPrint := startWait + for { + mux.Lock() + if runningTTJobs == 0 { + mux.Unlock() + break + } + mux.Unlock() + time.Sleep(time.Millisecond * 50) + if time.Since(lastPrint) > time.Second*5 { + log.Printf("CHTTworker (%d): waiting since %v for %d running TAKETHIS jobs to complete before exiting...", workerID, time.Since(startWait), runningTTJobs) + lastPrint = time.Now() + } + } +} // end func CHTTWorker // monitorMemoryStats logs memory statistics periodically func monitorMemoryStats() { diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index c66592f..c5a2bda 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -1069,7 +1069,8 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses demuxer.RegisterCommand(cmdID, TYPE_CHECK) //log.Printf("Newsgroup: '%s' | CHECK sent '%s' (CmdID=%d) pass notify to readResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) - readCHECKResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: msgID, N: n + 1, Reqs: len(messageIDs)} + //readCHECKResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: msgID, N: n + 1, Reqs: len(messageIDs)} + readCHECKResponsesChan <- GetReadRequest(cmdID, job, msgID, n+1, len(messageIDs)) //log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readCHECKResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) } return nil @@ -1079,7 +1080,7 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses // sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode // Registers the command ID with the demuxer for proper response routing -func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string, demuxer *ResponseDemuxer, readTAKETHISResponsesChan chan *ReadRequest, job *CHTTJob) (cmdID uint, txBytes int, err error) { +func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string, demuxer *ResponseDemuxer, readTAKETHISResponsesChan chan *ReadRequest, job *CHTTJob, n int, reqs int) (cmdID uint, txBytes int, err error) { //start := time.Now() //c.mux.Lock() //defer c.mux.Unlock() @@ -1173,7 +1174,8 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp //log.Printf("Newsgroup: '%s' | TAKETHIS flushed CmdID=%d '%s' (flushing took: %v) total time: %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(startFlush), time.Since(start), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) // Queue ReadRequest IMMEDIATELY after command (like SendCheckMultiple does at line 1608) - readTAKETHISResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: &article.MessageID, N: 1, Reqs: 1} + //readTAKETHISResponsesChan <- &ReadRequest{CmdID: cmdID, Job: job, MsgID: &article.MessageID, N: 1, Reqs: 1} + readTAKETHISResponsesChan <- GetReadRequest(cmdID, job, &article.MessageID, n+1, reqs) // reuse global struct to reduce GC pressure //log.Printf("Newsgroup: '%s' | TAKETHIS notified response reader CmdID=%d '%s' waited %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(chanStart), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) // Return command ID without reading response (streaming mode) return cmdID, txBytes, nil diff --git a/internal/nntp/nntp-transfer-demuxer.go b/internal/nntp/nntp-transfer-demuxer.go index 3b879b9..61646dd 100644 --- a/internal/nntp/nntp-transfer-demuxer.go +++ b/internal/nntp/nntp-transfer-demuxer.go @@ -27,8 +27,8 @@ func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}, BatchCheck int return &ResponseDemuxer{ conn: conn, signalChan: make(chan struct{}, 1), - checkResponseChan: make(chan *ResponseData, 128000), // Buffer for CHECK responses - ttResponseChan: make(chan *ResponseData, 128000), // Buffer for TAKETHIS responses + checkResponseChan: make(chan *ResponseData, 64*1024), // Buffer for CHECK responses + ttResponseChan: make(chan *ResponseData, 64*1024), // Buffer for TAKETHIS responses errChan: errChan, started: false, } @@ -171,18 +171,11 @@ func (d *ResponseDemuxer) readAndDispatch() { log.Printf("ResponseDemuxer: error reading response for cmdID=%d: %v", cmdInfo.CmdID, err) return } - respData := &ResponseData{ - CmdID: cmdInfo.CmdID, - Code: code, - Line: line, - Err: err, - } // Dispatch based on registered type - switch cmdInfo.RespType { case TYPE_CHECK: select { - case d.checkResponseChan <- respData: + case d.checkResponseChan <- GetResponseData(cmdInfo.CmdID, code, line, err): // Dispatched successfully //log.Printf("ResponseDemuxer: dispatched CHECK response cmdID=%d d.checkResponseChan=%d", cmdInfo.CmdID, len(d.checkResponseChan)) case <-d.errChan: @@ -193,7 +186,7 @@ func (d *ResponseDemuxer) readAndDispatch() { case TYPE_TAKETHIS: select { - case d.ttResponseChan <- respData: + case d.ttResponseChan <- GetResponseData(cmdInfo.CmdID, code, line, err): // Dispatched successfully //log.Printf("ResponseDemuxer: dispatched TAKETHIS response cmdID=%d d.ttResponseChan=%d", cmdInfo.CmdID, len(d.ttResponseChan)) case <-d.errChan: diff --git a/internal/nntp/nntp-transfer.go b/internal/nntp/nntp-transfer.go index 646832c..fae95e1 100644 --- a/internal/nntp/nntp-transfer.go +++ b/internal/nntp/nntp-transfer.go @@ -23,6 +23,41 @@ const ( TYPE_TAKETHIS ) +// Pool of ResponseData structs to reduce allocations +var ResponseDataPool = make(chan *ResponseData, 64*1024) + +// GetResponseData returns a recycled ResponseData struct or makes a new one if none are available +func GetResponseData(cmdID uint, code int, line string, err error) *ResponseData { + select { + case rd := <-ResponseDataPool: + rd.CmdID = cmdID + rd.Code = code + rd.Line = line + rd.Err = err + return rd + default: + return &ResponseData{ + CmdID: cmdID, + Code: code, + Line: line, + Err: err, + } + } +} + +// RecycleResponseData resets a ResponseData struct and recycles it back into the pool +func RecycleResponseData(rd *ResponseData) { + rd.CmdID = 0 + rd.Code = 0 + rd.Line = "" + rd.Err = nil + select { + case ResponseDataPool <- rd: + default: + // pool is full, discard + } +} + // ResponseData holds a read response from the connection type ResponseData struct { CmdID uint @@ -46,9 +81,36 @@ type TakeThisMode struct { CheckMode bool } +// TTSetup holds the response channel for a batched TAKETHIS job type TTSetup struct { ResponseChan chan *TTResponse - OffsetQ *OffsetQueue +} + +// Pool of TTSetup structs to reduce allocations +var TTSetupPool = make(chan *TTSetup, 64*1024) + +// GetTTSetup returns a recycled TTSetup struct or makes a new one if none are available +// the responseChan parameter is received from processBatch() and is mandatory and will be set on the returned struct +func GetTTSetup(responseChan chan *TTResponse) *TTSetup { + select { + case ch := <-TTSetupPool: + ch.ResponseChan = responseChan + return ch + default: + return &TTSetup{ + ResponseChan: responseChan, + } + } +} + +// RecycleTTSetup recycles a TTSetup struct back into the pool +func RecycleTTSetup(tts *TTSetup) { + tts.ResponseChan = nil + select { + case TTSetupPool <- tts: + default: + // pool is full, discard + } } // OffsetQueue manages the number of concurrent batches being processed for a newsgroup @@ -136,12 +198,79 @@ func (o *OffsetQueue) Add(n int) { } } +// TTResponse holds the response for a batched TAKETHIS job type TTResponse struct { Job *CHTTJob ForceCleanUp bool Err error } +// Pool of TTResponse structs to reduce allocations +var TTResponsePool = make(chan *TTResponse, 64*1024) + +// GetTTResponse returns a recycled TTResponse struct or makes a new one if none are available +func GetTTResponse(job *CHTTJob, forceCleanup bool, err error) *TTResponse { + select { + case resp := <-TTResponsePool: + resp.Job = job + resp.ForceCleanUp = forceCleanup + resp.Err = err + return resp + default: + return &TTResponse{ + Job: job, + ForceCleanUp: forceCleanup, + Err: err, + } + } +} + +// RecycleTTResponse resets a TTResponse struct and recycles it back into the pool +func RecycleTTResponse(resp *TTResponse) { + resp.Job = nil + resp.ForceCleanUp = false + resp.Err = nil + select { + case TTResponsePool <- resp: + default: + // pool is full, discard + } +} + +// Pool of TTResponse chans to reduce allocations +var TTResponseChans = make(chan chan *TTResponse, 16384) + +// GetTTResponseChan returns a recycled chan *TTResponse or makes a new one with capacity of 1 if none are available +func GetTTResponseChan() chan *TTResponse { + select { + case ch := <-TTResponseChans: + return ch + default: + return make(chan *TTResponse, 1) + } +} + +func RecycleTTResponseChan(ch chan *TTResponse) { + if cap(ch) != 1 { + log.Printf("Warning: Attempt to recycle chan *TTResponse with wrong capacity: %d", cap(ch)) + return + } + // empty out the channel + select { + case <-ch: + // successfully emptied + default: + // is already empty + } + // park + select { + case TTResponseChans <- ch: + // successfully recycled + default: + // channel pool is full, discard + } +} + type CheckResponse struct { // deprecated CmdId uint Article *models.Article @@ -150,29 +279,69 @@ type CheckResponse struct { // deprecated type ReadRequest struct { CmdID uint Job *CHTTJob + MsgID *string N int Reqs int - MsgID *string } -func (rr *ReadRequest) ClearReadRequest() { +// Pool of ReadRequest structs to reduce allocations +var ReadRequestsPool = make(chan *ReadRequest, 65536) + +// ClearReadRequest resets a ReadRequest struct and recycles it back into the pool +func (rr *ReadRequest) ClearReadRequest(respData *ResponseData) { + rr.CmdID = 0 rr.Job = nil rr.MsgID = nil - rr = nil + rr.N = 0 + rr.Reqs = 0 + RecycleReadRequest(rr) + if respData != nil { + RecycleResponseData(respData) + } +} + +// GetReadRequest returns a recycled ReadRequest struct or makes a new one if none are available +func GetReadRequest(CmdID uint, Job *CHTTJob, MsgID *string, n int, reqs int) *ReadRequest { + select { + case rr := <-ReadRequestsPool: + rr.CmdID = CmdID + rr.Job = Job + rr.MsgID = MsgID + rr.N = n + rr.Reqs = reqs + return rr + default: + return &ReadRequest{ + CmdID: CmdID, + Job: Job, + MsgID: MsgID, + N: n, + Reqs: reqs, + } + } +} + +func RecycleReadRequest(rr *ReadRequest) { + select { + case ReadRequestsPool <- rr: + default: + // pool is full, discard + } } // batched CHECK/TAKETHIS Job type CHTTJob struct { - JobID uint64 // Unique job ID for tracing - Newsgroup *string - Mux sync.RWMutex - TTMode *TakeThisMode - ResponseChan chan *TTResponse - responseSent bool // Track if response already sent (prevents double send) - Articles []*models.Article - ArticleMap map[*string]*models.Article - MessageIDs []*string - WantedIDs []*string + JobID uint64 // Unique job ID for tracing + Newsgroup *string + Mux sync.RWMutex + TTMode *TakeThisMode + ResponseChan chan *TTResponse + responseSent bool // Track if response already sent (prevents double send) + Articles []*models.Article + ArticleMap map[*string]*models.Article + MessageIDs []*string + WantedIDs []*string + PendingResponses sync.WaitGroup // Track pending TAKETHIS responses //checked uint64 //wanted uint64 //unwanted uint64 @@ -191,8 +360,8 @@ type CHTTJob struct { NGTProgress *NewsgroupTransferProgress } -// ReturnResponseChan returns the ResponseChan for the job -func (job *CHTTJob) ReturnResponseChan() chan *TTResponse { +// GetResponseChan returns the ResponseChan for the job +func (job *CHTTJob) GetResponseChan() chan *TTResponse { job.Mux.RLock() defer job.Mux.RUnlock() if job.ResponseChan != nil { @@ -231,8 +400,8 @@ func (job *CHTTJob) Response(ForceCleanUp bool, Err error) { job.responseSent = true job.Mux.Unlock() - job.ResponseChan <- &TTResponse{Job: job, ForceCleanUp: ForceCleanUp, Err: Err} - close(job.ResponseChan) + job.ResponseChan <- GetTTResponse(job, ForceCleanUp, Err) + //close(job.ResponseChan) } // NewsgroupTransferProgressMap is protected by ResultsMutex, used in nntp-transfer/main.go From 65527cb6b4e2a9d5b708d6ca2d90add9e5176a5b Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 14 Oct 2025 19:01:46 +0000 Subject: [PATCH 057/100] go1.25.3 --- .github/workflows/release.yml | 2 +- go.mod | 22 +++++++++---------- go.sum | 40 +++++++++++++++++------------------ 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2dccebc..3a85439 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,7 +32,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: '1.25.x' + go-version: '1.25.3' - name: Install dependencies run: go mod download diff --git a/go.mod b/go.mod index 51d3817..a99ca7a 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/go-while/go-pugleaf -go 1.25.1 +go 1.25.3 require ( github.com/gin-contrib/secure v1.1.2 @@ -8,10 +8,10 @@ require ( github.com/go-while/go-cpu-mem-profiler v0.0.0-20240612221627-856954a5fc83 github.com/mattn/go-sqlite3 v1.14.32 github.com/redis/go-redis/v9 v9.14.0 - golang.org/x/crypto v0.42.0 - golang.org/x/net v0.44.0 - golang.org/x/term v0.35.0 - golang.org/x/text v0.29.0 + golang.org/x/crypto v0.43.0 + golang.org/x/net v0.46.0 + golang.org/x/term v0.36.0 + golang.org/x/text v0.30.0 ) require ( @@ -25,7 +25,7 @@ require ( github.com/gin-contrib/sse v1.1.0 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/go-playground/validator/v10 v10.27.0 // indirect + github.com/go-playground/validator/v10 v10.28.0 // indirect github.com/goccy/go-json v0.10.5 // indirect github.com/goccy/go-yaml v1.18.0 // indirect github.com/gorilla/mux v1.8.1 // indirect @@ -37,14 +37,14 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/quic-go/qpack v0.5.1 // indirect - github.com/quic-go/quic-go v0.54.1 // indirect + github.com/quic-go/quic-go v0.55.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.3.0 // indirect go.uber.org/mock v0.6.0 // indirect - golang.org/x/arch v0.21.0 // indirect - golang.org/x/mod v0.28.0 // indirect + golang.org/x/arch v0.22.0 // indirect + golang.org/x/mod v0.29.0 // indirect golang.org/x/sync v0.17.0 // indirect - golang.org/x/sys v0.36.0 // indirect - golang.org/x/tools v0.37.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/tools v0.38.0 // indirect google.golang.org/protobuf v1.36.10 // indirect ) diff --git a/go.sum b/go.sum index 5ec1223..56d665f 100644 --- a/go.sum +++ b/go.sum @@ -31,8 +31,8 @@ github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/o github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4= -github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo= +github.com/go-playground/validator/v10 v10.28.0 h1:Q7ibns33JjyW48gHkuFT91qX48KG0ktULL6FgHdG688= +github.com/go-playground/validator/v10 v10.28.0/go.mod h1:GoI6I1SjPBh9p7ykNE/yj3fFYbyDOpwMn5KXd+m2hUU= github.com/go-while/go-cpu-mem-profiler v0.0.0-20240612221627-856954a5fc83 h1:vehfiL7LsK8bJQZdVuJxcMem4AP2HwPEQ8orUUnIA+E= github.com/go-while/go-cpu-mem-profiler v0.0.0-20240612221627-856954a5fc83/go.mod h1:31AY7BccUb7qvKi4LQCwLOYsvcza9TdQtl/zHEIOcRE= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= @@ -65,8 +65,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI= github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg= -github.com/quic-go/quic-go v0.54.1 h1:4ZAWm0AhCb6+hE+l5Q1NAL0iRn/ZrMwqHRGQiFwj2eg= -github.com/quic-go/quic-go v0.54.1/go.mod h1:e68ZEaCdyviluZmy44P6Iey98v/Wfz6HCjQEm+l8zTY= +github.com/quic-go/quic-go v0.55.0 h1:zccPQIqYCXDt5NmcEabyYvOnomjs8Tlwl7tISjJh9Mk= +github.com/quic-go/quic-go v0.55.0/go.mod h1:DR51ilwU1uE164KuWXhinFcKWGlEjzys2l8zUl5Ss1U= github.com/redis/go-redis/v9 v9.14.0 h1:u4tNCjXOyzfgeLN+vAZaW1xUooqWDqVEsZN0U01jfAE= github.com/redis/go-redis/v9 v9.14.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -84,25 +84,25 @@ github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4= go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= -golang.org/x/arch v0.21.0 h1:iTC9o7+wP6cPWpDWkivCvQFGAHDQ59SrSxsLPcnkArw= -golang.org/x/arch v0.21.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= -golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= -golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= -golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= -golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= -golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= -golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/arch v0.22.0 h1:c/Zle32i5ttqRXjdLyyHZESLD/bB90DCU1g9l/0YBDI= +golang.org/x/arch v0.22.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= +golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= +golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= -golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= -golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= -golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From 3549a37906fb601b1ce73e7c1f05b4c86dc5fa36 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 14 Oct 2025 19:02:48 +0000 Subject: [PATCH 058/100] v0.4.7.4 --- appVersion.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appVersion.txt b/appVersion.txt index 87b18a5..b48b2de 100644 --- a/appVersion.txt +++ b/appVersion.txt @@ -1 +1 @@ -4.7.3 +4.7.4 From 528f4854c2e7acef5a541a0aa526ae4d69f6f495 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 14 Oct 2025 19:05:57 +0000 Subject: [PATCH 059/100] Update common.go --- internal/common/common.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/common/common.go b/internal/common/common.go index 4af1631..19e8b2c 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -43,7 +43,7 @@ func IsClosedChannel(ch chan struct{}) bool { } func ChanLock(lockChan chan struct{}) { - // try aquire lock + // try acquire lock lockChan <- struct{}{} } From 8c26054003a790b31543c0038f8ee46aae7445d1 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 14 Oct 2025 19:06:01 +0000 Subject: [PATCH 060/100] Update headers.go --- internal/common/headers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/common/headers.go b/internal/common/headers.go index a921f52..9d706f1 100644 --- a/internal/common/headers.go +++ b/internal/common/headers.go @@ -16,7 +16,7 @@ var VERBOSE_HEADERS bool = false var IgnoreGoogleHeaders bool = false var UseStrictGroupValidation bool = false var ErrNoNewsgroups = fmt.Errorf("ErrNoNewsgroups") -var unwantedChars = "\t\x00;:,<>#*§()[]{}?!%$§/\\@\"'`" +var unwantedChars = "\t\x00;:,<>#*()[]{}?!%$§/\\@\"'`" var ( // Do NOT change this here! these are needed for runtime ! // validGroupNameRegex validates newsgroup names according to RFC standards From bd422a463bd71ed502ce0c70e39d6c97533f0820 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 14 Oct 2025 22:00:29 +0000 Subject: [PATCH 061/100] Update nntp-backend-pool.go --- internal/nntp/nntp-backend-pool.go | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/internal/nntp/nntp-backend-pool.go b/internal/nntp/nntp-backend-pool.go index 76cbdd1..1e87e61 100644 --- a/internal/nntp/nntp-backend-pool.go +++ b/internal/nntp/nntp-backend-pool.go @@ -383,19 +383,25 @@ func (pool *Pool) ClosePool() error { close(pool.connections) } log.Printf("[NNTP-POOL] Closing (%s:%d) active=%d", pool.Backend.Host, pool.Backend.Port, pool.activeConns) + allClosed := pool.activeConns == 0 pool.mux.Unlock() - // Close all connections in the pool -closeWait: - for { - select { - case conn := <-pool.connections: - if conn != nil { - conn.ForceCloseConn() + if !allClosed { + // Close all connections in the pool + closeWait: + for { + select { + case conn, ok := <-pool.connections: + if !ok { + break closeWait + } + if conn != nil { + conn.ForceCloseConn() + } + default: + // pass + break closeWait } - default: - // pass - break closeWait } } From ddea50555ee74929227519eb77ed499cad445be9 Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 16 Oct 2025 01:20:35 +0000 Subject: [PATCH 062/100] testing --- cmd/nntp-transfer/main.go | 87 +++++++++++++++++++++++--- internal/nntp/nntp-article-common.go | 83 +++++++++++++----------- internal/nntp/nntp-backend-pool.go | 6 +- internal/nntp/nntp-client-commands.go | 32 ++++++---- internal/nntp/nntp-transfer-demuxer.go | 12 ++++ internal/nntp/nntp-transfer.go | 2 + internal/processor/analyze.go | 6 +- internal/processor/proc_DLArt.go | 3 +- internal/processor/proc_DLXHDR.go | 2 +- 9 files changed, 168 insertions(+), 65 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index f64bde5..3c9f770 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -112,7 +112,7 @@ var StartDate string var EndDate string var GlobalSpeed uint64 -var totalTransferred, totalTTSentCount, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalRedisCacheHits, totalRedisCacheBeforeCheck, totalRedisCacheBeforeTakethis, totalTXErrors, totalConnErrors, globalTotalArticles, nothingInDateRange uint64 +var totalTransferred, totalTTSentCount, totalCheckSentCount, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalRedisCacheHits, totalRedisCacheBeforeCheck, totalRedisCacheBeforeTakethis, totalTXErrors, totalConnErrors, globalTotalArticles, nothingInDateRange uint64 func CalcGlobalSpeed() { for { @@ -667,6 +667,9 @@ func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, // Build query with date filtering var whereConditions []string + // Always exclude NULL date_sent values when using ORDER BY date_sent + whereConditions = append(whereConditions, "date_sent IS NOT NULL") + if startTime != nil { whereConditions = append(whereConditions, "date_sent >= ?") args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) @@ -726,6 +729,9 @@ func getArticleCountWithDateFilter(db *database.Database, groupDBs *database.Gro // Build count query with date filtering - must use live COUNT(*) query var whereConditions []string + // Always exclude NULL date_sent values to match SELECT query behavior + whereConditions = append(whereConditions, "date_sent IS NOT NULL") + if startTime != nil { whereConditions = append(whereConditions, "date_sent >= ?") args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) @@ -1146,8 +1152,8 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe for _, result := range results { log.Print(result) } - log.Printf("Summary: total: %d | transferred: %d | cache_hits: %d (before_check: %d, before_takethis: %d) | checked: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d", - globalTotalArticles, totalTransferred, totalRedisCacheHits, totalRedisCacheBeforeCheck, totalRedisCacheBeforeTakethis, totalChecked, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalTXErrors, totalConnErrors) + log.Printf("Summary: total: %d | transferred: %d | cache_hits: %d (before_check: %d, before_takethis: %d) | checked: %d/%d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d", + globalTotalArticles, totalTransferred, totalRedisCacheHits, totalRedisCacheBeforeCheck, totalRedisCacheBeforeTakethis, totalChecked, totalCheckSentCount, totalWanted, totalUnwanted, totalRejected, totalRetry, totalSkipped, totalTXErrors, totalConnErrors) nntp.ResultsMutex.Unlock() log.Printf("Debug: StructChansCap1: %d/%d", len(common.StructChansCap1), cap(common.StructChansCap1)) return nil @@ -1386,9 +1392,10 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i nntp.ResultsMutex.Lock() ngtprogress.Mux.Lock() - result := fmt.Sprintf("END Newsgroup: '%s' total: %d | transferred: %d | cache_hits: %d | checked: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d | took %v", + result := fmt.Sprintf("END Newsgroup: '%s' total: %d | CHECK_sent: %d | checked: %d | transferred: %d | cache_hits: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d | took %v", ng.Name, totalNGArticles, - ngtprogress.Transferred, ngtprogress.RedisCached, ngtprogress.Checked, + ngtprogress.CheckSentCount, ngtprogress.Checked, + ngtprogress.Transferred, ngtprogress.RedisCached, ngtprogress.Wanted, ngtprogress.Unwanted, ngtprogress.Rejected, ngtprogress.Retry, ngtprogress.Skipped, ngtprogress.TxErrors, ngtprogress.ConnErrors, @@ -1397,6 +1404,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i globalTotalArticles += uint64(totalNGArticles) totalTransferred += ngtprogress.Transferred totalTTSentCount += ngtprogress.TTSentCount + totalCheckSentCount += ngtprogress.CheckSentCount totalRedisCacheHits += ngtprogress.RedisCached totalRedisCacheBeforeCheck += ngtprogress.RedisCachedBeforeCheck totalRedisCacheBeforeTakethis += ngtprogress.RedisCachedBeforeTakethis @@ -1459,7 +1467,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } offset += int64(len(articles)) - articlesProcessed += int64(len(articles)) if dryRun && debugCapture { debugMutex.Lock() @@ -1664,7 +1671,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl return job.QuitResponseChan(), nil } if VERBOSE { - log.Printf("Newsgroup: '%s' | Sending CHECK commands for %d/%d articles", *ttMode.Newsgroup, len(job.MessageIDs), len(articles)) + log.Printf("Newsgroup: '%s' | processBatch: Received %d articles, queuing %d for CHECK (Redis filtered: %d)", *ttMode.Newsgroup, len(articles), len(job.MessageIDs), redis_cached) } // Assign job to worker (consistent assignment + load balancing) @@ -1819,6 +1826,10 @@ var QueuesMutex sync.RWMutex var CheckQueues []chan *nntp.CHTTJob var TakeThisQueues []chan *nntp.CHTTJob +// Demuxers holds per-worker demuxer instances for statistics tracking +var Demuxers []*nntp.ResponseDemuxer +var DemuxersMutex sync.RWMutex + // NewsgroupWorkerMap tracks which worker is assigned to each newsgroup var NewsgroupWorkerMap = make(map[string]int) var NewsgroupWorkerMapMux sync.RWMutex @@ -1897,6 +1908,9 @@ func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { CheckQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) TakeThisQueues = make([]chan *nntp.CHTTJob, nntp.NNTPTransferThreads) WorkerQueueLength = make([]int, nntp.NNTPTransferThreads) + DemuxersMutex.Lock() + Demuxers = make([]*nntp.ResponseDemuxer, nntp.NNTPTransferThreads) + DemuxersMutex.Unlock() for i := range CheckQueues { CheckQueues[i] = make(chan *nntp.CHTTJob, MaxQueuedJobs) // no cap! only accepts if there is a reader! TakeThisQueues[i] = make(chan *nntp.CHTTJob, MaxQueuedJobs) // allows max N queued TT jobs @@ -2243,6 +2257,13 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // Create ResponseDemuxer to eliminate race conditions in ReadCodeLine demuxer := nntp.NewResponseDemuxer(conn, errChan, BatchCheck) + // Store demuxer for statistics tracking + DemuxersMutex.Lock() + if workerID < len(Demuxers) { + Demuxers[workerID] = demuxer + } + DemuxersMutex.Unlock() + defer func(conn *nntp.BackendConn, rs *ReturnSignal) { conn.ForceCloseConn() rs.ExitChan <- rs @@ -2349,6 +2370,10 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) } + // Add CHECK sent count to progress after all batches are sent + currentJob.NGTProgress.Mux.Lock() + currentJob.NGTProgress.CheckSentCount += currentJob.CheckSentCount + currentJob.NGTProgress.Mux.Unlock() } else { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) currentJob.WantedIDs = currentJob.MessageIDs @@ -3224,6 +3249,30 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { {{end}} + {{if .DemuxerStats}} +

Response Demuxer Statistics ({{len .DemuxerStats}} Workers)

+ + + + + + + + + + + {{range .DemuxerStats}} + + + + + + + {{end}} + +
Worker IDPending CommandsCHECK Responses QueuedTAKETHIS Responses Queued
Worker #{{.WorkerID}}{{.PendingCommands}}{{.CheckResponsesQueued}}{{.TTResponsesQueued}}
+ {{end}} + ` @@ -3300,6 +3349,28 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { return progressList[i].Name < progressList[j].Name }) + // Collect demuxer statistics + type DemuxerStats struct { + WorkerID int + PendingCommands int + CheckResponsesQueued int + TTResponsesQueued int + } + var demuxerStats []DemuxerStats + DemuxersMutex.RLock() + for i, demux := range Demuxers { + if demux != nil { + pending, checkQueued, ttQueued := demux.GetStatistics() + demuxerStats = append(demuxerStats, DemuxerStats{ + WorkerID: i, + PendingCommands: pending, + CheckResponsesQueued: checkQueued, + TTResponsesQueued: ttQueued, + }) + } + } + DemuxersMutex.RUnlock() + data := struct { TotalNewsgroups int64 NewsgroupsToProcess int64 @@ -3307,6 +3378,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Started int64 Finished int64 Progress []ProgressInfo + DemuxerStats []DemuxerStats Timestamp string StartDate string EndDate string @@ -3333,6 +3405,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Started: started, Finished: finished, Progress: progressList, + DemuxerStats: demuxerStats, Timestamp: time.Now().Format("2006-01-02 15:04:05"), StartDate: StartDate, EndDate: EndDate, diff --git a/internal/nntp/nntp-article-common.go b/internal/nntp/nntp-article-common.go index 2c69da9..ca3a888 100644 --- a/internal/nntp/nntp-article-common.go +++ b/internal/nntp/nntp-article-common.go @@ -28,7 +28,7 @@ type ArticleRetrievalResult struct { Overview *models.Overview ArticleNum int64 MsgIdItem *history.MessageIdItem - GroupDBs *database.GroupDBs + //GroupDBs *database.GroupDBs } // retrieveArticleCommon handles the common logic for ARTICLE, HEAD, BODY, and STAT commands @@ -37,15 +37,10 @@ func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType Ar // Get article data using common logic result, err := c.getArticleData(args) - if result == nil || err != nil { + if result.Article == nil || err != nil { log.Printf("retrieveArticleCommon Error retrieving article data: %v", err) return nil // Error already handled in getArticleData } - defer func() { - if result.GroupDBs != nil { - result.GroupDBs.Return(c.server.DB) - } - }() // Update current article if we have a current group if c.currentGroup != "" { @@ -75,7 +70,7 @@ func (c *ClientConnection) retrieveArticleCommon(args []string, retrievalType Ar } // getArticleData handles the common article lookup logic -func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResult, error) { +func (c *ClientConnection) getArticleData(args []string) (ArticleRetrievalResult, error) { var groupDBs *database.GroupDBs var articleNum int64 var msgIdItem *history.MessageIdItem @@ -86,21 +81,21 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul if c.currentGroup == "" { c.rateLimitOnError() c.sendResponse(412, "No newsgroup selected") - return nil, nil + return ArticleRetrievalResult{}, nil } // Use current article articleNum = c.currentArticle if articleNum == 0 { c.rateLimitOnError() c.sendResponse(420, "Current article number is invalid") - return nil, nil + return ArticleRetrievalResult{}, nil } // Get group database groupDBs, err = c.server.DB.GetGroupDBs(c.currentGroup) if err != nil { c.rateLimitOnError() c.sendResponse(411, "No such newsgroup") - return nil, nil + return ArticleRetrievalResult{}, nil } } else { @@ -110,25 +105,25 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul if msgIdItem == nil { c.rateLimitOnError() c.sendResponse(500, "Error MsgId Cache") - return nil, nil + return ArticleRetrievalResult{}, nil } if c.server.local430.Check(msgIdItem) { c.rateLimitOnError() c.sendResponse(430, "Cache says no!") - return nil, nil + return ArticleRetrievalResult{}, nil } } else { if c.currentGroup == "" { c.rateLimitOnError() c.sendResponse(412, "No newsgroup selected") - return nil, nil + return ArticleRetrievalResult{}, nil } // Article number format articleNum, err = strconv.ParseInt(args[0], 10, 64) if err != nil { c.rateLimitOnError() c.sendResponse(501, "Invalid article number") - return nil, nil + return ArticleRetrievalResult{}, nil } } } @@ -144,7 +139,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF1") - return nil, nil + return ArticleRetrievalResult{}, nil } found := false @@ -154,14 +149,14 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF1") - return nil, nil + return ArticleRetrievalResult{}, nil case history.CasePass: // Not found in history c.rateLimitOnError() log.Printf("MsgIdItem not found in history: '%#v'", msgIdItem) c.sendResponse(430, "NotF2") - return nil, nil + return ArticleRetrievalResult{}, nil case history.CaseDupes: // Found in history - storage token should now be available @@ -174,7 +169,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul log.Printf("MsgIdItem not found in cache: %#v", msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF2") - return nil, nil + return ArticleRetrievalResult{}, nil } // Extract storage token or use cached values @@ -192,7 +187,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul c.rateLimitOnError() c.sendResponse(430, "NotF3") log.Printf("Invalid storage token format: %#v", msgIdItem) - return nil, nil + return ArticleRetrievalResult{}, nil } task := c.server.DB.Batch.GetOrCreateTasksMapKey(parts[0]) @@ -200,7 +195,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF4") - return nil, nil + return ArticleRetrievalResult{}, nil } articleNumParsed, err := strconv.ParseInt(parts[1], 10, 64) @@ -208,7 +203,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF5") - return nil, nil + return ArticleRetrievalResult{}, nil } msgIdItem.Mux.Lock() @@ -220,7 +215,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF6") - return nil, nil + return ArticleRetrievalResult{}, nil } } @@ -231,7 +226,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF7") - return nil, nil + return ArticleRetrievalResult{}, nil } } @@ -241,7 +236,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul c.server.local430.Add(msgIdItem) c.rateLimitOnError() c.sendResponse(430, "NotF8") - return nil, nil + return ArticleRetrievalResult{}, nil } articleNum = article.ArticleNums[groupDBs.NewsgroupPtr] @@ -252,8 +247,9 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul if err != nil { c.rateLimitOnError() c.sendResponse(411, "No such newsgroup") - return nil, nil + return ArticleRetrievalResult{}, nil } + defer groupDBs.Return(c.server.DB) } // For STAT command, we can use overview instead of full article @@ -261,7 +257,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul if err != nil { c.rateLimitOnError() c.sendResponse(423, "No such article number") - return nil, nil + return ArticleRetrievalResult{}, nil } // For other commands, get the full article @@ -269,11 +265,11 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul if err != nil { c.rateLimitOnError() c.sendResponse(423, "No such article number") - return nil, nil + return ArticleRetrievalResult{}, nil } if article.MessageID == "" { log.Printf("Error in getArticleData: Article with no message-id: %#v", article) - return nil, fmt.Errorf("error in getArticleData: article with no message-id") + return ArticleRetrievalResult{}, fmt.Errorf("error in getArticleData: article with no message-id") } // Create or get msgIdItem messageID := article.MessageID @@ -284,7 +280,7 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul if msgIdItem == nil { c.rateLimitOnError() c.sendResponse(500, "Error MsgId Cache") - return nil, fmt.Errorf("error msgid cache") + return ArticleRetrievalResult{}, fmt.Errorf("error msgid cache") } task := c.server.DB.Batch.GetOrCreateTasksMapKey(groupDBs.Newsgroup) @@ -296,24 +292,26 @@ func (c *ClientConnection) getArticleData(args []string) (*ArticleRetrievalResul } } - return &ArticleRetrievalResult{ + return ArticleRetrievalResult{ Article: article, Overview: overview, ArticleNum: articleNum, MsgIdItem: msgIdItem, - GroupDBs: groupDBs, }, nil } // sendArticleContent sends full article (headers + body) for ARTICLE command -func (c *ClientConnection) sendArticleContent(result *ArticleRetrievalResult) error { +func (c *ClientConnection) sendArticleContent(result ArticleRetrievalResult) error { + if c == nil || c.textConn == nil { + return fmt.Errorf("nil connection in sendArticleContent") + } // Parse headers and body from the article - log.Printf("sendArticleContent for result='%#v", result) + //log.Printf("sendArticleContent for result='%#v", result) headers := c.parseArticleHeadersFull(result.Article) bodyLines := c.parseArticleBody(result.Article) // Send response: 220 n message-id Article follows - if err := c.sendResponse(220, fmt.Sprintf("%d %s Article follows", result.ArticleNum, result.MsgIdItem.MessageId)); err != nil { + if err := c.textConn.PrintfLine("220 %d %s Article follows", result.ArticleNum, result.MsgIdItem.MessageId); err != nil { return err } @@ -341,7 +339,10 @@ func (c *ClientConnection) sendArticleContent(result *ArticleRetrievalResult) er } // sendHeadContent sends only headers for HEAD command -func (c *ClientConnection) sendHeadContent(result *ArticleRetrievalResult) error { +func (c *ClientConnection) sendHeadContent(result ArticleRetrievalResult) error { + if c == nil || c.textConn == nil { + return fmt.Errorf("nil connection in sendHeadContent") + } // Parse headers from the article headers := c.parseArticleHeadersFull(result.Article) @@ -362,7 +363,10 @@ func (c *ClientConnection) sendHeadContent(result *ArticleRetrievalResult) error } // sendBodyContent sends only body for BODY command -func (c *ClientConnection) sendBodyContent(result *ArticleRetrievalResult) error { +func (c *ClientConnection) sendBodyContent(result ArticleRetrievalResult) error { + if c == nil || c.textConn == nil { + return fmt.Errorf("nil connection in sendBodyContent") + } // Parse body from the article bodyLines := c.parseArticleBody(result.Article) @@ -383,7 +387,10 @@ func (c *ClientConnection) sendBodyContent(result *ArticleRetrievalResult) error } // sendStatContent sends only status for STAT command -func (c *ClientConnection) sendStatContent(result *ArticleRetrievalResult) error { +func (c *ClientConnection) sendStatContent(result ArticleRetrievalResult) error { + if c == nil || c.textConn == nil { + return fmt.Errorf("nil connection in sendStatContent") + } // Send response: 223 n message-id status return c.sendResponse(223, fmt.Sprintf("%d %s Article exists", result.ArticleNum, result.MsgIdItem.MessageId)) } diff --git a/internal/nntp/nntp-backend-pool.go b/internal/nntp/nntp-backend-pool.go index 1e87e61..82d19fb 100644 --- a/internal/nntp/nntp-backend-pool.go +++ b/internal/nntp/nntp-backend-pool.go @@ -71,7 +71,7 @@ func (pool *Pool) XOver(group string, start, end int64, enforceLimit bool) ([]Ov return result, nil } -func (pool *Pool) XHdr(group string, header string, start, end int64) ([]*HeaderLine, error) { +func (pool *Pool) XHdr(group string, header string, start, end int64) ([]HeaderLine, error) { // Get a connection from the pool client, err := pool.Get(MODE_READER_MV) if err != nil { @@ -93,7 +93,7 @@ func (pool *Pool) XHdr(group string, header string, start, end int64) ([]*Header // XHdrStreamed performs XHDR command and streams results through a channel // The channel will be closed when all results are sent or an error occurs // NOTE: This function takes ownership of the connection and will return it to the pool when done -func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xhdrChan chan<- HeaderLine, shutdownChan <-chan struct{}) error { // Get a connection from the pool client, err := pool.Get(MODE_READER_MV) if err != nil { @@ -102,7 +102,7 @@ func (pool *Pool) XHdrStreamed(group string, header string, start, end int64, xh } // Handle connection cleanup in a goroutine so the function can return immediately - go func(client *BackendConn, group string, header string, start, end int64, resultChan chan<- *HeaderLine, shutdownChan <-chan struct{}) { + go func(client *BackendConn, group string, header string, start, end int64, resultChan chan<- HeaderLine, shutdownChan <-chan struct{}) { // Use the streaming XHdr function on the client if err := client.XHdrStreamed(group, header, start, end, resultChan, shutdownChan); err != nil { // If there's an error, close the connection instead of returning it diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index c5a2bda..37682c7 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -8,6 +8,7 @@ import ( "log" "strconv" "strings" + "sync/atomic" "time" "github.com/go-while/go-pugleaf/internal/common" @@ -490,7 +491,7 @@ func (c *BackendConn) XOver(groupName string, start, end int64, enforceLimit boo // XHdr retrieves specific header field for a range of articles // Automatically limits to max 1000 articles to prevent SQLite overload -func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*HeaderLine, error) { +func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]HeaderLine, error) { c.mux.Lock() if !c.IsConnected() { c.mux.Unlock() @@ -538,13 +539,15 @@ func (c *BackendConn) XHdr(groupName, field string, start, end int64) ([]*Header } // Parse header lines - var headers = make([]*HeaderLine, 0, len(lines)) + var headers = make([]HeaderLine, 0, len(lines)) for _, line := range lines { header, err := c.parseHeaderLine(line) if err != nil { continue // Skip malformed lines } - headers = append(headers, header) + if header.ArticleNum > 0 { + headers = append(headers, header) + } } return headers, nil @@ -566,7 +569,7 @@ func (c *BackendConn) WantShutdown(shutdownChan <-chan struct{}) bool { // XHdrStreamed performs XHDR command and streams results line by line through a channel // Fetches max 1000 hdrs and starts a new fetch if the channel is less than 10% capacity -func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xhdrChan chan<- HeaderLine, shutdownChan <-chan struct{}) error { channelCap := cap(xhdrChan) lowWaterMark := channelCap / 10 // 10% threshold if lowWaterMark < 1 { @@ -622,7 +625,7 @@ func (c *BackendConn) XHdrStreamed(groupName, field string, start, end int64, xh } // XHdrStreamedBatch performs XHDR command and streams results line by line through a channel -func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- *HeaderLine, shutdownChan <-chan struct{}) error { +func (c *BackendConn) XHdrStreamedBatch(groupName, field string, start, end int64, xhdrChan chan<- HeaderLine, shutdownChan <-chan struct{}) error { c.mux.Lock() if !c.IsConnected() { c.mux.Unlock() @@ -1009,22 +1012,22 @@ func (c *BackendConn) parseOverviewLine(line string) (OverviewLine, error) { // parseHeaderLine parses a single XHDR response line // Format: articlenumheader-value -func (c *BackendConn) parseHeaderLine(line string) (*HeaderLine, error) { +func (c *BackendConn) parseHeaderLine(line string) (HeaderLine, error) { parts := strings.SplitN(line, " ", 2) if len(parts) < 2 { - return nil, fmt.Errorf("malformed XHDR line: %s", line) + return HeaderLine{}, fmt.Errorf("malformed XHDR line: %s", line) } articleNum, err := strconv.ParseInt(parts[0], 10, 64) if err != nil { log.Printf("Invalid article number in XHDR line: %q", parts[0]) - return nil, fmt.Errorf("invalid article number in XHDR line: %q", parts[0]) + return HeaderLine{}, fmt.Errorf("invalid article number in XHDR line: %q", parts[0]) } - - return &HeaderLine{ + headerline := HeaderLine{ ArticleNum: articleNum, Value: parts[1], - }, nil + } + return headerline, nil } // SendCheckMultiple sends CHECK commands for multiple message IDs without returning responses! @@ -1052,6 +1055,7 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses //defer writer.Flush() //log.Printf("Newsgroup: '%s' | SendCheckMultiple commands for %d message IDs", *job.Newsgroup, len(messageIDs)) + checksSent := uint64(0) for n, msgID := range messageIDs { if msgID == nil || *msgID == "" { log.Printf("Newsgroup: '%s' | Skipping empty message ID in CHECK command", *job.Newsgroup) @@ -1065,6 +1069,8 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses return fmt.Errorf("failed to send CHECK '%s': %w", *msgID, err) } + checksSent++ + // Register command ID with demuxer as TYPE_CHECK demuxer.RegisterCommand(cmdID, TYPE_CHECK) @@ -1073,6 +1079,10 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses readCHECKResponsesChan <- GetReadRequest(cmdID, job, msgID, n+1, len(messageIDs)) //log.Printf("Newsgroup: '%s' | CHECK notified response reader '%s' (CmdID=%d) readCHECKResponsesChan=%d", *job.Newsgroup, *msgID, cmdID, len(readCHECKResponsesChan)) } + + // Update job counter with how many CHECK commands were actually sent + atomic.AddUint64(&job.CheckSentCount, checksSent) + return nil } diff --git a/internal/nntp/nntp-transfer-demuxer.go b/internal/nntp/nntp-transfer-demuxer.go index 61646dd..7a69e68 100644 --- a/internal/nntp/nntp-transfer-demuxer.go +++ b/internal/nntp/nntp-transfer-demuxer.go @@ -204,3 +204,15 @@ func (d *ResponseDemuxer) readAndDispatch() { } } } + +// GetStatistics returns current demuxer statistics +func (d *ResponseDemuxer) GetStatistics() (pendingCommands int, checkResponsesQueued int, ttResponsesQueued int) { + d.cmdIDQMux.RLock() + pendingCommands = len(d.cmdIDQ) + d.cmdIDQMux.RUnlock() + + checkResponsesQueued = len(d.checkResponseChan) + ttResponsesQueued = len(d.ttResponseChan) + + return pendingCommands, checkResponsesQueued, ttResponsesQueued +} diff --git a/internal/nntp/nntp-transfer.go b/internal/nntp/nntp-transfer.go index fae95e1..af4d635 100644 --- a/internal/nntp/nntp-transfer.go +++ b/internal/nntp/nntp-transfer.go @@ -342,6 +342,7 @@ type CHTTJob struct { MessageIDs []*string WantedIDs []*string PendingResponses sync.WaitGroup // Track pending TAKETHIS responses + CheckSentCount uint64 // Track how many CHECK commands were sent //checked uint64 //wanted uint64 //unwanted uint64 @@ -426,6 +427,7 @@ type NewsgroupTransferProgress struct { Retry uint64 Transferred uint64 TTSentCount uint64 + CheckSentCount uint64 RedisCached uint64 TxErrors uint64 ConnErrors uint64 diff --git a/internal/processor/analyze.go b/internal/processor/analyze.go index bf8efa8..dd88867 100644 --- a/internal/processor/analyze.go +++ b/internal/processor/analyze.go @@ -496,7 +496,7 @@ func (proc *Processor) cacheFileExists(cacheFile string) bool { } // GetCachedMessageIDs returns cached message IDs for download optimization -func (proc *Processor) GetCachedMessageIDs(groupName string, startArticle, endArticle int64) ([]*nntp.HeaderLine, error) { +func (proc *Processor) GetCachedMessageIDs(groupName string, startArticle, endArticle int64) ([]nntp.HeaderLine, error) { providerName := "unknown" if proc.Pool.Backend != nil { providerName = proc.Pool.Backend.Provider.Name @@ -518,7 +518,7 @@ func (proc *Processor) GetCachedMessageIDs(groupName string, startArticle, endAr } defer file.Close() - var results []*nntp.HeaderLine + var results []nntp.HeaderLine scanner := bufio.NewScanner(file) for scanner.Scan() { @@ -550,7 +550,7 @@ func (proc *Processor) GetCachedMessageIDs(groupName string, startArticle, endAr continue } - results = append(results, &nntp.HeaderLine{ + results = append(results, nntp.HeaderLine{ ArticleNum: articleNum, Value: messageID, }) diff --git a/internal/processor/proc_DLArt.go b/internal/processor/proc_DLArt.go index 8da0daa..8d6fb3d 100644 --- a/internal/processor/proc_DLArt.go +++ b/internal/processor/proc_DLArt.go @@ -103,7 +103,7 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ //log.Printf("DownloadArticles: Fetching XHDR for %s from %d to %d (last known: %d, remaining: %d)", newsgroup, start, end, groupInfo.Last, remaining) var lastGoodEnd int64 = start //toFetch := end - start + 1 // +1 because ranges are inclusive (start=1, end=3 means articles 1,2,3) - xhdrChan := make(chan *nntp.HeaderLine, 1000) + xhdrChan := make(chan nntp.HeaderLine, 1000) errChan := make(chan error, 1) //log.Printf("Launch XHdrStreamed: '%s' toFetch=%d start=%d end=%d", newsgroup, toFetch, start, end) if proc.DB.IsDBshutdown() { @@ -153,7 +153,6 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ //log.Printf("DownloadArticles: Queued article %d (%s) for group '%s'", hdr.ArticleNum, hdr.Value, *item.GroupName) //hdr.Value = "" //hdr.ArticleNum = 0 - *hdr = nntp.HeaderLine{} } // end for xhdrChan //log.Printf("DownloadArticles: XHdr closed, finished feeding batch queue %d articles for group '%s' (existing: %d) total=%d", queued, newsgroup, exists, queued+exists) if queued == 0 { diff --git a/internal/processor/proc_DLXHDR.go b/internal/processor/proc_DLXHDR.go index 6b0dd58..f042ccb 100644 --- a/internal/processor/proc_DLXHDR.go +++ b/internal/processor/proc_DLXHDR.go @@ -3,7 +3,7 @@ package processor import "github.com/go-while/go-pugleaf/internal/nntp" // GetXHDR fetches XHDR data for a group -func (proc *Processor) GetXHDR(groupName string, header string, start, end int64) ([]*nntp.HeaderLine, error) { +func (proc *Processor) GetXHDR(groupName string, header string, start, end int64) ([]nntp.HeaderLine, error) { // Fetch XHDR data from NNTP server xhdrData, err := proc.Pool.XHdr(groupName, header, start, end) if err != nil { From 80ae796d18ea8ff7029c96eb1efff65454cfbf3f Mon Sep 17 00:00:00 2001 From: go-while Date: Thu, 16 Oct 2025 01:53:36 +0000 Subject: [PATCH 063/100] testing --- cmd/nntp-transfer/main.go | 34 ++++++++++++++++++-------- internal/nntp/nntp-transfer-demuxer.go | 14 +++++++++-- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 3c9f770..9618013 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1467,6 +1467,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } offset += int64(len(articles)) + articlesProcessed += int64(len(articles)) if dryRun && debugCapture { debugMutex.Lock() @@ -1507,7 +1508,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than N in flight } - articlesProcessed += int64(len(articles)) + // articlesProcessed already incremented above after loading from DB remainingArticles -= int64(len(articles)) if VERBOSE { log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t)", ng.Name, articlesProcessed, totalNGArticles, remainingArticles, ttMode.UseCHECK()) @@ -3251,24 +3252,29 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { {{if .DemuxerStats}}

Response Demuxer Statistics ({{len .DemuxerStats}} Workers)

+ Workers print only if they have non-empty channels or if last request is longer than 15s ago. - - - - + + + + + {{range .DemuxerStats}} + {{if or (gt .PendingCommands 0) (gt .CheckResponsesQueued 0) (gt .TTResponsesQueued 0) (gt .IdleSeconds 15)}} - - - - + + + + + {{end}} + {{end}}
Worker IDPending CommandsCHECK Responses QueuedTAKETHIS Responses QueuedWorker IDPending CommandsCHECK Responses QueuedTAKETHIS Responses QueuedIdle Time (seconds)
Worker #{{.WorkerID}}{{.PendingCommands}}{{.CheckResponsesQueued}}{{.TTResponsesQueued}}Worker #{{.WorkerID}}{{.PendingCommands}}{{.CheckResponsesQueued}}{{.TTResponsesQueued}}{{.IdleSeconds}}
{{end}} @@ -3355,17 +3361,25 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { PendingCommands int CheckResponsesQueued int TTResponsesQueued int + LastRequest time.Time + IdleSeconds int } var demuxerStats []DemuxerStats DemuxersMutex.RLock() for i, demux := range Demuxers { if demux != nil { - pending, checkQueued, ttQueued := demux.GetStatistics() + pending, checkQueued, ttQueued, lastReq := demux.GetStatistics() + idleSeconds := 0 + if !lastReq.IsZero() { + idleSeconds = int(time.Since(lastReq).Seconds()) + } demuxerStats = append(demuxerStats, DemuxerStats{ WorkerID: i, PendingCommands: pending, CheckResponsesQueued: checkQueued, TTResponsesQueued: ttQueued, + LastRequest: lastReq, + IdleSeconds: idleSeconds, }) } } diff --git a/internal/nntp/nntp-transfer-demuxer.go b/internal/nntp/nntp-transfer-demuxer.go index 7a69e68..20e96a0 100644 --- a/internal/nntp/nntp-transfer-demuxer.go +++ b/internal/nntp/nntp-transfer-demuxer.go @@ -20,6 +20,8 @@ type ResponseDemuxer struct { errChan chan struct{} started bool startedMux sync.Mutex + lastRequest time.Time + lastRequestMux sync.RWMutex } // NewResponseDemuxer creates a new response demultiplexer @@ -36,6 +38,10 @@ func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}, BatchCheck int // RegisterCommand registers a command ID with its type (CHECK or TAKETHIS) func (d *ResponseDemuxer) RegisterCommand(cmdID uint, cmdType ResponseType) { + d.lastRequestMux.Lock() + d.lastRequest = time.Now() + d.lastRequestMux.Unlock() + d.cmdIDQMux.Lock() d.cmdIDQ = append(d.cmdIDQ, &CmdIDinfo{CmdID: cmdID, RespType: cmdType}) d.cmdIDQMux.Unlock() @@ -206,13 +212,17 @@ func (d *ResponseDemuxer) readAndDispatch() { } // GetStatistics returns current demuxer statistics -func (d *ResponseDemuxer) GetStatistics() (pendingCommands int, checkResponsesQueued int, ttResponsesQueued int) { +func (d *ResponseDemuxer) GetStatistics() (pendingCommands int, checkResponsesQueued int, ttResponsesQueued int, lastRequest time.Time) { d.cmdIDQMux.RLock() pendingCommands = len(d.cmdIDQ) d.cmdIDQMux.RUnlock() + d.lastRequestMux.RLock() + lastRequest = d.lastRequest + d.lastRequestMux.RUnlock() + checkResponsesQueued = len(d.checkResponseChan) ttResponsesQueued = len(d.ttResponseChan) - return pendingCommands, checkResponsesQueued, ttResponsesQueued + return pendingCommands, checkResponsesQueued, ttResponsesQueued, lastRequest } From 117d748d5a63b379e318f190fb403511423e63cf Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 19 Oct 2025 22:00:07 +0000 Subject: [PATCH 064/100] testing --- cmd/nntp-transfer/main.go | 28 ++++++++++++++------------- internal/database/progress.go | 6 +++++- internal/database/sqlite_retry.go | 4 ++-- internal/nntp/nntp-client-commands.go | 23 +++++++++++----------- internal/nntp/nntp-transfer.go | 10 +++++----- 5 files changed, 39 insertions(+), 32 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 9618013..fb84b72 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1784,7 +1784,7 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art // Increment pending responses counter before sending job.PendingResponses.Add(1) - cmdID, txBytes, err := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup, demuxer, readTAKETHISResponsesChan, job, n, len(articles)-skipped) + cmdID, txBytes, err, doContinue := conn.SendTakeThisArticleStreaming(article, &processor.LocalNNTPHostname, newsgroup, demuxer, readTAKETHISResponsesChan, job, n, len(articles)-skipped) astart2 = time.Now() job.Mux.Lock() job.TTxBytes += uint64(txBytes) @@ -1794,9 +1794,9 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art if err != nil { // Decrement on error since no response will come job.PendingResponses.Done() - if err == common.ErrNoNewsgroups { + if err == common.ErrNoNewsgroups || doContinue { job.NGTProgress.Increment(nntp.IncrFLAG_SKIPPED, 1) - log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': no newsgroups header", newsgroup, article.MessageID) + log.Printf("Newsgroup: '%s' | skipped TAKETHIS '%s': bad newsgroups header doContinue=%t", newsgroup, article.MessageID, doContinue) continue } conn.Unlock() @@ -3176,7 +3176,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Cache Hits (Total): {{.TotalRedisCacheHits}}
  ├─ Before CHECK: {{.TotalRedisCacheBeforeCheck}}
  └─ Before TAKETHIS: {{.TotalRedisCacheBeforeTakethis}}
- Checked: {{.TotalChecked}}
+ Checked: {{.TotalCheckSentCount}} / {{.TotalChecked}}
Wanted: {{.TotalWanted}}
Unwanted: {{.TotalUnwanted}}
TTSentCount: {{.TotalTTSentCount}}
@@ -3358,26 +3358,26 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { // Collect demuxer statistics type DemuxerStats struct { WorkerID int - PendingCommands int - CheckResponsesQueued int - TTResponsesQueued int + PendingCommands int64 + CheckResponsesQueued int64 + TTResponsesQueued int64 LastRequest time.Time - IdleSeconds int + IdleSeconds int64 } var demuxerStats []DemuxerStats DemuxersMutex.RLock() for i, demux := range Demuxers { if demux != nil { pending, checkQueued, ttQueued, lastReq := demux.GetStatistics() - idleSeconds := 0 + var idleSeconds int64 if !lastReq.IsZero() { - idleSeconds = int(time.Since(lastReq).Seconds()) + idleSeconds = int64(time.Since(lastReq).Seconds()) } demuxerStats = append(demuxerStats, DemuxerStats{ WorkerID: i, - PendingCommands: pending, - CheckResponsesQueued: checkQueued, - TTResponsesQueued: ttQueued, + PendingCommands: int64(pending), + CheckResponsesQueued: int64(checkQueued), + TTResponsesQueued: int64(ttQueued), LastRequest: lastReq, IdleSeconds: idleSeconds, }) @@ -3403,6 +3403,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { TotalRedisCacheBeforeCheck uint64 TotalRedisCacheBeforeTakethis uint64 TotalChecked uint64 + TotalCheckSentCount uint64 TotalUnwanted uint64 TotalWanted uint64 TotalTransferred uint64 @@ -3430,6 +3431,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { TotalRedisCacheBeforeCheck: totalRedisCacheBeforeCheck, TotalRedisCacheBeforeTakethis: totalRedisCacheBeforeTakethis, TotalChecked: totalChecked, + TotalCheckSentCount: totalCheckSentCount, TotalWanted: totalWanted, TotalUnwanted: totalUnwanted, TotalTransferred: totalTransferred, diff --git a/internal/database/progress.go b/internal/database/progress.go index 08e2a5b..4eabc68 100644 --- a/internal/database/progress.go +++ b/internal/database/progress.go @@ -7,6 +7,7 @@ import ( "log" "os" "path/filepath" + "sync" "time" _ "github.com/mattn/go-sqlite3" @@ -14,7 +15,8 @@ import ( // ProgressDB tracks fetching progress for newsgroups per backend type ProgressDB struct { - db *sql.DB + db *sql.DB + mux sync.Mutex } // ProgressEntry represents the fetching progress for a newsgroup on a backend @@ -120,6 +122,8 @@ ON CONFLICT(backend_name, newsgroup_name) DO UPDATE SET // UpdateProgress updates the fetching progress for a newsgroup on a backend func (p *ProgressDB) UpdateProgress(backendName, newsgroupName string, lastArticle int64) error { + p.mux.Lock() + defer p.mux.Unlock() _, err := retryableExec(p.db, query_UpdateProgress, backendName, newsgroupName, lastArticle) if err != nil { return fmt.Errorf("failed to update progress: %w", err) diff --git a/internal/database/sqlite_retry.go b/internal/database/sqlite_retry.go index cc406fe..d0251ea 100644 --- a/internal/database/sqlite_retry.go +++ b/internal/database/sqlite_retry.go @@ -10,8 +10,8 @@ import ( const ( maxRetries = 1000 - baseDelay = 10 * time.Millisecond - maxDelay = 25 * time.Millisecond + baseDelay = 100 * time.Millisecond + maxDelay = 2500 * time.Millisecond ) // isRetryableError checks if the error is a retryable SQLite error diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 37682c7..9941538 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -1090,19 +1090,20 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses // sends TAKETHIS command and article content without waiting for response // Returns command ID for later response reading - used for streaming mode // Registers the command ID with the demuxer for proper response routing -func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string, demuxer *ResponseDemuxer, readTAKETHISResponsesChan chan *ReadRequest, job *CHTTJob, n int, reqs int) (cmdID uint, txBytes int, err error) { +// return value doContinue indicates whether the caller should continue sending more articles +func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntphostname *string, newsgroup string, demuxer *ResponseDemuxer, readTAKETHISResponsesChan chan *ReadRequest, job *CHTTJob, n int, reqs int) (cmdID uint, txBytes int, err error, doContinue bool) { //start := time.Now() //c.mux.Lock() //defer c.mux.Unlock() if !c.IsConnected() { //c.mux.Unlock() - return 0, 0, fmt.Errorf("not connected") + return 0, 0, fmt.Errorf("not connected"), false } if c.ModeReader { //c.mux.Unlock() - return 0, 0, fmt.Errorf("cannot send article in reader mode") + return 0, 0, fmt.Errorf("cannot send article in reader mode"), false } c.lastUsed = time.Now() //c.mux.Unlock() @@ -1110,7 +1111,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // Prepare article for transfer headers, err := common.ReconstructHeaders(article, true, nntphostname, newsgroup) if err != nil { - return 0, 0, err + return 0, 0, err, true } //writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers writer := bufio.NewWriter(c.conn) // Slightly larger buffer than article size for headers @@ -1122,13 +1123,13 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // Send TAKETHIS command cmdID, err = c.TextConn.Cmd("TAKETHIS %s", article.MessageID) if err != nil { - return 0, 0, fmt.Errorf("failed SendTakeThisArticleStreaming command: %w", err) + return 0, 0, fmt.Errorf("failed SendTakeThisArticleStreaming command: %w", err), false } // Send headers for _, headerLine := range headers { if tx, err := writer.WriteString(headerLine + CRLF); err != nil { - return 0, txBytes, fmt.Errorf("failed to write header SendTakeThisArticleStreaming: %w", err) + return 0, txBytes, fmt.Errorf("failed to write header SendTakeThisArticleStreaming: %w", err), false } else { txBytes += tx } @@ -1136,7 +1137,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // Send empty line between headers and body if tx, err := writer.WriteString(CRLF); err != nil { - return 0, txBytes, fmt.Errorf("failed to write header/body separator SendTakeThisArticleStreaming: %w", err) + return 0, txBytes, fmt.Errorf("failed to write header/body separator SendTakeThisArticleStreaming: %w", err), false } else { txBytes += tx } @@ -1159,7 +1160,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp } if tx, err := writer.WriteString(line + CRLF); err != nil { - return 0, txBytes, fmt.Errorf("failed to write body line SendTakeThisArticleStreaming: %w", err) + return 0, txBytes, fmt.Errorf("failed to write body line SendTakeThisArticleStreaming: %w", err), false } else { txBytes += tx } @@ -1167,7 +1168,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp // Send termination line (single dot) if tx, err := writer.WriteString(DOT + CRLF); err != nil { - return 0, txBytes, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err) + return 0, txBytes, fmt.Errorf("failed to send article terminator SendTakeThisArticleStreaming: %w", err), false } else { txBytes += tx } @@ -1175,7 +1176,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp //startFlush := time.Now() if err := writer.Flush(); err != nil { - return 0, txBytes, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err) + return 0, txBytes, fmt.Errorf("failed to flush article data SendTakeThisArticleStreaming: %w", err), false } //chanStart := time.Now() @@ -1188,7 +1189,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp readTAKETHISResponsesChan <- GetReadRequest(cmdID, job, &article.MessageID, n+1, reqs) // reuse global struct to reduce GC pressure //log.Printf("Newsgroup: '%s' | TAKETHIS notified response reader CmdID=%d '%s' waited %v readTAKETHISResponsesChan=%d/%d", newsgroup, cmdID, article.MessageID, time.Since(chanStart), len(readTAKETHISResponsesChan), cap(readTAKETHISResponsesChan)) // Return command ID without reading response (streaming mode) - return cmdID, txBytes, nil + return cmdID, txBytes, nil, true } // PostArticle posts an article using the POST command diff --git a/internal/nntp/nntp-transfer.go b/internal/nntp/nntp-transfer.go index af4d635..3b5b1c1 100644 --- a/internal/nntp/nntp-transfer.go +++ b/internal/nntp/nntp-transfer.go @@ -24,7 +24,7 @@ const ( ) // Pool of ResponseData structs to reduce allocations -var ResponseDataPool = make(chan *ResponseData, 64*1024) +var ResponseDataPool = make(chan *ResponseData, 1024*1024) // GetResponseData returns a recycled ResponseData struct or makes a new one if none are available func GetResponseData(cmdID uint, code int, line string, err error) *ResponseData { @@ -87,7 +87,7 @@ type TTSetup struct { } // Pool of TTSetup structs to reduce allocations -var TTSetupPool = make(chan *TTSetup, 64*1024) +var TTSetupPool = make(chan *TTSetup, 1024*1024) // GetTTSetup returns a recycled TTSetup struct or makes a new one if none are available // the responseChan parameter is received from processBatch() and is mandatory and will be set on the returned struct @@ -206,7 +206,7 @@ type TTResponse struct { } // Pool of TTResponse structs to reduce allocations -var TTResponsePool = make(chan *TTResponse, 64*1024) +var TTResponsePool = make(chan *TTResponse, 1024*1024) // GetTTResponse returns a recycled TTResponse struct or makes a new one if none are available func GetTTResponse(job *CHTTJob, forceCleanup bool, err error) *TTResponse { @@ -238,7 +238,7 @@ func RecycleTTResponse(resp *TTResponse) { } // Pool of TTResponse chans to reduce allocations -var TTResponseChans = make(chan chan *TTResponse, 16384) +var TTResponseChans = make(chan chan *TTResponse, 1024*1024) // GetTTResponseChan returns a recycled chan *TTResponse or makes a new one with capacity of 1 if none are available func GetTTResponseChan() chan *TTResponse { @@ -285,7 +285,7 @@ type ReadRequest struct { } // Pool of ReadRequest structs to reduce allocations -var ReadRequestsPool = make(chan *ReadRequest, 65536) +var ReadRequestsPool = make(chan *ReadRequest, 1024*1024) // ClearReadRequest resets a ReadRequest struct and recycles it back into the pool func (rr *ReadRequest) ClearReadRequest(respData *ResponseData) { From 8bbbd62802f466004ae8d4b5fd184ab2929f30c0 Mon Sep 17 00:00:00 2001 From: go-while Date: Sun, 19 Oct 2025 23:03:13 +0000 Subject: [PATCH 065/100] testing --- cmd/nntp-transfer/main.go | 134 +------------- cmd/recover-db/main.go | 244 ++++++++++++++++++++++++- internal/database/db_groupdbs.go | 55 ++++++ internal/database/db_rescan.go | 24 ++- internal/database/queries.go | 127 +++++++++++++ internal/nntp/nntp-transfer-demuxer.go | 4 +- 6 files changed, 448 insertions(+), 140 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index fb84b72..48c7935 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -649,133 +649,6 @@ func parseProxyConfig(address, proxyType, username, password string) (*ProxyConf }, nil } -const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles` -const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" - -// getArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering -func getArticlesBatchWithDateFilter(db *database.Database, ng *models.Newsgroup, offset int64, startTime, endTime *time.Time) ([]*models.Article, error) { - // Get group database - groupDBs, err := db.GetGroupDBs(ng.Name) - if err != nil { - return nil, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) - } - - var query string - var args []interface{} - - if startTime != nil || endTime != nil { - // Build query with date filtering - var whereConditions []string - - // Always exclude NULL date_sent values when using ORDER BY date_sent - whereConditions = append(whereConditions, "date_sent IS NOT NULL") - - if startTime != nil { - whereConditions = append(whereConditions, "date_sent >= ?") - args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) - } - - if endTime != nil { - whereConditions = append(whereConditions, "date_sent <= ?") - args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) - } - - whereClause := "" - if len(whereConditions) > 0 { - whereClause = " WHERE " + strings.Join(whereConditions, " AND ") - } - - query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby - args = append(args, dbBatchSize, offset) - } else { - // No date filtering - simple OFFSET pagination - query = query_getArticlesBatchWithDateFilter_selectPart + query_getArticlesBatchWithDateFilter_orderby - args = []interface{}{dbBatchSize, offset} - } - - rows, err := groupDBs.DB.Query(query, args...) - if err != nil { - db.ForceCloseGroupDBs(groupDBs) - return nil, err - } - defer rows.Close() - - var out []*models.Article - for rows.Next() { - var a models.Article - if err := rows.Scan(&a.DBArtNum, &a.MessageID, &a.Subject, &a.FromHeader, &a.DateSent, &a.DateString, &a.References, &a.Bytes, &a.Lines, &a.ReplyCount, &a.Path, &a.HeadersJSON, &a.BodyText, &a.ImportedAt); err != nil { - return nil, err - } - out = append(out, &a) - } - - if int64(len(out)) < dbBatchSize { - db.ForceCloseGroupDBs(groupDBs) - } else { - groupDBs.Return(db) - } - return out, nil -} - -// getArticleCountWithDateFilter gets the total count of articles with optional date filtering -// When no date filter is specified, uses cached message_count from newsgroups table in main DB -func getArticleCountWithDateFilter(db *database.Database, groupDBs *database.GroupDBs, startTime, endTime *time.Time) (int64, error) { - var query string - var args []interface{} - var count int64 - start := time.Now() - - if startTime != nil || endTime != nil { - // Build count query with date filtering - must use live COUNT(*) query - var whereConditions []string - - // Always exclude NULL date_sent values to match SELECT query behavior - whereConditions = append(whereConditions, "date_sent IS NOT NULL") - - if startTime != nil { - whereConditions = append(whereConditions, "date_sent >= ?") - args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) - } - - if endTime != nil { - whereConditions = append(whereConditions, "date_sent <= ?") - args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) - } - - whereClause := "" - if len(whereConditions) > 0 { - whereClause = " WHERE " + strings.Join(whereConditions, " AND ") - } - - query = "SELECT COUNT(*) FROM articles" + whereClause - err := groupDBs.DB.QueryRow(query, args...).Scan(&count) - if err != nil { - return 0, err - } - } else { - // No date filtering - use cached message_count from newsgroups table in main DB - // This is MUCH faster than COUNT(*) on large tables (O(1) vs O(N)) - query = "SELECT COALESCE(message_count, 0) FROM newsgroups WHERE name = ?" - err := db.GetMainDB().QueryRow(query, groupDBs.Newsgroup).Scan(&count) - if err != nil { - // Fallback to direct COUNT if newsgroups table doesn't have the entry - log.Printf("WARNING: Could not get message_count from newsgroups table for '%s', falling back to COUNT(*): %v", groupDBs.Newsgroup, err) - query = "SELECT COUNT(*) FROM articles" - err = groupDBs.DB.QueryRow(query).Scan(&count) - if err != nil { - return 0, err - } - } - } - - elapsed := time.Since(start) - if elapsed > 5*time.Second { - log.Printf("WARNING: Slow COUNT query for group '%s' took %v (count=%d)", groupDBs.Newsgroup, elapsed, count) - } - - return count, nil -} - // testConnection tests the connection to the target NNTP server func testConnection(host *string, port *int, username *string, password *string, ssl *bool, timeout *int, proxyConfig *ProxyConfig) error { testProvider := &config.Provider{ @@ -1239,7 +1112,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i nntp.ResultsMutex.Unlock() // Get total article count first with date filtering - totalNGArticles, err := getArticleCountWithDateFilter(db, groupDBsA, startTime, endTime) + totalNGArticles, err := db.GetArticleCountWithDateFilter(groupDBsA, startTime, endTime) if err != nil { if ferr := db.ForceCloseGroupDBs(groupDBsA); ferr != nil { log.Printf("ForceCloseGroupDBs error for '%s': %v", ng.Name, ferr) @@ -1455,7 +1328,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } start := time.Now() // Load batch from database using OFFSET pagination - articles, err := getArticlesBatchWithDateFilter(db, ng, offset, startTime, endTime) + articles, err := db.GetArticlesBatchWithDateFilter(ng, offset, startTime, endTime, dbBatchSize) if err != nil { log.Printf("Error loading article batch (processed %d) for newsgroup %s: %v", articlesProcessed, ng.Name, err) return fmt.Errorf("failed to load article batch (processed %d) for newsgroup '%s': %v", articlesProcessed, ng.Name, err) @@ -2804,6 +2677,9 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // requeue at front rs.jobs = append([]*nntp.CHTTJob{job}, rs.jobs...) rs.Mux.Unlock() + mux.Lock() + runningTTJobs++ + mux.Unlock() return } if redis_cached > 0 { diff --git a/cmd/recover-db/main.go b/cmd/recover-db/main.go index ac2ed7a..c8ca302 100644 --- a/cmd/recover-db/main.go +++ b/cmd/recover-db/main.go @@ -43,6 +43,7 @@ func main() { maxPar = flag.Int("max-par", 1, "use with -rebuild-threads to process N newsgroups") dataDir = flag.String("data", "./data", "Directory to store database files") scanOutOfOrderCheck = flag.Bool("scan-out-of-order-overview", false, "Scan newsgroups for articles with out-of-order date_sent values") + reorderByDateSent = flag.Bool("reorder-by-datesent", false, "Reorder all articles by date_sent and write to new database file with .new extension") ) flag.Parse() @@ -147,7 +148,7 @@ func main() { wg.Done() }(wg) fmt.Printf("🧵 [%d/%d] Rebuilding threads for newsgroup: %s\n", i+1, len(newsgroups), newsgroup.Name) - report, err := db.RebuildThreadsFromScratch(newsgroup.Name, *verbose) + report, err := db.RebuildThreadsFromScratch(newsgroup.Name, *verbose, nil) if err != nil { fmt.Printf("❌ Failed to rebuild threads for '%s': %v\n", newsgroup.Name, err) return @@ -196,6 +197,17 @@ func main() { os.Exit(0) } + // If reorder-by-datesent is requested, run that and exit + if *reorderByDateSent { + fmt.Printf("🔄 Starting article reordering by date_sent...\n") + fmt.Printf("=====================================\n") + err := reorderArticlesByDateSent(db, newsgroups, *verbose, *maxPar) + if err != nil { + log.Fatalf("Article reordering failed: %v", err) + } + os.Exit(0) + } + // Initialize tracking variables var results []GroupResult var ( @@ -270,7 +282,7 @@ func main() { // Optionally rebuild threads after repair if there were thread-related issues if len(report.OrphanedThreads) > 0 { fmt.Printf("🧵 Rebuilding thread relationships after repair...\n") - threadReport, err := db.RebuildThreadsFromScratch(newsgroup.Name, *verbose) + threadReport, err := db.RebuildThreadsFromScratch(newsgroup.Name, *verbose, nil) if err != nil { fmt.Printf("❌ Failed to rebuild threads: %v\n", err) } else { @@ -1157,3 +1169,231 @@ func scanOutOfOrderOverview(db *database.Database, newsgroups []*models.Newsgrou return nil } + +// reorderArticlesByDateSent reorders articles by date_sent and writes to a new database +func reorderArticlesByDateSent(db *database.Database, newsgroups []*models.Newsgroup, verbose bool, maxPar int) error { + fmt.Printf("Processing %d newsgroups with max parallelism: %d\n\n", len(newsgroups), maxPar) + + parChan := make(chan struct{}, maxPar) + var wg sync.WaitGroup + var totalArticles, totalNewsgroups int64 + var mu sync.Mutex + + for i, ng := range newsgroups { + parChan <- struct{}{} // acquire lock + wg.Add(1) + + go func(ng *models.Newsgroup, index int) { + defer func() { + <-parChan // release lock + wg.Done() + }() + + fmt.Printf("🔄 [%d/%d] Processing: %s\n", index+1, len(newsgroups), ng.Name) + + articleCount, err := reorderSingleNewsgroup(db, ng.Name, verbose) + if err != nil { + fmt.Printf("❌ [%d/%d] Failed to reorder %s: %v\n", index+1, len(newsgroups), ng.Name, err) + return + } + + mu.Lock() + totalArticles += articleCount + if articleCount > 0 { + totalNewsgroups++ + } + mu.Unlock() + + if articleCount > 0 { + fmt.Printf("✅ [%d/%d] Completed: %s (%d articles)\n", index+1, len(newsgroups), ng.Name, articleCount) + } else { + fmt.Printf("⚠️ [%d/%d] Skipped: %s (no articles)\n", index+1, len(newsgroups), ng.Name) + } + }(ng, i) + } + + wg.Wait() + + fmt.Printf("\n=====================================\n") + fmt.Printf("📊 REORDER SUMMARY\n") + fmt.Printf("=====================================\n") + fmt.Printf("Newsgroups processed: %d\n", totalNewsgroups) + fmt.Printf("Total articles reordered: %d\n", totalArticles) + fmt.Printf("=====================================\n") + + return nil +} + +// reorderSingleNewsgroup reorders articles in a single newsgroup by date_sent +func reorderSingleNewsgroup(db *database.Database, newsgroupName string, verbose bool) (int64, error) { + // Open source database + sourceDB, err := db.GetGroupDBs(newsgroupName) + if err != nil { + return 0, fmt.Errorf("failed to open source database: %w", err) + } + defer db.ForceCloseGroupDBs(sourceDB) + + // Count articles in source database + var totalArticles int64 + err = sourceDB.DB.QueryRow("SELECT COUNT(*) FROM articles WHERE date_sent IS NOT NULL").Scan(&totalArticles) + if err != nil { + return 0, fmt.Errorf("failed to count articles: %w", err) + } + + if totalArticles == 0 { + return 0, nil // No articles to process + } + + // Open destination database with .new suffix + destDB, destPath, err := db.GetGroupDBsWithSuffix(newsgroupName, ".new") + if err != nil { + return 0, fmt.Errorf("failed to create destination database: %w", err) + } + defer database.CloseGroupDBDirectly(destDB) + + if verbose { + fmt.Printf(" Source: %s\n", sourceDB.Newsgroup) + fmt.Printf(" Dest: %s\n", destPath) + fmt.Printf(" Articles to reorder: %d\n", totalArticles) + } + + // Process articles in batches to avoid memory issues with large newsgroups + const batchSize = 1000 + var newArticleNum int64 = 1 + var processed int64 + var offset int64 = 0 + + // Create a temporary newsgroup model for GetArticlesBatchWithDateFilter + tempNG := &models.Newsgroup{ + Name: newsgroupName, + } + + // Prepare insert statement for reuse across batches + insertSQL := `INSERT INTO articles (article_num, message_id, subject, from_header, date_sent, date_string, + "references", bytes, lines, reply_count, path, headers_json, body_text, + imported_at, spam, hide) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` + + for offset < totalArticles { + // Use existing GetArticlesBatchWithDateFilter function to fetch articles + // Pass nil for startTime/endTime to get all articles, ordered by date_sent + articles, err := db.GetArticlesBatchWithDateFilter(tempNG, offset, nil, nil, batchSize) + if err != nil { + return processed, fmt.Errorf("failed to query source database at offset %d: %w", offset, err) + } + + // Break if no articles returned + if len(articles) == 0 { + break + } + + // Begin transaction for this batch + tx, err := destDB.Begin() + if err != nil { + return processed, fmt.Errorf("failed to begin transaction at offset %d: %w", offset, err) + } + + stmt, err := tx.Prepare(insertSQL) + if err != nil { + tx.Rollback() + return processed, fmt.Errorf("failed to prepare insert statement: %w", err) + } + + // Process batch + batchCount := 0 + for _, article := range articles { + if article == nil { + continue + } + + // Determine spam/hide flags (these are stored as integers in DB) + spam := 0 + hide := 0 + // Note: Article model doesn't have Spam/Hide fields, so we default to 0 + + // Insert with new sequential article_num + _, err = stmt.Exec( + newArticleNum, // new sequential article_num + article.MessageID, + article.Subject, + article.FromHeader, + article.DateSent, + article.DateString, + article.References, + article.Bytes, + article.Lines, + article.ReplyCount, + article.Path, + article.HeadersJSON, + article.BodyText, + article.ImportedAt, + spam, + hide, + ) + if err != nil { + stmt.Close() + tx.Rollback() + return processed, fmt.Errorf("failed to insert article %d: %w", newArticleNum, err) + } + + newArticleNum++ + processed++ + batchCount++ + } + + stmt.Close() + + // Commit batch transaction + if err = tx.Commit(); err != nil { + return processed, fmt.Errorf("failed to commit transaction at offset %d: %w", offset, err) + } + + if verbose { + fmt.Printf(" Progress: %d/%d articles (batch: %d)\n", processed, totalArticles, batchCount) + } + + // Break if we got fewer articles than batch size (end of data) + if batchCount < int(batchSize) { + break + } + + offset += int64(batchCount) + } + + // Close destination database before rebuilding threads + if err := database.CloseGroupDBDirectly(destDB); err != nil { + log.Printf("Warning: failed to close destination database: %v", err) + } + + // Rebuild threads in the new database + if verbose { + fmt.Printf(" Rebuilding thread relationships...\n") + } + + // Reopen the destination database for thread rebuild + destDB, err = sql.Open("sqlite3", destPath) + if err != nil { + return processed, fmt.Errorf("failed to reopen destination database for thread rebuild: %w", err) + } + defer destDB.Close() + + // Create a temporary GroupDBs wrapper for the new database + tempGroupDBs := &database.GroupDBs{ + Newsgroup: newsgroupName, + DB: destDB, + } + + // Rebuild threads using the existing RebuildThreadsFromScratch function + var report *database.ThreadRebuildReport + report, err = db.RebuildThreadsFromScratch(newsgroupName, verbose, tempGroupDBs) + if err != nil { + return processed, fmt.Errorf("failed to rebuild threads: %w", err) + } + + if verbose { + fmt.Printf(" Thread rebuild complete: %d threads from %d articles\n", + report.ThreadsRebuilt, report.TotalArticles) + } + + return processed, nil +} diff --git a/internal/database/db_groupdbs.go b/internal/database/db_groupdbs.go index b55596b..2af740f 100644 --- a/internal/database/db_groupdbs.go +++ b/internal/database/db_groupdbs.go @@ -192,3 +192,58 @@ func (dbs *GroupDBs) Close(who string) error { } return nil } + +// GetGroupDBsWithSuffix opens a group database with a custom suffix (e.g., ".new") +// Returns the database connection, the full file path, and any error +func (db *Database) GetGroupDBsWithSuffix(groupName, suffix string) (*sql.DB, string, error) { + if db.dbconfig == nil { + return nil, "", fmt.Errorf("database configuration is not set") + } + + groupsHash := GroupHashMap.GroupToHash(groupName) + baseGroupDBdir := filepath.Join(db.dbconfig.DataDir, "/db/"+groupsHash) + + if err := createDirIfNotExists(baseGroupDBdir); err != nil { + return nil, "", fmt.Errorf("failed to create group database directory: %w", err) + } + + groupDBfile := filepath.Join(baseGroupDBdir + "/" + SanitizeGroupName(groupName) + ".db" + suffix) + + // Open database + groupDB, err := sql.Open("sqlite3", groupDBfile) + if err != nil { + return nil, "", fmt.Errorf("failed to open database: %w", err) + } + + // Apply pragmas for new database + if err := db.applySQLitePragmasGroupDB(groupDB); err != nil { + if cerr := groupDB.Close(); cerr != nil { + log.Printf("Failed to close groupDB during pragma error: %v", cerr) + } + return nil, "", fmt.Errorf("failed to apply pragmas: %w", err) + } + + // Apply schema/migrations + tempGroupDBs := &GroupDBs{ + Newsgroup: groupName, + DB: groupDB, + Idle: time.Now(), + } + + if err := db.migrateGroupDB(tempGroupDBs); err != nil { + if cerr := groupDB.Close(); cerr != nil { + log.Printf("Failed to close groupDB during migration error: %v", cerr) + } + return nil, "", fmt.Errorf("failed to migrate group database: %w", err) + } + + return groupDB, groupDBfile, nil +} + +// CloseGroupDBDirectly closes a database connection directly +func CloseGroupDBDirectly(db *sql.DB) error { + if db == nil { + return nil + } + return db.Close() +} diff --git a/internal/database/db_rescan.go b/internal/database/db_rescan.go index 47d7a68..73a0e89 100644 --- a/internal/database/db_rescan.go +++ b/internal/database/db_rescan.go @@ -379,7 +379,8 @@ const query_RebuildThreadsFromScratch5 = "SELECT article_num, message_id FROM ar // RebuildThreadsFromScratch completely rebuilds all thread relationships for a newsgroup // This function deletes all existing threads and rebuilds them from article 1 based on message references -func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (*ThreadRebuildReport, error) { +// If groupDB is provided (not nil), it will use that instead of opening the newsgroup's database +func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool, groupDB *GroupDBs) (*ThreadRebuildReport, error) { report := &ThreadRebuildReport{ Newsgroup: newsgroup, StartTime: time.Now(), @@ -390,15 +391,24 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool) (* log.Printf("RebuildThreadsFromScratch: Starting complete thread rebuild for newsgroup '%s'", newsgroup) } - // Get group database - groupDB, err := db.GetGroupDBs(newsgroup) - if err != nil { - report.Errors = append(report.Errors, fmt.Sprintf("Failed to get group database: %v", err)) - return report, err + // Get group database if not provided + var shouldCloseDB bool + if groupDB == nil { + var err error + groupDB, err = db.GetGroupDBs(newsgroup) + if err != nil { + report.Errors = append(report.Errors, fmt.Sprintf("Failed to get group database: %v", err)) + return report, err + } + shouldCloseDB = true + } + + if shouldCloseDB { + defer groupDB.Return(db) } - defer groupDB.Return(db) // Get total article count + var err error err = retryableQueryRowScan(groupDB.DB, query_RebuildThreadsFromScratch1, []interface{}{}, &report.TotalArticles) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get article count: %v", err)) diff --git a/internal/database/queries.go b/internal/database/queries.go index 3cc9b59..ef68796 100644 --- a/internal/database/queries.go +++ b/internal/database/queries.go @@ -3280,3 +3280,130 @@ func (db *Database) SearchUserByComputedHash(targetHash string, nonce string) (* // No matching user found return nil, nil } + +const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles` +const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" + +// GetArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering +func (db *Database) GetArticlesBatchWithDateFilter(ng *models.Newsgroup, offset int64, startTime, endTime *time.Time, dbBatchSize int64) ([]*models.Article, error) { + // Get group database + groupDBs, err := db.GetGroupDBs(ng.Name) + if err != nil { + return nil, fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) + } + + var query string + var args []interface{} + + if startTime != nil || endTime != nil { + // Build query with date filtering + var whereConditions []string + + // Always exclude NULL date_sent values when using ORDER BY date_sent + whereConditions = append(whereConditions, "date_sent IS NOT NULL") + + if startTime != nil { + whereConditions = append(whereConditions, "date_sent >= ?") + args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) + } + + if endTime != nil { + whereConditions = append(whereConditions, "date_sent <= ?") + args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) + } + + whereClause := "" + if len(whereConditions) > 0 { + whereClause = " WHERE " + strings.Join(whereConditions, " AND ") + } + + query = query_getArticlesBatchWithDateFilter_selectPart + whereClause + query_getArticlesBatchWithDateFilter_orderby + args = append(args, dbBatchSize, offset) + } else { + // No date filtering - simple OFFSET pagination + query = query_getArticlesBatchWithDateFilter_selectPart + query_getArticlesBatchWithDateFilter_orderby + args = []interface{}{dbBatchSize, offset} + } + + rows, err := groupDBs.DB.Query(query, args...) + if err != nil { + db.ForceCloseGroupDBs(groupDBs) + return nil, err + } + defer rows.Close() + i := 0 + out := make([]*models.Article, dbBatchSize) + for rows.Next() { + out[i] = &models.Article{} + if err := rows.Scan(&out[i].DBArtNum, &out[i].MessageID, &out[i].Subject, &out[i].FromHeader, &out[i].DateSent, &out[i].DateString, &out[i].References, &out[i].Bytes, &out[i].Lines, &out[i].ReplyCount, &out[i].Path, &out[i].HeadersJSON, &out[i].BodyText, &out[i].ImportedAt); err != nil { + return nil, err + } + i++ + } + + if int64(len(out)) < dbBatchSize { + db.ForceCloseGroupDBs(groupDBs) + } else { + groupDBs.Return(db) + } + return out, nil +} + +// GetArticleCountWithDateFilter gets the total count of articles with optional date filtering +// When no date filter is specified, uses cached message_count from newsgroups table in main DB +func (db *Database) GetArticleCountWithDateFilter(groupDBs *GroupDBs, startTime, endTime *time.Time) (int64, error) { + var query string + var args []interface{} + var count int64 + start := time.Now() + + if startTime != nil || endTime != nil { + // Build count query with date filtering - must use live COUNT(*) query + var whereConditions []string + + // Always exclude NULL date_sent values to match SELECT query behavior + whereConditions = append(whereConditions, "date_sent IS NOT NULL") + + if startTime != nil { + whereConditions = append(whereConditions, "date_sent >= ?") + args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) + } + + if endTime != nil { + whereConditions = append(whereConditions, "date_sent <= ?") + args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) + } + + whereClause := "" + if len(whereConditions) > 0 { + whereClause = " WHERE " + strings.Join(whereConditions, " AND ") + } + + query = "SELECT COUNT(*) FROM articles" + whereClause + err := groupDBs.DB.QueryRow(query, args...).Scan(&count) + if err != nil { + return 0, err + } + } else { + // No date filtering - use cached message_count from newsgroups table in main DB + // This is MUCH faster than COUNT(*) on large tables (O(1) vs O(N)) + query = "SELECT COALESCE(message_count, 0) FROM newsgroups WHERE name = ?" + err := db.GetMainDB().QueryRow(query, groupDBs.Newsgroup).Scan(&count) + if err != nil { + // Fallback to direct COUNT if newsgroups table doesn't have the entry + log.Printf("WARNING: Could not get message_count from newsgroups table for '%s', falling back to COUNT(*): %v", groupDBs.Newsgroup, err) + query = "SELECT COUNT(*) FROM articles" + err = groupDBs.DB.QueryRow(query).Scan(&count) + if err != nil { + return 0, err + } + } + } + + elapsed := time.Since(start) + if elapsed > 5*time.Second { + log.Printf("WARNING: Slow COUNT query for group '%s' took %v (count=%d)", groupDBs.Newsgroup, elapsed, count) + } + + return count, nil +} diff --git a/internal/nntp/nntp-transfer-demuxer.go b/internal/nntp/nntp-transfer-demuxer.go index 20e96a0..5727aec 100644 --- a/internal/nntp/nntp-transfer-demuxer.go +++ b/internal/nntp/nntp-transfer-demuxer.go @@ -29,8 +29,8 @@ func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}, BatchCheck int return &ResponseDemuxer{ conn: conn, signalChan: make(chan struct{}, 1), - checkResponseChan: make(chan *ResponseData, 64*1024), // Buffer for CHECK responses - ttResponseChan: make(chan *ResponseData, 64*1024), // Buffer for TAKETHIS responses + checkResponseChan: make(chan *ResponseData, 1024*1024), // Buffer for CHECK responses + ttResponseChan: make(chan *ResponseData, 1024*1024), // Buffer for TAKETHIS responses errChan: errChan, started: false, } From fa41c491a3e0430112048d9e99b0a45c947ea019 Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 20 Oct 2025 00:09:21 +0000 Subject: [PATCH 066/100] testing --- cmd/nntp-transfer/main.go | 50 ++++++++++++++------------- internal/database/queries.go | 10 +++--- internal/database/sqlite_retry.go | 4 +-- internal/models/models.go | 57 +++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 30 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 48c7935..852b666 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1034,7 +1034,7 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe var debugArticles = make(map[string][]*models.Article) var debugMutex sync.Mutex -var ErrNotInDateRange = fmt.Errorf("article not in specified date range") +var ErrNotInDateRange = fmt.Errorf("notinrange") // processRequeuedJobs processes any failed jobs that were requeued for retry // Returns the number of jobs processed successfully @@ -1224,21 +1224,7 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i if VERBOSE { log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) } - // Clean up Articles and their internal fields - for i := range resp.Job.Articles { - if resp.Job.Articles[i] != nil { - // Clean article internal fields to free memory - resp.Job.Articles[i].RefSlice = nil - resp.Job.Articles[i].NNTPhead = nil - resp.Job.Articles[i].NNTPbody = nil - resp.Job.Articles[i].Headers = nil - resp.Job.Articles[i].ArticleNums = nil - resp.Job.Articles[i].NewsgroupsPtr = nil - resp.Job.Articles[i].ProcessQueue = nil - resp.Job.Articles[i].MsgIdItem = nil - resp.Job.Articles[i] = nil - } - } + models.RecycleArticles(resp.Job.Articles) resp.Job.Articles = nil // Clean up ArticleMap - nil the keys (pointers) before deleting @@ -1350,12 +1336,21 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } //if VERBOSE { var size int + var skipped int for _, a := range articles { + if a == nil { + skipped++ + continue + } size += a.Bytes } - log.Printf("Newsgroup: '%s' | Loaded %d articles from database (processed %d/%d) (Bytes=%d) took %v", ng.Name, len(articles), articlesProcessed, totalNGArticles, size, time.Since(start)) //} + if skipped == len(articles) { + return fmt.Errorf("all articles in batch are nil for newsgroup '%s'", ng.Name) + } + log.Printf("Newsgroup: '%s' | Loaded %d articles (Bytes=%d) took %v (skipped: %d)", ng.Name, len(articles), size, time.Since(start), skipped) // Process articles in network batches + start2 := time.Now() for i := 0; i < len(articles); i += batchCheck { OffsetQueue.Add(1) if common.WantShutdown() { @@ -1367,6 +1362,18 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i if end > len(articles) { end = len(articles) } + skipped := 0 + for _, a := range articles[i:end] { + if a == nil { + //log.Printf("Warning: nil article in batch for newsgroup '%s' (articles %d-%d)", ng.Name, i+1, end) + skipped++ + } + } + if skipped == (end - i) { + log.Printf("Error: all articles in batch are nil for newsgroup '%s' (articles %d-%d)", ng.Name, i+1, end) + OffsetQueue.OffsetBatchDone() + continue + } // pass articles to CHECK or TAKETHIS queue (async!) responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), articlesProcessed-int64(len(articles))+int64(i), OffsetQueue, ngtprogress) if err != nil { @@ -1376,17 +1383,14 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i if responseChan != nil { // pass the response channel to the collector channel: ttResponses ttResponsesSetupChan <- nntp.GetTTSetup(responseChan) - } - OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than N in flight } // articlesProcessed already incremented above after loading from DB remainingArticles -= int64(len(articles)) - if VERBOSE { - log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t)", ng.Name, articlesProcessed, totalNGArticles, remainingArticles, ttMode.UseCHECK()) - //log.Printf("Newsgroup: '%s' | Pushed (processed %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, articlesProcessed, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) - } + + log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t) (Bytes=%d) took: %v", ng.Name, articlesProcessed, totalNGArticles, remainingArticles, ttMode.UseCHECK(), size, time.Since(start2)) + //log.Printf("Newsgroup: '%s' | Pushed (processed %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, articlesProcessed, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } // end for keyset pagination loop diff --git a/internal/database/queries.go b/internal/database/queries.go index ef68796..ef49ae9 100644 --- a/internal/database/queries.go +++ b/internal/database/queries.go @@ -3331,14 +3331,14 @@ func (db *Database) GetArticlesBatchWithDateFilter(ng *models.Newsgroup, offset return nil, err } defer rows.Close() - i := 0 - out := make([]*models.Article, dbBatchSize) + //out := make([]*models.Article, dbBatchSize) + var out []*models.Article for rows.Next() { - out[i] = &models.Article{} - if err := rows.Scan(&out[i].DBArtNum, &out[i].MessageID, &out[i].Subject, &out[i].FromHeader, &out[i].DateSent, &out[i].DateString, &out[i].References, &out[i].Bytes, &out[i].Lines, &out[i].ReplyCount, &out[i].Path, &out[i].HeadersJSON, &out[i].BodyText, &out[i].ImportedAt); err != nil { + a := models.NewArticle() + if err := rows.Scan(&a.DBArtNum, &a.MessageID, &a.Subject, &a.FromHeader, &a.DateSent, &a.DateString, &a.References, &a.Bytes, &a.Lines, &a.ReplyCount, &a.Path, &a.HeadersJSON, &a.BodyText, &a.ImportedAt); err != nil { return nil, err } - i++ + out = append(out, a) } if int64(len(out)) < dbBatchSize { diff --git a/internal/database/sqlite_retry.go b/internal/database/sqlite_retry.go index d0251ea..839acf1 100644 --- a/internal/database/sqlite_retry.go +++ b/internal/database/sqlite_retry.go @@ -9,8 +9,8 @@ import ( ) const ( - maxRetries = 1000 - baseDelay = 100 * time.Millisecond + maxRetries = 10000 + baseDelay = 10 * time.Millisecond maxDelay = 2500 * time.Millisecond ) diff --git a/internal/models/models.go b/internal/models/models.go index 8d85885..233a820 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -158,6 +158,63 @@ type Article struct { ProcessQueue chan *string `json:"-" db:"-"` // newsgroup ptr for batching } +var ArticlePool = make(chan *Article, 128*1024) + +func NewArticle() *Article { + select { + case art := <-ArticlePool: + return art + default: + return &Article{} + } +} + +func RecycleArticles(arts []*Article) { + for _, art := range arts { + RecycleArticle(art) + } +} + +func RecycleArticle(art *Article) { + // Clear fields to avoid memory leaks + art.GetDataFunc = nil + art.DBArtNum = 0 + art.MessageID = "" + art.Subject = "" + art.FromHeader = "" + art.DateSent = time.Time{} + art.DateString = "" + art.References = "" + art.Bytes = 0 + art.Lines = 0 + art.ReplyCount = 0 + art.HeadersJSON = "" + art.BodyText = "" + art.Path = "" + art.ImportedAt = time.Time{} + art.Spam = 0 + art.Hide = 0 + art.Sanitized = false + art.MsgIdItem = nil + art.Headers = nil + art.ArticleNums = nil + art.NNTPhead = nil + art.NNTPbody = nil + art.IsThrRoot = false + art.IsReply = false + art.RefSlice = nil + art.NewsgroupsPtr = nil + art.ProcessQueue = nil + + select { + case ArticlePool <- art: + // Successfully recycled + default: + // Pool is full, let it be garbage collected + //log.Printf("Warning: ArticlePool is full, discarding article to be garbage collected") + } +} + func (a *Article) GetData(what string, group string) string { if a == nil { return "" From f523be274f6a93f93b3fa75678cf905e7a401c94 Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 20 Oct 2025 12:28:44 +0000 Subject: [PATCH 067/100] testing --- cmd/nntp-fetcher/main.go | 77 +++++++++++++----- cmd/nntp-transfer/main.go | 107 +++++++++++-------------- internal/common/common.go | 8 ++ internal/database/progress.go | 17 ++-- internal/nntp/nntp-client-commands.go | 3 +- internal/nntp/nntp-client.go | 11 +-- internal/nntp/nntp-transfer-demuxer.go | 23 ++---- 7 files changed, 138 insertions(+), 108 deletions(-) diff --git a/cmd/nntp-fetcher/main.go b/cmd/nntp-fetcher/main.go index 35968d0..0e7212f 100644 --- a/cmd/nntp-fetcher/main.go +++ b/cmd/nntp-fetcher/main.go @@ -64,9 +64,10 @@ func main() { fetchNewsgroup = flag.String("group", "", "Newsgroup to fetch (default: empty = all groups once up to max-batch) or rocksolid.* with final wildcard to match prefix.*") nntphostname = flag.String("nntphostname", "", "Your hostname must be set!") useShortHashLenPtr = flag.Int("useshorthashlen", 7, "short hash length for history storage (2-7, default: 7) - NOTE: cannot be changed once set!") - fetchActiveOnly = flag.Bool("fetch-active-only", true, "Fetch only active newsgroups (default: true)") + fetchActiveOnly = flag.Bool("fetch-active-only", true, "Downloads only active newsgroups (default: true) To download only disabled newsgroups set to false!") + excludePrefix = flag.String("exclude-prefix", "", "use with UpdateNewsgroupList to exclude newsgroups with this prefix (default: empty = no exclusion) allows comma separation and wildcards alt.*,comp.*") downloadMaxPar = flag.Int("download-max-par", 1, "run this many groups in parallel, can eat your memory! (default: 1)") - updateList = flag.String("fetch-newsgroups-from-remote", "", "Fetch remote newsgroup list from first enabled provider (default: empty, nothing. use \"group.*\" or \"\\$all\")") + updateList = flag.String("fetch-newsgroups-from-remote", "", "UpdateNewsgroupList: get remote newsgroup list from first enabled provider (default: empty, nothing. use \"group.*\" or \"\\$all\")") updateListForce = flag.Bool("fetch-newsgroups-force", false, "use with -fetch-newsgroups-from-remote .. to really add them to database") dataDir = flag.String("data", "./data", "Directory to store database files") // Download options with date filtering @@ -80,7 +81,7 @@ func main() { os.Exit(0) } if *updateList != "" { - if err := UpdateNewsgroupList(updateList, *updateListForce); err != nil { + if err := UpdateNewsgroupList(updateList, excludePrefix, *updateListForce); err != nil { log.Fatalf("Newsgroup list update failed: %v", err) } os.Exit(0) @@ -410,6 +411,9 @@ func main() { default: // pass } + if groupInfo.First > lastArticle { + lastArticle = groupInfo.First - 1 + } //log.Printf("DEBUG-RANGE: ng='%s' lastArticle=%d (after switch)", *ng, lastArticle) start := lastArticle + 1 // Start from the first article in the remote group end := start + processor.MaxBatchSize - 1 // End at the last article in the remote group @@ -716,8 +720,8 @@ func getRealMemoryUsage() (uint64, error) { // UpdateNewsgroupList fetches the remote newsgroup list from the first enabled provider // and adds all groups to the database that we don't already have -func UpdateNewsgroupList(updateList *string, updateListForce bool) error { - log.Printf("Starting newsgroup list update from remote server...") +func UpdateNewsgroupList(updateList *string, excludePrefix *string, updateListForce bool) error { + log.Printf("UpdateNewsgroupList: Starting newsgroup list update from remote server...") // Initialize database db, err := database.OpenDatabase(nil) @@ -744,7 +748,7 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { return fmt.Errorf("no enabled providers found in database") } - log.Printf("Using provider: %s (Host: %s, Port: %d, SSL: %v)", + log.Printf("UpdateNewsgroupList: Using provider: %s (Host: %s, Port: %d, SSL: %v)", firstProvider.Name, firstProvider.Host, firstProvider.Port, firstProvider.SSL) // Create NNTP backend config using the first enabled provider @@ -768,7 +772,7 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { } defer pool.Put(conn) - log.Printf("Connected to %s:%d, fetching newsgroup list...", firstProvider.Host, firstProvider.Port) + log.Printf("UpdateNewsgroupList: Connected to %s:%d, fetching newsgroup list...", firstProvider.Host, firstProvider.Port) // Fetch the complete newsgroup list remoteGroups, err := conn.ListGroups() @@ -776,7 +780,7 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { return fmt.Errorf("failed to fetch newsgroup list: %w", err) } - log.Printf("Fetched %d newsgroups from remote server", len(remoteGroups)) + log.Printf("UpdateNewsgroupList: Fetched %d newsgroups from remote server", len(remoteGroups)) // Parse the update pattern to determine filtering updatePattern := *updateList @@ -785,17 +789,24 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { if updatePattern == "$all" { addAllGroups = true - log.Printf("Listing all newsgroups from remote server") + log.Printf("UpdateNewsgroupList: Listing all newsgroups from remote server") } else if strings.HasSuffix(updatePattern, "*") { groupPrefix = strings.TrimSuffix(updatePattern, "*") - log.Printf("Listing newsgroups with prefix: '%s'", groupPrefix) + log.Printf("UpdateNewsgroupList: Listing newsgroups with prefix: '%s'", groupPrefix) } else if updatePattern != "" { groupPrefix = updatePattern - log.Printf("Listing newsgroups matching: '%s'", groupPrefix) + log.Printf("UpdateNewsgroupList: Listing newsgroups matching: '%s'", groupPrefix) } else { return fmt.Errorf("invalid update pattern: '%s' (use 'group.*' or '$all')", updatePattern) } - + var excludePrefixes []string + if excludePrefix != nil && *excludePrefix != "" { + excludePrefixes = strings.Split(*excludePrefix, ",") + for i, p := range excludePrefixes { + excludePrefixes[i] = strings.TrimSpace(p) + log.Printf("UpdateNewsgroupList: Excluding newsgroups with prefix: '%s'", excludePrefixes[i]) + } + } // Get existing newsgroups from local database localGroups, err := db.MainDBGetAllNewsgroups() if err != nil { @@ -808,28 +819,54 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { existingGroups[group.Name] = true } - log.Printf("Found %d newsgroups in local database", len(localGroups)) + log.Printf("UpdateNewsgroupList: Found %d newsgroups in local database", len(localGroups)) + today := time.Now().UTC() + //today := time.Now().UTC().Truncate(24 * time.Hour) // Add new newsgroups that don't exist locally and match the pattern newGroupCount := 0 skippedCount := 0 var messages int64 +loopGroups: for _, remoteGroup := range remoteGroups { // Apply prefix filtering if !addAllGroups { if groupPrefix != "" && !strings.HasPrefix(remoteGroup.Name, groupPrefix) { skippedCount++ - continue + continue loopGroups } } - + if len(excludePrefixes) > 0 { + for _, excludePrefix := range excludePrefixes { + if excludePrefix == "" { + continue + } + if strings.HasSuffix(excludePrefix, "*") { + pattern := strings.TrimSuffix(excludePrefix, "*") + if strings.HasPrefix(remoteGroup.Name, pattern) { + log.Printf("Excluding newsgroup: '%s' by prefix: '%s' pattern: '%s'", remoteGroup.Name, excludePrefix, pattern) + skippedCount++ + continue loopGroups + } + } else if remoteGroup.Name == excludePrefix { + log.Printf("Excluding newsgroup: '%s'", remoteGroup.Name) + skippedCount++ + continue loopGroups + } + } + } + if !common.IsValidGroupName(remoteGroup.Name) { + log.Printf("Skipping invalid newsgroup name: '%s'", remoteGroup.Name) + skippedCount++ + continue loopGroups + } if !existingGroups[remoteGroup.Name] { // Create a new newsgroup model newGroup := &models.Newsgroup{ Name: remoteGroup.Name, - Active: true, // Default to active - Status: "y", // Default posting status - CreatedAt: time.Now().UTC(), // Default created at + Active: false, // Default to inactive + Status: remoteGroup.Status, // newsgroups y,m,c,a status + CreatedAt: today, // Default created at } if updateListForce { @@ -840,9 +877,9 @@ func UpdateNewsgroupList(updateList *string, updateListForce bool) error { continue } - log.Printf("Added new newsgroup: %s", remoteGroup.Name) + log.Printf("(Added new) newsgroup: '%s'", remoteGroup.Name) } else { - log.Printf("New newsgroup: %s (not added) lo=%d hi=%d messages=%d", remoteGroup.Name, remoteGroup.First, remoteGroup.Last, remoteGroup.Count) + log.Printf("(not added) newsgroup: '%s' messages=%d status=%s", remoteGroup.Name, remoteGroup.Count, remoteGroup.Status) } newGroupCount++ messages += remoteGroup.Count diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 852b666..e1e304a 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1442,7 +1442,7 @@ var upperLevel float64 = 95.0 func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64, offsetQ *nntp.OffsetQueue, ngtprogress *nntp.NewsgroupTransferProgress) (chan *nntp.TTResponse, error) { if len(articles) == 0 { - log.Printf("processBatch: no articles in this batch for newsgroup '%s'", *ttMode.Newsgroup) + log.Printf("Newsgroup: '%s' | processBatch: no articles in this batch", *ttMode.Newsgroup) return nil, nil } @@ -1549,7 +1549,6 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl return job.QuitResponseChan(), nil } if VERBOSE { - log.Printf("Newsgroup: '%s' | processBatch: Received %d articles, queuing %d for CHECK (Redis filtered: %d)", *ttMode.Newsgroup, len(articles), len(job.MessageIDs), redis_cached) } // Assign job to worker (consistent assignment + load balancing) @@ -1562,12 +1561,12 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl QueuesMutex.RUnlock() workerID := assignWorkerToNewsgroup(*ttMode.Newsgroup) + QueuesMutex.RLock() WorkersCheckChannel := CheckQueues[workerID] QueuesMutex.RUnlock() - //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queue job #%d with %d message IDs. CheckQ=%d", *ttMode.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(CheckQueues[workerID])) - + log.Printf("Newsgroup: '%s' | CheckWorker (%d) queueing job #%d with %d msgIDs to worker %d. CheckQ=%d", *ttMode.Newsgroup, workerID, job.JobID, len(job.MessageIDs), workerID, len(CheckQueues[workerID])) WorkersCheckChannel <- job // checkQueue <- job //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, job.JobID) return job.GetResponseChan(), nil @@ -1677,7 +1676,6 @@ func sendArticlesBatchViaTakeThis(conn *nntp.BackendConn, articles []*models.Art continue } conn.Unlock() - conn.ForceCloseConn() job.NGTProgress.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) log.Printf("ERROR Newsgroup: '%s' | Failed to send TAKETHIS for %s: %v", newsgroup, article.MessageID, err) return redis_cached, fmt.Errorf("failed to send TAKETHIS for %s: %v", article.MessageID, err) @@ -1790,7 +1788,7 @@ func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { Demuxers = make([]*nntp.ResponseDemuxer, nntp.NNTPTransferThreads) DemuxersMutex.Unlock() for i := range CheckQueues { - CheckQueues[i] = make(chan *nntp.CHTTJob, MaxQueuedJobs) // no cap! only accepts if there is a reader! + CheckQueues[i] = make(chan *nntp.CHTTJob) // no cap for CH jobs TakeThisQueues[i] = make(chan *nntp.CHTTJob, MaxQueuedJobs) // allows max N queued TT jobs WorkerQueueLength[i] = 0 } @@ -1854,10 +1852,10 @@ forever: errChan: errChan, redisCli: redisCli, ExitChan: make(chan *ReturnSignal, 1), - jobsQueued: make(map[*nntp.CHTTJob]uint64, BatchCheck), - jobsReadOK: make(map[*nntp.CHTTJob]uint64, BatchCheck), - jobMap: make(map[*string]*nntp.CHTTJob, BatchCheck), - jobs: make([]*nntp.CHTTJob, 0, BatchCheck), + jobsQueued: make(map[*nntp.CHTTJob]uint64), + jobsReadOK: make(map[*nntp.CHTTJob]uint64), + jobMap: make(map[*string]*nntp.CHTTJob), + jobs: make([]*nntp.CHTTJob, 0, MaxQueuedJobs), } returnSignals[workerID] = returnSignal @@ -2121,8 +2119,8 @@ func replyChan(request chan struct{}, reply chan struct{}) { func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { var mux sync.Mutex var workerWG sync.WaitGroup - readCHECKResponsesChan := make(chan *nntp.ReadRequest, 128000) - readTAKETHISResponsesChan := make(chan *nntp.ReadRequest, 128000) + readCHECKResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) + readTAKETHISResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) errChan := make(chan struct{}, 9) tickChan := common.GetStructChanCap1() @@ -2145,10 +2143,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue defer func(conn *nntp.BackendConn, rs *ReturnSignal) { conn.ForceCloseConn() rs.ExitChan <- rs - select { - case errChan <- struct{}{}: - default: - } + common.SignalErrChan(errChan) }(conn, rs) //lastRun := time.Now() @@ -2159,18 +2154,20 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // launch go routine which sends CHECK commands workerWG.Add(1) go func(workerWG *sync.WaitGroup) { - defer workerWG.Done() - // tick every n seconds to check if any CHECKs to do ticker := time.NewTicker(DefaultCheckTicker) - defer ticker.Stop() - defer func() { - errChan <- struct{}{} - }() + defer func(workerWG *sync.WaitGroup) { + conn.ForceCloseConn() + ticker.Stop() + common.SignalErrChan(errChan) + workerWG.Done() + log.Printf("CheckWorker (%d): CHECK sender goroutine exiting", workerID) + }(workerWG) + // tick every n seconds to check if any CHECKs to do loop: for { select { case <-errChan: - errChan <- struct{}{} + common.SignalErrChan(errChan) log.Printf("CheckWorker (%d): Send CHECK got errChan signal... exiting", workerID) return @@ -2242,8 +2239,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rs.Mux.Lock() rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front rs.Mux.Unlock() - rs.BlockCHECK() - //common.ChanRelease(flipflopChan) + //rs.BlockCHECK() return } //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) @@ -2302,21 +2298,19 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // launch a go routine to read CHECK responses from the supplied connection with textproto readline workerWG.Add(1) go func(workerWG *sync.WaitGroup) { - defer workerWG.Done() var responseCount int var tookTime time.Duration - defer func() { - select { - case errChan <- struct{}{}: - default: - } - }() + defer func(workerWG *sync.WaitGroup) { + conn.ForceCloseConn() + common.SignalErrChan(errChan) + workerWG.Done() + }(workerWG) loop: for { select { case <-errChan: log.Printf("CheckWorker (%d): Read CHECK responses got errChan signal...", workerID) - errChan <- struct{}{} + common.SignalErrChan(errChan) log.Printf("CheckWorker (%d): Read CHECK responses exiting", workerID) return @@ -2492,20 +2486,18 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // This follows the EXACT pattern as CHECK response reader (lines 2366-2552) workerWG.Add(1) go func(workerWG *sync.WaitGroup) { - defer workerWG.Done() - defer func() { - select { - case errChan <- struct{}{}: - default: - } - }() + defer func(workerWG *sync.WaitGroup) { + conn.ForceCloseConn() + common.SignalErrChan(errChan) + workerWG.Done() + }(workerWG) ttloop: for { select { case <-errChan: log.Printf("TTResponseWorker (%d): got errChan signal, exiting", workerID) - errChan <- struct{}{} + common.SignalErrChan(errChan) return case rr := <-readTAKETHISResponsesChan: @@ -2557,7 +2549,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.Job.NGTProgress.Increment(nntp.IncrFLAG_CONN_ERRORS, 1) rr.Job.PendingResponses.Done() // Mark response as handled (error case) rr.ClearReadRequest(respData) - conn.ForceCloseConn() return } @@ -2597,7 +2588,6 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue rr.Job.NGTProgress.Increment(nntp.IncrFLAG_TX_ERRORS, 1) rr.Job.PendingResponses.Done() // Mark response handled (error case) rr.ClearReadRequest(respData) - conn.ForceCloseConn() return default: @@ -2616,13 +2606,11 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // launch a goroutine to process TAKETHIS jobs from local channel sharing the same connection workerWG.Add(1) go func(workerWG *sync.WaitGroup) { - defer workerWG.Done() - defer func() { - select { - case errChan <- struct{}{}: - default: - } - }() + defer func(workerWG *sync.WaitGroup) { + conn.ForceCloseConn() + common.SignalErrChan(errChan) + workerWG.Done() + }(workerWG) var job *nntp.CHTTJob for { if common.WantShutdown() { @@ -2711,10 +2699,7 @@ forever: for { select { case <-errChan: - select { - case errChan <- struct{}{}: - default: - } + common.SignalErrChan(errChan) break forever case job := <-checkQueue: @@ -2795,13 +2780,21 @@ forever: workerWG.Wait() startWait := time.Now() lastPrint := startWait +wait: for { mux.Lock() if runningTTJobs == 0 { mux.Unlock() - break + break wait } mux.Unlock() + select { + case <-errChan: + common.SignalErrChan(errChan) + break wait + default: + // continue + } time.Sleep(time.Millisecond * 50) if time.Since(lastPrint) > time.Second*5 { log.Printf("CHTTworker (%d): waiting since %v for %d running TAKETHIS jobs to complete before exiting...", workerID, time.Since(startWait), runningTTJobs) @@ -2812,14 +2805,12 @@ forever: // monitorMemoryStats logs memory statistics periodically func monitorMemoryStats() { - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() var m runtime.MemStats startTime := time.Now() for { - <-ticker.C + time.Sleep(30 * time.Second) runtime.ReadMemStats(&m) // Convert bytes to MB for readability diff --git a/internal/common/common.go b/internal/common/common.go index 19e8b2c..0733e5d 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -52,6 +52,14 @@ func ChanRelease(lockChan chan struct{}) { <-lockChan } +func SignalErrChan(errChan chan struct{}) { + select { + case errChan <- struct{}{}: + default: + // already signaled + } +} + var StructChansCap1 = make(chan chan struct{}, 16384) // GetStructChanCap1 returns a recycled chan struct{} or makes a new one with capacity of 1 if none are available diff --git a/internal/database/progress.go b/internal/database/progress.go index 4eabc68..8cc81b8 100644 --- a/internal/database/progress.go +++ b/internal/database/progress.go @@ -16,7 +16,7 @@ import ( // ProgressDB tracks fetching progress for newsgroups per backend type ProgressDB struct { db *sql.DB - mux sync.Mutex + mux sync.RWMutex } // ProgressEntry represents the fetching progress for a newsgroup on a backend @@ -87,20 +87,20 @@ func (p *ProgressDB) initSchema() (err error) { return err } -const query_GetLastArticle = ` -SELECT last_article FROM progress -WHERE backend_name = ? AND newsgroup_name = ? -` +const query_GetLastArticle = `SELECT last_article FROM progress WHERE backend_name = ? AND newsgroup_name = ?` // GetLastArticle returns the last fetched article number for a newsgroup on a backend func (p *ProgressDB) GetLastArticle(backendName, newsgroupName string) (int64, error) { + p.mux.RLock() var lastArticle int64 err := retryableQueryRowScan(p.db, query_GetLastArticle, []interface{}{backendName, newsgroupName}, &lastArticle) - if err == sql.ErrNoRows { + p.mux.RUnlock() //log.Printf("progressDB.GetLastArticle: provider '%s', newsgroup '%s' has no progress", backendName, newsgroupName) - return 0, nil // No previous progress, start from 0 + p.UpdateProgress(backendName, newsgroupName, 0) // Initialize progress + return 0, nil // No previous progress, start from 0 } + p.mux.RUnlock() if err != nil { return -999, fmt.Errorf("failed to get last article: %w", err) } @@ -111,8 +111,7 @@ func (p *ProgressDB) GetLastArticle(backendName, newsgroupName string) (int64, e return lastArticle, nil } -const query_UpdateProgress = ` -INSERT INTO progress (backend_name, newsgroup_name, last_article, last_fetched, updated_at) +const query_UpdateProgress = `INSERT INTO progress (backend_name, newsgroup_name, last_article, last_fetched, updated_at) VALUES (?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) ON CONFLICT(backend_name, newsgroup_name) DO UPDATE SET last_article = excluded.last_article, diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index 9941538..d828261 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -980,6 +980,7 @@ func (c *BackendConn) parseGroupLine(line string) (GroupInfo, error) { First: first, Last: last, PostingOK: postingOK, + Status: parts[3], }, nil } @@ -1114,7 +1115,7 @@ func (c *BackendConn) SendTakeThisArticleStreaming(article *models.Article, nntp return 0, 0, err, true } //writer := bufio.NewWriterSize(c.conn, c.GetBufSize(article.Bytes)) // Slightly larger buffer than article size for headers - writer := bufio.NewWriter(c.conn) // Slightly larger buffer than article size for headers + writer := bufio.NewWriter(c.conn) //c.mux.Lock() //defer c.mux.Unlock() diff --git a/internal/nntp/nntp-client.go b/internal/nntp/nntp-client.go index f702426..29d83b8 100644 --- a/internal/nntp/nntp-client.go +++ b/internal/nntp/nntp-client.go @@ -43,12 +43,12 @@ const ( DefaultConnExpire = 25 * time.Second // MaxReadLines is the maximum lines to read per response (allow for large group lists). - MaxReadLines = 500000 + MaxReadLines = 1024 * 1024 ) // DefaultBufferTX is the default buffer size for bufio.Writer // when sending articles via POST/TAKETHIS/IHAVE commands. -var DefaultBufferTX int = 64 * 1024 +var DefaultBufferTX int = 16 * 1024 // BackendConn represents an NNTP connection to a server. // It manages the connection state, authentication, and provides methods @@ -115,6 +115,7 @@ type GroupInfo struct { FetchStart int64 FetchEnd int64 PostingOK bool + Status string } // OverviewLine represents a line from XOVER command @@ -370,10 +371,10 @@ func (c *BackendConn) UpdateLastUsed() { } func (c *BackendConn) GetBufSize(size int) int { - if size+4096 <= 1024*1024 { - return size + 4096 + if size+1024 <= DefaultBufferTX { + return size + 1024 } - return 1024 * 1024 // hardcoded default max buffer size + return DefaultBufferTX // hardcoded default max buffer size } func (c *BackendConn) Lock() { diff --git a/internal/nntp/nntp-transfer-demuxer.go b/internal/nntp/nntp-transfer-demuxer.go index 5727aec..8ceea64 100644 --- a/internal/nntp/nntp-transfer-demuxer.go +++ b/internal/nntp/nntp-transfer-demuxer.go @@ -4,6 +4,8 @@ import ( "log" "sync" "time" + + "github.com/go-while/go-pugleaf/internal/common" ) // ResponseDemuxer reads all responses from a connection in ONE goroutine @@ -105,20 +107,14 @@ func (d *ResponseDemuxer) readAndDispatch() { if r := recover(); r != nil { log.Printf("ResponseDemuxer: panic in readAndDispatch: %v", r) } - select { - case d.errChan <- struct{}{}: - default: - } + common.SignalErrChan(d.errChan) }() outoforderBacklog := make(map[uint]*CmdIDinfo, 1024) for { select { case <-d.errChan: log.Printf("ResponseDemuxer: got errChan signal, exiting") - select { - case d.errChan <- struct{}{}: - default: - } + common.SignalErrChan(d.errChan) // exit return default: @@ -173,7 +169,7 @@ func (d *ResponseDemuxer) readAndDispatch() { log.Printf("LongWait ResponseDemuxer: received response cmdID=%d: code=%d line='%s' err='%v' respType=%d (waited %v)", cmdInfo.CmdID, code, line, err, cmdInfo.RespType, time.Since(start)) } if err != nil && code == 0 { - d.errChan <- struct{}{} + common.SignalErrChan(d.errChan) log.Printf("ResponseDemuxer: error reading response for cmdID=%d: %v", cmdInfo.CmdID, err) return } @@ -186,7 +182,7 @@ func (d *ResponseDemuxer) readAndDispatch() { //log.Printf("ResponseDemuxer: dispatched CHECK response cmdID=%d d.checkResponseChan=%d", cmdInfo.CmdID, len(d.checkResponseChan)) case <-d.errChan: log.Printf("ResponseDemuxer: got errChan while dispatching CHECK response, exiting") - d.errChan <- struct{}{} + common.SignalErrChan(d.errChan) return } @@ -196,17 +192,14 @@ func (d *ResponseDemuxer) readAndDispatch() { // Dispatched successfully //log.Printf("ResponseDemuxer: dispatched TAKETHIS response cmdID=%d d.ttResponseChan=%d", cmdInfo.CmdID, len(d.ttResponseChan)) case <-d.errChan: - d.errChan <- struct{}{} + common.SignalErrChan(d.errChan) log.Printf("ResponseDemuxer: got errChan while dispatching TAKETHIS response, exiting") return } default: log.Printf("ResponseDemuxer: WARNING - unknown command type for cmdID=%d, signaling ERROR", cmdInfo.CmdID) - select { - case d.errChan <- struct{}{}: - default: - } + common.SignalErrChan(d.errChan) } } } From 6ae06ddc4dcb8850927038468375b4848a962fb5 Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 20 Oct 2025 18:50:11 +0000 Subject: [PATCH 068/100] testing --- cmd/nntp-fetcher/main.go | 2 +- cmd/nntp-transfer/main.go | 278 +++++++++++++++++---------------- internal/common/common.go | 7 + internal/database/queries.go | 158 ++++++++++++++++++- internal/nntp/nntp-transfer.go | 2 - scripts/queries.sh | 27 ++++ 6 files changed, 338 insertions(+), 136 deletions(-) diff --git a/cmd/nntp-fetcher/main.go b/cmd/nntp-fetcher/main.go index 0e7212f..1c55fed 100644 --- a/cmd/nntp-fetcher/main.go +++ b/cmd/nntp-fetcher/main.go @@ -886,7 +886,7 @@ loopGroups: } } - log.Printf("Newsgroup list update completed: %d new groups added, %d skipped (prefix filter), out of %d remote groups with total: %d messages", + log.Printf("Result: %d new groups, %d skipped, out of %d remote groups with total: %d messages", newGroupCount, skippedCount, len(remoteGroups), messages) return nil diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index e1e304a..6a252e9 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -189,6 +189,7 @@ func main() { fileInclude = flag.String("file-include", "", "File containing newsgroup patterns to include (one per line)") fileExclude = flag.String("file-exclude", "", "File containing newsgroup patterns to exclude (one per line)") forceIncludeOnly = flag.Bool("force-include-only", false, "When set, only transfer newsgroups that match patterns in include file (ignores -group pattern)") + excludePrefix = flag.String("exclude-prefix", "", "Exclude newsgroups with this prefix (comma-separated list, supports wildcards like 'alt.binaries.*')") // Web server and profiling options webPort = flag.Int("web-port", 0, "Enable web server on this port to view results (e.g. 8080, default: disabled)") @@ -413,7 +414,7 @@ func main() { log.Printf("Created connection pool for target server '%s:%d' with max %d connections", *host, *port, *maxThreads) // Get newsgroups to transfer - newsgroups, err := getNewsgroupsToTransfer(db, *transferGroup, *fileInclude, *fileExclude, *forceIncludeOnly) + newsgroups, err := getNewsgroupsToTransfer(db, *transferGroup, *fileInclude, *fileExclude, *excludePrefix, *forceIncludeOnly) if err != nil { log.Fatalf("Failed to get newsgroups: %v", err) } @@ -440,7 +441,7 @@ func main() { // Start NNTP worker pool if !*dryRun { log.Printf("Starting NNTP connection worker pool...") - go BootConnWorkers(pool, redisCli) + go BootConnWorkers(db, pool, redisCli) } nntp.ResultsMutex.Lock() TotalNewsgroups = int64(len(newsgroups)) @@ -711,9 +712,19 @@ func testConnection(host *string, port *int, username *string, password *string, } // getNewsgroupsToTransfer returns newsgroups matching the specified pattern and file filters -func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, fileExclude string, forceIncludeOnly bool) ([]*models.Newsgroup, error) { +func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, fileExclude, excludePrefix string, forceIncludeOnly bool) ([]*models.Newsgroup, error) { var newsgroups []*models.Newsgroup + // Parse exclude prefix patterns (comma-separated) + var excludePrefixes []string + if excludePrefix != "" { + excludePrefixes = strings.Split(excludePrefix, ",") + for i, p := range excludePrefixes { + excludePrefixes[i] = strings.TrimSpace(p) + log.Printf("Excluding newsgroups with prefix: '%s'", excludePrefixes[i]) + } + } + // Load include/exclude patterns from files if specified var includePatterns, excludePatterns []string var includeLookup, excludeLookup map[string]bool @@ -804,6 +815,10 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f // Now apply include patterns to group-filtered newsgroups start = time.Now() for _, ng := range groupFiltered { + // Check exclude prefix first + if matchesExcludePrefix(ng.Name, excludePrefixes) { + continue + } // Fast exact match check first if includeLookup[ng.Name] { newsgroups = append(newsgroups, ng) @@ -824,6 +839,10 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f log.Printf("Using $all pattern: transferring all newsgroups with file filters applied") start := time.Now() for _, ng := range allNewsgroups { + // Check exclude prefix first + if matchesExcludePrefix(ng.Name, excludePrefixes) { + continue + } if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { newsgroups = append(newsgroups, ng) } @@ -846,6 +865,10 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f if suffixWildcard { for _, ng := range allNewsgroups { if strings.HasPrefix(ng.Name, wildcardPrefix) { + // Check exclude prefix first + if matchesExcludePrefix(ng.Name, excludePrefixes) { + continue + } if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { newsgroups = append(newsgroups, ng) } @@ -855,6 +878,10 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f // Exact match for _, ng := range allNewsgroups { if ng.Name == groupPattern { + // Check exclude prefix first + if matchesExcludePrefix(ng.Name, excludePrefixes) { + break + } if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { newsgroups = append(newsgroups, ng) } @@ -867,6 +894,21 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f return newsgroups, nil } +// matchesExcludePrefix checks if a newsgroup name matches any of the exclude prefixes +func matchesExcludePrefix(ngName string, excludePrefixes []string) bool { + for _, excludePrefix := range excludePrefixes { + if strings.HasSuffix(excludePrefix, "*") { + pattern := strings.TrimSuffix(excludePrefix, "*") + if strings.HasPrefix(ngName, pattern) { + return true + } + } else if ngName == excludePrefix { + return true + } + } + return false +} + // loadPatternsFromFile loads newsgroup patterns from a file (one per line) func loadPatternsFromFile(filePath string) ([]string, error) { file, err := os.Open(filePath) @@ -1065,9 +1107,9 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse return i, nil } - log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.Articles)) + log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.MessageIDs)) // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, job.Articles, redisCli, job.BatchStart, job.BatchEnd, -1, job.OffsetQ, job.NGTProgress) + responseChan, err := processBatch(ttMode, job.MessageIDs, redisCli, job.BatchStart, job.BatchEnd, -1, job.OffsetQ, job.NGTProgress) if err != nil { log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup, err) jobRequeueMutex.Lock() @@ -1220,20 +1262,22 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i return } // free memory - CRITICAL: Lock and unlock in same scope, not with defer! - resp.Job.Mux.Lock() - if VERBOSE { - log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) - } - models.RecycleArticles(resp.Job.Articles) - resp.Job.Articles = nil + //resp.Job.Mux.Lock() + //if VERBOSE { + // log.Printf("Newsgroup: '%s' | Cleaning up TT job #%d with %d articles (ForceCleanUp)", ng.Name, resp.Job.JobID, len(resp.Job.Articles)) + //} + //models.RecycleArticles(resp.Job.Articles) + //resp.Job.Articles = nil // Clean up ArticleMap - nil the keys (pointers) before deleting + /* disabled for msgid := range resp.Job.ArticleMap { resp.Job.ArticleMap[msgid] = nil delete(resp.Job.ArticleMap, msgid) } resp.Job.ArticleMap = nil - resp.Job.Mux.Unlock() + */ + //resp.Job.Mux.Unlock() nntp.RecycleTTResponseChan(responseChan) nntp.RecycleTTResponse(resp) }(setup.ResponseChan, num, responseWG) @@ -1299,13 +1343,13 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } // Use simple OFFSET pagination - var articlesProcessed int64 - var offset int64 - + var processed int64 + msgIDsChan := make(chan []*string, 1) + go db.GetMessageIDsWithDateFilter(ng, startTime, endTime, dbBatchSize, msgIDsChan) // Get articles in database batches (much larger than network batches) - for articlesProcessed < totalNGArticles { + for messageIDs := range msgIDsChan { if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: '%s' (processed %d articles)", ng.Name, articlesProcessed) + log.Printf("WantShutdown in newsgroup: '%s' (processed %d messageIDs)", ng.Name, processed) return nil } // Process any requeued jobs first (from previous failed batches) @@ -1313,69 +1357,38 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i return err } start := time.Now() - // Load batch from database using OFFSET pagination - articles, err := db.GetArticlesBatchWithDateFilter(ng, offset, startTime, endTime, dbBatchSize) - if err != nil { - log.Printf("Error loading article batch (processed %d) for newsgroup %s: %v", articlesProcessed, ng.Name, err) - return fmt.Errorf("failed to load article batch (processed %d) for newsgroup '%s': %v", articlesProcessed, ng.Name, err) - } - - if len(articles) == 0 { - //log.Printf("No more articles in newsgroup %s (processed %d)", ng.Name, articlesProcessed) + if len(messageIDs) == 0 { + log.Printf("No more articles in newsgroup %s (loaded %d)", ng.Name, processed) break } - offset += int64(len(articles)) - articlesProcessed += int64(len(articles)) + processed += int64(len(messageIDs)) if dryRun && debugCapture { - debugMutex.Lock() - debugArticles[ng.Name] = append(debugArticles[ng.Name], articles...) - debugMutex.Unlock() + log.Printf("Newsgroup: '%s' | DRY RUN with debug capture: Capturing %d articles (processed %d) BROKEN TODO NEED FIX!", ng.Name, len(messageIDs), processed) + //debugMutex.Lock() + // TODO: broken debug catpure code fetch articles here + //debugArticles[ng.Name] = append(debugArticles[ng.Name], articles...) + //debugMutex.Unlock() return nil } - //if VERBOSE { - var size int - var skipped int - for _, a := range articles { - if a == nil { - skipped++ - continue - } - size += a.Bytes - } - //} - if skipped == len(articles) { - return fmt.Errorf("all articles in batch are nil for newsgroup '%s'", ng.Name) - } - log.Printf("Newsgroup: '%s' | Loaded %d articles (Bytes=%d) took %v (skipped: %d)", ng.Name, len(articles), size, time.Since(start), skipped) + + log.Printf("Newsgroup: '%s' | Loaded %d mids | took %v)", ng.Name, len(messageIDs), time.Since(start)) // Process articles in network batches start2 := time.Now() - for i := 0; i < len(articles); i += batchCheck { + for i := 0; i < len(messageIDs); i += batchCheck { OffsetQueue.Add(1) if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: '%s' (processed %d)", ng.Name, articlesProcessed) + log.Printf("WantShutdown in newsgroup: '%s' (processed %d)", ng.Name, processed) return nil } // Determine end index for the batch end := i + batchCheck - if end > len(articles) { - end = len(articles) - } - skipped := 0 - for _, a := range articles[i:end] { - if a == nil { - //log.Printf("Warning: nil article in batch for newsgroup '%s' (articles %d-%d)", ng.Name, i+1, end) - skipped++ - } - } - if skipped == (end - i) { - log.Printf("Error: all articles in batch are nil for newsgroup '%s' (articles %d-%d)", ng.Name, i+1, end) - OffsetQueue.OffsetBatchDone() - continue + if end > len(messageIDs) { + end = len(messageIDs) } // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, articles[i:end], redisCli, int64(i), int64(end), articlesProcessed-int64(len(articles))+int64(i), OffsetQueue, ngtprogress) + responseChan, err := processBatch(ttMode, messageIDs[i:end], redisCli, int64(i), int64(end), processed-int64(len(messageIDs[i:end]))+int64(i), OffsetQueue, ngtprogress) if err != nil { log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", ng.Name, i+1, end, err) return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, ng.Name, err) @@ -1387,9 +1400,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than N in flight } // articlesProcessed already incremented above after loading from DB - remainingArticles -= int64(len(articles)) + remainingArticles -= int64(len(messageIDs)) - log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t) (Bytes=%d) took: %v", ng.Name, articlesProcessed, totalNGArticles, remainingArticles, ttMode.UseCHECK(), size, time.Since(start2)) + log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t) took: %v", ng.Name, processed, totalNGArticles, remainingArticles, ttMode.UseCHECK(), time.Since(start2)) //log.Printf("Newsgroup: '%s' | Pushed (processed %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, articlesProcessed, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) } // end for keyset pagination loop @@ -1427,8 +1440,8 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i // Wait for collector goroutine to finish processing all responses collectorWG.Wait() - //log.Printf("Newsgroup: '%s' | All jobs completed and responses collected", ng.Name) - + log.Printf("Newsgroup: '%s' | All jobs completed and responses collected", ng.Name) + deassignWorker(ng.Name) return nil } // end func transferNewsgroup @@ -1439,9 +1452,9 @@ var upperLevel float64 = 95.0 // processBatch processes a batch of articles using NNTP streaming protocol (RFC 4644) // Uses TAKETHIS primarily, falls back to CHECK when success rate < 95% -func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64, offsetQ *nntp.OffsetQueue, ngtprogress *nntp.NewsgroupTransferProgress) (chan *nntp.TTResponse, error) { +func processBatch(ttMode *nntp.TakeThisMode, messageIDs []*string, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64, offsetQ *nntp.OffsetQueue, ngtprogress *nntp.NewsgroupTransferProgress) (chan *nntp.TTResponse, error) { - if len(articles) == 0 { + if len(messageIDs) == 0 { log.Printf("Newsgroup: '%s' | processBatch: no articles in this batch", *ttMode.Newsgroup) return nil, nil } @@ -1461,11 +1474,11 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl ttMode.FlipMode(lowerLevel, upperLevel) job := &nntp.CHTTJob{ - JobID: atomic.AddUint64(&nntp.JobIDCounter, 1), - Newsgroup: ttMode.Newsgroup, - MessageIDs: make([]*string, 0, len(articles)), - Articles: make([]*models.Article, 0, len(articles)), - ArticleMap: make(map[*string]*models.Article, len(articles)), + JobID: atomic.AddUint64(&nntp.JobIDCounter, 1), + Newsgroup: ttMode.Newsgroup, + MessageIDs: make([]*string, 0, len(messageIDs)), + //Articles: make([]*models.Article, 0, len(messageIDs)), + //ArticleMap: make(map[*string]*models.Article, len(messageIDs)), ResponseChan: nntp.GetTTResponseChan(), TTMode: ttMode, OffsetStart: dbOffset, @@ -1479,14 +1492,14 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl // Batch check Redis cache using pipeline before sending CHECK if redisCli != nil { pipe := redisCli.Pipeline() - cmds := make([]*redis.IntCmd, len(articles)) + cmds := make([]*redis.IntCmd, len(messageIDs)) redis2Check := 0 // Queue all EXISTS commands - for i, article := range articles { - if article == nil { + for i, msgid := range messageIDs { + if msgid == nil { continue } - cmds[i] = pipe.Exists(redisCtx, article.MessageID) + cmds[i] = pipe.Exists(redisCtx, *msgid) redis2Check++ } @@ -1497,59 +1510,57 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl // Process results and filter cached articles for i, cmd := range cmds { - if cmd == nil || articles[i] == nil { - log.Printf("Newsgroup: '%s' | Warning: nil Redis command or nil article in batch for job #%d (skip CHECK)", *ttMode.Newsgroup, job.JobID) - continue // Skip if command wasn't queued or article is nil + if cmd == nil || messageIDs[i] == nil { + log.Printf("Newsgroup: '%s' | Warning: nil Redis command or nil message ID in batch for job #%d (skip CHECK)", *ttMode.Newsgroup, job.JobID) + continue // Skip if command wasn't queued or message ID is nil } - article := articles[i] + msgid := messageIDs[i] exists, cmdErr := cmd.Result() if cmdErr == nil && exists > 0 { // Cached in Redis - skip this article if VERBOSE { - log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, article.MessageID, job.JobID) + log.Printf("Newsgroup: '%s' | Message ID '%s' is cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, *msgid, job.JobID) } job.NGTProgress.Increment(nntp.IncrFLAG_REDIS_CACHED_BEFORE_CHECK, 1) redis_cached++ - articles[i] = nil + messageIDs[i] = nil continue } if cmdErr != nil { - log.Printf("Newsgroup: '%s' | Redis cache error for message ID '%s' in job #%d: %v (include in CHECK)", *ttMode.Newsgroup, article.MessageID, job.JobID, cmdErr) + log.Printf("Newsgroup: '%s' | Redis cache error for message ID '%s' in job #%d: %v (include in CHECK)", *ttMode.Newsgroup, *msgid, job.JobID, cmdErr) } // Not cached - add to valid list - job.Articles = append(job.Articles, article) - job.ArticleMap[&article.MessageID] = article - job.MessageIDs = append(job.MessageIDs, &article.MessageID) + //job.Articles = append(job.Articles, article) + //job.ArticleMap[&article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, msgid) } } - if redis_cached == uint64(len(articles)) { + if redis_cached == uint64(len(messageIDs)) { if VERBOSE { - log.Printf("Newsgroup: '%s' | All %d articles in batch are cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, len(articles), job.JobID) + log.Printf("Newsgroup: '%s' | All %d articles in batch are cached in Redis in job #%d (skip CHECK)", *ttMode.Newsgroup, len(messageIDs), job.JobID) } return job.QuitResponseChan(), nil } else if redis_cached > 0 { if VERBOSE { - log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before CHECK)", *ttMode.Newsgroup, redis_cached, len(articles), job.JobID) + log.Printf("Newsgroup: '%s' | Redis got %d/%d cached articles in job #%d (before CHECK)", *ttMode.Newsgroup, redis_cached, len(messageIDs), job.JobID) } } } else { // No Redis - add all non-nil message IDs - for _, article := range articles { - if article == nil { + for _, msgid := range messageIDs { + if msgid == nil { continue } - job.Articles = append(job.Articles, article) - job.ArticleMap[&article.MessageID] = article - job.MessageIDs = append(job.MessageIDs, &article.MessageID) + //job.Articles = append(job.Articles, article) + //job.ArticleMap[&article.MessageID] = article + job.MessageIDs = append(job.MessageIDs, msgid) } } if len(job.MessageIDs) == 0 { log.Printf("Newsgroup: '%s' | No message IDs to check in batch. (redis_cache_hits: %d)", *ttMode.Newsgroup, redis_cached) return job.QuitResponseChan(), nil } - if VERBOSE { - } // Assign job to worker (consistent assignment + load balancing) QueuesMutex.RLock() @@ -1567,7 +1578,7 @@ func processBatch(ttMode *nntp.TakeThisMode, articles []*models.Article, redisCl QueuesMutex.RUnlock() log.Printf("Newsgroup: '%s' | CheckWorker (%d) queueing job #%d with %d msgIDs to worker %d. CheckQ=%d", *ttMode.Newsgroup, workerID, job.JobID, len(job.MessageIDs), workerID, len(CheckQueues[workerID])) - WorkersCheckChannel <- job // checkQueue <- job + WorkersCheckChannel <- job // checkQueue <- job // goto: job := <-checkQueue //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, job.JobID) return job.GetResponseChan(), nil } // end func processBatch @@ -1750,6 +1761,24 @@ func assignWorkerToNewsgroup(newsgroup string) int { return workerID } +func deassignWorker(newsgroup string) { + // Remove newsgroup assignment + NewsgroupWorkerMapMux.Lock() + workerID, exists := NewsgroupWorkerMap[newsgroup] + if exists { + delete(NewsgroupWorkerMap, newsgroup) + // Decrement worker queue length + WorkerQueueLengthMux.Lock() + if workerID >= 0 && workerID < len(WorkerQueueLength) { + if WorkerQueueLength[workerID] > 0 { + WorkerQueueLength[workerID]-- + } + } + WorkerQueueLengthMux.Unlock() + } + NewsgroupWorkerMapMux.Unlock() +} + // Find first empty slot func findEmptySlot(openConns *int, workerSlots []bool, mux *sync.Mutex) int { mux.Lock() @@ -1773,7 +1802,7 @@ func UnsetWorker(openConns *int, slotID int, workerSlots []bool, mux *sync.Mutex } } -func BootConnWorkers(pool *nntp.Pool, redisCli *redis.Client) { +func BootConnWorkers(db *database.Database, pool *nntp.Pool, redisCli *redis.Client) { openConns := 0 workerSlots := make([]bool, nntp.NNTPTransferThreads) defaultSleep := time.Second @@ -1861,7 +1890,7 @@ forever: returnSignals[workerID] = returnSignal // assign checkQueue by openConns counter // so restarted workers get same channels to read from - go CHTTWorker(slotID, conn, returnSignal, CheckQueues[slotID]) + go CHTTWorker(db, slotID, conn, returnSignal, CheckQueues[slotID]) newConns++ } if newConns == 0 { @@ -1901,7 +1930,7 @@ forever: if job != nil { // copy articles pointer job.Mux.Lock() - if len(job.Articles) == 0 { + if len(job.MessageIDs) == 0 { log.Printf("ERROR in CHTTWorker (%d) job #%d has no articles, skipping requeue", workerID, job.JobID) job.Mux.Unlock() continue @@ -1909,7 +1938,7 @@ forever: rqj := &nntp.CHTTJob{ JobID: job.JobID, Newsgroup: job.Newsgroup, - Articles: job.Articles, + MessageIDs: job.MessageIDs, OffsetQ: job.OffsetQ, NGTProgress: job.NGTProgress, } @@ -1918,7 +1947,7 @@ forever: jobRequeueMutex.Lock() jobRequeue[rqj.Newsgroup] = append(jobRequeue[rqj.Newsgroup], rqj) jobRequeueMutex.Unlock() - log.Printf("CHTTWorker (%d) did requeue job #%d with %d articles for newsgroup '%s'", workerID, rqj.JobID, len(rqj.Articles), *rqj.Newsgroup) + log.Printf("CHTTWorker (%d) did requeue job #%d with %d articles for newsgroup '%s'", workerID, rqj.JobID, len(rqj.MessageIDs), *rqj.Newsgroup) // unlink pointers job.Mux.Lock() select { @@ -1930,8 +1959,6 @@ forever: } job.Newsgroup = nil job.TTMode = nil - job.Articles = nil - job.ArticleMap = nil job.MessageIDs = nil job.WantedIDs = nil job.OffsetQ = nil @@ -2116,7 +2143,7 @@ func replyChan(request chan struct{}, reply chan struct{}) { } } -func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQueue chan *nntp.CHTTJob) { +func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs *ReturnSignal, WorkersCheckChannel chan *nntp.CHTTJob) { var mux sync.Mutex var workerWG sync.WaitGroup readCHECKResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) @@ -2267,11 +2294,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue // If there are more jobs waiting, immediately trigger next job processing if hasMoreJobs { - select { - case tickChan <- struct{}{}: - default: - // Channel full, will be processed on next tick - } + common.SignalTickChan(tickChan) } //log.Printf("CheckWorker (%d): job #%d CHECKs sent, loop to next job", workerID, currentJob.JobID) @@ -2284,12 +2307,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue hasWork := len(rs.jobs) > 0 rs.Mux.Unlock() if hasWork { - select { - case tickChan <- struct{}{}: - log.Printf("CheckWorker (%d): Ticker ticked, sent tickChan signal", workerID) - default: - // tickChan full, tickChan will tick - } + common.SignalTickChan(tickChan) } } // end select } // end forever @@ -2612,6 +2630,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue workerWG.Done() }(workerWG) var job *nntp.CHTTJob + WorkersTTChannel := TakeThisQueues[workerID] for { if common.WantShutdown() { log.Printf("TTworker (%d): WantShutdown, exiting", workerID) @@ -2619,7 +2638,7 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } select { - case ajob := <-TakeThisQueues[workerID]: + case ajob := <-WorkersTTChannel: job = ajob case <-errChan: @@ -2638,12 +2657,12 @@ func CHTTWorker(workerID int, conn *nntp.BackendConn, rs *ReturnSignal, checkQue } // Build list of wanted articles wantedArticles := make([]*models.Article, 0, len(job.WantedIDs)) - for _, wantedID := range job.WantedIDs { - if wantedID != nil { - if article, exists := job.ArticleMap[wantedID]; exists { - wantedArticles = append(wantedArticles, article) - } - } + // TODO: fetch articles from database for all wantedIDs + wantedArticles, err := db.GetArticlesByIDs(job.Newsgroup, job.WantedIDs) + if err != nil { + log.Printf("Newsgroup: '%s' | TTworker (%d): Error fetching wanted articles from DB for job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) + job.Response(false, err) + continue } if len(wantedArticles) == 0 { @@ -2702,7 +2721,7 @@ forever: common.SignalErrChan(errChan) break forever - case job := <-checkQueue: + case job := <-WorkersCheckChannel: // CheckQueues[workerID] // source: WorkersCheckChannel <- job // checkQueue <- job if common.WantShutdown() { log.Printf("CHTTworker: WantShutdown, exiting") break forever @@ -2768,12 +2787,7 @@ forever: // Add job to processing queue rs.jobs = append(rs.jobs, job) // Signal ticker to process this job - select { - case tickChan <- struct{}{}: - //log.Printf("Newsgroup: '%s' | CHTTworker (%d): signal ticker start job #%d with %d message IDs. queued=%d", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) - default: - // tickChan full, will be processed on next tick - } + common.SignalTickChan(tickChan) rs.Mux.Unlock() } // end select } // end for diff --git a/internal/common/common.go b/internal/common/common.go index 0733e5d..56d6a37 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -59,6 +59,13 @@ func SignalErrChan(errChan chan struct{}) { // already signaled } } +func SignalTickChan(achan chan struct{}) { + select { + case achan <- struct{}{}: + default: + // already signaled + } +} var StructChansCap1 = make(chan chan struct{}, 16384) diff --git a/internal/database/queries.go b/internal/database/queries.go index ef49ae9..2f459c0 100644 --- a/internal/database/queries.go +++ b/internal/database/queries.go @@ -3281,9 +3281,165 @@ func (db *Database) SearchUserByComputedHash(targetHash string, nonce string) (* return nil, nil } -const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text, imported_at FROM articles` +const query_getMessageIDsBatchWithDateFilter_selectPart = `SELECT message_id FROM articles` +const query_getMessageIDsBatchWithDateFilter_orderby = " ORDER BY date_sent ASC" +const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text FROM articles` const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" +// GetArticlesByIDs retrieves articles by their message IDs, ordered by date_sent +func (db *Database) GetArticlesByIDs(newsgroup *string, wantedIDs []*string) ([]*models.Article, error) { + if len(wantedIDs) == 0 { + return nil, nil + } + + groupDBs, err := db.GetGroupDBs(*newsgroup) + if err != nil { + return nil, err + } + defer groupDBs.Return(db) + + // Build the IN clause with placeholders + placeholders := make([]string, len(wantedIDs)) + args := make([]interface{}, len(wantedIDs)) + for i, id := range wantedIDs { + placeholders[i] = "?" + args[i] = id + } + + // Build query with IN clause for message_ids + query := query_getArticlesBatchWithDateFilter_selectPart + + " WHERE message_id IN (" + strings.Join(placeholders, ",") + ") " + + "ORDER BY date_sent ASC" + + rows, err := groupDBs.DB.Query(query, args...) + if err != nil { + return nil, fmt.Errorf("failed to query articles by IDs: %w", err) + } + defer rows.Close() + + var articles []*models.Article + for rows.Next() { + article := &models.Article{} + if err := rows.Scan( + &article.MessageID, + &article.Subject, + &article.FromHeader, + &article.DateSent, + &article.DateString, + &article.References, + &article.Bytes, + &article.Lines, + &article.Path, + &article.HeadersJSON, + &article.BodyText, + ); err != nil { + return nil, fmt.Errorf("failed to scan article: %w", err) + } + articles = append(articles, article) + } + + if err = rows.Err(); err != nil { + return nil, fmt.Errorf("error iterating article rows: %w", err) + } + + return articles, nil +} + +// GetArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering +func (db *Database) GetMessageIDsWithDateFilter(ng *models.Newsgroup, startTime, endTime *time.Time, batchCheck int64, resultChan chan []*string) error { + // Get group database + groupDBs, err := db.GetGroupDBs(ng.Name) + if err != nil { + return fmt.Errorf("failed to get group DBs for newsgroup '%s': %v", ng.Name, err) + } + defer close(resultChan) + defer db.ForceCloseGroupDBs(groupDBs) + + var query string + var args []interface{} + + if startTime != nil || endTime != nil { + // Build query with date filtering + var whereConditions []string + + // Always exclude NULL date_sent values when using ORDER BY date_sent + whereConditions = append(whereConditions, "date_sent IS NOT NULL") + + if startTime != nil { + whereConditions = append(whereConditions, "date_sent >= ?") + args = append(args, startTime.UTC().Format("2006-01-02 15:04:05")) + } + + if endTime != nil { + whereConditions = append(whereConditions, "date_sent <= ?") + args = append(args, endTime.UTC().Format("2006-01-02 15:04:05")) + } + + whereClause := "" + if len(whereConditions) > 0 { + whereClause = " WHERE " + strings.Join(whereConditions, " AND ") + } + + query = query_getMessageIDsBatchWithDateFilter_selectPart + whereClause + query_getMessageIDsBatchWithDateFilter_orderby + } else { + // No date filtering + query = query_getMessageIDsBatchWithDateFilter_selectPart + query_getMessageIDsBatchWithDateFilter_orderby + } + + rows, err := groupDBs.DB.Query(query, args...) + if err != nil { + return err + } + defer rows.Close() + //out := make([]*models.Article, dbBatchSize) + tmpChan := make(chan *string, batchCheck) + for rows.Next() { + var msgid string + if err := rows.Scan( + &msgid, + ); err != nil { + return err + } + + if msgid != "" { + load: + for { + select { + case tmpChan <- &msgid: + break load + default: + // chan full + db.releaseTmpChan(tmpChan, resultChan, cap(tmpChan)) + tmpChan <- &msgid + } + } + } + } + db.releaseTmpChan(tmpChan, resultChan, 0) + return nil +} + +func (db *Database) releaseTmpChan(tmpChan chan *string, resultChan chan []*string, limit int) { + if limit > 0 && len(tmpChan) < limit && len(tmpChan) < cap(tmpChan) { + return + } + if len(tmpChan) > 0 { + out := make([]*string, 0, len(tmpChan)) + empty: + for { + select { + case mid := <-tmpChan: + out = append(out, mid) + default: + break empty + } + } + if len(out) > 0 { + resultChan <- out + } + } +} + // GetArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering func (db *Database) GetArticlesBatchWithDateFilter(ng *models.Newsgroup, offset int64, startTime, endTime *time.Time, dbBatchSize int64) ([]*models.Article, error) { // Get group database diff --git a/internal/nntp/nntp-transfer.go b/internal/nntp/nntp-transfer.go index 3b5b1c1..61352a7 100644 --- a/internal/nntp/nntp-transfer.go +++ b/internal/nntp/nntp-transfer.go @@ -337,8 +337,6 @@ type CHTTJob struct { TTMode *TakeThisMode ResponseChan chan *TTResponse responseSent bool // Track if response already sent (prevents double send) - Articles []*models.Article - ArticleMap map[*string]*models.Article MessageIDs []*string WantedIDs []*string PendingResponses sync.WaitGroup // Track pending TAKETHIS responses diff --git a/scripts/queries.sh b/scripts/queries.sh index b90d8fe..2cd5ecf 100644 --- a/scripts/queries.sh +++ b/scripts/queries.sh @@ -25,6 +25,33 @@ # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET active = 0 WHERE name LIKE '%dateien%';" # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET active = 0 WHERE name LIKE '%warez%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%binar%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%bianr%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%bainar%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%banar%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%binaer%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%bini%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%biyar%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%boneless%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%nairies%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%naries%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%alt.bin%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE 'a.b.%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%alt-bin%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%dvdnordic%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%cd.image%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%files.image%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%music.bin%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%nzb%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '4u%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%sex%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%erotic%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%porno%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%pedo%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%paedo%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%dateien%';" +# sqlite3 data/cfg/pugleaf.sq3 "DELETE from newsgroups WHERE name LIKE '%warez%';" + # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET max_articles = 1000, expiry_days = 7 WHERE name LIKE '%.test';" # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET max_articles = 1000, expiry_days = 7 WHERE name LIKE 'alt.test.%';" # sqlite3 data/cfg/pugleaf.sq3 "UPDATE newsgroups SET max_articles = 1000, expiry_days = 7 WHERE name LIKE 'jobs.%';" From 87a0af1d141fce00b23836d2a4eee6261651d7d3 Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 20 Oct 2025 19:31:54 +0000 Subject: [PATCH 069/100] testing --- cmd/nntp-transfer/main.go | 86 ++++++++++++++++++++---------------- internal/database/queries.go | 20 ++++++--- 2 files changed, 60 insertions(+), 46 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 6a252e9..e83b348 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -1021,6 +1021,7 @@ func matchesAnyWildcardPattern(newsgroup string, patterns []string) bool { // runTransfer performs the actual article transfer process func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchCheck int, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client) error { defer wgP.Done() + defer log.Printf("runTransfer() quitted") maxThreadsChan := make(chan struct{}, maxThreads) var wg sync.WaitGroup log.Printf("Todo: %d newsgroups", len(newsgroups)) @@ -1051,16 +1052,21 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe nntp.ResultsMutex.Unlock() err = nil // not a real error } + + nntp.ResultsMutex.Lock() + NewsgroupsToProcess-- if err != nil { log.Printf("Error transferring newsgroup %s: %v", ng.Name, err) + } else { + log.Printf("Newsgroup: '%s' | completed transferNewsgroup() remaining newsgroups: %d", ng.Name, NewsgroupsToProcess) } - nntp.ResultsMutex.Lock() - NewsgroupsToProcess-- nntp.ResultsMutex.Unlock() }(ng, &wg, redisCli) } - nntp.ResultsMutex.Lock() // Wait for all transfers to complete + wg.Wait() + + nntp.ResultsMutex.Lock() if nothingInDateRange > 0 { log.Printf("Note: %d newsgroups had no articles in the specified date range", nothingInDateRange) } @@ -1109,7 +1115,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse log.Printf("Newsgroup: '%s' | Processing requeued job %d/%d with %d articles", newsgroup, i+1, len(queuedJobs), len(job.MessageIDs)) // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, job.MessageIDs, redisCli, job.BatchStart, job.BatchEnd, -1, job.OffsetQ, job.NGTProgress) + responseChan, err := processBatch(ttMode, job.MessageIDs, redisCli, job.OffsetStart, job.OffsetQ, job.NGTProgress) if err != nil { log.Printf("Newsgroup: '%s' | Error processing requeued batch: %v", newsgroup, err) jobRequeueMutex.Lock() @@ -1344,9 +1350,10 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i // Use simple OFFSET pagination var processed int64 - msgIDsChan := make(chan []*string, 1) + msgIDsChan := make(chan []*string, MaxQueuedJobs) go db.GetMessageIDsWithDateFilter(ng, startTime, endTime, dbBatchSize, msgIDsChan) // Get articles in database batches (much larger than network batches) + var dbOffset int64 for messageIDs := range msgIDsChan { if common.WantShutdown() { log.Printf("WantShutdown in newsgroup: '%s' (processed %d messageIDs)", ng.Name, processed) @@ -1362,8 +1369,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i break } - processed += int64(len(messageIDs)) - if dryRun && debugCapture { log.Printf("Newsgroup: '%s' | DRY RUN with debug capture: Capturing %d articles (processed %d) BROKEN TODO NEED FIX!", ng.Name, len(messageIDs), processed) //debugMutex.Lock() @@ -1373,41 +1378,44 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i return nil } - log.Printf("Newsgroup: '%s' | Loaded %d mids | took %v)", ng.Name, len(messageIDs), time.Since(start)) + log.Printf("Newsgroup: '%s' | Loaded %d mids | took: %v", ng.Name, len(messageIDs), time.Since(start)) // Process articles in network batches start2 := time.Now() - for i := 0; i < len(messageIDs); i += batchCheck { - OffsetQueue.Add(1) - if common.WantShutdown() { - log.Printf("WantShutdown in newsgroup: '%s' (processed %d)", ng.Name, processed) - return nil - } - // Determine end index for the batch - end := i + batchCheck - if end > len(messageIDs) { - end = len(messageIDs) - } - // pass articles to CHECK or TAKETHIS queue (async!) - responseChan, err := processBatch(ttMode, messageIDs[i:end], redisCli, int64(i), int64(end), processed-int64(len(messageIDs[i:end]))+int64(i), OffsetQueue, ngtprogress) - if err != nil { - log.Printf("Newsgroup: '%s' | Error processing batch %d-%d: %v", ng.Name, i+1, end, err) - return fmt.Errorf("error processing batch %d-%d for newsgroup '%s': %v", i+1, end, ng.Name, err) - } - if responseChan != nil { - // pass the response channel to the collector channel: ttResponses - ttResponsesSetupChan <- nntp.GetTTSetup(responseChan) - } - OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than N in flight + OffsetQueue.Add(1) + if common.WantShutdown() { + log.Printf("WantShutdown in newsgroup: '%s' (processed %d)", ng.Name, processed) + return nil + } + // Determine end index for the batch + /* disabled + //end := i + batchCheck + //if end > len(messageIDs) { + // end = len(messageIDs) + //} + */ + // pass articles to CHECK or TAKETHIS queue (async!) + responseChan, err := processBatch(ttMode, messageIDs, redisCli, dbOffset, OffsetQueue, ngtprogress) + if err != nil { + log.Printf("Newsgroup: '%s' | Error processing batch offset %d: %v", ng.Name, dbOffset, err) + return fmt.Errorf("error processing batch offset %d for newsgroup '%s': %v", dbOffset, ng.Name, err) } + if responseChan != nil { + // pass the response channel to the collector channel: ttResponses + ttResponsesSetupChan <- nntp.GetTTSetup(responseChan) + } + dbOffset += int64(len(messageIDs)) + processed += int64(len(messageIDs)) + + OffsetQueue.Wait(MaxQueuedJobs) // wait for offset batches to finish, less than N in flight // articlesProcessed already incremented above after loading from DB remainingArticles -= int64(len(messageIDs)) log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t) took: %v", ng.Name, processed, totalNGArticles, remainingArticles, ttMode.UseCHECK(), time.Since(start2)) //log.Printf("Newsgroup: '%s' | Pushed (processed %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, articlesProcessed, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) - } // end for keyset pagination loop + } // end for msgIDsChan - //log.Printf("Newsgroup: '%s' | Main article loop completed, checking for requeued jobs...", ng.Name) + log.Printf("Newsgroup: '%s' | msgIDsChan closed, checking for requeued jobs...", ng.Name) // Process any remaining requeued jobs after main loop completes // This handles failures that occurred in the last batch @@ -1452,7 +1460,7 @@ var upperLevel float64 = 95.0 // processBatch processes a batch of articles using NNTP streaming protocol (RFC 4644) // Uses TAKETHIS primarily, falls back to CHECK when success rate < 95% -func processBatch(ttMode *nntp.TakeThisMode, messageIDs []*string, redisCli *redis.Client, batchStart int64, batchEnd int64, dbOffset int64, offsetQ *nntp.OffsetQueue, ngtprogress *nntp.NewsgroupTransferProgress) (chan *nntp.TTResponse, error) { +func processBatch(ttMode *nntp.TakeThisMode, messageIDs []*string, redisCli *redis.Client, dbOffset int64, offsetQ *nntp.OffsetQueue, ngtprogress *nntp.NewsgroupTransferProgress) (chan *nntp.TTResponse, error) { if len(messageIDs) == 0 { log.Printf("Newsgroup: '%s' | processBatch: no articles in this batch", *ttMode.Newsgroup) @@ -1464,8 +1472,8 @@ func processBatch(ttMode *nntp.TakeThisMode, messageIDs []*string, redisCli *red if progress, exists := nntp.NewsgroupTransferProgressMap[*ttMode.Newsgroup]; exists { progress.Mux.Lock() progress.OffsetStart = dbOffset - progress.BatchStart = batchStart - progress.BatchEnd = batchEnd + //progress.BatchStart = batchStart + //progress.BatchEnd = batchEnd progress.LastUpdated = time.Now() progress.Mux.Unlock() } @@ -1482,10 +1490,10 @@ func processBatch(ttMode *nntp.TakeThisMode, messageIDs []*string, redisCli *red ResponseChan: nntp.GetTTResponseChan(), TTMode: ttMode, OffsetStart: dbOffset, - BatchStart: batchStart, - BatchEnd: batchEnd, - OffsetQ: offsetQ, - NGTProgress: ngtprogress, + //BatchStart: batchStart, + //BatchEnd: batchEnd, + OffsetQ: offsetQ, + NGTProgress: ngtprogress, } var redis_cached uint64 diff --git a/internal/database/queries.go b/internal/database/queries.go index 2f459c0..3946c77 100644 --- a/internal/database/queries.go +++ b/internal/database/queries.go @@ -3354,10 +3354,10 @@ func (db *Database) GetMessageIDsWithDateFilter(ng *models.Newsgroup, startTime, } defer close(resultChan) defer db.ForceCloseGroupDBs(groupDBs) - + var loaded uint64 var query string var args []interface{} - + start := time.Now() if startTime != nil || endTime != nil { // Build query with date filtering var whereConditions []string @@ -3385,9 +3385,10 @@ func (db *Database) GetMessageIDsWithDateFilter(ng *models.Newsgroup, startTime, // No date filtering query = query_getMessageIDsBatchWithDateFilter_selectPart + query_getMessageIDsBatchWithDateFilter_orderby } - + log.Printf("Newsgroup: '%s' | GetMessageIDsWithDateFilter: Executing query: '%s' with args: '%v'", ng.Name, query, args) rows, err := groupDBs.DB.Query(query, args...) if err != nil { + log.Printf("ERROR Newsgroup: '%s' | GetMessageIDsWithDateFilter: Failed to execute query: %v", ng.Name, err) return err } defer rows.Close() @@ -3398,6 +3399,7 @@ func (db *Database) GetMessageIDsWithDateFilter(ng *models.Newsgroup, startTime, if err := rows.Scan( &msgid, ); err != nil { + log.Printf("ERROR GetMessageIDsWithDateFilter: Failed to scan message ID in newsgroup '%s': %v", ng.Name, err) return err } @@ -3409,19 +3411,21 @@ func (db *Database) GetMessageIDsWithDateFilter(ng *models.Newsgroup, startTime, break load default: // chan full - db.releaseTmpChan(tmpChan, resultChan, cap(tmpChan)) + loaded += db.releaseTmpChan(tmpChan, resultChan, cap(tmpChan)) tmpChan <- &msgid + break load } } } } - db.releaseTmpChan(tmpChan, resultChan, 0) + loaded += db.releaseTmpChan(tmpChan, resultChan, 0) + log.Printf("Newsgroup: '%s' | GetMessageIDsWithDateFilter: Loaded %d message IDs | took: %v", ng.Name, loaded, time.Since(start)) return nil } -func (db *Database) releaseTmpChan(tmpChan chan *string, resultChan chan []*string, limit int) { +func (db *Database) releaseTmpChan(tmpChan chan *string, resultChan chan []*string, limit int) (loaded uint64) { if limit > 0 && len(tmpChan) < limit && len(tmpChan) < cap(tmpChan) { - return + return 0 } if len(tmpChan) > 0 { out := make([]*string, 0, len(tmpChan)) @@ -3436,8 +3440,10 @@ func (db *Database) releaseTmpChan(tmpChan chan *string, resultChan chan []*stri } if len(out) > 0 { resultChan <- out + loaded += uint64(len(out)) } } + return loaded } // GetArticlesBatchWithDateFilter retrieves articles from a group database with optional date filtering From 291ad013aea336d4fa550c3e23159721cd20ea2b Mon Sep 17 00:00:00 2001 From: go-while Date: Mon, 20 Oct 2025 23:15:44 +0000 Subject: [PATCH 070/100] testing --- cmd/nntp-transfer/main.go | 215 ++++++++++++++----------- internal/database/queries.go | 2 +- internal/database/sqlite_retry.go | 3 +- internal/nntp/nntp-client-commands.go | 18 +-- internal/nntp/nntp-transfer-demuxer.go | 68 +++++--- internal/nntp/nntp-transfer.go | 6 +- 6 files changed, 180 insertions(+), 132 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index e83b348..888eb73 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -101,7 +101,7 @@ var appVersion = "-unset-" var redisCtx = context.Background() var REDIS_TTL time.Duration = 3600 * time.Second // default 1h var MaxQueuedJobs int = 8 -var BatchCheck int +var BatchCheck int64 var CHECK_FIRST bool // statistics @@ -156,7 +156,7 @@ func main() { proxyPassword = flag.String("proxy-password", "", "Proxy authentication password") // Transfer configuration - batchCheck = flag.Int("batch-check", 100, "Number of message IDs/articles to send in streamed CHECK/TAKETHIS") + batchCheck = flag.Int64("batch-check", 100, "Number of message IDs/articles to send in streamed CHECK/TAKETHIS") batchDB = flag.Int64("batch-db", 1000, "Fetch N articles from DB in a batch") maxThreads = flag.Int("max-threads", 1, "Transfer N newsgroups in concurrent threads. Each thread uses 1 connection.") redisCache = flag.Bool("redis-cache", true, "Use Redis caching for message IDs") @@ -1019,7 +1019,7 @@ func matchesAnyWildcardPattern(newsgroup string, patterns []string) bool { } // runTransfer performs the actual article transfer process -func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchCheck int, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client) error { +func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchCheck int64, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client) error { defer wgP.Done() defer log.Printf("runTransfer() quitted") maxThreadsChan := make(chan struct{}, maxThreads) @@ -1135,7 +1135,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse } // transferNewsgroup transfers articles from a single newsgroup -func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { +func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck int64, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { //log.Printf("Newsgroup: '%s' | transferNewsgroup: Starting (getting group DBs)...", ng.Name) @@ -1298,9 +1298,10 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i log.Printf("Newsgroup: '%s' | Collector: all response processors closed", ng.Name) } - nntp.ResultsMutex.Lock() - ngtprogress.Mux.Lock() + var tmpglobalTotalArticles, tmptotalTransferred, tmptotalTTSentCount, tmptotalCheckSentCount, tmptotalRedisCacheHits, tmptotalRedisCacheBeforeCheck, tmptotalRedisCacheBeforeTakethis uint64 + var tmptotalWanted, tmptotalUnwanted, tmptotalChecked, tmptotalRejected, tmptotalRetry, tmptotalSkipped, tmptotalTXErrors, tmptotalConnErrors uint64 + ngtprogress.Mux.Lock() result := fmt.Sprintf("END Newsgroup: '%s' total: %d | CHECK_sent: %d | checked: %d | transferred: %d | cache_hits: %d | wanted: %d | unwanted: %d | rejected: %d | retry: %d | skipped: %d | TX_Errors: %d | connErrors: %d | took %v", ng.Name, totalNGArticles, ngtprogress.CheckSentCount, ngtprogress.Checked, @@ -1309,37 +1310,52 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i ngtprogress.Retry, ngtprogress.Skipped, ngtprogress.TxErrors, ngtprogress.ConnErrors, time.Since(start)) - - globalTotalArticles += uint64(totalNGArticles) - totalTransferred += ngtprogress.Transferred - totalTTSentCount += ngtprogress.TTSentCount - totalCheckSentCount += ngtprogress.CheckSentCount - totalRedisCacheHits += ngtprogress.RedisCached - totalRedisCacheBeforeCheck += ngtprogress.RedisCachedBeforeCheck - totalRedisCacheBeforeTakethis += ngtprogress.RedisCachedBeforeTakethis - totalWanted += ngtprogress.Wanted - totalUnwanted += ngtprogress.Unwanted - totalChecked += ngtprogress.Checked - totalRejected += ngtprogress.Rejected - totalRetry += ngtprogress.Retry - totalSkipped += ngtprogress.Skipped - totalTXErrors += ngtprogress.TxErrors - totalConnErrors += ngtprogress.ConnErrors - + // capture values + tmpglobalTotalArticles += uint64(totalNGArticles) + tmptotalTransferred += ngtprogress.Transferred + tmptotalTTSentCount += ngtprogress.TTSentCount + tmptotalCheckSentCount += ngtprogress.CheckSentCount + tmptotalRedisCacheHits += ngtprogress.RedisCached + tmptotalRedisCacheBeforeCheck += ngtprogress.RedisCachedBeforeCheck + tmptotalRedisCacheBeforeTakethis += ngtprogress.RedisCachedBeforeTakethis + tmptotalWanted += ngtprogress.Wanted + tmptotalUnwanted += ngtprogress.Unwanted + tmptotalChecked += ngtprogress.Checked + tmptotalRejected += ngtprogress.Rejected + tmptotalRetry += ngtprogress.Retry + tmptotalSkipped += ngtprogress.Skipped + tmptotalTXErrors += ngtprogress.TxErrors + tmptotalConnErrors += ngtprogress.ConnErrors // Mark newsgroup as finished ngtprogress.Finished = true ngtprogress.LastUpdated = time.Now() ngtprogress.LastCronTX = ngtprogress.LastUpdated ngtprogress.Mux.Unlock() + nntp.ResultsMutex.Lock() + // capture values + globalTotalArticles += tmpglobalTotalArticles + totalTransferred += tmptotalTransferred + totalTTSentCount += tmptotalTTSentCount + totalCheckSentCount += tmptotalCheckSentCount + totalRedisCacheHits += tmptotalRedisCacheHits + totalRedisCacheBeforeCheck += tmptotalRedisCacheBeforeCheck + totalRedisCacheBeforeTakethis += tmptotalRedisCacheBeforeTakethis + totalWanted += tmptotalWanted + totalUnwanted += tmptotalUnwanted + totalChecked += tmptotalChecked + totalRejected += tmptotalRejected + totalRetry += tmptotalRetry + totalSkipped += tmptotalSkipped + totalTXErrors += tmptotalTXErrors + totalConnErrors += tmptotalConnErrors + // store result results = append(results, result) - for _, msgId := range rejectedArticles[ng.Name] { // prints all at the end again log.Printf("END Newsgroup: '%s' | REJECTED '%s'", ng.Name, msgId) } delete(rejectedArticles, ng.Name) // free memory - nntp.ResultsMutex.Unlock() }(&responseWG) @@ -1351,9 +1367,10 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i // Use simple OFFSET pagination var processed int64 msgIDsChan := make(chan []*string, MaxQueuedJobs) - go db.GetMessageIDsWithDateFilter(ng, startTime, endTime, dbBatchSize, msgIDsChan) + go db.GetMessageIDsWithDateFilter(ng, startTime, endTime, batchCheck, msgIDsChan) // Get articles in database batches (much larger than network batches) var dbOffset int64 + //start := time.Now() for messageIDs := range msgIDsChan { if common.WantShutdown() { log.Printf("WantShutdown in newsgroup: '%s' (processed %d messageIDs)", ng.Name, processed) @@ -1363,7 +1380,6 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i if _, err := processRequeuedJobs(ng.Name, ttMode, ttResponsesSetupChan, redisCli); err != nil { return err } - start := time.Now() if len(messageIDs) == 0 { log.Printf("No more articles in newsgroup %s (loaded %d)", ng.Name, processed) break @@ -1378,9 +1394,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i return nil } - log.Printf("Newsgroup: '%s' | Loaded %d mids | took: %v", ng.Name, len(messageIDs), time.Since(start)) + //log.Printf("Newsgroup: '%s' | Loaded %d mids | took: %v", ng.Name, len(messageIDs), time.Since(start)) // Process articles in network batches - start2 := time.Now() + //start2 := time.Now() OffsetQueue.Add(1) if common.WantShutdown() { log.Printf("WantShutdown in newsgroup: '%s' (processed %d)", ng.Name, processed) @@ -1410,9 +1426,9 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i // articlesProcessed already incremented above after loading from DB remainingArticles -= int64(len(messageIDs)) - log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t) took: %v", ng.Name, processed, totalNGArticles, remainingArticles, ttMode.UseCHECK(), time.Since(start2)) + //log.Printf("Newsgroup: '%s' | Pushed to queue (processed %d/%d) remaining: %d (Check=%t) took: %v", ng.Name, processed, totalNGArticles, remainingArticles, ttMode.UseCHECK(), time.Since(start2)) //log.Printf("Newsgroup: '%s' | Pushed (processed %d/%d) total: %d/%d (unw: %d / rej: %d) (Check=%t)", ng.Name, articlesProcessed, totalArticles, transferred, remainingArticles, ttMode.Unwanted, ttMode.Rejected, ttMode.GetMode()) - + //start = time.Now() } // end for msgIDsChan log.Printf("Newsgroup: '%s' | msgIDsChan closed, checking for requeued jobs...", ng.Name) @@ -1585,7 +1601,7 @@ func processBatch(ttMode *nntp.TakeThisMode, messageIDs []*string, redisCli *red WorkersCheckChannel := CheckQueues[workerID] QueuesMutex.RUnlock() - log.Printf("Newsgroup: '%s' | CheckWorker (%d) queueing job #%d with %d msgIDs to worker %d. CheckQ=%d", *ttMode.Newsgroup, workerID, job.JobID, len(job.MessageIDs), workerID, len(CheckQueues[workerID])) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queueing job #%d with %d msgIDs to worker %d. CheckQ=%d", *ttMode.Newsgroup, workerID, job.JobID, len(job.MessageIDs), workerID, len(CheckQueues[workerID])) WorkersCheckChannel <- job // checkQueue <- job // goto: job := <-checkQueue //log.Printf("Newsgroup: '%s' | CheckWorker (%d) queued Job #%d", *ttMode.Newsgroup, workerID, job.JobID) return job.GetResponseChan(), nil @@ -1898,7 +1914,7 @@ forever: returnSignals[workerID] = returnSignal // assign checkQueue by openConns counter // so restarted workers get same channels to read from - go CHTTWorker(db, slotID, conn, returnSignal, CheckQueues[slotID]) + go CHTTWorker(db, workerID, conn, returnSignal) newConns++ } if newConns == 0 { @@ -2151,13 +2167,18 @@ func replyChan(request chan struct{}, reply chan struct{}) { } } -func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs *ReturnSignal, WorkersCheckChannel chan *nntp.CHTTJob) { +func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs *ReturnSignal) { var mux sync.Mutex var workerWG sync.WaitGroup readCHECKResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) readTAKETHISResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) errChan := make(chan struct{}, 9) + QueuesMutex.RLock() + WorkersTTChannel := TakeThisQueues[workerID] + WorkersCheckChannel := CheckQueues[workerID] + QueuesMutex.RUnlock() + tickChan := common.GetStructChanCap1() requestReplyJobDone := common.GetStructChanCap1() replyJobDone := common.GetStructChanCap1() @@ -2166,7 +2187,7 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs defer common.RecycleStructChanCap1(replyJobDone) // Create ResponseDemuxer to eliminate race conditions in ReadCodeLine - demuxer := nntp.NewResponseDemuxer(conn, errChan, BatchCheck) + demuxer := nntp.NewResponseDemuxer(conn, errChan) // Store demuxer for statistics tracking DemuxersMutex.Lock() @@ -2196,6 +2217,7 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs common.SignalErrChan(errChan) workerWG.Done() log.Printf("CheckWorker (%d): CHECK sender goroutine exiting", workerID) + }(workerWG) // tick every n seconds to check if any CHECKs to do loop: @@ -2219,24 +2241,22 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs log.Printf("CheckWorker (%d): Ticked but no jobs in queue, continue...", workerID) continue loop } - if len(rs.jobs) >= MaxQueuedJobs { + if VERBOSE && len(rs.jobs) >= MaxQueuedJobs { log.Printf("CheckWorker (%d): Ticked and found %d jobs in queue (max: %d)", workerID, len(rs.jobs), MaxQueuedJobs) } currentJob := rs.jobs[0] rs.jobs = rs.jobs[1:] // Remove first job from queue rs.Mux.Unlock() if currentJob == nil { + log.Printf("ERROR CheckWorker (%d): got nil job from queue, skipping...", workerID) continue loop } workerID := assignWorkerToNewsgroup(*currentJob.Newsgroup) - QueuesMutex.RLock() - WorkersTTChannel := TakeThisQueues[workerID] - QueuesMutex.RUnlock() waiting: for { if len(WorkersTTChannel) >= MaxQueuedJobs { rs.BlockCHECK() - log.Printf("CheckWorker (%d): waiting... shared takeThisChan full (%d)", workerID, len(TakeThisQueues[workerID])) + log.Printf("CheckWorker (%d): waiting... shared takeThisChan full (%d)", workerID, len(WorkersTTChannel)) time.Sleep(time.Second / 4) continue waiting } @@ -2246,49 +2266,53 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs if currentJob.TTMode.UseCHECK() { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waits to check %d message IDs in batches of %d", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs), BatchCheck) - // Process checkIds in batches of BatchCheck - for batchStart := 0; batchStart < len(currentJob.MessageIDs); batchStart += BatchCheck { - batchEnd := batchStart + BatchCheck - if batchEnd > len(currentJob.MessageIDs) { - batchEnd = len(currentJob.MessageIDs) - } - // Lock for this batch only - - //common.ChanLock(flipflopChan) - if !conn.IsConnected() { - rs.Mux.Lock() - rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front - rs.Mux.Unlock() - //common.ChanRelease(flipflopChan) - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d connection lost before SendCheckMultiple for batch (offset %d: %d-%d)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) - time.Sleep(time.Second) - return - } - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire LOCK CHECK for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) - rs.LockCHECK() - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) - err := conn.SendCheckMultiple(currentJob.MessageIDs[batchStart:batchEnd], readCHECKResponsesChan, currentJob, demuxer) - if err != nil { - log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) - time.Sleep(time.Second) - rs.Mux.Lock() - rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front - rs.Mux.Unlock() - //rs.BlockCHECK() - return - } - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + //common.ChanLock(flipflopChan) + if !conn.IsConnected() { + rs.Mux.Lock() + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front + rs.Mux.Unlock() + //common.ChanRelease(flipflopChan) + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d connection lost before SendCheckMultiple for batch (offset %d: %d-%d)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + time.Sleep(time.Second) + return + } + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire LOCK CHECK for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) + rs.LockCHECK() + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + checksSent, err := conn.SendCheckMultiple(currentJob.MessageIDs, readCHECKResponsesChan, currentJob, demuxer) + if err != nil { + log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) + time.Sleep(time.Second) + rs.Mux.Lock() + rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front + rs.Mux.Unlock() + //rs.BlockCHECK() + return } + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + // Add CHECK sent count to progress after all batches are sent currentJob.NGTProgress.Mux.Lock() - currentJob.NGTProgress.CheckSentCount += currentJob.CheckSentCount + currentJob.NGTProgress.CheckSentCount += checksSent currentJob.NGTProgress.Mux.Unlock() } else { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) currentJob.WantedIDs = currentJob.MessageIDs //rs.UnlockCHECKforTTwithWait() rs.BlockCHECK() - WorkersTTChannel <- currentJob // local takethis chan sharing the same connection + enqueue: + for { + select { + case WorkersTTChannel <- currentJob: // local takethis chan sharing the same connection + // passed + break enqueue + default: + // chan full + rs.BlockCHECK() + time.Sleep(time.Millisecond * 16) + } + } + rs.BlockCHECK() //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) } //lastRun = time.Now() @@ -2316,6 +2340,8 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs rs.Mux.Unlock() if hasWork { common.SignalTickChan(tickChan) + } else { + rs.BlockCHECK() } } // end select } // end forever @@ -2485,16 +2511,16 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs rs.Mux.Unlock() if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(TakeThisQueues[workerID])) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(WorkersTTChannel)) rs.UnlockCHECKforTT() - TakeThisQueues[workerID] <- job // local takethis chan sharing the same connection - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(TakeThisQueues[workerID])) + WorkersTTChannel <- job // local takethis chan sharing the same connection + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(WorkersTTChannel)) } else { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) // Send response and close channel for jobs with no wanted articles job.Response(true, nil) - if len(TakeThisQueues[workerID]) > 0 { + if len(WorkersTTChannel) > 0 { rs.UnlockCHECKforTT() } else { rs.BlockTT() @@ -2638,7 +2664,6 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs workerWG.Done() }(workerWG) var job *nntp.CHTTJob - WorkersTTChannel := TakeThisQueues[workerID] for { if common.WantShutdown() { log.Printf("TTworker (%d): WantShutdown, exiting", workerID) @@ -2745,7 +2770,7 @@ forever: // Build jobMap for tracking which message IDs belong to this job // and count queued messages rs.Mux.Lock() - queueFull := len(rs.jobs) >= MaxQueuedJobs || len(TakeThisQueues[workerID]) >= MaxQueuedJobs + queueFull := len(rs.jobs) >= MaxQueuedJobs || len(WorkersTTChannel) >= MaxQueuedJobs if queueFull { log.Printf("Newsgroup: '%s' | CHTTworker (%d): got job #%d with %d message IDs. queued=%d ... waiting...", *job.Newsgroup, workerID, job.JobID, len(job.MessageIDs), len(rs.jobs)) select { @@ -2766,7 +2791,7 @@ forever: // pass case <-time.After(time.Millisecond * 16): rs.Mux.Lock() - queueFull = len(rs.jobs) >= MaxQueuedJobs || len(TakeThisQueues[workerID]) >= MaxQueuedJobs + queueFull = len(rs.jobs) >= MaxQueuedJobs || len(WorkersTTChannel) >= MaxQueuedJobs rs.Mux.Unlock() if !queueFull { break waitForReply @@ -2776,7 +2801,7 @@ forever: if common.WantShutdown() { break forever } - log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(TakeThisQueues[workerID])) + log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(WorkersTTChannel)) lastPrint = time.Now() } } @@ -3099,7 +3124,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { NG Workers: ( {{subtract .Started .Finished}} ) Progress Speed{{if gtUint64 .GlobalSpeed 0}}
{{.GlobalSpeed}} KByte/s{{end}} - CH/s
TT/s + {{.LiveCH}} CH/s
{{.LiveTT}} TT/s @@ -3213,7 +3238,7 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { started := int64(len(nntp.NewsgroupTransferProgressMap)) var finished int64 var progressList []ProgressInfo - + var liveCH, liveTT uint64 for name, progress := range nntp.NewsgroupTransferProgressMap { progress.CalcSpeed() progress.Mux.RLock() @@ -3226,10 +3251,10 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { duration := time.Since(progress.Started).Round(time.Second).String() progressList = append(progressList, ProgressInfo{ - Name: name, - OffsetStart: progress.OffsetStart, - BatchStart: progress.BatchStart, - BatchEnd: progress.BatchEnd, + Name: name, + OffsetStart: progress.OffsetStart, + //BatchStart: progress.BatchStart, + //BatchEnd: progress.BatchEnd, TotalArticles: progress.TotalArticles, Started: progress.Started.Format("15:04:05"), LastUpdated: progress.LastUpdated.Format("15:04:05"), @@ -3240,6 +3265,8 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { Finished: false, Duration: duration, }) + liveCH += progress.LastArtPerfC + liveTT += progress.LastArtPerfT progress.Mux.RUnlock() } @@ -3261,16 +3288,16 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { DemuxersMutex.RLock() for i, demux := range Demuxers { if demux != nil { - pending, checkQueued, ttQueued, lastReq := demux.GetStatistics() + pending, checkQueued, ttQueued, lastReq := demux.GetDemuxerStats() var idleSeconds int64 if !lastReq.IsZero() { idleSeconds = int64(time.Since(lastReq).Seconds()) } demuxerStats = append(demuxerStats, DemuxerStats{ WorkerID: i, - PendingCommands: int64(pending), - CheckResponsesQueued: int64(checkQueued), - TTResponsesQueued: int64(ttQueued), + PendingCommands: pending, + CheckResponsesQueued: checkQueued, + TTResponsesQueued: ttQueued, LastRequest: lastReq, IdleSeconds: idleSeconds, }) @@ -3306,6 +3333,8 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { TotalSkipped uint64 TotalTXErrors uint64 TotalConnErrors uint64 + LiveCH uint64 + LiveTT uint64 }{ TotalNewsgroups: TotalNewsgroups, NewsgroupsToProcess: NewsgroupsToProcess, @@ -3334,6 +3363,8 @@ func handleIndex(w http.ResponseWriter, r *http.Request) { TotalSkipped: totalSkipped, TotalTXErrors: totalTXErrors, TotalConnErrors: totalConnErrors, + LiveCH: liveCH, + LiveTT: liveTT, } w.Header().Set("Content-Type", "text/html; charset=utf-8") diff --git a/internal/database/queries.go b/internal/database/queries.go index 3946c77..e74cfb2 100644 --- a/internal/database/queries.go +++ b/internal/database/queries.go @@ -3283,7 +3283,7 @@ func (db *Database) SearchUserByComputedHash(targetHash string, nonce string) (* const query_getMessageIDsBatchWithDateFilter_selectPart = `SELECT message_id FROM articles` const query_getMessageIDsBatchWithDateFilter_orderby = " ORDER BY date_sent ASC" -const query_getArticlesBatchWithDateFilter_selectPart = `SELECT article_num, message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, reply_count, path, headers_json, body_text FROM articles` +const query_getArticlesBatchWithDateFilter_selectPart = `SELECT message_id, subject, from_header, date_sent, date_string, "references", bytes, lines, path, headers_json, body_text FROM articles` const query_getArticlesBatchWithDateFilter_orderby = " ORDER BY date_sent ASC LIMIT ? OFFSET ?" // GetArticlesByIDs retrieves articles by their message IDs, ordered by date_sent diff --git a/internal/database/sqlite_retry.go b/internal/database/sqlite_retry.go index 839acf1..b86abf5 100644 --- a/internal/database/sqlite_retry.go +++ b/internal/database/sqlite_retry.go @@ -265,8 +265,7 @@ func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for prepared statement exec. Error: %v", - attempt+1, maxRetries, err) + log.Printf("SQLite retry attempt %d/%d for prepared statement exec. Error: %v stmt=%v", attempt+1, maxRetries, err, stmt) } } diff --git a/internal/nntp/nntp-client-commands.go b/internal/nntp/nntp-client-commands.go index d828261..003104f 100644 --- a/internal/nntp/nntp-client-commands.go +++ b/internal/nntp/nntp-client-commands.go @@ -8,7 +8,6 @@ import ( "log" "strconv" "strings" - "sync/atomic" "time" "github.com/go-while/go-pugleaf/internal/common" @@ -1033,30 +1032,29 @@ func (c *BackendConn) parseHeaderLine(line string) (HeaderLine, error) { // SendCheckMultiple sends CHECK commands for multiple message IDs without returning responses! // Registers each command ID with the demuxer for proper response routing -func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponsesChan chan *ReadRequest, job *CHTTJob, demuxer *ResponseDemuxer) error { +func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponsesChan chan *ReadRequest, job *CHTTJob, demuxer *ResponseDemuxer) (checksSent uint64, err error) { c.mux.Lock() if !c.IsConnected() { c.mux.Unlock() - return fmt.Errorf("not connected") + return 0, fmt.Errorf("not connected") } if c.ModeReader { c.mux.Unlock() - return fmt.Errorf("cannot check article in reader mode") + return 0, fmt.Errorf("cannot check article in reader mode") } c.lastUsed = time.Now() c.mux.Unlock() if len(messageIDs) == 0 { - return fmt.Errorf("no message IDs provided") + return 0, fmt.Errorf("no message IDs provided") } //writer := bufio.NewWriter(c.conn) //defer writer.Flush() //log.Printf("Newsgroup: '%s' | SendCheckMultiple commands for %d message IDs", *job.Newsgroup, len(messageIDs)) - checksSent := uint64(0) for n, msgID := range messageIDs { if msgID == nil || *msgID == "" { log.Printf("Newsgroup: '%s' | Skipping empty message ID in CHECK command", *job.Newsgroup) @@ -1067,7 +1065,7 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses cmdID, err := c.TextConn.Cmd("CHECK %s", *msgID) c.mux.Unlock() if err != nil { - return fmt.Errorf("failed to send CHECK '%s': %w", *msgID, err) + return checksSent, fmt.Errorf("failed to send CHECK '%s': %w", *msgID, err) } checksSent++ @@ -1082,9 +1080,11 @@ func (c *BackendConn) SendCheckMultiple(messageIDs []*string, readCHECKResponses } // Update job counter with how many CHECK commands were actually sent - atomic.AddUint64(&job.CheckSentCount, checksSent) + job.Mux.Lock() + job.CheckSentCount += checksSent + job.Mux.Unlock() - return nil + return checksSent, nil } // SendTakeThisArticleStreaming IS UNSAFE! MUST BE LOCKED AND UNLOCKED OUTSIDE FOR THE WHOLE BATCH!!! diff --git a/internal/nntp/nntp-transfer-demuxer.go b/internal/nntp/nntp-transfer-demuxer.go index 8ceea64..ad9150e 100644 --- a/internal/nntp/nntp-transfer-demuxer.go +++ b/internal/nntp/nntp-transfer-demuxer.go @@ -12,10 +12,10 @@ import ( // and dispatches them to the appropriate handler channel (CHECK or TAKETHIS) // This eliminates race conditions in concurrent ReadCodeLine calls type ResponseDemuxer struct { - conn *BackendConn - cmdIDQ []*CmdIDinfo - signalChan chan struct{} - cmdIDQMux sync.RWMutex + conn *BackendConn + cmdIDChan chan *CmdIDinfo + signalChan chan struct{} + //cmdIDQMux sync.RWMutex LastID uint checkResponseChan chan *ResponseData ttResponseChan chan *ResponseData @@ -27,12 +27,13 @@ type ResponseDemuxer struct { } // NewResponseDemuxer creates a new response demultiplexer -func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}, BatchCheck int) *ResponseDemuxer { +func NewResponseDemuxer(conn *BackendConn, errChan chan struct{}) *ResponseDemuxer { return &ResponseDemuxer{ conn: conn, - signalChan: make(chan struct{}, 1), + cmdIDChan: make(chan *CmdIDinfo, 64*1024), // Buffer for command IDs checkResponseChan: make(chan *ResponseData, 1024*1024), // Buffer for CHECK responses ttResponseChan: make(chan *ResponseData, 1024*1024), // Buffer for TAKETHIS responses + signalChan: make(chan struct{}, 1), errChan: errChan, started: false, } @@ -43,28 +44,45 @@ func (d *ResponseDemuxer) RegisterCommand(cmdID uint, cmdType ResponseType) { d.lastRequestMux.Lock() d.lastRequest = time.Now() d.lastRequestMux.Unlock() - - d.cmdIDQMux.Lock() - d.cmdIDQ = append(d.cmdIDQ, &CmdIDinfo{CmdID: cmdID, RespType: cmdType}) - d.cmdIDQMux.Unlock() + select { + case d.cmdIDChan <- &CmdIDinfo{CmdID: cmdID, RespType: cmdType}: + // Registered successfully + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while registering command, exiting") + common.SignalErrChan(d.errChan) + return + } select { case d.signalChan <- struct{}{}: + // sent signal + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while signaling command, exiting") + common.SignalErrChan(d.errChan) + return default: + // no-op } } // PopCommand removes a command ID from the queue func (d *ResponseDemuxer) PopCommand() *CmdIDinfo { - d.cmdIDQMux.Lock() - defer d.cmdIDQMux.Unlock() - if len(d.cmdIDQ) == 0 { + if len(d.cmdIDChan) == 0 { return nil } - cmdIDInfo := d.cmdIDQ[0] - d.cmdIDQ = d.cmdIDQ[1:] - return cmdIDInfo + select { + case cmdIDInfo := <-d.cmdIDChan: + return cmdIDInfo + case <-d.errChan: + log.Printf("ResponseDemuxer: got errChan while popping command, exiting") + common.SignalErrChan(d.errChan) + return nil + default: + // no-op + } + + return nil } // GetCheckResponseChan returns the channel for CHECK responses @@ -110,6 +128,7 @@ func (d *ResponseDemuxer) readAndDispatch() { common.SignalErrChan(d.errChan) }() outoforderBacklog := make(map[uint]*CmdIDinfo, 1024) +loop: for { select { case <-d.errChan: @@ -122,6 +141,7 @@ func (d *ResponseDemuxer) readAndDispatch() { } if !d.conn.IsConnected() { + common.SignalErrChan(d.errChan) log.Printf("ResponseDemuxer: connection lost, exiting") return } @@ -145,17 +165,17 @@ func (d *ResponseDemuxer) readAndDispatch() { log.Printf("ResponseDemuxer: got no cmdInfo but have outoforderBacklog: %d [%v]", len(outoforderBacklog), outoforderBacklog) if _, exists := outoforderBacklog[d.LastID+1]; exists { log.Printf("ResponseDemuxer: pre-processing out-of-order backlog cmdID=%d d.LastID=%d", d.LastID+1, d.LastID) - continue + continue loop } } //log.Printf("ResponseDemuxer: nothing to process, waiting on signalChan") <-d.signalChan - continue + continue loop } if d.LastID+1 != cmdInfo.CmdID { log.Printf("ResponseDemuxer: WARNING - out-of-order cmdID received, expected %d got %d", d.LastID+1, cmdInfo.CmdID) outoforderBacklog[cmdInfo.CmdID] = cmdInfo - continue + continue loop } else { d.LastID = cmdInfo.CmdID } @@ -205,17 +225,15 @@ func (d *ResponseDemuxer) readAndDispatch() { } // GetStatistics returns current demuxer statistics -func (d *ResponseDemuxer) GetStatistics() (pendingCommands int, checkResponsesQueued int, ttResponsesQueued int, lastRequest time.Time) { - d.cmdIDQMux.RLock() - pendingCommands = len(d.cmdIDQ) - d.cmdIDQMux.RUnlock() +func (d *ResponseDemuxer) GetDemuxerStats() (pendingCommands int64, checkResponsesQueued int64, ttResponsesQueued int64, lastRequest time.Time) { + pendingCommands = int64(len(d.cmdIDChan)) d.lastRequestMux.RLock() lastRequest = d.lastRequest d.lastRequestMux.RUnlock() - checkResponsesQueued = len(d.checkResponseChan) - ttResponsesQueued = len(d.ttResponseChan) + checkResponsesQueued = int64(len(d.checkResponseChan)) + ttResponsesQueued = int64(len(d.ttResponseChan)) return pendingCommands, checkResponsesQueued, ttResponsesQueued, lastRequest } diff --git a/internal/nntp/nntp-transfer.go b/internal/nntp/nntp-transfer.go index 61352a7..0273244 100644 --- a/internal/nntp/nntp-transfer.go +++ b/internal/nntp/nntp-transfer.go @@ -414,9 +414,9 @@ type NewsgroupTransferProgress struct { Started time.Time LastUpdated time.Time - OffsetStart int64 - BatchStart int64 - BatchEnd int64 + OffsetStart int64 + //BatchStart int64 + //BatchEnd int64 TotalArticles int64 Checked uint64 Wanted uint64 From e0a15fe24cdd117d3c5bd98afc2ebecf7657b4a0 Mon Sep 17 00:00:00 2001 From: go-while Date: Tue, 21 Oct 2025 00:28:06 +0000 Subject: [PATCH 071/100] testing --- cmd/nntp-transfer/main.go | 134 ++++---------------------------------- 1 file changed, 13 insertions(+), 121 deletions(-) diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index 888eb73..db89de8 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2048,8 +2048,7 @@ var JobsToRetryMux sync.Mutex type ReturnSignal struct { Mux sync.Mutex - CHECK bool - RunTT bool + ConnMux sync.Mutex // Simple mutex to serialize connection access slotID int ExitChan chan *ReturnSignal errChan chan struct{} @@ -2060,99 +2059,6 @@ type ReturnSignal struct { jobs []*nntp.CHTTJob } -func (rs *ReturnSignal) BlockTT() { - rs.Mux.Lock() - rs.RunTT = false - rs.Mux.Unlock() - //log.Printf("BlockTT: released RunTT lock") -} - -func (rs *ReturnSignal) GetLockTT() { - start := time.Now() - printLast := start - for { - rs.Mux.Lock() - if rs.RunTT { - //log.Printf("GetLockTT: RunTT already true") - rs.Mux.Unlock() - return - } - if !rs.RunTT && !rs.CHECK { - rs.RunTT = true - rs.Mux.Unlock() - //log.Printf("GetLockTT: acquired RunTT lock") - return - } - rs.Mux.Unlock() - if time.Since(printLast) > time.Second*30 { - log.Printf("GetLockTT: waiting since %v for RunTT to become true...", time.Since(start)) - printLast = time.Now() - } - time.Sleep(nntp.ReturnDelay) - } -} - -func (rs *ReturnSignal) UnlockCHECKforTTwithWait() { - start := time.Now() - printLast := start - for { - rs.Mux.Lock() - if !rs.RunTT { - rs.CHECK = false - rs.RunTT = true - rs.Mux.Unlock() - log.Printf("UnlockCHECKforTTwithWait: switched CHECK to RunTT") - return - } - rs.Mux.Unlock() - if time.Since(printLast) > time.Second*30 { - log.Printf("UnlockCHECKforTTwithWait: waiting since %v for RunTT to become false...", time.Since(start)) - time.Sleep(nntp.ReturnDelay) - printLast = time.Now() - } - } -} - -func (rs *ReturnSignal) UnlockCHECKforTT() { - rs.Mux.Lock() - defer rs.Mux.Unlock() - if !rs.CHECK || rs.RunTT { - //log.Printf("UnlockCHECKforTT: already set... CHECK=%t RunTT=%t", rs.CHECK, rs.RunTT) - return - } - //log.Printf("UnlockCHECKforTT: switched CHECK to RunTT") - rs.CHECK = false - rs.RunTT = true -} - -func (rs *ReturnSignal) BlockCHECK() { - rs.Mux.Lock() - rs.CHECK = false - rs.RunTT = true - //log.Printf("BlockCHECK: set CHECK to false (RunTT=%t)", rs.RunTT) - rs.Mux.Unlock() -} - -func (rs *ReturnSignal) LockCHECK() { - start := time.Now() - printLast := start - for { - rs.Mux.Lock() - if !rs.RunTT { - rs.CHECK = true - //log.Printf("LockCHECK: acquired CHECK lock (RunTT=%t) waited %v", rs.RunTT, time.Since(start)) - rs.Mux.Unlock() - return - } - if time.Since(printLast) > time.Second*30 { - log.Printf("LockCHECK: waiting since %v for RunTT to become false... CHECK=%t RunTT=%t", time.Since(start), rs.CHECK, rs.RunTT) - printLast = time.Now() - } - rs.Mux.Unlock() - time.Sleep(nntp.ReturnDelay) - } -} - func replyChan(request chan struct{}, reply chan struct{}) { select { case <-request: @@ -2169,6 +2075,7 @@ func replyChan(request chan struct{}, reply chan struct{}) { func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs *ReturnSignal) { var mux sync.Mutex + var runningTTJobs int // protected by local mux var workerWG sync.WaitGroup readCHECKResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) readTAKETHISResponsesChan := make(chan *nntp.ReadRequest, 1024*1024) @@ -2255,7 +2162,6 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs waiting: for { if len(WorkersTTChannel) >= MaxQueuedJobs { - rs.BlockCHECK() log.Printf("CheckWorker (%d): waiting... shared takeThisChan full (%d)", workerID, len(WorkersTTChannel)) time.Sleep(time.Second / 4) continue waiting @@ -2266,27 +2172,25 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs if currentJob.TTMode.UseCHECK() { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d waits to check %d message IDs in batches of %d", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs), BatchCheck) - //common.ChanLock(flipflopChan) if !conn.IsConnected() { rs.Mux.Lock() rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front rs.Mux.Unlock() - //common.ChanRelease(flipflopChan) log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d connection lost before SendCheckMultiple for batch (offset %d: %d-%d)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) time.Sleep(time.Second) return } - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire LOCK CHECK for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs[batchStart:batchEnd])) - rs.LockCHECK() - //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired CHECK lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquire connection lock for batch (offset %d: %d-%d) (%d messages)", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, len(currentJob.MessageIDs)) + rs.ConnMux.Lock() + //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d acquired connection lock for batch (offset %d: %d-%d) -> SendCheckMultiple", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) checksSent, err := conn.SendCheckMultiple(currentJob.MessageIDs, readCHECKResponsesChan, currentJob, demuxer) + rs.ConnMux.Unlock() if err != nil { log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d SendCheckMultiple error for batch (offset %d: %d-%d): %v", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd, err) time.Sleep(time.Second) rs.Mux.Lock() rs.jobs = append([]*nntp.CHTTJob{currentJob}, rs.jobs...) // requeue at front rs.Mux.Unlock() - //rs.BlockCHECK() return } //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d Sent CHECK for batch (offset %d: %d-%d), responses will be read asynchronously...", *currentJob.Newsgroup, workerID, currentJob.JobID, currentJob.OffsetStart, currentJob.BatchStart, currentJob.BatchEnd) @@ -2298,8 +2202,6 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs } else { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d skipping CHECK for %d message IDs (TAKETHIS mode)", *currentJob.Newsgroup, workerID, currentJob.JobID, len(currentJob.MessageIDs)) currentJob.WantedIDs = currentJob.MessageIDs - //rs.UnlockCHECKforTTwithWait() - rs.BlockCHECK() enqueue: for { select { @@ -2308,11 +2210,9 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs break enqueue default: // chan full - rs.BlockCHECK() - time.Sleep(time.Millisecond * 16) + time.Sleep(time.Millisecond * 100) } } - rs.BlockCHECK() //log.Printf("Newsgroup: '%s' | CheckWorker (%d): job #%d sent to local TakeThisChan", *currentJob.Newsgroup, workerID, currentJob.JobID) } //lastRun = time.Now() @@ -2339,9 +2239,10 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs hasWork := len(rs.jobs) > 0 rs.Mux.Unlock() if hasWork { + //log.Printf("CheckWorker (%d): Ticker found work to do, signaling tickChan...", workerID) common.SignalTickChan(tickChan) } else { - rs.BlockCHECK() + //log.Printf("CheckWorker (%d): Ticker found no work to do.", workerID) } } // end select } // end forever @@ -2512,7 +2413,6 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs if len(job.WantedIDs) > 0 { // Pass job to TAKETHIS worker via channel //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5 job #%d got all %d CHECK responses, passing to TAKETHIS worker (wanted: %d articles) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, queuedCount, len(job.WantedIDs), len(WorkersTTChannel)) - rs.UnlockCHECKforTT() WorkersTTChannel <- job // local takethis chan sharing the same connection //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG5c Sent job #%d to TAKETHIS worker (wanted: %d/%d) takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, len(job.WantedIDs), queuedCount, len(WorkersTTChannel)) @@ -2520,11 +2420,6 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d got %d CHECK responses but server wants none", *job.Newsgroup, workerID, job.JobID, queuedCount) // Send response and close channel for jobs with no wanted articles job.Response(true, nil) - if len(WorkersTTChannel) > 0 { - rs.UnlockCHECKforTT() - } else { - rs.BlockTT() - } } } else { //log.Printf("Newsgroup: '%s' | CheckWorker (%d): DEBUG6 job #%d CHECK responses so far: %d/%d readResponsesChan=%d", *job.Newsgroup, workerID, job.JobID, readCount, queuedCount, len(readResponsesChan)) @@ -2654,7 +2549,6 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs } // end for }(&workerWG) - var runningTTJobs int // protected by local mux // launch a goroutine to process TAKETHIS jobs from local channel sharing the same connection workerWG.Add(1) go func(workerWG *sync.WaitGroup) { @@ -2706,14 +2600,12 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs mux.Lock() runningTTJobs++ mux.Unlock() - //log.Printf("Newsgroup: '%s' | TTworker (%d): Prepare locking to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) - rs.GetLockTT() - //common.ChanLock(flipflopChan) + //log.Printf("Newsgroup: '%s' | TTworker (%d): Acquire connection lock to send TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) //log.Printf("Newsgroup: '%s' | TTworker (%d): Sending TAKETHIS for job #%d with %d wanted articles", *job.Newsgroup, workerID, job.JobID, len(wantedArticles)) // Send TAKETHIS commands using existing function + rs.ConnMux.Lock() redis_cached, err := sendArticlesBatchViaTakeThis(conn, wantedArticles, job, *job.Newsgroup, rs.redisCli, demuxer, readTAKETHISResponsesChan) - //common.ChanRelease(flipflopChan) - rs.BlockTT() + rs.ConnMux.Unlock() if err != nil { log.Printf("Newsgroup: '%s' | TTworker (%d): Error in TAKETHIS job #%d: %v", *job.Newsgroup, workerID, job.JobID, err) job.Response(false, err) @@ -2722,7 +2614,7 @@ func CHTTWorker(db *database.Database, workerID int, conn *nntp.BackendConn, rs rs.jobs = append([]*nntp.CHTTJob{job}, rs.jobs...) rs.Mux.Unlock() mux.Lock() - runningTTJobs++ + runningTTJobs-- mux.Unlock() return } From b5cb379c36d5560e330234d1c901d44817c89f88 Mon Sep 17 00:00:00 2001 From: go-while Date: Wed, 22 Oct 2025 02:13:25 +0000 Subject: [PATCH 072/100] testing --- cmd/nntp-transfer/main.go | 84 +++++- internal/database/db_batch.go | 15 +- internal/database/db_groupdbs.go | 10 +- internal/database/sqlite_retry.go | 35 +-- internal/nntp/transfer-progress-utils.go | 154 ++++++++++ internal/nntp/transfer-progress.go | 347 +++++++++++++++++++++++ 6 files changed, 606 insertions(+), 39 deletions(-) create mode 100644 internal/nntp/transfer-progress-utils.go create mode 100644 internal/nntp/transfer-progress.go diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index db89de8..c684487 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -190,6 +190,7 @@ func main() { fileExclude = flag.String("file-exclude", "", "File containing newsgroup patterns to exclude (one per line)") forceIncludeOnly = flag.Bool("force-include-only", false, "When set, only transfer newsgroups that match patterns in include file (ignores -group pattern)") excludePrefix = flag.String("exclude-prefix", "", "Exclude newsgroups with this prefix (comma-separated list, supports wildcards like 'alt.binaries.*')") + useProgressDB = flag.Bool("use-progress-db", true, "Use transfer progress database to track transferred newsgroups and stats") // Web server and profiling options webPort = flag.Int("web-port", 0, "Enable web server on this port to view results (e.g. 8080, default: disabled)") @@ -369,7 +370,16 @@ func main() { } log.Printf("Using stored UseShortHashLen: %d", finalUseShortHashLen) } - + var progressDB *nntp.TransferProgressDB = nil + if *useProgressDB { + // Open transfer progress database + aprogressDB, err := nntp.OpenTransferProgressDB(*dataDir, *host+":"+strconv.Itoa(*port)) + if err != nil { + log.Fatalf("Failed to open transfer progress database: %v", err) + } + progressDB = aprogressDB + defer progressDB.Close() + } // Create target server connection pool targetProvider := &config.Provider{ Name: "transfer:" + *host, @@ -404,17 +414,16 @@ func main() { backendConfig.ProxyUsername = proxyConfig.Username backendConfig.ProxyPassword = proxyConfig.Password } - nntphostname, err := db.GetConfigValue("local_nntp_hostname") + nntphostname, err := db.GetConfigValue(config.CFG_KEY_HOSTNAME) if err != nil || nntphostname == "" { - log.Printf("Failed to get local_nntp_hostname from database: %v", err) - os.Exit(1) + log.Fatalf("Failed to get local_nntp_hostname from database: %v", err) } pool := nntp.NewPool(backendConfig) log.Printf("Created connection pool for target server '%s:%d' with max %d connections", *host, *port, *maxThreads) // Get newsgroups to transfer - newsgroups, err := getNewsgroupsToTransfer(db, *transferGroup, *fileInclude, *fileExclude, *excludePrefix, *forceIncludeOnly) + newsgroups, err := getNewsgroupsToTransfer(db, progressDB, startTime, endTime, *transferGroup, *fileInclude, *fileExclude, *excludePrefix, *forceIncludeOnly) if err != nil { log.Fatalf("Failed to get newsgroups: %v", err) } @@ -454,7 +463,7 @@ func main() { go func(wgP *sync.WaitGroup, redisCli *redis.Client) { defer wgP.Done() resultChan := make(chan error, 1) - resultChan <- runTransfer(db, newsgroups, *batchCheck, *maxThreads, *dryRun, startTime, endTime, *debugCapture, wgP, redisCli) + resultChan <- runTransfer(db, newsgroups, *batchCheck, *maxThreads, *dryRun, startTime, endTime, *debugCapture, wgP, redisCli, progressDB) result := <-resultChan if !*debugCapture { transferDoneChan <- result @@ -712,7 +721,7 @@ func testConnection(host *string, port *int, username *string, password *string, } // getNewsgroupsToTransfer returns newsgroups matching the specified pattern and file filters -func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, fileExclude, excludePrefix string, forceIncludeOnly bool) ([]*models.Newsgroup, error) { +func getNewsgroupsToTransfer(db *database.Database, progressDB *nntp.TransferProgressDB, startTime, endTime *time.Time, groupPattern, fileInclude, fileExclude, excludePrefix string, forceIncludeOnly bool) ([]*models.Newsgroup, error) { var newsgroups []*models.Newsgroup // Parse exclude prefix patterns (comma-separated) @@ -819,6 +828,10 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f if matchesExcludePrefix(ng.Name, excludePrefixes) { continue } + // Check if newsgroup already has results for this remote + if IgnoreNewsgroupProgress(ng, progressDB, startTime, endTime) { + continue + } // Fast exact match check first if includeLookup[ng.Name] { newsgroups = append(newsgroups, ng) @@ -843,6 +856,10 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f if matchesExcludePrefix(ng.Name, excludePrefixes) { continue } + // Check if newsgroup already has results for this remote + if IgnoreNewsgroupProgress(ng, progressDB, startTime, endTime) { + continue + } if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { newsgroups = append(newsgroups, ng) } @@ -869,6 +886,10 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f if matchesExcludePrefix(ng.Name, excludePrefixes) { continue } + // Check if newsgroup already has results for this remote + if IgnoreNewsgroupProgress(ng, progressDB, startTime, endTime) { + continue + } if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { newsgroups = append(newsgroups, ng) } @@ -882,6 +903,10 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f if matchesExcludePrefix(ng.Name, excludePrefixes) { break } + // Check if newsgroup already has results for this remote + if IgnoreNewsgroupProgress(ng, progressDB, startTime, endTime) { + break + } if shouldIncludeNewsgroup(ng.Name, includePatterns, excludePatterns, includeLookup, excludeLookup, hasIncludeWildcards, hasExcludeWildcards) { newsgroups = append(newsgroups, ng) } @@ -894,6 +919,20 @@ func getNewsgroupsToTransfer(db *database.Database, groupPattern, fileInclude, f return newsgroups, nil } +func IgnoreNewsgroupProgress(ng *models.Newsgroup, progressDB *nntp.TransferProgressDB, startTime, endTime *time.Time) bool { + if progressDB != nil { + exists, err := progressDB.NewsgroupExists(ng.Name, startTime, endTime) + if err != nil { + log.Printf("Warning: Failed to check if newsgroup %s exists in progress DB: %v", ng.Name, err) + return true + } else if exists { + log.Printf("Skipping newsgroup %s - already has transfer results for this remote", ng.Name) + return true + } + } + return false +} + // matchesExcludePrefix checks if a newsgroup name matches any of the exclude prefixes func matchesExcludePrefix(ngName string, excludePrefixes []string) bool { for _, excludePrefix := range excludePrefixes { @@ -1019,7 +1058,7 @@ func matchesAnyWildcardPattern(newsgroup string, patterns []string) bool { } // runTransfer performs the actual article transfer process -func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchCheck int64, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client) error { +func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchCheck int64, maxThreads int, dryRun bool, startTime, endTime *time.Time, debugCapture bool, wgP *sync.WaitGroup, redisCli *redis.Client, progressDB *nntp.TransferProgressDB) error { defer wgP.Done() defer log.Printf("runTransfer() quitted") maxThreadsChan := make(chan struct{}, maxThreads) @@ -1045,7 +1084,7 @@ func runTransfer(db *database.Database, newsgroups []*models.Newsgroup, batchChe if VERBOSE { log.Printf("Newsgroup: '%s' | Start", ng.Name) } - err := transferNewsgroup(db, ng, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli) + err := transferNewsgroup(db, ng, batchCheck, dryRun, startTime, endTime, debugCapture, redisCli, progressDB) if err == ErrNotInDateRange { nntp.ResultsMutex.Lock() nothingInDateRange++ @@ -1135,7 +1174,7 @@ func processRequeuedJobs(newsgroup string, ttMode *nntp.TakeThisMode, ttResponse } // transferNewsgroup transfers articles from a single newsgroup -func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck int64, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client) error { +func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck int64, dryRun bool, startTime, endTime *time.Time, debugCapture bool, redisCli *redis.Client, transferProgressDB *nntp.TransferProgressDB) error { //log.Printf("Newsgroup: '%s' | transferNewsgroup: Starting (getting group DBs)...", ng.Name) @@ -1183,6 +1222,12 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i results = append(results, fmt.Sprintf("END Newsgroup: '%s' | No articles to process", ng.Name)) } nntp.ResultsMutex.Unlock() + // Insert result into progress database + if transferProgressDB != nil { + if err := transferProgressDB.InsertResult(ng.Name, startTime, endTime, 0, 0, 0, 0, 0, 0, 0, 0); err != nil { + log.Printf("Warning: Failed to insert result to progress DB for '%s': %v", ng.Name, err) + } + } // No articles to process if startTime != nil || endTime != nil { if VERBOSE { @@ -1357,6 +1402,25 @@ func transferNewsgroup(db *database.Database, ng *models.Newsgroup, batchCheck i } delete(rejectedArticles, ng.Name) // free memory nntp.ResultsMutex.Unlock() + + // Insert result into progress database + if transferProgressDB != nil { + if err := transferProgressDB.InsertResult( + ng.Name, + startTime, + endTime, + int64(tmptotalTransferred), + int64(tmptotalUnwanted), + int64(tmptotalChecked), + int64(tmptotalRejected), + int64(tmptotalRetry), + int64(tmptotalSkipped), + int64(tmptotalTXErrors), + int64(tmptotalConnErrors), + ); err != nil { + log.Printf("Warning: Failed to insert result to progress DB for '%s': %v", ng.Name, err) + } + } }(&responseWG) OffsetQueue := &nntp.OffsetQueue{ diff --git a/internal/database/db_batch.go b/internal/database/db_batch.go index 170296f..67e4e41 100644 --- a/internal/database/db_batch.go +++ b/internal/database/db_batch.go @@ -1142,19 +1142,20 @@ func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map // Process each thread root and its accumulated updates for threadRoot, updates := range threadUpdates { - // Get current cache state with retryable logic + // Get current cache state - NO retry needed, outer transaction handles it var currentChildren string var currentCount int - err := retryableStmtQueryRowScan(selectStmt, []interface{}{threadRoot}, ¤tChildren, ¤tCount) + row := selectStmt.QueryRow(threadRoot) + err := row.Scan(¤tChildren, ¤tCount) if err != nil { // Thread cache entry doesn't exist, initialize it with the first update //firstUpdate := updates[0] // Format dates as UTC strings to avoid timezone encoding issues firstUpdateDateUTC := updates[0].childDate.UTC().Format("2006-01-02 15:04:05") - _, err = retryableStmtExec(initStmt, threadRoot, firstUpdateDateUTC, updates[0].childArticleNum, firstUpdateDateUTC) + _, err = initStmt.Exec(threadRoot, firstUpdateDateUTC, updates[0].childArticleNum, firstUpdateDateUTC) if err != nil { - log.Printf("[BATCH-CACHE] Failed to initialize thread cache for root %d after retries: %v", threadRoot, err) + log.Printf("[BATCH-CACHE] Failed to initialize thread cache for root %d: %v", threadRoot, err) return fmt.Errorf("failed to initialize thread cache for root %d: %w", threadRoot, err) } currentChildren = "" @@ -1182,13 +1183,13 @@ func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map newCount := currentCount + len(updates) - // Execute the batch update for this thread with retryable logic + // Execute the batch update for this thread - NO retry needed, outer transaction handles it // Format lastActivity as UTC string to avoid timezone encoding issues lastActivityUTC := lastActivity.UTC().Format("2006-01-02 15:04:05") - _, err = retryableStmtExec(updateStmt, newChildren, newCount, lastChildNum, lastActivityUTC, threadRoot) + _, err = updateStmt.Exec(newChildren, newCount, lastChildNum, lastActivityUTC, threadRoot) if err != nil { - log.Printf("[BATCH-CACHE] Failed to update thread cache for root %d after retries: %v", threadRoot, err) + log.Printf("[BATCH-CACHE] Failed to update thread cache for root %d: %v", threadRoot, err) return fmt.Errorf("failed to update thread cache for root %d: %w", threadRoot, err) } updatedCount++ diff --git a/internal/database/db_groupdbs.go b/internal/database/db_groupdbs.go index 2af740f..17ab678 100644 --- a/internal/database/db_groupdbs.go +++ b/internal/database/db_groupdbs.go @@ -32,11 +32,10 @@ func (db *Database) GetGroupDBs(groupName string) (*GroupDBs, error) { return nil, fmt.Errorf("database configuration is not set") } - db.MainMutex.Lock() + db.MainMutex.Lock() //mux #d2ef40e0 groupDBs := db.groupDBs[groupName] if groupDBs != nil { - db.MainMutex.Unlock() - + db.MainMutex.Unlock() //mux #d2ef40e0 for { groupDBs.mux.RLock() if groupDBs.state == stateCREATED { @@ -53,9 +52,10 @@ func (db *Database) GetGroupDBs(groupName string) (*GroupDBs, error) { NewsgroupPtr: db.Batch.GetNewsgroupPointer(groupName), DB: nil, Idle: time.Now(), + Workers: 1, } db.groupDBs[groupName] = groupDBs - db.MainMutex.Unlock() + db.MainMutex.Unlock() //mux #d2ef40e0 groupsHash := GroupHashMap.GroupToHash(groupName) @@ -108,8 +108,6 @@ func (db *Database) GetGroupDBs(groupName string) (*GroupDBs, error) { return nil, fmt.Errorf("failed to migrate group database %s: %w", groupName, err) } - groupDBs.IncrementWorkers() - db.MainMutex.Lock() db.openDBsNum++ db.MainMutex.Unlock() diff --git a/internal/database/sqlite_retry.go b/internal/database/sqlite_retry.go index b86abf5..afa6cd9 100644 --- a/internal/database/sqlite_retry.go +++ b/internal/database/sqlite_retry.go @@ -5,6 +5,7 @@ import ( "log" "math/rand" "strings" + "sync/atomic" "time" ) @@ -31,7 +32,7 @@ func isRetryableError(err error) bool { func retryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, error) { var result sql.Result var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { result, err = db.Exec(query, args...) @@ -50,8 +51,8 @@ func retryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, e jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("[WARN] SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v", - attempt+1, maxRetries, truncateString(query, 50), err) + log.Printf("(#%d) SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v", + atomic.LoadUint64(&queryID), attempt+1, maxRetries, truncateString(query, 50), err) } } @@ -99,6 +100,7 @@ func retryableQueryRow(db *sql.DB, query string, args ...interface{}) *sql.Row { // retryableQueryRowScan executes a QueryRow and Scan with retry logic func retryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest ...interface{}) error { var err error + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { row := db.QueryRow(query, args...) @@ -118,8 +120,8 @@ func retryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest .. jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for QueryRow scan (first 50 chars): %s... Error: %v", - attempt+1, maxRetries, truncateString(query, 50), err) + log.Printf("(#%d) SQLite retry attempt %d/%d for QueryRow scan (first 50 chars): %s... Error: %v", + atomic.LoadUint64(&queryID), attempt+1, maxRetries, truncateString(query, 50), err) } } @@ -130,7 +132,7 @@ func retryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest .. func retryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, error) { var rows *sql.Rows var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { rows, err = db.Query(query, args...) @@ -154,8 +156,8 @@ func retryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, e jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v", - attempt+1, maxRetries, truncateString(query, 50), err) + log.Printf("(#%d) SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v", + atomic.LoadUint64(&queryID), attempt+1, maxRetries, truncateString(query, 50), err) } } @@ -170,7 +172,7 @@ func retryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, e // retryableTransactionExec executes a transaction with retry logic func retryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { tx, err := db.Begin() if err != nil { @@ -186,7 +188,7 @@ func retryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { // Add random jitter (up to 50% of delay) jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for transaction begin: %v", attempt+1, maxRetries, err) + log.Printf("(#%d) SQLite retry attempt %d/%d for transaction begin: %v", atomic.LoadUint64(&queryID), attempt+1, maxRetries, err) continue } return err @@ -207,7 +209,7 @@ func retryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { // Add random jitter (up to 50% of delay) jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for transaction: %v", attempt+1, maxRetries, err) + log.Printf("(#%d) SQLite retry attempt %d/%d for transaction: %v", atomic.LoadUint64(&queryID), attempt+1, maxRetries, err) continue } return err @@ -242,11 +244,13 @@ func truncateString(s string, length int) string { return s[:length] } +var queryID uint64 + // retryableStmtExec executes a prepared statement with retry logic for lock conflicts func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) { var result sql.Result var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { result, err = stmt.Exec(args...) @@ -265,7 +269,7 @@ func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for prepared statement exec. Error: %v stmt=%v", attempt+1, maxRetries, err, stmt) + log.Printf("(#%d) SQLite retry attempt %d/%d for prepared statement exec. Error: %v stmt=%v", atomic.LoadUint64(&queryID), attempt+1, maxRetries, err, stmt) } } @@ -275,7 +279,7 @@ func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) // retryableStmtQueryRowScan executes a prepared statement QueryRow and Scan with retry logic func retryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...interface{}) error { var err error - + atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { row := stmt.QueryRow(args...) err = row.Scan(dest...) @@ -295,8 +299,7 @@ func retryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...inter jitter := time.Duration(rand.Int63n(int64(delay) / 2)) time.Sleep(delay + jitter) - log.Printf("SQLite retry attempt %d/%d for prepared statement QueryRow scan. Error: %v", - attempt+1, maxRetries, err) + log.Printf("(#%d) SQLite retry attempt %d/%d for prepared statement QueryRow scan. Error: %v", atomic.LoadUint64(&queryID), attempt+1, maxRetries, err) } } diff --git a/internal/nntp/transfer-progress-utils.go b/internal/nntp/transfer-progress-utils.go new file mode 100644 index 0000000..29a19d4 --- /dev/null +++ b/internal/nntp/transfer-progress-utils.go @@ -0,0 +1,154 @@ +package nntp + +import ( + "fmt" + "log" + "time" +) + +// PrintRecentTransfers prints recent transfer results to the console +func (tpdb *TransferProgressDB) PrintRecentTransfers(limit int) error { + results, err := tpdb.GetRecentTransfers(limit) + if err != nil { + return fmt.Errorf("failed to get recent transfers: %v", err) + } + + if len(results) == 0 { + log.Printf("No transfer records found for remote '%s' (id=%d)", tpdb.remoteName, tpdb.remoteID) + return nil + } + + log.Printf("=== Recent transfers for remote '%s' (showing last %d) ===", tpdb.remoteName, len(results)) + log.Printf("%-30s %-20s %8s %8s %8s %8s %8s %8s %8s %8s", + "Newsgroup", "Timestamp", "Sent", "Unwanted", "Checked", "Rejected", "Retry", "Skipped", "TXErr", "ConnErr") + log.Printf("%s", "-------------------------------------------------------------------------------------------------------------------") + + for _, r := range results { + log.Printf("%-30s %-20s %8d %8d %8d %8d %8d %8d %8d %8d", + truncateString(r.Newsgroup, 30), + r.Timestamp.Format("2006-01-02 15:04:05"), + r.Sent, + r.Unwanted, + r.Checked, + r.Rejected, + r.Retry, + r.Skipped, + r.TXErrors, + r.ConnErrors, + ) + } + + return nil +} + +// GetTransferStatsByNewsgroup returns aggregated statistics for a specific newsgroup +func (tpdb *TransferProgressDB) GetTransferStatsByNewsgroup(newsgroup string) (*TransferResult, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + query := ` + SELECT + remote_id, + newsgroup, + MAX(timestamp) as last_transfer, + SUM(sent) as total_sent, + SUM(unwanted) as total_unwanted, + SUM(checked) as total_checked, + SUM(rejected) as total_rejected, + SUM(retry) as total_retry, + SUM(skipped) as total_skipped, + SUM(tx_errors) as total_tx_errors, + SUM(conn_errors) as total_conn_errors + FROM transfers + WHERE remote_id = ? AND newsgroup = ? + GROUP BY remote_id, newsgroup + ` + + var r TransferResult + var timestampStr string + err := tpdb.db.QueryRow(query, tpdb.remoteID, newsgroup).Scan( + &r.RemoteID, + &r.Newsgroup, + ×tampStr, + &r.Sent, + &r.Unwanted, + &r.Checked, + &r.Rejected, + &r.Retry, + &r.Skipped, + &r.TXErrors, + &r.ConnErrors, + ) + if err != nil { + return nil, err + } + + // Parse timestamp + r.Timestamp, err = time.Parse("2006-01-02 15:04:05", timestampStr) + if err != nil { + return nil, err + } + + return &r, nil +} + +// GetAllTransferStats returns aggregated statistics across all newsgroups +func (tpdb *TransferProgressDB) GetAllTransferStats() (*TransferResult, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + query := ` + SELECT + remote_id, + MAX(timestamp) as last_transfer, + SUM(sent) as total_sent, + SUM(unwanted) as total_unwanted, + SUM(checked) as total_checked, + SUM(rejected) as total_rejected, + SUM(retry) as total_retry, + SUM(skipped) as total_skipped, + SUM(tx_errors) as total_tx_errors, + SUM(conn_errors) as total_conn_errors + FROM transfers + WHERE remote_id = ? + GROUP BY remote_id + ` + + var r TransferResult + var timestampStr string + err := tpdb.db.QueryRow(query, tpdb.remoteID).Scan( + &r.RemoteID, + ×tampStr, + &r.Sent, + &r.Unwanted, + &r.Checked, + &r.Rejected, + &r.Retry, + &r.Skipped, + &r.TXErrors, + &r.ConnErrors, + ) + if err != nil { + return nil, err + } + + // Parse timestamp + r.Timestamp, err = time.Parse("2006-01-02 15:04:05", timestampStr) + if err != nil { + return nil, err + } + + r.Newsgroup = "ALL" + return &r, nil +} + +// truncateString truncates a string to the specified length +func truncateString(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + if maxLen <= 3 { + return s[:maxLen] + } + return s[:maxLen-3] + "..." +} diff --git a/internal/nntp/transfer-progress.go b/internal/nntp/transfer-progress.go new file mode 100644 index 0000000..2caac9b --- /dev/null +++ b/internal/nntp/transfer-progress.go @@ -0,0 +1,347 @@ +package nntp + +import ( + "database/sql" + "fmt" + "log" + "os" + "path/filepath" + "sync" + "time" + + "github.com/go-while/go-pugleaf/internal/database" + _ "github.com/mattn/go-sqlite3" +) + +// TransferProgressDB manages the SQLite database for tracking transfer progress +type TransferProgressDB struct { + db *sql.DB + remoteID int64 + remoteName string + mu sync.RWMutex +} + +// TransferResult represents a single transfer result record +type TransferResult struct { + RemoteID int64 + Newsgroup string + Timestamp time.Time + StartDate *time.Time // Can be nil for no start date filter + EndDate *time.Time // Can be nil for no end date filter + Sent int64 + Unwanted int64 + Checked int64 + Rejected int64 + Retry int64 + Skipped int64 + TXErrors int64 + ConnErrors int64 +} + +// OpenTransferProgressDB opens or creates the transfer progress database +func OpenTransferProgressDB(dataDir, remoteName string) (*TransferProgressDB, error) { + // Create data directory if it doesn't exist + progressDir := filepath.Join(dataDir, "transfer-progress") + if err := os.MkdirAll(progressDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create progress directory: %v", err) + } + + dbPath := filepath.Join(progressDir, "transfer-progress.db") + db, err := sql.Open("sqlite3", dbPath) + if err != nil { + return nil, fmt.Errorf("failed to open database: %v", err) + } + + // Enable WAL mode for better concurrent performance + if _, err := db.Exec("PRAGMA journal_mode=WAL"); err != nil { + db.Close() + return nil, fmt.Errorf("failed to set WAL mode: %v", err) + } + + // Create tables if they don't exist + if err := createTables(db); err != nil { + db.Close() + return nil, fmt.Errorf("failed to create tables: %v", err) + } + + tpdb := &TransferProgressDB{ + db: db, + remoteName: remoteName, + } + + // Get or create remote ID + remoteID, err := tpdb.getOrCreateRemoteTransferProgress(remoteName) + if err != nil { + db.Close() + return nil, fmt.Errorf("failed to get/create remote: %v", err) + } + tpdb.remoteID = remoteID + + log.Printf("Transfer progress database opened: %s (remote_id=%d, hostname=%s)", dbPath, remoteID, remoteName) + return tpdb, nil +} + +// createTables creates the necessary database tables +func createTables(db *sql.DB) error { + schema := ` + CREATE TABLE IF NOT EXISTS remotes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + hostname TEXT NOT NULL UNIQUE, + created_at TEXT NOT NULL DEFAULT (datetime('now', 'utc')) + ); + + CREATE TABLE IF NOT EXISTS transfers ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + remote_id INTEGER NOT NULL, + newsgroup TEXT NOT NULL, + timestamp TEXT NOT NULL, + start_date TEXT NOT NULL DEFAULT "", + end_date TEXT NOT NULL DEFAULT "", + sent INTEGER NOT NULL DEFAULT 0, + unwanted INTEGER NOT NULL DEFAULT 0, + checked INTEGER NOT NULL DEFAULT 0, + rejected INTEGER NOT NULL DEFAULT 0, + retry INTEGER NOT NULL DEFAULT 0, + skipped INTEGER NOT NULL DEFAULT 0, + tx_errors INTEGER NOT NULL DEFAULT 0, + conn_errors INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY (remote_id) REFERENCES remotes(id) + ); + + CREATE INDEX IF NOT EXISTS idx_transfers_remote_newsgroup + ON transfers(remote_id, newsgroup); + + CREATE INDEX IF NOT EXISTS idx_transfers_remote_ng_dates + ON transfers(remote_id, newsgroup, start_date, end_date); + + CREATE INDEX IF NOT EXISTS idx_transfers_timestamp + ON transfers(timestamp DESC); + ` + + _, err := database.RetryableExec(db, schema) + return err +} + +const query_getOrCreateRemote = "INSERT INTO remotes (hostname, created_at) VALUES (?, datetime('now', 'utc'))" + +// getOrCreateRemote gets or creates a remote server record +func (tpdb *TransferProgressDB) getOrCreateRemoteTransferProgress(hostname string) (int64, error) { + tpdb.mu.Lock() + defer tpdb.mu.Unlock() + + // Try to get existing remote + var id int64 + err := database.RetryableQueryRowScan( + tpdb.db, + "SELECT id FROM remotes WHERE hostname = ?", + []interface{}{hostname}, + &id, + ) + if err == nil { + return id, nil + } + if err != sql.ErrNoRows { + return 0, err + } + + // Create new remote + result, err := database.RetryableExec(tpdb.db, query_getOrCreateRemote, hostname) + if err != nil { + return 0, err + } + + return result.LastInsertId() +} + +const query_InsertResult = ` + INSERT INTO transfers ( + remote_id, newsgroup, timestamp, start_date, end_date, sent, unwanted, checked, + rejected, retry, skipped, tx_errors, conn_errors + ) VALUES (?, ?, datetime('now', 'utc'), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ` + +// InsertResult inserts a transfer result into the database +func (tpdb *TransferProgressDB) InsertResult(newsgroup string, startDate, endDate *time.Time, sent, unwanted, checked, rejected, retry, skipped, txErrors, connErrors int64) error { + tpdb.mu.Lock() + defer tpdb.mu.Unlock() + + // Convert time pointers to nullable strings + var startDateStr, endDateStr interface{} + if startDate != nil { + startDateStr = startDate.UTC().Format("2006-01-02 15:04:05") + } else { + startDateStr = nil + } + if endDate != nil { + endDateStr = endDate.UTC().Format("2006-01-02 15:04:05") + } else { + endDateStr = nil + } + + _, err := database.RetryableExec( + tpdb.db, + query_InsertResult, + tpdb.remoteID, + newsgroup, + startDateStr, + endDateStr, + sent, + unwanted, + checked, + rejected, + retry, + skipped, + txErrors, + connErrors, + ) + + if err != nil { + return fmt.Errorf("failed to insert transfer result: %v", err) + } + + return nil +} + +// GetRemoteID returns the current remote ID +func (tpdb *TransferProgressDB) GetRemoteID() int64 { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + return tpdb.remoteID +} + +// GetRemoteName returns the current remote hostname +func (tpdb *TransferProgressDB) GetRemoteName() string { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + return tpdb.remoteName +} + +// GetRecentTransfers returns recent transfer records for the current remote +func (tpdb *TransferProgressDB) GetRecentTransfers(limit int) ([]TransferResult, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + query := ` + SELECT remote_id, newsgroup, timestamp, start_date, end_date, sent, unwanted, checked, + rejected, retry, skipped, tx_errors, conn_errors + FROM transfers + WHERE remote_id = ? + ORDER BY timestamp DESC + LIMIT ? + ` + + rows, err := database.RetryableQuery(tpdb.db, query, tpdb.remoteID, limit) + if err != nil { + return nil, err + } + defer rows.Close() + + var results []TransferResult + for rows.Next() { + var r TransferResult + var timestampStr string + var startDateStr, endDateStr sql.NullString + err := rows.Scan( + &r.RemoteID, + &r.Newsgroup, + ×tampStr, + &startDateStr, + &endDateStr, + &r.Sent, + &r.Unwanted, + &r.Checked, + &r.Rejected, + &r.Retry, + &r.Skipped, + &r.TXErrors, + &r.ConnErrors, + ) + if err != nil { + return nil, err + } + + // Parse timestamp + r.Timestamp, err = time.Parse("2006-01-02 15:04:05", timestampStr) + if err != nil { + return nil, err + } + + // Parse start_date if present + if startDateStr.Valid { + parsedStart, err := time.Parse("2006-01-02 15:04:05", startDateStr.String) + if err != nil { + return nil, err + } + r.StartDate = &parsedStart + } + + // Parse end_date if present + if endDateStr.Valid { + parsedEnd, err := time.Parse("2006-01-02 15:04:05", endDateStr.String) + if err != nil { + return nil, err + } + r.EndDate = &parsedEnd + } + + results = append(results, r) + } + + return results, rows.Err() +} + +// NewsgroupExists checks if a newsgroup already has transfer results for the current remote +// with exactly the same start_date and end_date (including NULL values) +func (tpdb *TransferProgressDB) NewsgroupExists(newsgroup string, startDate, endDate *time.Time) (bool, error) { + tpdb.mu.RLock() + defer tpdb.mu.RUnlock() + + // Convert time pointers to nullable strings for comparison + var startDateStr, endDateStr interface{} + if startDate != nil { + startDateStr = startDate.UTC().Format("2006-01-02 15:04:05") + } else { + startDateStr = nil + } + if endDate != nil { + endDateStr = endDate.UTC().Format("2006-01-02 15:04:05") + } else { + endDateStr = nil + } + + // Query that checks for exact match including NULL values + // Using IS to handle NULL comparisons properly + query := ` + SELECT COUNT(*) FROM transfers + WHERE remote_id = ? + AND newsgroup = ? + AND (start_date IS ? OR (start_date IS NOT NULL AND ? IS NOT NULL AND start_date = ?)) + AND (end_date IS ? OR (end_date IS NOT NULL AND ? IS NOT NULL AND end_date = ?)) + ` + + var count int64 + err := database.RetryableQueryRowScan( + tpdb.db, + query, + []interface{}{ + tpdb.remoteID, + newsgroup, + startDateStr, startDateStr, startDateStr, + endDateStr, endDateStr, endDateStr, + }, + &count, + ) + if err != nil { + return false, fmt.Errorf("failed to check newsgroup existence: %v", err) + } + + return count > 0, nil +} + +// Close closes the database connection +func (tpdb *TransferProgressDB) Close() error { + if tpdb.db != nil { + return tpdb.db.Close() + } + return nil +} From 33f5fece219c77ed2e07e1042c86f429d3214324 Mon Sep 17 00:00:00 2001 From: go-while Date: Wed, 22 Oct 2025 03:22:32 +0000 Subject: [PATCH 073/100] testing --- internal/database/config_cache.go | 2 +- internal/database/database.go | 32 +++++------ internal/database/db_aimodels.go | 18 +++---- internal/database/db_apitokens.go | 16 +++--- internal/database/db_batch.go | 14 ++--- internal/database/db_config.go | 4 +- internal/database/db_cron_jobs.go | 14 ++--- internal/database/db_groupdbs.go | 2 +- internal/database/db_migrate.go | 2 +- internal/database/db_nntp_users.go | 36 ++++++------- internal/database/db_rescan.go | 26 ++++----- internal/database/db_sections.go | 28 +++++----- internal/database/db_sessions.go | 20 +++---- internal/database/progress.go | 8 +-- internal/database/queries.go | 86 +++++++++++++++--------------- internal/database/sqlite_retry.go | 65 ++++------------------ internal/database/thread_cache.go | 18 +++---- internal/database/tree_cache.go | 12 ++--- 18 files changed, 180 insertions(+), 223 deletions(-) diff --git a/internal/database/config_cache.go b/internal/database/config_cache.go index 612cb80..7edf9a4 100644 --- a/internal/database/config_cache.go +++ b/internal/database/config_cache.go @@ -105,7 +105,7 @@ func (cc *ConfigCache) RefreshCache() error { // getConfigValueDirect performs a direct database query (fallback method) func (cc *ConfigCache) getConfigValueDirect(key string) (string, error) { var value string - err := retryableQueryRowScan(cc.db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{key}, &value) + err := RetryableQueryRowScan(cc.db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{key}, &value) if err != nil { if err.Error() == "sql: no rows in result set" { return "", nil // Return empty string for missing keys diff --git a/internal/database/database.go b/internal/database/database.go index 3e049bc..329560a 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -252,7 +252,7 @@ func (db *Database) GetHistoryUseShortHashLen(defaultValue int) (int, bool, erro var locked string // Get the UseShortHashLen value - err := retryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{"history_use_short_hash_len"}, &value) + err := RetryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{"history_use_short_hash_len"}, &value) if err != nil { if err == sql.ErrNoRows { // Not found, use default @@ -262,7 +262,7 @@ func (db *Database) GetHistoryUseShortHashLen(defaultValue int) (int, bool, erro } // Check if config is locked - err = retryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{"history_config_locked"}, &locked) + err = RetryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{"history_config_locked"}, &locked) if err != nil && err != sql.ErrNoRows { return 0, false, fmt.Errorf("failed to query history_config_locked: %w", err) } @@ -296,14 +296,14 @@ func (db *Database) SetHistoryUseShortHashLen(value int) error { } // Store the value - _, err = retryableExec(db.mainDB, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", + _, err = RetryableExec(db.mainDB, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", "history_use_short_hash_len", fmt.Sprintf("%d", value)) if err != nil { return fmt.Errorf("failed to store history_use_short_hash_len: %w", err) } // Lock the configuration to prevent future changes - _, err = retryableExec(db.mainDB, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", + _, err = RetryableExec(db.mainDB, "INSERT OR REPLACE INTO config (key, value) VALUES (?, ?)", "history_config_locked", "true") if err != nil { return fmt.Errorf("failed to lock history configuration: %w", err) @@ -330,7 +330,7 @@ func (db *Database) InitializeSystemStatus(appVersion string) error { hostname, _ := os.Hostname() pid := os.Getpid() - _, err := retryableExec(db.mainDB, query, appVersion, pid, hostname) + _, err := RetryableExec(db.mainDB, query, appVersion, pid, hostname) if err != nil { return fmt.Errorf("failed to initialize system status: %w", err) } @@ -342,7 +342,7 @@ func (db *Database) InitializeSystemStatus(appVersion string) error { // GetNewsgroupID returns the ID of a newsgroup by name func (db *Database) GetNewsgroupID(groupName string) (int, error) { var id int - err := retryableQueryRowScan(db.mainDB, "SELECT id FROM newsgroups WHERE name = ?", []interface{}{groupName}, &id) + err := RetryableQueryRowScan(db.mainDB, "SELECT id FROM newsgroups WHERE name = ?", []interface{}{groupName}, &id) if err != nil { return 0, fmt.Errorf("failed to get newsgroup ID for '%s': %w", groupName, err) } @@ -369,7 +369,7 @@ func (db *Database) IncrementArticleSpam(groupName string, articleNum int64) err defer groupDBs.Return(db) // Update spam counter in group database - result, err := retryableExec(groupDBs.DB, "UPDATE articles SET spam = spam + 1 WHERE article_num = ?", articleNum) + result, err := RetryableExec(groupDBs.DB, "UPDATE articles SET spam = spam + 1 WHERE article_num = ?", articleNum) if err != nil { log.Printf("DEBUG: Failed to update spam count in group DB: %v", err) return fmt.Errorf("failed to increment spam count: %w", err) @@ -379,7 +379,7 @@ func (db *Database) IncrementArticleSpam(groupName string, articleNum int64) err log.Printf("DEBUG: Updated %d rows in articles table for article %d", rowsAffected, articleNum) // Add to main database spam table - result2, err := retryableExec(db.mainDB, "INSERT OR IGNORE INTO spam (newsgroup_id, article_num) VALUES (?, ?)", newsgroupID, articleNum) + result2, err := RetryableExec(db.mainDB, "INSERT OR IGNORE INTO spam (newsgroup_id, article_num) VALUES (?, ?)", newsgroupID, articleNum) if err != nil { log.Printf("DEBUG: Failed to insert into spam table: %v", err) return fmt.Errorf("failed to add to spam table: %w", err) @@ -399,7 +399,7 @@ func (db *Database) IncrementArticleHide(groupName string, articleNum int64) err } defer groupDBs.Return(db) - _, err = retryableExec(groupDBs.DB, "UPDATE articles SET hide = 1 WHERE article_num = ? AND spam > 0", articleNum) + _, err = RetryableExec(groupDBs.DB, "UPDATE articles SET hide = 1 WHERE article_num = ? AND spam > 0", articleNum) if err != nil { return fmt.Errorf("failed to increment hide count: %w", err) } @@ -415,7 +415,7 @@ func (db *Database) UnHideArticle(groupName string, articleNum int64) error { } defer groupDBs.Return(db) - _, err = retryableExec(groupDBs.DB, "UPDATE articles SET hide = 0 WHERE article_num = ?", articleNum) + _, err = RetryableExec(groupDBs.DB, "UPDATE articles SET hide = 0 WHERE article_num = ?", articleNum) if err != nil { return fmt.Errorf("failed to unhide: %w", err) } @@ -444,7 +444,7 @@ func (db *Database) DecrementArticleSpam(groupName string, articleNum int64) err // Check current spam count first var currentSpam int - err = retryableQueryRowScan(groupDBs.DB, "SELECT spam FROM articles WHERE article_num = ?", []interface{}{articleNum}, ¤tSpam) + err = RetryableQueryRowScan(groupDBs.DB, "SELECT spam FROM articles WHERE article_num = ?", []interface{}{articleNum}, ¤tSpam) if err != nil { log.Printf("DEBUG: Failed to get current spam count: %v", err) return fmt.Errorf("failed to get current spam count: %w", err) @@ -456,7 +456,7 @@ func (db *Database) DecrementArticleSpam(groupName string, articleNum int64) err } // Decrement spam counter in group database - result, err := retryableExec(groupDBs.DB, "UPDATE articles SET spam = spam - 1 WHERE article_num = ? AND spam > 0", articleNum) + result, err := RetryableExec(groupDBs.DB, "UPDATE articles SET spam = spam - 1 WHERE article_num = ? AND spam > 0", articleNum) if err != nil { log.Printf("DEBUG: Failed to decrement spam count in group DB: %v", err) return fmt.Errorf("failed to decrement spam count: %w", err) @@ -467,7 +467,7 @@ func (db *Database) DecrementArticleSpam(groupName string, articleNum int64) err // If spam count reaches 0, remove from main database spam table and clear all user flags if currentSpam == 1 { - result2, err := retryableExec(db.mainDB, "DELETE FROM spam WHERE newsgroup_id = ? AND article_num = ?", newsgroupID, articleNum) + result2, err := RetryableExec(db.mainDB, "DELETE FROM spam WHERE newsgroup_id = ? AND article_num = ?", newsgroupID, articleNum) if err != nil { log.Printf("DEBUG: Failed to remove from spam table: %v", err) return fmt.Errorf("failed to remove from spam table: %w", err) @@ -477,7 +477,7 @@ func (db *Database) DecrementArticleSpam(groupName string, articleNum int64) err log.Printf("DEBUG: Removed %d rows from spam table", rowsAffected2) // Also remove all user spam flags for this article - result3, err := retryableExec(db.mainDB, "DELETE FROM user_spam_flags WHERE newsgroup_id = ? AND article_num = ?", newsgroupID, articleNum) + result3, err := RetryableExec(db.mainDB, "DELETE FROM user_spam_flags WHERE newsgroup_id = ? AND article_num = ?", newsgroupID, articleNum) if err != nil { log.Printf("DEBUG: Failed to clear user spam flags: %v", err) return fmt.Errorf("failed to clear user spam flags: %w", err) @@ -499,7 +499,7 @@ func (db *Database) HasUserFlaggedSpam(userID int64, groupName string, articleNu } var count int - err = retryableQueryRowScan(db.mainDB, ` + err = RetryableQueryRowScan(db.mainDB, ` SELECT COUNT(*) FROM user_spam_flags WHERE user_id = ? AND newsgroup_id = ? AND article_num = ?`, []interface{}{userID, newsgroupID, articleNum}, &count) @@ -519,7 +519,7 @@ func (db *Database) RecordUserSpamFlag(userID int64, groupName string, articleNu return fmt.Errorf("failed to get newsgroup ID: %w", err) } - _, err = retryableExec(db.mainDB, ` + _, err = RetryableExec(db.mainDB, ` INSERT OR IGNORE INTO user_spam_flags (user_id, newsgroup_id, article_num) VALUES (?, ?, ?)`, userID, newsgroupID, articleNum) diff --git a/internal/database/db_aimodels.go b/internal/database/db_aimodels.go index 5233387..cd14519 100644 --- a/internal/database/db_aimodels.go +++ b/internal/database/db_aimodels.go @@ -18,7 +18,7 @@ func (db *Database) GetActiveAIModels() ([]*models.AIModel, error) { WHERE is_active = 1 ORDER BY sort_order ASC, display_name ASC` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, err } @@ -52,7 +52,7 @@ func (db *Database) GetDefaultAIModel() (*models.AIModel, error) { LIMIT 1` model := &models.AIModel{} - err := retryableQueryRowScan(db.mainDB, query, nil, + err := RetryableQueryRowScan(db.mainDB, query, nil, &model.ID, &model.PostKey, &model.OllamaModelName, &model.DisplayName, &model.Description, &model.IsActive, &model.IsDefault, &model.SortOrder, &model.CreatedAt, &model.UpdatedAt, @@ -80,7 +80,7 @@ func (db *Database) GetFirstActiveAIModel() (*models.AIModel, error) { LIMIT 1` model := &models.AIModel{} - err := retryableQueryRowScan(db.mainDB, query, nil, + err := RetryableQueryRowScan(db.mainDB, query, nil, &model.ID, &model.PostKey, &model.OllamaModelName, &model.DisplayName, &model.Description, &model.IsActive, &model.IsDefault, &model.SortOrder, &model.CreatedAt, &model.UpdatedAt, @@ -102,7 +102,7 @@ func (db *Database) GetAIModelByPostKey(postKey string) (*models.AIModel, error) WHERE post_key = ?` model := &models.AIModel{} - err := retryableQueryRowScan(db.mainDB, query, []interface{}{postKey}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{postKey}, &model.ID, &model.PostKey, &model.OllamaModelName, &model.DisplayName, &model.Description, &model.IsActive, &model.IsDefault, &model.SortOrder, &model.CreatedAt, &model.UpdatedAt, @@ -122,7 +122,7 @@ func (db *Database) CreateAIModel(postKey, ollamaModelName, displayName, descrip query := `INSERT INTO ai_models (post_key, ollama_model_name, display_name, description, is_active, is_default, sort_order) VALUES (?, ?, ?, ?, ?, ?, ?)` - result, err := retryableExec(db.mainDB, query, postKey, ollamaModelName, displayName, description, isActive, isDefault, sortOrder) + result, err := RetryableExec(db.mainDB, query, postKey, ollamaModelName, displayName, description, isActive, isDefault, sortOrder) if err != nil { return nil, err } @@ -159,7 +159,7 @@ func (db *Database) UpdateAIModel(id int, ollamaModelName, displayName, descript SET ollama_model_name = ?, display_name = ?, description = ?, is_active = ?, is_default = ?, sort_order = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, ollamaModelName, displayName, description, isActive, isDefault, sortOrder, id) + _, err := RetryableExec(db.mainDB, query, ollamaModelName, displayName, description, isActive, isDefault, sortOrder, id) return err } @@ -168,7 +168,7 @@ func (db *Database) SetDefaultAIModel(id int) error { db.MainMutex.Lock() defer db.MainMutex.Unlock() - return retryableTransactionExec(db.mainDB, func(tx *sql.Tx) error { + return RetryableTransactionExec(db.mainDB, func(tx *sql.Tx) error { // First, unset all defaults _, err := tx.Exec("UPDATE ai_models SET is_default = 0") if err != nil { @@ -186,7 +186,7 @@ func (db *Database) DeleteAIModel(id int) error { db.MainMutex.Lock() defer db.MainMutex.Unlock() query := `DELETE FROM ai_models WHERE id = ?` - _, err := retryableExec(db.mainDB, query, id) + _, err := RetryableExec(db.mainDB, query, id) return err } @@ -199,7 +199,7 @@ func (db *Database) GetAllAIModels() ([]*models.AIModel, error) { FROM ai_models ORDER BY sort_order ASC, display_name ASC` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, err } diff --git a/internal/database/db_apitokens.go b/internal/database/db_apitokens.go index c7ee2af..63f7048 100644 --- a/internal/database/db_apitokens.go +++ b/internal/database/db_apitokens.go @@ -52,7 +52,7 @@ func (db *Database) CreateAPIToken(ownerName string, ownerID int64, expiresAt *t db.MainMutex.Lock() defer db.MainMutex.Unlock() - result, err := retryableExec(db.mainDB, query_CreateAPIToken, hashedToken, ownerName, ownerID, expiresAt) + result, err := RetryableExec(db.mainDB, query_CreateAPIToken, hashedToken, ownerName, ownerID, expiresAt) if err != nil { return nil, "", err } @@ -88,7 +88,7 @@ func (db *Database) ValidateAPIToken(plainToken string) (*APIToken, error) { WHERE apitoken = ? AND is_enabled = 1` var token APIToken - err := retryableQueryRowScan(db.mainDB, query, []interface{}{hashedToken}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{hashedToken}, &token.ID, &token.APIToken, &token.OwnerName, &token.OwnerID, &token.CreatedAt, &token.LastUsedAt, &token.ExpiresAt, &token.IsEnabled, &token.UsageCount, @@ -114,7 +114,7 @@ func (db *Database) UpdateTokenUsage(tokenID int64) error { SET last_used_at = CURRENT_TIMESTAMP, usage_count = usage_count + 1 WHERE id = ?` - _, err := retryableExec(db.mainDB, query, tokenID) + _, err := RetryableExec(db.mainDB, query, tokenID) return err } @@ -127,7 +127,7 @@ func (db *Database) ListAPITokens() ([]*APIToken, error) { FROM api_tokens ORDER BY created_at DESC` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, err } @@ -156,7 +156,7 @@ func (db *Database) DisableAPIToken(tokenID int) error { defer db.MainMutex.Unlock() query := `UPDATE api_tokens SET is_enabled = 0 WHERE id = ?` - _, err := retryableExec(db.mainDB, query, tokenID) + _, err := RetryableExec(db.mainDB, query, tokenID) return err } @@ -166,7 +166,7 @@ func (db *Database) EnableAPIToken(tokenID int) error { defer db.MainMutex.Unlock() query := `UPDATE api_tokens SET is_enabled = 1 WHERE id = ?` - _, err := retryableExec(db.mainDB, query, tokenID) + _, err := RetryableExec(db.mainDB, query, tokenID) return err } @@ -176,7 +176,7 @@ func (db *Database) DeleteAPIToken(tokenID int) error { defer db.MainMutex.Unlock() query := `DELETE FROM api_tokens WHERE id = ?` - _, err := retryableExec(db.mainDB, query, tokenID) + _, err := RetryableExec(db.mainDB, query, tokenID) return err } @@ -186,7 +186,7 @@ func (db *Database) CleanupExpiredTokens() (int, error) { defer db.MainMutex.Unlock() query := `DELETE FROM api_tokens WHERE expires_at IS NOT NULL AND expires_at < CURRENT_TIMESTAMP` - result, err := retryableExec(db.mainDB, query) + result, err := RetryableExec(db.mainDB, query) if err != nil { return 0, err } diff --git a/internal/database/db_batch.go b/internal/database/db_batch.go index 67e4e41..193547d 100644 --- a/internal/database/db_batch.go +++ b/internal/database/db_batch.go @@ -669,7 +669,7 @@ retry2: //LockQueryChan() //defer ReturnQueryChan() // Use retryable transaction to prevent race conditions between concurrent batches - err = retryableTransactionExec(sq.db.mainDB, func(tx *sql.Tx) error { + err = RetryableTransactionExec(sq.db.mainDB, func(tx *sql.Tx) error { // Use UPSERT to handle both new and existing newsgroups _, txErr := tx.Exec(query_processNewsgroupBatch, *task.Newsgroup, len(batches), maxArticleNum, time.Now().UTC().Format("2006-01-02 15:04:05")) @@ -786,7 +786,7 @@ func (c *SQ3batch) processOverviewBatch(groupDBs *GroupDBs, batches []*models.Ar // ORDER BY not needed; we map by message_id query := query_processOverviewBatch2 + getPlaceholders(len(args)) + `)` log.Printf("[OVB-BATCH] group '%s': Selecting article numbers for %d articles queryLen=%d", groupDBs.Newsgroup, len(batches), len(query)) - rows, err := retryableQuery(groupDBs.DB, query, args...) + rows, err := RetryableQuery(groupDBs.DB, query, args...) if err != nil { log.Printf("[OVB-BATCH] group '%s': Failed to execute batch select: %v", groupDBs.Newsgroup, err) return fmt.Errorf("failed to execute batch select for group '%s': %w", groupDBs.Newsgroup, err) @@ -1073,7 +1073,7 @@ func (c *SQ3batch) batchUpdateReplyCounts(groupDBs *GroupDBs, parentCounts map[* // Use strings.Repeat for efficient SQL building - zero string copies // Execute the batch UPDATE //log.Printf("[P-BATCH] group '%s': update batch reply count for %d articles (queryLen=%d)", groupDBs.Newsgroup, len(messageIDs), len(sql)) - _, err := retryableExec(groupDBs.DB, fmt.Sprintf(query_batchUpdateReplyCounts2, strings.Repeat(query_batchUpdateReplyCounts1, len(messageIDs)), getPlaceholders(len(messageIDs))), args...) + _, err := RetryableExec(groupDBs.DB, fmt.Sprintf(query_batchUpdateReplyCounts2, strings.Repeat(query_batchUpdateReplyCounts1, len(messageIDs)), getPlaceholders(len(messageIDs))), args...) if err != nil { log.Printf("[P-BATCH] group '%s': Failed to execute batch reply count update: %v", groupDBs.Newsgroup, err) } @@ -1098,7 +1098,7 @@ func (c *SQ3batch) findThreadRoot(groupDBs *GroupDBs, refs []string) (int64, err // Check if this article is a thread root with retryable logic var rootArticle int64 threadQuery := `SELECT root_article FROM threads WHERE root_article = (SELECT article_num FROM articles WHERE message_id = ? LIMIT 1) LIMIT 1` - err := retryableQueryRowScan(groupDBs.DB, threadQuery, []interface{}{refMessageID}, &rootArticle) + err := RetryableQueryRowScan(groupDBs.DB, threadQuery, []interface{}{refMessageID}, &rootArticle) if err == nil { return rootArticle, nil } @@ -1115,8 +1115,8 @@ func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map var updatedCount int var initializedCount int - // Use retryableTransactionExec for SQLite lock safety - err := retryableTransactionExec(groupDBs.DB, func(tx *sql.Tx) error { + // Use RetryableTransactionExec for SQLite lock safety + err := RetryableTransactionExec(groupDBs.DB, func(tx *sql.Tx) error { // Reset ShutDownCounters for each retry attempt updatedCount = 0 initializedCount = 0 @@ -1200,7 +1200,7 @@ func (sq *SQ3batch) batchUpdateThreadCache(groupDBs *GroupDBs, threadUpdates map } } - return nil // Transaction will be committed by retryableTransactionExec + return nil // Transaction will be committed by RetryableTransactionExec }) if err != nil { diff --git a/internal/database/db_config.go b/internal/database/db_config.go index ad3f925..899e6ef 100644 --- a/internal/database/db_config.go +++ b/internal/database/db_config.go @@ -16,7 +16,7 @@ func (db *Database) GetConfigValue(key string) (string, error) { // getConfigValueDirect retrieves a configuration value directly from the database func (db *Database) getConfigValueDirect(key string) (string, error) { var value string - err := retryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{key}, &value) + err := RetryableQueryRowScan(db.mainDB, "SELECT value FROM config WHERE key = ?", []interface{}{key}, &value) if err != nil { if err == sql.ErrNoRows { return "", nil // Return empty string for missing keys @@ -37,7 +37,7 @@ func (db *Database) SetConfigValue(key, value string) error { // setConfigValueDirect sets or updates a configuration value directly in the database func (db *Database) setConfigValueDirect(key, value string) error { - _, err := retryableExec(db.mainDB, ` + _, err := RetryableExec(db.mainDB, ` INSERT OR REPLACE INTO config (key, value) VALUES (?, ?) `, key, value) diff --git a/internal/database/db_cron_jobs.go b/internal/database/db_cron_jobs.go index f5e3c02..7c47cfc 100644 --- a/internal/database/db_cron_jobs.go +++ b/internal/database/db_cron_jobs.go @@ -13,7 +13,7 @@ const query_GetAllCronJobs = `SELECT id, name, command, interval_minutes, start_ // GetAllCronJobs retrieves all cron jobs func (db *Database) GetAllCronJobs() ([]*models.CronJob, error) { - rows, err := retryableQuery(db.mainDB, query_GetAllCronJobs) + rows, err := RetryableQuery(db.mainDB, query_GetAllCronJobs) if err != nil { return nil, err } @@ -45,7 +45,7 @@ func (db *Database) GetCronJobByID(id int64) (*models.CronJob, error) { var cronJob models.CronJob var lastRun sql.NullTime - err := retryableQueryRowScan(db.mainDB, query_GetCronJobByID, []interface{}{id}, + err := RetryableQueryRowScan(db.mainDB, query_GetCronJobByID, []interface{}{id}, &cronJob.ID, &cronJob.Name, &cronJob.Command, &cronJob.IntervalMinutes, &cronJob.StartHourMinute, &cronJob.Enabled, &lastRun, &cronJob.RunCount, &cronJob.CreatedAt, &cronJob.UpdatedAt) if err != nil { @@ -63,7 +63,7 @@ const query_InsertCronJob = `INSERT INTO cron_jobs (name, command, interval_minu // InsertCronJob creates a new cron job func (db *Database) InsertCronJob(cronJob *models.CronJob) error { - _, err := retryableExec(db.mainDB, query_InsertCronJob, cronJob.Name, cronJob.Command, cronJob.IntervalMinutes, cronJob.StartHourMinute, cronJob.Enabled) + _, err := RetryableExec(db.mainDB, query_InsertCronJob, cronJob.Name, cronJob.Command, cronJob.IntervalMinutes, cronJob.StartHourMinute, cronJob.Enabled) return err } @@ -71,7 +71,7 @@ const query_UpdateCronJob = `UPDATE cron_jobs SET name = ?, command = ?, interva // UpdateCronJob updates an existing cron job func (db *Database) UpdateCronJob(cronJob *models.CronJob) error { - _, err := retryableExec(db.mainDB, query_UpdateCronJob, cronJob.Name, cronJob.Command, cronJob.IntervalMinutes, cronJob.StartHourMinute, cronJob.Enabled, cronJob.ID) + _, err := RetryableExec(db.mainDB, query_UpdateCronJob, cronJob.Name, cronJob.Command, cronJob.IntervalMinutes, cronJob.StartHourMinute, cronJob.Enabled, cronJob.ID) return err } @@ -79,7 +79,7 @@ const query_DeleteCronJob = `DELETE FROM cron_jobs WHERE id = ?` // DeleteCronJob deletes a cron job func (db *Database) DeleteCronJob(id int64) error { - _, err := retryableExec(db.mainDB, query_DeleteCronJob, id) + _, err := RetryableExec(db.mainDB, query_DeleteCronJob, id) return err } @@ -91,7 +91,7 @@ func (db *Database) ToggleCronJob(id int64) error { if err != nil { return err } - _, err = retryableExec(db.mainDB, query_ToggleCronJob, !cronJob.Enabled, id) + _, err = RetryableExec(db.mainDB, query_ToggleCronJob, !cronJob.Enabled, id) return err } @@ -99,6 +99,6 @@ const query_UpdateCronJobRunStats = `UPDATE cron_jobs SET last_run = ?, run_coun // UpdateCronJobRunStats updates the run statistics after a cron job execution func (db *Database) UpdateCronJobRunStats(id int64) error { - _, err := retryableExec(db.mainDB, query_UpdateCronJobRunStats, time.Now(), id) + _, err := RetryableExec(db.mainDB, query_UpdateCronJobRunStats, time.Now(), id) return err } diff --git a/internal/database/db_groupdbs.go b/internal/database/db_groupdbs.go index 17ab678..b5762bb 100644 --- a/internal/database/db_groupdbs.go +++ b/internal/database/db_groupdbs.go @@ -170,7 +170,7 @@ func (dbs *GroupDBs) Return(db *Database) { func (db *GroupDBs) ExistsMsgIdInArticlesDB(messageID string) bool { query := "SELECT 1 FROM articles WHERE message_id = ? LIMIT 1" var exists bool - if err := retryableQueryRowScan(db.DB, query, []interface{}{messageID}, &exists); err != nil { + if err := RetryableQueryRowScan(db.DB, query, []interface{}{messageID}, &exists); err != nil { return false } return exists diff --git a/internal/database/db_migrate.go b/internal/database/db_migrate.go index cf4f51c..366c47c 100644 --- a/internal/database/db_migrate.go +++ b/internal/database/db_migrate.go @@ -179,7 +179,7 @@ func ensureMigrationsTable(db *sql.DB, dbType string) error { func getAppliedMigrations(db *sql.DB, dbType string) (map[string]bool, error) { applied := make(map[string]bool) - rows, err := retryableQuery(db, `SELECT filename FROM schema_migrations WHERE db_type = ? OR db_type = ''`, dbType) + rows, err := RetryableQuery(db, `SELECT filename FROM schema_migrations WHERE db_type = ? OR db_type = ''`, dbType) if err != nil { log.Printf("Failed to query applied migrations for %s: %v", dbType, err) return nil, fmt.Errorf("failed to query applied migrations for %s: %w", dbType, err) diff --git a/internal/database/db_nntp_users.go b/internal/database/db_nntp_users.go index 6fe85b2..b8de9dc 100644 --- a/internal/database/db_nntp_users.go +++ b/internal/database/db_nntp_users.go @@ -21,7 +21,7 @@ func (db *Database) InsertNNTPUser(u *models.NNTPUser) error { query := `INSERT INTO nntp_users (username, password, maxconns, posting, web_user_id, is_active) VALUES (?, ?, ?, ?, ?, ?)` - _, err = retryableExec(db.mainDB, query, u.Username, string(hashedPassword), u.MaxConns, u.Posting, u.WebUserID, u.IsActive) + _, err = RetryableExec(db.mainDB, query, u.Username, string(hashedPassword), u.MaxConns, u.Posting, u.WebUserID, u.IsActive) return err } @@ -31,7 +31,7 @@ func (db *Database) GetNNTPUserByUsername(username string) (*models.NNTPUser, er FROM nntp_users WHERE username = ? AND is_active = 1` var u models.NNTPUser - err := retryableQueryRowScan(db.mainDB, query, []interface{}{username}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{username}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, &u.CreatedAt, &u.UpdatedAt, &u.LastLogin, &u.IsActive) if err != nil { return nil, err @@ -45,7 +45,7 @@ func (db *Database) GetNNTPUserByID(id int) (*models.NNTPUser, error) { FROM nntp_users WHERE id = ?` var u models.NNTPUser - err := retryableQueryRowScan(db.mainDB, query, []interface{}{id}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{id}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, &u.CreatedAt, &u.UpdatedAt, &u.LastLogin, &u.IsActive) if err != nil { return nil, err @@ -59,7 +59,7 @@ func (db *Database) GetNNTPUserByWebUserID(webUserID int64) (*models.NNTPUser, e FROM nntp_users WHERE web_user_id = ? AND is_active = 1` var u models.NNTPUser - err := retryableQueryRowScan(db.mainDB, query, []interface{}{webUserID}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{webUserID}, &u.ID, &u.Username, &u.Password, &u.MaxConns, &u.Posting, &u.WebUserID, &u.CreatedAt, &u.UpdatedAt, &u.LastLogin, &u.IsActive) if err != nil { return nil, err @@ -76,7 +76,7 @@ func (db *Database) SearchNNTPUsers(searchTerm string, limit int) ([]*models.NNT LIMIT ?` searchPattern := "%" + searchTerm + "%" - rows, err := retryableQuery(db.mainDB, query, searchPattern, limit) + rows, err := RetryableQuery(db.mainDB, query, searchPattern, limit) if err != nil { return nil, err } @@ -97,7 +97,7 @@ func (db *Database) SearchNNTPUsers(searchTerm string, limit int) ([]*models.NNT func (db *Database) GetAllNNTPUsers() ([]*models.NNTPUser, error) { query := `SELECT id, username, password, maxconns, posting, web_user_id, created_at, updated_at, last_login, is_active FROM nntp_users ORDER BY username` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, err } @@ -141,48 +141,48 @@ func (db *Database) UpdateNNTPUserPassword(userID int, password string) error { } query := `UPDATE nntp_users SET password = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err = retryableExec(db.mainDB, query, string(hashedPassword), userID) + _, err = RetryableExec(db.mainDB, query, string(hashedPassword), userID) return err } // UpdateNNTPUserLastLogin updates the last login timestamp func (db *Database) UpdateNNTPUserLastLogin(userID int) error { query := `UPDATE nntp_users SET last_login = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } // UpdateNNTPUserPermissions updates maxconns and posting permissions func (db *Database) UpdateNNTPUserPermissions(userID int, maxConns int, posting bool) error { query := `UPDATE nntp_users SET maxconns = ?, posting = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, maxConns, posting, userID) + _, err := RetryableExec(db.mainDB, query, maxConns, posting, userID) return err } // DeactivateNNTPUser deactivates an NNTP user (soft delete) func (db *Database) DeactivateNNTPUser(userID int) error { query := `UPDATE nntp_users SET is_active = 0, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } // ActivateNNTPUser activates an NNTP user (reverses soft delete) func (db *Database) ActivateNNTPUser(userID int) error { query := `UPDATE nntp_users SET is_active = 1, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } // DeleteNNTPUser permanently deletes an NNTP user func (db *Database) DeleteNNTPUser(userID int) error { // First delete any sessions - _, err := retryableExec(db.mainDB, `DELETE FROM nntp_sessions WHERE user_id = ?`, userID) + _, err := RetryableExec(db.mainDB, `DELETE FROM nntp_sessions WHERE user_id = ?`, userID) if err != nil { return fmt.Errorf("failed to delete NNTP sessions: %w", err) } // Then delete the user - _, err = retryableExec(db.mainDB, `DELETE FROM nntp_users WHERE id = ?`, userID) + _, err = RetryableExec(db.mainDB, `DELETE FROM nntp_users WHERE id = ?`, userID) return err } @@ -191,21 +191,21 @@ func (db *Database) DeleteNNTPUser(userID int) error { // CreateNNTPSession creates a new NNTP session func (db *Database) CreateNNTPSession(userID int, connectionID, remoteAddr string) error { query := `INSERT INTO nntp_sessions (user_id, connection_id, remote_addr) VALUES (?, ?, ?)` - _, err := retryableExec(db.mainDB, query, userID, connectionID, remoteAddr) + _, err := RetryableExec(db.mainDB, query, userID, connectionID, remoteAddr) return err } // UpdateNNTPSessionActivity updates the last activity timestamp func (db *Database) UpdateNNTPSessionActivity(connectionID string) error { query := `UPDATE nntp_sessions SET last_activity = CURRENT_TIMESTAMP WHERE connection_id = ? AND is_active = 1` - _, err := retryableExec(db.mainDB, query, connectionID) + _, err := RetryableExec(db.mainDB, query, connectionID) return err } // CloseNNTPSession marks a session as inactive func (db *Database) CloseNNTPSession(connectionID string) error { query := `UPDATE nntp_sessions SET is_active = 0 WHERE connection_id = ?` - _, err := retryableExec(db.mainDB, query, connectionID) + _, err := RetryableExec(db.mainDB, query, connectionID) return err } @@ -213,7 +213,7 @@ func (db *Database) CloseNNTPSession(connectionID string) error { func (db *Database) GetActiveNNTPSessionsForUser(userID int) (int, error) { query := `SELECT COUNT(*) FROM nntp_sessions WHERE user_id = ? AND is_active = 1` var count int - err := retryableQueryRowScan(db.mainDB, query, []interface{}{userID}, &count) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{userID}, &count) return count, err } @@ -221,7 +221,7 @@ func (db *Database) GetActiveNNTPSessionsForUser(userID int) (int, error) { func (db *Database) CleanupOldNNTPSessions(olderThan time.Duration) error { cutoff := time.Now().Add(-olderThan) query := `DELETE FROM nntp_sessions WHERE is_active = 0 AND last_activity < ?` - _, err := retryableExec(db.mainDB, query, cutoff) + _, err := RetryableExec(db.mainDB, query, cutoff) return err } diff --git a/internal/database/db_rescan.go b/internal/database/db_rescan.go index 73a0e89..2b3794a 100644 --- a/internal/database/db_rescan.go +++ b/internal/database/db_rescan.go @@ -19,7 +19,7 @@ func (db *Database) GetLatestArticleNumberFromOverview(newsgroup string) (int64, defer groupDB.Return(db) var latestArticle int64 - err = retryableQueryRowScan(groupDB.DB, ` + err = RetryableQueryRowScan(groupDB.DB, ` SELECT MAX(article_num) FROM articles `, []interface{}{}, &latestArticle) @@ -32,7 +32,7 @@ func (db *Database) GetLatestArticleNumberFromOverview(newsgroup string) (int64, func (db *Database) GetLatestArticleNumbers(newsgroup string) (map[string]int64, error) { // Query the latest article numbers for the specified newsgroup - rows, err := retryableQuery(db.GetMainDB(), ` + rows, err := RetryableQuery(db.GetMainDB(), ` SELECT name, last_article FROM newsgroups WHERE name = ? @@ -105,7 +105,7 @@ func (db *Database) CheckDatabaseConsistency(newsgroup string) (*ConsistencyRepo defer groupDB.Return(db) // 3. Get max article numbers from each table (handle NULL for empty tables) - err = retryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(article_num), 0) FROM articles", []interface{}{}, &report.ArticlesMaxNum) + err = RetryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(article_num), 0) FROM articles", []interface{}{}, &report.ArticlesMaxNum) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get max article_num from articles: %v", err)) } @@ -113,13 +113,13 @@ func (db *Database) CheckDatabaseConsistency(newsgroup string) (*ConsistencyRepo // Since overview is now unified with articles, OverviewMaxNum equals ArticlesMaxNum report.OverviewMaxNum = report.ArticlesMaxNum - err = retryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(root_article), 0) FROM threads", []interface{}{}, &report.ThreadsMaxNum) + err = RetryableQueryRowScan(groupDB.DB, "SELECT COALESCE(MAX(root_article), 0) FROM threads", []interface{}{}, &report.ThreadsMaxNum) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get max root_article from threads: %v", err)) } // 4. Get counts from each table - err = retryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM articles", []interface{}{}, &report.ArticleCount) + err = RetryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM articles", []interface{}{}, &report.ArticleCount) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get article count: %v", err)) } @@ -127,7 +127,7 @@ func (db *Database) CheckDatabaseConsistency(newsgroup string) (*ConsistencyRepo // Since overview is now unified with articles, OverviewCount equals ArticleCount report.OverviewCount = report.ArticleCount - err = retryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM threads", []interface{}{}, &report.ThreadCount) + err = RetryableQueryRowScan(groupDB.DB, "SELECT COUNT(*) FROM threads", []interface{}{}, &report.ThreadCount) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get thread count: %v", err)) } @@ -170,7 +170,7 @@ func (db *Database) findMissingArticles(groupDB *GroupDBs, maxArticleNum int64) for offset < maxArticleNum { // Get batch of article numbers - rows, err := retryableQuery(groupDB.DB, + rows, err := RetryableQuery(groupDB.DB, "SELECT article_num FROM articles WHERE article_num > ? ORDER BY article_num LIMIT ?", offset, RescanBatchSize) if err != nil { @@ -229,7 +229,7 @@ func (db *Database) findOrphanedThreads(groupDB *GroupDBs) []int64 { for { // Get batch of article numbers - rows, err := retryableQuery(groupDB.DB, + rows, err := RetryableQuery(groupDB.DB, "SELECT article_num FROM articles WHERE article_num > ? ORDER BY article_num LIMIT ?", offset, RescanBatchSize) if err != nil { @@ -275,7 +275,7 @@ func (db *Database) findOrphanedThreads(groupDB *GroupDBs) []int64 { for { // Get batch of distinct root_article numbers from threads table - rows, err := retryableQuery(groupDB.DB, + rows, err := RetryableQuery(groupDB.DB, "SELECT DISTINCT root_article FROM threads WHERE root_article > ? ORDER BY root_article LIMIT ?", offset, RescanBatchSize) if err != nil { @@ -409,7 +409,7 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool, gr // Get total article count var err error - err = retryableQueryRowScan(groupDB.DB, query_RebuildThreadsFromScratch1, []interface{}{}, &report.TotalArticles) + err = RetryableQueryRowScan(groupDB.DB, query_RebuildThreadsFromScratch1, []interface{}{}, &report.TotalArticles) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to get article count: %v", err)) return report, err @@ -489,7 +489,7 @@ func (db *Database) RebuildThreadsFromScratch(newsgroup string, verbose bool, gr } // Load batch of article mappings - rows, err := retryableQuery(groupDB.DB, query_RebuildThreadsFromScratch5, currentBatchSize, offset) + rows, err := RetryableQuery(groupDB.DB, query_RebuildThreadsFromScratch5, currentBatchSize, offset) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("Failed to query articles batch: %v", err)) @@ -573,7 +573,7 @@ const query_processThreadBatch3 = "INSERT INTO threads (root_article, parent_art // Based on the actual threading system: only ROOT articles go in threads table, replies only update thread_cache func (db *Database) processThreadBatch(groupDB *GroupDBs, msgIDToArticleNum map[string]int64, offset, batchSize int64, verbose bool) (int, error) { // Get batch of articles with their references and dates - rows, err := retryableQuery(groupDB.DB, query_processThreadBatch1, batchSize, offset) + rows, err := RetryableQuery(groupDB.DB, query_processThreadBatch1, batchSize, offset) if err != nil { return 0, fmt.Errorf("failed to query articles: %w", err) } @@ -774,7 +774,7 @@ func (db *Database) initializeThreadCacheSimple(groupDB *GroupDBs, threadRoot in rootDate = now } - _, err := retryableExec(groupDB.DB, query_initializeThreadCacheSimple1, + _, err := RetryableExec(groupDB.DB, query_initializeThreadCacheSimple1, threadRoot, rootDate.UTC().Format("2006-01-02 15:04:05"), threadRoot, // last_child_number starts as the root itself diff --git a/internal/database/db_sections.go b/internal/database/db_sections.go index 8e124ab..4f5a17b 100644 --- a/internal/database/db_sections.go +++ b/internal/database/db_sections.go @@ -15,7 +15,7 @@ func (db *Database) GetAllSections() ([]*models.Section, error) { ORDER BY sort_order ASC, display_name ASC ` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, fmt.Errorf("failed to query sections: %w", err) } @@ -57,7 +57,7 @@ func (db *Database) GetAllSectionsWithCounts() ([]*models.Section, error) { ORDER BY s.sort_order ASC, s.display_name ASC ` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, fmt.Errorf("failed to query sections with counts: %w", err) } @@ -94,7 +94,7 @@ func (db *Database) GetAllSectionGroups() ([]*models.SectionGroup, error) { ORDER BY section_id ASC, sort_order ASC, newsgroup_name ASC ` - rows, err := retryableQuery(db.mainDB, query) + rows, err := RetryableQuery(db.mainDB, query) if err != nil { return nil, fmt.Errorf("failed to query section groups: %w", err) } @@ -130,7 +130,7 @@ func (db *Database) GetSectionByID(id int) (*models.Section, error) { ` section := &models.Section{} - err := retryableQueryRowScan(db.mainDB, query, []interface{}{id}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{id}, §ion.ID, §ion.Name, §ion.DisplayName, @@ -156,7 +156,7 @@ func (db *Database) SectionNameExists(name string) (bool, error) { query := `SELECT COUNT(*) FROM sections WHERE name = ?` var count int - err := retryableQueryRowScan(db.mainDB, query, []interface{}{name}, &count) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{name}, &count) if err != nil { return false, fmt.Errorf("failed to check section name existence: %w", err) } @@ -169,7 +169,7 @@ func (db *Database) SectionNameExistsExcluding(name string, excludeID int) (bool query := `SELECT COUNT(*) FROM sections WHERE name = ? AND id != ?` var count int - err := retryableQueryRowScan(db.mainDB, query, []interface{}{name, excludeID}, &count) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{name, excludeID}, &count) if err != nil { return false, fmt.Errorf("failed to check section name existence: %w", err) } @@ -184,7 +184,7 @@ func (db *Database) CreateSection(section *models.Section) error { VALUES (?, ?, ?, ?, ?, ?, ?) ` - result, err := retryableExec(db.mainDB, query, + result, err := RetryableExec(db.mainDB, query, section.Name, section.DisplayName, section.Description, @@ -215,7 +215,7 @@ func (db *Database) UpdateSection(section *models.Section) error { WHERE id = ? ` - result, err := retryableExec(db.mainDB, query, + result, err := RetryableExec(db.mainDB, query, section.Name, section.DisplayName, section.Description, @@ -243,7 +243,7 @@ func (db *Database) UpdateSection(section *models.Section) error { // DeleteSection deletes a section and all its group assignments func (db *Database) DeleteSection(id int) error { - return retryableTransactionExec(db.mainDB, func(tx *sql.Tx) error { + return RetryableTransactionExec(db.mainDB, func(tx *sql.Tx) error { // Delete section groups first (foreign key constraint) _, err := tx.Exec("DELETE FROM section_groups WHERE section_id = ?", id) if err != nil { @@ -278,7 +278,7 @@ func (db *Database) GetSectionGroupByID(id int) (*models.SectionGroup, error) { ` sg := &models.SectionGroup{} - err := retryableQueryRowScan(db.mainDB, query, []interface{}{id}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{id}, &sg.ID, &sg.SectionID, &sg.NewsgroupName, @@ -303,7 +303,7 @@ func (db *Database) SectionGroupExists(sectionID int, newsgroupName string) (boo query := `SELECT COUNT(*) FROM section_groups WHERE section_id = ? AND newsgroup_name = ?` var count int - err := retryableQueryRowScan(db.mainDB, query, []interface{}{sectionID, newsgroupName}, &count) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{sectionID, newsgroupName}, &count) if err != nil { return false, fmt.Errorf("failed to check section group existence: %w", err) } @@ -318,7 +318,7 @@ func (db *Database) CreateSectionGroup(sg *models.SectionGroup) error { VALUES (?, ?, ?, ?, ?, ?) ` - result, err := retryableExec(db.mainDB, query, + result, err := RetryableExec(db.mainDB, query, sg.SectionID, sg.NewsgroupName, sg.GroupDescription, @@ -344,7 +344,7 @@ func (db *Database) CreateSectionGroup(sg *models.SectionGroup) error { func (db *Database) DeleteSectionGroup(id int) error { query := `DELETE FROM section_groups WHERE id = ?` - result, err := retryableExec(db.mainDB, query, id) + result, err := RetryableExec(db.mainDB, query, id) if err != nil { return fmt.Errorf("failed to delete section group: %w", err) } @@ -371,7 +371,7 @@ func (db *Database) GetNewsgroupByName(name string) (*models.Newsgroup, error) { ` ng := &models.Newsgroup{} - err := retryableQueryRowScan(db.mainDB, query, []interface{}{name}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{name}, &ng.ID, &ng.Name, &ng.Active, diff --git a/internal/database/db_sessions.go b/internal/database/db_sessions.go index 5b58dab..ae0a9b2 100644 --- a/internal/database/db_sessions.go +++ b/internal/database/db_sessions.go @@ -49,7 +49,7 @@ func (db *Database) CreateUserSession(userID int64, remoteIP string) (string, er updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err = retryableExec(db.mainDB, query, sessionID, remoteIP, expiresAt, userID) + _, err = RetryableExec(db.mainDB, query, sessionID, remoteIP, expiresAt, userID) if err != nil { return "", fmt.Errorf("failed to create user session: %w", err) } @@ -69,7 +69,7 @@ func (db *Database) ValidateUserSession(sessionID string) (*models.User, error) FROM users WHERE session_id = ? AND session_expires_at > CURRENT_TIMESTAMP` var user models.User - err := retryableQueryRowScan(db.mainDB, query, []interface{}{sessionID}, + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{sessionID}, &user.ID, &user.Username, &user.Email, &user.PasswordHash, &user.DisplayName, &user.SessionID, &user.LastLoginIP, &user.SessionExpiresAt, &user.LoginAttempts, &user.CreatedAt, &user.UpdatedAt) @@ -81,7 +81,7 @@ func (db *Database) ValidateUserSession(sessionID string) (*models.User, error) // Extend session expiration (sliding timeout) in UTC - write operation newExpiresAt := time.Now().UTC().Add(SessionTimeout) updateQuery := `UPDATE users SET session_expires_at = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err = retryableExec(db.mainDB, updateQuery, newExpiresAt, user.ID) + _, err = RetryableExec(db.mainDB, updateQuery, newExpiresAt, user.ID) if err != nil { // Log error but don't fail validation fmt.Printf("Warning: Failed to extend session expiration: %v\n", err) @@ -99,7 +99,7 @@ func (db *Database) InvalidateUserSession(userID int64) error { session_expires_at = NULL, updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } @@ -110,7 +110,7 @@ func (db *Database) InvalidateUserSessionBySessionID(sessionID string) error { session_expires_at = NULL, updated_at = CURRENT_TIMESTAMP WHERE session_id = ?` - _, err := retryableExec(db.mainDB, query, sessionID) + _, err := RetryableExec(db.mainDB, query, sessionID) return err } @@ -121,7 +121,7 @@ func (db *Database) IncrementLoginAttempts(username string) error { updated_at = CURRENT_TIMESTAMP WHERE username = ?` - _, err := retryableExec(db.mainDB, query, username) + _, err := RetryableExec(db.mainDB, query, username) return err } @@ -132,7 +132,7 @@ func (db *Database) ResetLoginAttempts(userID int64) error { updated_at = CURRENT_TIMESTAMP WHERE id = ?` - _, err := retryableExec(db.mainDB, query, userID) + _, err := RetryableExec(db.mainDB, query, userID) return err } @@ -142,7 +142,7 @@ func (db *Database) IsUserLockedOut(username string) (bool, error) { var attempts int var updatedAt time.Time - err := retryableQueryRowScan(db.mainDB, query, []interface{}{username}, &attempts, &updatedAt) + err := RetryableQueryRowScan(db.mainDB, query, []interface{}{username}, &attempts, &updatedAt) if err != nil { return false, err } @@ -156,7 +156,7 @@ func (db *Database) IsUserLockedOut(username string) (bool, error) { } else { // Lockout period expired, reset attempts resetQuery := `UPDATE users SET login_attempts = 0, updated_at = CURRENT_TIMESTAMP WHERE username = ?` - retryableExec(db.mainDB, resetQuery, username) + RetryableExec(db.mainDB, resetQuery, username) } } @@ -171,7 +171,7 @@ func (db *Database) CleanupExpiredSessions() error { updated_at = CURRENT_TIMESTAMP WHERE session_expires_at < CURRENT_TIMESTAMP` - result, err := retryableExec(db.mainDB, query) + result, err := RetryableExec(db.mainDB, query) if err != nil { return err } diff --git a/internal/database/progress.go b/internal/database/progress.go index 8cc81b8..d20be92 100644 --- a/internal/database/progress.go +++ b/internal/database/progress.go @@ -93,7 +93,7 @@ const query_GetLastArticle = `SELECT last_article FROM progress WHERE backend_na func (p *ProgressDB) GetLastArticle(backendName, newsgroupName string) (int64, error) { p.mux.RLock() var lastArticle int64 - err := retryableQueryRowScan(p.db, query_GetLastArticle, []interface{}{backendName, newsgroupName}, &lastArticle) + err := RetryableQueryRowScan(p.db, query_GetLastArticle, []interface{}{backendName, newsgroupName}, &lastArticle) if err == sql.ErrNoRows { p.mux.RUnlock() //log.Printf("progressDB.GetLastArticle: provider '%s', newsgroup '%s' has no progress", backendName, newsgroupName) @@ -123,7 +123,7 @@ ON CONFLICT(backend_name, newsgroup_name) DO UPDATE SET func (p *ProgressDB) UpdateProgress(backendName, newsgroupName string, lastArticle int64) error { p.mux.Lock() defer p.mux.Unlock() - _, err := retryableExec(p.db, query_UpdateProgress, backendName, newsgroupName, lastArticle) + _, err := RetryableExec(p.db, query_UpdateProgress, backendName, newsgroupName, lastArticle) if err != nil { return fmt.Errorf("failed to update progress: %w", err) } @@ -141,7 +141,7 @@ ORDER BY backend_name, newsgroup_name // GetAllProgress returns all progress entries func (p *ProgressDB) GetAllProgress() ([]*ProgressEntry, error) { - rows, err := retryableQuery(p.db, query_GetAllProgress) + rows, err := RetryableQuery(p.db, query_GetAllProgress) if err != nil { return nil, fmt.Errorf("failed to query progress: %w", err) } @@ -181,7 +181,7 @@ ORDER BY newsgroup_name // GetProgressForBackend returns progress entries for a specific backend func (p *ProgressDB) GetProgressForBackend(backendName string) ([]*ProgressEntry, error) { - rows, err := retryableQuery(p.db, query_GetProgressForBackend, backendName) + rows, err := RetryableQuery(p.db, query_GetProgressForBackend, backendName) if err != nil { return nil, fmt.Errorf("failed to query progress for backend: %w", err) } diff --git a/internal/database/queries.go b/internal/database/queries.go index e74cfb2..49c59c4 100644 --- a/internal/database/queries.go +++ b/internal/database/queries.go @@ -54,7 +54,7 @@ const query_AddProvider = `INSERT INTO providers (name, grp, host, port, ssl, us VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` func (db *Database) AddProvider(provider *models.Provider) error { - _, err := retryableExec(db.mainDB, query_AddProvider, + _, err := RetryableExec(db.mainDB, query_AddProvider, provider.Name, provider.Grp, provider.Host, provider.Port, provider.SSL, provider.Username, provider.Password, provider.MaxConns, provider.Enabled, provider.Priority, @@ -69,7 +69,7 @@ func (db *Database) AddProvider(provider *models.Provider) error { // DeleteProvider deletes a provider from the main database func (db *Database) DeleteProvider(id int) error { - _, err := retryableExec(db.mainDB, `DELETE FROM providers WHERE id = ?`, id) + _, err := RetryableExec(db.mainDB, `DELETE FROM providers WHERE id = ?`, id) if err != nil { return fmt.Errorf("failed to delete provider %d: %w", id, err) } @@ -83,7 +83,7 @@ const query_SetProvider = `UPDATE providers SET WHERE id = ?` func (db *Database) SetProvider(provider *models.Provider) error { - _, err := retryableExec(db.mainDB, query_SetProvider, + _, err := RetryableExec(db.mainDB, query_SetProvider, provider.Grp, provider.Host, provider.Port, provider.SSL, provider.Username, provider.Password, provider.MaxConns, provider.Enabled, provider.Priority, @@ -100,7 +100,7 @@ func (db *Database) SetProvider(provider *models.Provider) error { const query_GetProviders = `SELECT id, enabled, priority, name, host, port, ssl, username, password, max_conns, max_art_size, posting, created_at, proxy_enabled, proxy_type, proxy_host, proxy_port, proxy_username, proxy_password FROM providers order by priority ASC` func (db *Database) GetProviders() ([]*models.Provider, error) { - rows, err := retryableQuery(db.mainDB, query_GetProviders) + rows, err := RetryableQuery(db.mainDB, query_GetProviders) if err != nil { return nil, err } @@ -124,7 +124,7 @@ func (db *Database) InsertNewsgroup(g *models.Newsgroup) error { if g.Hierarchy == "" { g.Hierarchy = ExtractHierarchyFromGroupName(g.Name) } - _, err := retryableExec(db.mainDB, query_InsertNewsgroup, g.Name, g.Description, g.LastArticle, g.MessageCount, g.Active, g.ExpiryDays, g.MaxArticles, g.MaxArtSize, g.HighWater, g.LowWater, g.Status, g.Hierarchy) + _, err := RetryableExec(db.mainDB, query_InsertNewsgroup, g.Name, g.Description, g.LastArticle, g.MessageCount, g.Active, g.ExpiryDays, g.MaxArticles, g.MaxArtSize, g.HighWater, g.LowWater, g.Status, g.Hierarchy) // Invalidate hierarchy cache for the affected hierarchy if err == nil && db.HierarchyCache != nil { @@ -138,7 +138,7 @@ const query_MainDBGetAllNewsgroupsCount = `SELECT COUNT(*) FROM newsgroups` func (db *Database) MainDBGetAllNewsgroupsCount() int64 { var count int64 - err := retryableQueryRowScan(db.mainDB, query_MainDBGetAllNewsgroupsCount, nil, &count) + err := RetryableQueryRowScan(db.mainDB, query_MainDBGetAllNewsgroupsCount, nil, &count) if err != nil { log.Printf("MainDBGetNewsgroupsCount: Failed to get newsgroups count: %v", err) return 0 @@ -151,7 +151,7 @@ const query_GetUsersCount = `SELECT COUNT(*) FROM users` func (db *Database) GetUsersCount() int64 { var count int64 - err := retryableQueryRowScan(db.mainDB, query_GetUsersCount, nil, &count) + err := RetryableQueryRowScan(db.mainDB, query_GetUsersCount, nil, &count) if err != nil { log.Printf("GetUsersCount: Failed to get users count: %v", err) return 0 @@ -166,7 +166,7 @@ WHERE u.disabled = 0 AND (u.id = 1 OR up.permission = 'admin')` func (db *Database) GetAdminUsersCount() int64 { var count int64 - err := retryableQueryRowScan(db.mainDB, query_GetAdminUsersCount, nil, &count) + err := RetryableQueryRowScan(db.mainDB, query_GetAdminUsersCount, nil, &count) if err != nil { log.Printf("GetAdminUsersCount: Failed to get admin users count: %v", err) return 0 @@ -178,7 +178,7 @@ const query_MainDBGetNewsgroupsActiveCount = `SELECT COUNT(*) FROM newsgroups WH func (db *Database) MainDBGetNewsgroupsActiveCount() int64 { var count int64 - err := retryableQueryRowScan(db.mainDB, query_MainDBGetNewsgroupsActiveCount, nil, &count) + err := RetryableQueryRowScan(db.mainDB, query_MainDBGetNewsgroupsActiveCount, nil, &count) if err != nil { log.Printf("MainDBGetNewsgroupsActiveCount: Failed to get newsgroups count: %v", err) return 0 @@ -190,7 +190,7 @@ func (db *Database) MainDBGetNewsgroupsActiveCount() int64 { const query_MainDBGetAllNewsgroups = `SELECT id, name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, high_water, low_water, status, hierarchy, created_at FROM newsgroups order by name` func (db *Database) MainDBGetAllNewsgroups() ([]*models.Newsgroup, error) { - rows, err := retryableQuery(db.mainDB, query_MainDBGetAllNewsgroups) + rows, err := RetryableQuery(db.mainDB, query_MainDBGetAllNewsgroups) if err != nil { log.Printf("MainDBGetAllNewsgroups: Failed to query newsgroups: %v", err) return nil, err @@ -211,7 +211,7 @@ func (db *Database) MainDBGetAllNewsgroups() ([]*models.Newsgroup, error) { const query_MainDBGetNewsgroup = `SELECT id, name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, high_water, low_water, status, hierarchy, created_at FROM newsgroups WHERE name = ?` func (db *Database) MainDBGetNewsgroup(newsgroup string) (*models.Newsgroup, error) { - rows, err := retryableQuery(db.mainDB, query_MainDBGetNewsgroup, newsgroup) + rows, err := RetryableQuery(db.mainDB, query_MainDBGetNewsgroup, newsgroup) if err != nil { log.Printf("MainDBGetNewsgroup: Failed to query newsgroup '%s': %v", newsgroup, err) return nil, err @@ -235,7 +235,7 @@ func (db *Database) MainDBGetNewsgroup(newsgroup string) (*models.Newsgroup, err const query_MainDBGetNewsgroupByID = `SELECT id, name, description, last_article, message_count, active, expiry_days, max_articles, max_art_size, high_water, low_water, status, hierarchy, created_at FROM newsgroups WHERE id = ?` func (db *Database) MainDBGetNewsgroupByID(id int64) (*models.Newsgroup, error) { - rows, err := retryableQuery(db.mainDB, query_MainDBGetNewsgroupByID, id) + rows, err := RetryableQuery(db.mainDB, query_MainDBGetNewsgroupByID, id) if err != nil { log.Printf("MainDBGetNewsgroupByID: Failed to query newsgroup with ID %d: %v", id, err) return nil, err @@ -264,7 +264,7 @@ func (db *Database) UpdateNewsgroup(g *models.Newsgroup) error { g.Hierarchy = ExtractHierarchyFromGroupName(g.Name) } - _, err := retryableExec(db.mainDB, query_UpdateNewsgroup, + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroup, g.Description, g.LastArticle, g.MessageCount, g.Active, g.ExpiryDays, g.MaxArticles, g.HighWater, g.LowWater, g.Status, g.Hierarchy, g.Name, ) @@ -280,7 +280,7 @@ func (db *Database) UpdateNewsgroup(g *models.Newsgroup) error { const query_UpdateNewsgroupExpiry = `UPDATE newsgroups SET expiry_days = ? WHERE name = ?` func (db *Database) UpdateNewsgroupExpiry(name string, expiryDays int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupExpiry, expiryDays, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupExpiry, expiryDays, name) return err } @@ -288,7 +288,7 @@ func (db *Database) UpdateNewsgroupExpiry(name string, expiryDays int) error { const query_UpdateNewsgroupExpiryPrefix = `UPDATE newsgroups SET expiry_days = ? WHERE name LIKE ? ` func (db *Database) UpdateNewsgroupExpiryPrefix(name string, expiryDays int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupExpiryPrefix, expiryDays, name+"%") + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupExpiryPrefix, expiryDays, name+"%") return err } @@ -296,7 +296,7 @@ func (db *Database) UpdateNewsgroupExpiryPrefix(name string, expiryDays int) err const query_UpdateNewsgroupMaxArticles = `UPDATE newsgroups SET max_articles = ? WHERE name = ?` func (db *Database) UpdateNewsgroupMaxArticles(name string, maxArticles int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupMaxArticles, maxArticles, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupMaxArticles, maxArticles, name) return err } @@ -304,7 +304,7 @@ func (db *Database) UpdateNewsgroupMaxArticles(name string, maxArticles int) err const query_UpdateNewsgroupMaxArticlesPrefix = `UPDATE newsgroups SET max_articles = ? WHERE name LIKE ?` func (db *Database) UpdateNewsgroupMaxArticlesPrefix(name string, maxArticles int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupMaxArticlesPrefix, maxArticles, name+"%") + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupMaxArticlesPrefix, maxArticles, name+"%") return err } @@ -312,7 +312,7 @@ func (db *Database) UpdateNewsgroupMaxArticlesPrefix(name string, maxArticles in const query_UpdateNewsgroupMaxArtSize = `UPDATE newsgroups SET max_art_size = ? WHERE name = ?` func (db *Database) UpdateNewsgroupMaxArtSize(name string, maxArtSize int) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupMaxArtSize, maxArtSize, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupMaxArtSize, maxArtSize, name) return err } @@ -320,7 +320,7 @@ const query_UpdateNewsgroupActive = `UPDATE newsgroups SET active = ? WHERE name // UpdateNewsgroupActive updates the active status for a newsgroup func (db *Database) UpdateNewsgroupActive(name string, active bool) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupActive, active, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupActive, active, name) // Update hierarchy cache with new active status instead of invalidating if err == nil && db.HierarchyCache != nil { @@ -438,7 +438,7 @@ func (db *Database) BulkDeleteNewsgroups(names []string) (int, error) { const query_UpdateNewsgroupDescription = `UPDATE newsgroups SET description = ? WHERE name = ?` func (db *Database) UpdateNewsgroupDescription(name string, description string) error { - _, err := retryableExec(db.mainDB, query_UpdateNewsgroupDescription, description, name) + _, err := RetryableExec(db.mainDB, query_UpdateNewsgroupDescription, description, name) return err } @@ -456,7 +456,7 @@ func (db *Database) DeleteNewsgroup(name string) error { hierarchy = ExtractHierarchyFromGroupName(name) } - _, err = retryableExec(db.mainDB, query_DeleteNewsgroup, name) + _, err = RetryableExec(db.mainDB, query_DeleteNewsgroup, name) // Invalidate hierarchy cache for the affected hierarchy if err == nil && db.HierarchyCache != nil { @@ -471,7 +471,7 @@ const query_GetThreadsCount = `SELECT COUNT(*) FROM threads` func (db *Database) GetThreadsCount(groupDBs *GroupDBs) (int64, error) { var count int64 - err := retryableQueryRowScan(groupDBs.DB, query_GetThreadsCount, nil, &count) + err := RetryableQueryRowScan(groupDBs.DB, query_GetThreadsCount, nil, &count) if err != nil { return 0, err } @@ -483,7 +483,7 @@ const query_GetArticlesCount = `SELECT COUNT(*) FROM articles` func (db *Database) GetArticlesCount(groupDBs *GroupDBs) (int64, error) { var count int64 - err := retryableQueryRowScan(groupDBs.DB, query_GetArticlesCount, nil, &count) + err := RetryableQueryRowScan(groupDBs.DB, query_GetArticlesCount, nil, &count) if err != nil { return 0, err } @@ -508,7 +508,7 @@ const query_GetLastArticleDate = `SELECT MAX(date_sent) FROM articles WHERE hide func (db *Database) GetLastArticleDate(groupDBs *GroupDBs) (*time.Time, error) { var lastDateStr sql.NullString - err := retryableQueryRowScan(groupDBs.DB, query_GetLastArticleDate, nil, &lastDateStr) + err := RetryableQueryRowScan(groupDBs.DB, query_GetLastArticleDate, nil, &lastDateStr) if err != nil { return nil, fmt.Errorf("failed to get last article date for group %s: %w", groupDBs.Newsgroup, err) } @@ -534,7 +534,7 @@ func (db *Database) GetArticlesBatch(groupDBs *GroupDBs, limit, offset int) ([]* limit = 100 // Default batch size } - rows, err := retryableQuery(groupDBs.DB, query_GetArticlesBatch, limit, offset) + rows, err := RetryableQuery(groupDBs.DB, query_GetArticlesBatch, limit, offset) if err != nil { return nil, err } @@ -559,7 +559,7 @@ func (db *Database) GetArticlesBatch(groupDBs *GroupDBs, limit, offset int) ([]* const query_InsertThread = `INSERT INTO threads (root_article, parent_article, child_article, depth, thread_order) VALUES (?, ?, ?, ?, ?)` func (db *Database) InsertThread(groupDBs *GroupDBs, t *models.Thread, a *models.Article) error { - _, err := retryableExec(groupDBs.DB, query_InsertThread, + _, err := RetryableExec(groupDBs.DB, query_InsertThread, t.RootArticle, t.ParentArticle, t.ChildArticle, t.Depth, t.ThreadOrder, ) @@ -569,7 +569,7 @@ func (db *Database) InsertThread(groupDBs *GroupDBs, t *models.Thread, a *models const query_GetThreads = `SELECT id, root_article, parent_article, child_article, depth, thread_order FROM threads` func (db *Database) GetThreads(groupDBs *GroupDBs) ([]*models.Thread, error) { - rows, err := retryableQuery(groupDBs.DB, query_GetThreads) + rows, err := RetryableQuery(groupDBs.DB, query_GetThreads) if err != nil { return nil, err } @@ -602,12 +602,12 @@ func (db *Database) InsertOverview(groupDBs *GroupDBs, o *models.Overview) (int6 if o.ArticleNum == 0 { // Auto-increment article_num - don't include it in INSERT - res, err = retryableExec(groupDBs.DB, query_InsertOverview, + res, err = RetryableExec(groupDBs.DB, query_InsertOverview, o.Subject, o.FromHeader, dateSentStr, o.DateString, o.MessageID, o.References, o.Bytes, o.Lines, o.ReplyCount, o.Downloaded, ) } else { // Explicit article_num provided (e.g. from ImportOverview) - res, err = retryableExec(groupDBs.DB, query_ImportOverview, + res, err = RetryableExec(groupDBs.DB, query_ImportOverview, o.ArticleNum, o.Subject, o.FromHeader, dateSentStr, o.DateString, o.MessageID, o.References, o.Bytes, o.Lines, o.ReplyCount, o.Downloaded, ) } @@ -623,7 +623,7 @@ const query_GetOverviews = `SELECT article_num, subject, from_header, date_sent, func (db *Database) GetOverviews(groupDBs *GroupDBs) ([]*models.Overview, error) { log.Printf("GetOverviews: group '%s' fetching overviews from articles table", groupDBs.Newsgroup) - rows, err := retryableQuery(groupDBs.DB, query_GetOverviews) + rows, err := RetryableQuery(groupDBs.DB, query_GetOverviews) if err != nil { return nil, err } @@ -869,7 +869,7 @@ func (db *Database) GetArticleByMessageID(groupDBs *GroupDBs, messageID string) const query_UpdateReplyCount = `UPDATE articles SET reply_count = ? WHERE message_id = ?` func (db *Database) UpdateReplyCount(groupDBs *GroupDBs, messageID string, replyCount int) error { - _, err := retryableExec(groupDBs.DB, query_UpdateReplyCount, replyCount, messageID) + _, err := RetryableExec(groupDBs.DB, query_UpdateReplyCount, replyCount, messageID) return err } @@ -877,7 +877,7 @@ func (db *Database) UpdateReplyCount(groupDBs *GroupDBs, messageID string, reply const query_IncrementReplyCount = `UPDATE articles SET reply_count = reply_count + 1 WHERE message_id = ?` func (db *Database) IncrementReplyCount(groupDBs *GroupDBs, messageID string) error { - _, err := retryableExec(groupDBs.DB, + _, err := RetryableExec(groupDBs.DB, query_IncrementReplyCount, messageID, ) @@ -903,7 +903,7 @@ func (db *Database) GetReplyCount(groupDBs *GroupDBs, messageID string) (int, er const query_UpdateArticleDateSent = `UPDATE articles SET date_sent = ?, date_string = ? WHERE message_id = ?` func (db *Database) UpdateArticleDateSent(groupDBs *GroupDBs, messageID string, dateSent time.Time, dateString string) error { - _, err := retryableExec(groupDBs.DB, query_UpdateArticleDateSent, dateSent.UTC().Format("2006-01-02 15:04:05"), dateString, messageID) + _, err := RetryableExec(groupDBs.DB, query_UpdateArticleDateSent, dateSent.UTC().Format("2006-01-02 15:04:05"), dateString, messageID) return err } @@ -911,7 +911,7 @@ func (db *Database) UpdateArticleDateSent(groupDBs *GroupDBs, messageID string, const query_UpdateOverviewReplyCount = `UPDATE articles SET reply_count = ? WHERE message_id = ?` func (db *Database) UpdateOverviewReplyCount(groupDBs *GroupDBs, messageID string, replyCount int) error { - _, err := retryableExec(groupDBs.DB, + _, err := RetryableExec(groupDBs.DB, query_UpdateOverviewReplyCount, replyCount, messageID, ) @@ -923,7 +923,7 @@ const query_IncrementOverviewReplyCount = `UPDATE articles SET reply_count = rep func (db *Database) IncrementOverviewReplyCount(groupDBs *GroupDBs, messageID string) error { - _, err := retryableExec(groupDBs.DB, + _, err := RetryableExec(groupDBs.DB, query_IncrementOverviewReplyCount, messageID, ) @@ -2980,7 +2980,7 @@ func (db *Database) ResetNewsgroupCounters(newsgroupName string) error { log.Printf("ResetNewsgroupCounters: Resetting counters for newsgroup '%s'", newsgroupName) // Reset all counters to 0 and water marks to default values - _, err := retryableExec(db.mainDB, query_ResetNewsgroupCounters, newsgroupName) + _, err := RetryableExec(db.mainDB, query_ResetNewsgroupCounters, newsgroupName) if err != nil { return fmt.Errorf("failed to reset counters for newsgroup '%s': %w", newsgroupName, err) @@ -2997,7 +2997,7 @@ const query_GetAllSiteNews = `SELECT id, subject, content, date_published, is_vi FROM site_news ORDER BY date_published DESC` func (db *Database) GetAllSiteNews() ([]*models.SiteNews, error) { - rows, err := retryableQuery(db.mainDB, query_GetAllSiteNews) + rows, err := RetryableQuery(db.mainDB, query_GetAllSiteNews) if err != nil { return nil, fmt.Errorf("failed to query all site news: %w", err) } @@ -3026,7 +3026,7 @@ const query_GetVisibleSiteNews = `SELECT id, subject, content, date_published, i FROM site_news WHERE is_visible = 1 ORDER BY date_published DESC` func (db *Database) GetVisibleSiteNews() ([]*models.SiteNews, error) { - rows, err := retryableQuery(db.mainDB, query_GetVisibleSiteNews) + rows, err := RetryableQuery(db.mainDB, query_GetVisibleSiteNews) if err != nil { return nil, fmt.Errorf("failed to query visible site news: %w", err) } @@ -3057,7 +3057,7 @@ const query_GetSiteNewsByID = `SELECT id, subject, content, date_published, is_v func (db *Database) GetSiteNewsByID(id int) (*models.SiteNews, error) { var item models.SiteNews var isVisibleInt int - err := retryableQueryRowScan(db.mainDB, query_GetSiteNewsByID, []interface{}{id}, + err := RetryableQueryRowScan(db.mainDB, query_GetSiteNewsByID, []interface{}{id}, &item.ID, &item.Subject, &item.Content, &item.DatePublished, &isVisibleInt, &item.CreatedAt, &item.UpdatedAt) @@ -3082,7 +3082,7 @@ func (db *Database) CreateSiteNews(news *models.SiteNews) error { isVisibleInt = 1 } - result, err := retryableExec(db.mainDB, query_CreateSiteNews, news.Subject, news.Content, + result, err := RetryableExec(db.mainDB, query_CreateSiteNews, news.Subject, news.Content, news.DatePublished, isVisibleInt) if err != nil { return fmt.Errorf("failed to create site news: %w", err) @@ -3107,7 +3107,7 @@ func (db *Database) UpdateSiteNews(news *models.SiteNews) error { isVisibleInt = 1 } - _, err := retryableExec(db.mainDB, query_UpdateSiteNews, news.Subject, news.Content, + _, err := RetryableExec(db.mainDB, query_UpdateSiteNews, news.Subject, news.Content, news.DatePublished, isVisibleInt, news.ID) if err != nil { return fmt.Errorf("failed to update site news ID %d: %w", news.ID, err) @@ -3120,7 +3120,7 @@ func (db *Database) UpdateSiteNews(news *models.SiteNews) error { const query_DeleteSiteNews = `DELETE FROM site_news WHERE id = ?` func (db *Database) DeleteSiteNews(id int) error { - _, err := retryableExec(db.mainDB, query_DeleteSiteNews, id) + _, err := RetryableExec(db.mainDB, query_DeleteSiteNews, id) if err != nil { return fmt.Errorf("failed to delete site news ID %d: %w", id, err) } @@ -3131,7 +3131,7 @@ func (db *Database) DeleteSiteNews(id int) error { const query_ToggleSiteNewsVisibility = `UPDATE site_news SET is_visible = (1 - is_visible) WHERE id = ?` func (db *Database) ToggleSiteNewsVisibility(id int) error { - _, err := retryableExec(db.mainDB, query_ToggleSiteNewsVisibility, id) + _, err := RetryableExec(db.mainDB, query_ToggleSiteNewsVisibility, id) if err != nil { return fmt.Errorf("failed to toggle visibility for site news ID %d: %w", id, err) } diff --git a/internal/database/sqlite_retry.go b/internal/database/sqlite_retry.go index afa6cd9..89d8b8b 100644 --- a/internal/database/sqlite_retry.go +++ b/internal/database/sqlite_retry.go @@ -29,9 +29,10 @@ func isRetryableError(err error) bool { } // retryableExec executes a SQL statement with retry logic for lock conflicts -func retryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, error) { +func RetryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, error) { var result sql.Result var err error + start := time.Now() atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { result, err = db.Exec(query, args...) @@ -48,11 +49,11 @@ func retryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, e } // Add random jitter (up to 50% of delay) - jitter := time.Duration(rand.Int63n(int64(delay) / 2)) + jitter := time.Duration(rand.Int63n(int64(delay) / 100 * 50)) time.Sleep(delay + jitter) - log.Printf("(#%d) SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v", - atomic.LoadUint64(&queryID), attempt+1, maxRetries, truncateString(query, 50), err) + log.Printf("(#%d) SQLite retry attempt %d/%d for query (first 50 chars): %s... Error: %v took %v (retry in: %v)", + atomic.LoadUint64(&queryID), attempt+1, maxRetries, truncateString(query, 50), err, time.Since(start), delay+jitter) } } @@ -60,7 +61,7 @@ func retryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, e } // retryableExecPtr executes a SQL statement with retry logic for lock conflicts -func retryableExecPtr(db *sql.DB, query *strings.Builder, args ...interface{}) (sql.Result, error) { +func RetryableExecPtr(db *sql.DB, query *strings.Builder, args ...interface{}) (sql.Result, error) { var result sql.Result var err error @@ -90,15 +91,8 @@ func retryableExecPtr(db *sql.DB, query *strings.Builder, args ...interface{}) ( return result, err } -// retryableQueryRow executes a query that returns a single row with retry logic -func retryableQueryRow(db *sql.DB, query string, args ...interface{}) *sql.Row { - // For QueryRow, we can't detect errors until Scan() is called - // Return the row directly - callers should handle retryable errors in their Scan() calls - return db.QueryRow(query, args...) -} - // retryableQueryRowScan executes a QueryRow and Scan with retry logic -func retryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest ...interface{}) error { +func RetryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest ...interface{}) error { var err error atomic.AddUint64(&queryID, 1) @@ -129,7 +123,7 @@ func retryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest .. } // retryableQuery executes a query that returns multiple rows with retry logic -func retryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, error) { +func RetryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, error) { var rows *sql.Rows var err error atomic.AddUint64(&queryID, 1) @@ -170,7 +164,7 @@ func retryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, e } // retryableTransactionExec executes a transaction with retry logic -func retryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { +func RetryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { var err error atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { @@ -247,7 +241,7 @@ func truncateString(s string, length int) string { var queryID uint64 // retryableStmtExec executes a prepared statement with retry logic for lock conflicts -func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) { +func RetryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) { var result sql.Result var err error atomic.AddUint64(&queryID, 1) @@ -277,7 +271,7 @@ func retryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) } // retryableStmtQueryRowScan executes a prepared statement QueryRow and Scan with retry logic -func retryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...interface{}) error { +func RetryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...interface{}) error { var err error atomic.AddUint64(&queryID, 1) for attempt := 0; attempt < maxRetries; attempt++ { @@ -305,40 +299,3 @@ func retryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...inter return err } - -// Exported wrapper functions for use by other packages - -// RetryableExec executes a SQL statement with retry logic for lock conflicts -func RetryableExec(db *sql.DB, query string, args ...interface{}) (sql.Result, error) { - return retryableExec(db, query, args...) -} - -// RetryableQuery executes a SQL query with retry logic for lock conflicts -func RetryableQuery(db *sql.DB, query string, args ...interface{}) (*sql.Rows, error) { - return retryableQuery(db, query, args...) -} - -// RetryableQueryRow executes a SQL query and returns a single row with retry logic -func RetryableQueryRow(db *sql.DB, query string, args ...interface{}) *sql.Row { - return retryableQueryRow(db, query, args...) -} - -// RetryableQueryRowScan executes a SQL query and scans the result with retry logic -func RetryableQueryRowScan(db *sql.DB, query string, args []interface{}, dest ...interface{}) error { - return retryableQueryRowScan(db, query, args, dest...) -} - -// RetryableTransactionExec executes a transaction with retry logic for lock conflicts -func RetryableTransactionExec(db *sql.DB, txFunc func(*sql.Tx) error) error { - return retryableTransactionExec(db, txFunc) -} - -// RetryableStmtExec executes a prepared statement with retry logic for lock conflicts -func RetryableStmtExec(stmt *sql.Stmt, args ...interface{}) (sql.Result, error) { - return retryableStmtExec(stmt, args...) -} - -// RetryableStmtQueryRowScan executes a prepared statement QueryRow and scans with retry logic -func RetryableStmtQueryRowScan(stmt *sql.Stmt, args []interface{}, dest ...interface{}) error { - return retryableStmtQueryRowScan(stmt, args, dest...) -} diff --git a/internal/database/thread_cache.go b/internal/database/thread_cache.go index 6cbc0ca..59457ec 100644 --- a/internal/database/thread_cache.go +++ b/internal/database/thread_cache.go @@ -40,7 +40,7 @@ func (db *Database) InitializeThreadCache(groupDBs *GroupDBs, threadRoot int64, // Format dates as UTC strings to avoid timezone encoding issues rootDateUTC := rootArticle.DateSent.UTC().Format("2006-01-02 15:04:05") - _, err := retryableExec(groupDBs.DB, query, + _, err := RetryableExec(groupDBs.DB, query, threadRoot, rootDateUTC, threadRoot, // last_child_number starts as the root itself @@ -62,7 +62,7 @@ func (db *Database) UpdateThreadCache(groupDBs *GroupDBs, threadRoot int64, chil var currentCount int query := `SELECT child_articles, message_count FROM thread_cache WHERE thread_root = ?` - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, ¤tChildren, ¤tCount) + err := RetryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, ¤tChildren, ¤tCount) if err != nil { // If the thread cache entry doesn't exist, queue it for batch initialization // This can happen if the root article was processed without initializing the cache @@ -112,7 +112,7 @@ func (db *Database) UpdateThreadCache(groupDBs *GroupDBs, threadRoot int64, chil // Format childDate as UTC string to avoid timezone encoding issues childDateUTC := childDate.UTC().Format("2006-01-02 15:04:05") - _, err = retryableExec(groupDBs.DB, updateQuery, + _, err = RetryableExec(groupDBs.DB, updateQuery, newChildren, currentCount+1, childArticleNum, @@ -249,7 +249,7 @@ func (db *Database) GetCachedThreadReplies(groupDBs *GroupDBs, threadRoot int64, var totalReplies int query := `SELECT child_articles, message_count FROM thread_cache WHERE thread_root = ?` - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &childArticles, &totalReplies) + err := RetryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &childArticles, &totalReplies) if err != nil { return nil, 0, fmt.Errorf("failed to get thread cache for root %d: %w", threadRoot, err) } @@ -297,7 +297,7 @@ func (db *Database) GetCachedThreadReplies(groupDBs *GroupDBs, threadRoot int64, args[i] = num } - rows, err := retryableQuery(groupDBs.DB, childQuery, args...) + rows, err := RetryableQuery(groupDBs.DB, childQuery, args...) if err != nil { return nil, 0, fmt.Errorf("failed to query thread replies: %w", err) } @@ -330,7 +330,7 @@ func (db *Database) GetOverviewByArticleNum(groupDBs *GroupDBs, articleNum int64 ` overview := &models.Overview{} - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{articleNum}, + err := RetryableQueryRowScan(groupDBs.DB, query, []interface{}{articleNum}, &overview.ArticleNum, &overview.Subject, &overview.FromHeader, &overview.DateSent, &overview.DateString, &overview.MessageID, &overview.References, &overview.Bytes, &overview.Lines, @@ -461,7 +461,7 @@ func (mem *MemCachedThreads) RefreshThreadCache(db *Database, groupDBs *GroupDBs ` args := []interface{}{cacheSize, cacheWindowStart} - rows, err := retryableQuery(groupDBs.DB, query, args...) + rows, err := RetryableQuery(groupDBs.DB, query, args...) if err != nil { return fmt.Errorf("failed to query thread cache: %w", err) } @@ -490,7 +490,7 @@ func (mem *MemCachedThreads) RefreshThreadCache(db *Database, groupDBs *GroupDBs // Quick check if thread root article is hidden (fast single lookup) var hidden int checkQuery := `SELECT hide FROM articles WHERE article_num = ? LIMIT 1` - err = retryableQueryRowScan(groupDBs.DB, checkQuery, []interface{}{entry.ThreadRoot}, &hidden) + err = RetryableQueryRowScan(groupDBs.DB, checkQuery, []interface{}{entry.ThreadRoot}, &hidden) if err != nil || hidden != 0 { continue // Skip hidden threads } @@ -515,7 +515,7 @@ func (mem *MemCachedThreads) RefreshThreadCache(db *Database, groupDBs *GroupDBs // Get the REAL total count from database (not just cached count) var realTotalCount int64 countQuery := `SELECT COUNT(*) FROM thread_cache` - err = retryableQueryRowScan(groupDBs.DB, countQuery, []interface{}{}, &realTotalCount) + err = RetryableQueryRowScan(groupDBs.DB, countQuery, []interface{}{}, &realTotalCount) if err != nil { log.Printf("[PERF:REFRESH] Failed to get real total count: %v", err) realTotalCount = int64(len(threadRoots)) // Fallback to cached count diff --git a/internal/database/tree_cache.go b/internal/database/tree_cache.go index c81a34f..153953b 100644 --- a/internal/database/tree_cache.go +++ b/internal/database/tree_cache.go @@ -75,7 +75,7 @@ func (db *Database) BuildThreadTree(groupDBs *GroupDBs, threadRoot int64) (*Thre // Get all articles in this thread from thread_cache var childArticles string query := `SELECT child_articles FROM thread_cache WHERE thread_root = ?` - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &childArticles) + err := RetryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &childArticles) if err != nil { if err == sql.ErrNoRows { // Thread cache not yet built - fall back to single article @@ -200,7 +200,7 @@ func (db *Database) GetCachedTree(groupDBs *GroupDBs, threadRoot int64) (*Thread // Check if tree cache exists and is recent var lastUpdated time.Time query := `SELECT last_updated FROM tree_stats WHERE thread_root = ?` - err := retryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &lastUpdated) + err := RetryableQueryRowScan(groupDBs.DB, query, []interface{}{threadRoot}, &lastUpdated) if err != nil { return nil, fmt.Errorf("no cached tree found: %w", err) } @@ -211,7 +211,7 @@ func (db *Database) GetCachedTree(groupDBs *GroupDBs, threadRoot int64) (*Thread } // Load tree structure from cache - rows, err := retryableQuery(groupDBs.DB, ` + rows, err := RetryableQuery(groupDBs.DB, ` SELECT article_num, parent_article, depth, child_count, descendant_count, tree_path, sort_order FROM cached_trees @@ -270,7 +270,7 @@ func (db *Database) GetCachedTree(groupDBs *GroupDBs, threadRoot int64) (*Thread // Load tree stats statsQuery := `SELECT max_depth, total_nodes, leaf_count FROM tree_stats WHERE thread_root = ?` - err = retryableQueryRowScan(groupDBs.DB, statsQuery, []interface{}{threadRoot}, + err = RetryableQueryRowScan(groupDBs.DB, statsQuery, []interface{}{threadRoot}, &tree.MaxDepth, &tree.TotalNodes, &tree.LeafCount) if err != nil { log.Printf("Failed to load tree stats: %v", err) @@ -386,12 +386,12 @@ func (db *Database) CacheTreeStructure(groupDBs *GroupDBs, tree *ThreadTree) err // InvalidateTreeCache removes cached tree data when thread structure changes func (db *Database) InvalidateTreeCache(groupDBs *GroupDBs, threadRoot int64) error { - _, err := retryableExec(groupDBs.DB, `DELETE FROM cached_trees WHERE thread_root = ?`, threadRoot) + _, err := RetryableExec(groupDBs.DB, `DELETE FROM cached_trees WHERE thread_root = ?`, threadRoot) if err != nil { return fmt.Errorf("failed to invalidate tree cache: %w", err) } - _, err = retryableExec(groupDBs.DB, `DELETE FROM tree_stats WHERE thread_root = ?`, threadRoot) + _, err = RetryableExec(groupDBs.DB, `DELETE FROM tree_stats WHERE thread_root = ?`, threadRoot) if err != nil { return fmt.Errorf("failed to invalidate tree stats: %w", err) } From e832d5b88d2323c8596ce6bc7f35027fcd4d354f Mon Sep 17 00:00:00 2001 From: go-while Date: Fri, 24 Oct 2025 22:49:29 +0000 Subject: [PATCH 074/100] testing --- cmd/nntp-fetcher/main.go | 14 ++++++++++++-- cmd/nntp-transfer/main.go | 2 ++ internal/database/progress.go | 14 +++++++++----- internal/nntp/transfer-progress.go | 29 ++++++++++++++--------------- 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/cmd/nntp-fetcher/main.go b/cmd/nntp-fetcher/main.go index 1c55fed..c65c419 100644 --- a/cmd/nntp-fetcher/main.go +++ b/cmd/nntp-fetcher/main.go @@ -311,8 +311,18 @@ func main() { continue } nga, err := db.MainDBGetNewsgroup(ng.Name) - if err != nil || nga == nil || *fetchActiveOnly && !nga.Active { - //log.Printf("[FETCHER] ignore newsgroup '%s' err='%v' ng='%#v'", ng.Name, err, ng) + if err != nil || nga == nil { + log.Printf("[FETCHER] Failed to get newsgroup '%s' from database: err='%v' nga='%#v'", ng.Name, err, nga) + return + } + + if *fetchActiveOnly && !nga.Active { + //log.Printf("[FETCHER] ignore inactive newsgroup '%s'", ng.Name) + continue + } + + if !*fetchActiveOnly && nga.Active { + //log.Printf("[FETCHER] ignore active newsgroup '%s'", ng.Name) continue } diff --git a/cmd/nntp-transfer/main.go b/cmd/nntp-transfer/main.go index c684487..4e96486 100644 --- a/cmd/nntp-transfer/main.go +++ b/cmd/nntp-transfer/main.go @@ -2757,7 +2757,9 @@ forever: if common.WantShutdown() { break forever } + rs.Mux.Lock() log.Printf("Newsgroup: '%s' | CHTTworker (%d): pre append job #%d waiting since %v rs.jobs=%d takeThisChan=%d", *job.Newsgroup, workerID, job.JobID, time.Since(start), len(rs.jobs), len(WorkersTTChannel)) + rs.Mux.Unlock() lastPrint = time.Now() } } diff --git a/internal/database/progress.go b/internal/database/progress.go index d20be92..935cf37 100644 --- a/internal/database/progress.go +++ b/internal/database/progress.go @@ -91,16 +91,16 @@ const query_GetLastArticle = `SELECT last_article FROM progress WHERE backend_na // GetLastArticle returns the last fetched article number for a newsgroup on a backend func (p *ProgressDB) GetLastArticle(backendName, newsgroupName string) (int64, error) { - p.mux.RLock() + //p.mux.RLock() var lastArticle int64 err := RetryableQueryRowScan(p.db, query_GetLastArticle, []interface{}{backendName, newsgroupName}, &lastArticle) if err == sql.ErrNoRows { - p.mux.RUnlock() + //p.mux.RUnlock() //log.Printf("progressDB.GetLastArticle: provider '%s', newsgroup '%s' has no progress", backendName, newsgroupName) p.UpdateProgress(backendName, newsgroupName, 0) // Initialize progress return 0, nil // No previous progress, start from 0 } - p.mux.RUnlock() + //p.mux.RUnlock() if err != nil { return -999, fmt.Errorf("failed to get last article: %w", err) } @@ -121,8 +121,8 @@ ON CONFLICT(backend_name, newsgroup_name) DO UPDATE SET // UpdateProgress updates the fetching progress for a newsgroup on a backend func (p *ProgressDB) UpdateProgress(backendName, newsgroupName string, lastArticle int64) error { - p.mux.Lock() - defer p.mux.Unlock() + //p.mux.Lock() + //defer p.mux.Unlock() _, err := RetryableExec(p.db, query_UpdateProgress, backendName, newsgroupName, lastArticle) if err != nil { return fmt.Errorf("failed to update progress: %w", err) @@ -141,6 +141,8 @@ ORDER BY backend_name, newsgroup_name // GetAllProgress returns all progress entries func (p *ProgressDB) GetAllProgress() ([]*ProgressEntry, error) { + //p.mux.RLock() + //defer p.mux.RUnlock() rows, err := RetryableQuery(p.db, query_GetAllProgress) if err != nil { return nil, fmt.Errorf("failed to query progress: %w", err) @@ -181,6 +183,8 @@ ORDER BY newsgroup_name // GetProgressForBackend returns progress entries for a specific backend func (p *ProgressDB) GetProgressForBackend(backendName string) ([]*ProgressEntry, error) { + //p.mux.RLock() + //defer p.mux.RUnlock() rows, err := RetryableQuery(p.db, query_GetProgressForBackend, backendName) if err != nil { return nil, fmt.Errorf("failed to query progress for backend: %w", err) diff --git a/internal/nntp/transfer-progress.go b/internal/nntp/transfer-progress.go index 2caac9b..43dab07 100644 --- a/internal/nntp/transfer-progress.go +++ b/internal/nntp/transfer-progress.go @@ -165,17 +165,17 @@ func (tpdb *TransferProgressDB) InsertResult(newsgroup string, startDate, endDat tpdb.mu.Lock() defer tpdb.mu.Unlock() - // Convert time pointers to nullable strings - var startDateStr, endDateStr interface{} + // Convert time pointers to strings (empty string if nil to match NOT NULL DEFAULT "") + var startDateStr, endDateStr string if startDate != nil { startDateStr = startDate.UTC().Format("2006-01-02 15:04:05") } else { - startDateStr = nil + startDateStr = "" } if endDate != nil { endDateStr = endDate.UTC().Format("2006-01-02 15:04:05") } else { - endDateStr = nil + endDateStr = "" } _, err := database.RetryableExec( @@ -291,32 +291,31 @@ func (tpdb *TransferProgressDB) GetRecentTransfers(limit int) ([]TransferResult, } // NewsgroupExists checks if a newsgroup already has transfer results for the current remote -// with exactly the same start_date and end_date (including NULL values) +// with exactly the same start_date and end_date (empty string represents no filter) func (tpdb *TransferProgressDB) NewsgroupExists(newsgroup string, startDate, endDate *time.Time) (bool, error) { tpdb.mu.RLock() defer tpdb.mu.RUnlock() - // Convert time pointers to nullable strings for comparison - var startDateStr, endDateStr interface{} + // Convert time pointers to strings (empty string if nil to match schema) + var startDateStr, endDateStr string if startDate != nil { startDateStr = startDate.UTC().Format("2006-01-02 15:04:05") } else { - startDateStr = nil + startDateStr = "" } if endDate != nil { endDateStr = endDate.UTC().Format("2006-01-02 15:04:05") } else { - endDateStr = nil + endDateStr = "" } - // Query that checks for exact match including NULL values - // Using IS to handle NULL comparisons properly + // Query that checks for exact match including empty strings query := ` SELECT COUNT(*) FROM transfers WHERE remote_id = ? AND newsgroup = ? - AND (start_date IS ? OR (start_date IS NOT NULL AND ? IS NOT NULL AND start_date = ?)) - AND (end_date IS ? OR (end_date IS NOT NULL AND ? IS NOT NULL AND end_date = ?)) + AND start_date = ? + AND end_date = ? ` var count int64 @@ -326,8 +325,8 @@ func (tpdb *TransferProgressDB) NewsgroupExists(newsgroup string, startDate, end []interface{}{ tpdb.remoteID, newsgroup, - startDateStr, startDateStr, startDateStr, - endDateStr, endDateStr, endDateStr, + startDateStr, + endDateStr, }, &count, ) From 661070ed38c3ac4db837f4700ae788c44bbd2f80 Mon Sep 17 00:00:00 2001 From: go-while Date: Sat, 25 Oct 2025 07:26:16 +0000 Subject: [PATCH 075/100] testing --- cmd/nntp-fetcher/main.go | 2 +- internal/processor/proc_DLArt.go | 4 +-- web/templates/admin_newsgroups.html | 43 ++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/cmd/nntp-fetcher/main.go b/cmd/nntp-fetcher/main.go index c65c419..c76667e 100644 --- a/cmd/nntp-fetcher/main.go +++ b/cmd/nntp-fetcher/main.go @@ -292,9 +292,9 @@ func main() { DLParChan := make(chan struct{}, DownloadMaxPar) var mux sync.Mutex downloaded := 0 - // scan group worker queued := 0 todo := 0 + // scan group worker go func() { defer close(processor.Batch.Check) for _, ng := range newsgroups { diff --git a/internal/processor/proc_DLArt.go b/internal/processor/proc_DLArt.go index 8d6fb3d..6408cc3 100644 --- a/internal/processor/proc_DLArt.go +++ b/internal/processor/proc_DLArt.go @@ -162,7 +162,7 @@ func (proc *Processor) DownloadArticles(newsgroup string, DLParChan chan struct{ } }() var dups, lastDups, gots, lastGots, notf, lastNotf, errs, lastErrs int64 - aliveCheck := 5 * time.Second + aliveCheck := 9 * time.Second ticker := time.NewTicker(100 * time.Millisecond) defer ticker.Stop() startTime := time.Now() @@ -202,7 +202,7 @@ forProcessing: nextCheck = time.Now().Add(aliveCheck) // Reset last check time deathCounter++ } - if deathCounter > 3 { // If we are stuck for too long + if deathCounter > 9 { // If we are stuck for too long log.Printf("DownloadArticles: '%s' Timeout... stopping import deathCounter=%d", newsgroup, deathCounter) return fmt.Errorf("DownloadArticles: '%s' Timeout... %d articles processed (%d dups, %d got, %d errs)", newsgroup, dups+gots+notf+errs, dups, gots, errs) } diff --git a/web/templates/admin_newsgroups.html b/web/templates/admin_newsgroups.html index 30431e4..afaa30d 100644 --- a/web/templates/admin_newsgroups.html +++ b/web/templates/admin_newsgroups.html @@ -103,6 +103,7 @@
📰 Newsgroup Management
Edit Newsgroup Count
Last + Status Section Expiry Actions @@ -147,7 +148,8 @@
📰 Newsgroup Management
data-expiry-days="{{if .ExpiryDays}}{{.ExpiryDays}}{{else}}0{{end}}" data-max-articles="{{if .MaxArticles}}{{.MaxArticles}}{{else}}0{{end}}" data-max-art-size="{{if .MaxArtSize}}{{.MaxArtSize}}{{else}}0{{end}}" - data-active="{{.Active}}"> + data-active="{{.Active}}" + data-status="{{.Status}}"> Edit
@@ -163,6 +165,11 @@
📰 Newsgroup Management

L#{{if .LastArticle}}{{.LastArticle}}{{else}}-{{end}} + + + {{if eq .Status "y"}}y - Posting{{else if eq .Status "m"}}m - Moderated{{else if eq .Status "n"}}n - No Post{{else if eq .Status "j"}}j - Junk{{else if eq .Status "x"}}x - Disabled{{else}}{{.Status}}{{end}} + + {{/* Find current section assignment */}} {{$currentSection := ""}} @@ -483,6 +490,22 @@ + +
+ + +
+ Controls posting permissions and article handling. + For redirect (=), manually enter "=foo.bar" to file articles in foo.bar instead. +
+