forked from forgejo/forgejo
Pause queues (#15928)
* Start adding mechanism to return unhandled data Signed-off-by: Andrew Thornton <art27@cantab.net> * Create pushback interface Signed-off-by: Andrew Thornton <art27@cantab.net> * Add Pausable interface to WorkerPool and Manager Signed-off-by: Andrew Thornton <art27@cantab.net> * Implement Pausable and PushBack for the bytefifos Signed-off-by: Andrew Thornton <art27@cantab.net> * Implement Pausable and Pushback for ChannelQueues and ChannelUniqueQueues Signed-off-by: Andrew Thornton <art27@cantab.net> * Wire in UI for pausing Signed-off-by: Andrew Thornton <art27@cantab.net> * add testcases and fix a few issues Signed-off-by: Andrew Thornton <art27@cantab.net> * fix build Signed-off-by: Andrew Thornton <art27@cantab.net> * prevent "race" in the test Signed-off-by: Andrew Thornton <art27@cantab.net> * fix jsoniter mismerge Signed-off-by: Andrew Thornton <art27@cantab.net> * fix conflicts Signed-off-by: Andrew Thornton <art27@cantab.net> * fix format Signed-off-by: Andrew Thornton <art27@cantab.net> * Add warnings for no worker configurations and prevent data-loss with redis/levelqueue Signed-off-by: Andrew Thornton <art27@cantab.net> * Use StopTimer Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Lauris BH <lauris@nix.lv> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
parent
27ee01e1e8
commit
a82fd98d53
34 changed files with 1389 additions and 122 deletions
|
@ -22,6 +22,8 @@ type WorkerPool struct {
|
|||
lock sync.Mutex
|
||||
baseCtx context.Context
|
||||
baseCtxCancel context.CancelFunc
|
||||
paused chan struct{}
|
||||
resumed chan struct{}
|
||||
cond *sync.Cond
|
||||
qid int64
|
||||
maxNumberOfWorkers int
|
||||
|
@ -35,6 +37,11 @@ type WorkerPool struct {
|
|||
numInQueue int64
|
||||
}
|
||||
|
||||
var (
|
||||
_ Flushable = &WorkerPool{}
|
||||
_ ManagedPool = &WorkerPool{}
|
||||
)
|
||||
|
||||
// WorkerPoolConfiguration is the basic configuration for a WorkerPool
|
||||
type WorkerPoolConfiguration struct {
|
||||
QueueLength int
|
||||
|
@ -50,11 +57,15 @@ func NewWorkerPool(handle HandlerFunc, config WorkerPoolConfiguration) *WorkerPo
|
|||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
dataChan := make(chan Data, config.QueueLength)
|
||||
resumed := make(chan struct{})
|
||||
close(resumed)
|
||||
pool := &WorkerPool{
|
||||
baseCtx: ctx,
|
||||
baseCtxCancel: cancel,
|
||||
batchLength: config.BatchLength,
|
||||
dataChan: dataChan,
|
||||
resumed: resumed,
|
||||
paused: make(chan struct{}),
|
||||
handle: handle,
|
||||
blockTimeout: config.BlockTimeout,
|
||||
boostTimeout: config.BoostTimeout,
|
||||
|
@ -69,6 +80,14 @@ func NewWorkerPool(handle HandlerFunc, config WorkerPoolConfiguration) *WorkerPo
|
|||
func (p *WorkerPool) Push(data Data) {
|
||||
atomic.AddInt64(&p.numInQueue, 1)
|
||||
p.lock.Lock()
|
||||
select {
|
||||
case <-p.paused:
|
||||
p.lock.Unlock()
|
||||
p.dataChan <- data
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
if p.blockTimeout > 0 && p.boostTimeout > 0 && (p.numberOfWorkers <= p.maxNumberOfWorkers || p.maxNumberOfWorkers < 0) {
|
||||
if p.numberOfWorkers == 0 {
|
||||
p.zeroBoost()
|
||||
|
@ -82,6 +101,17 @@ func (p *WorkerPool) Push(data Data) {
|
|||
}
|
||||
}
|
||||
|
||||
// HasNoWorkerScaling will return true if the queue has no workers, and has no worker boosting
|
||||
func (p *WorkerPool) HasNoWorkerScaling() bool {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
return p.hasNoWorkerScaling()
|
||||
}
|
||||
|
||||
func (p *WorkerPool) hasNoWorkerScaling() bool {
|
||||
return p.numberOfWorkers == 0 && (p.boostTimeout == 0 || p.boostWorkers == 0 || p.maxNumberOfWorkers == 0)
|
||||
}
|
||||
|
||||
func (p *WorkerPool) zeroBoost() {
|
||||
ctx, cancel := context.WithTimeout(p.baseCtx, p.boostTimeout)
|
||||
mq := GetManager().GetManagedQueue(p.qid)
|
||||
|
@ -272,6 +302,12 @@ func (p *WorkerPool) addWorkers(ctx context.Context, cancel context.CancelFunc,
|
|||
p.cond.Broadcast()
|
||||
cancel()
|
||||
}
|
||||
if p.hasNoWorkerScaling() {
|
||||
log.Warn(
|
||||
"Queue: %d is configured to be non-scaling and has no workers - this configuration is likely incorrect.\n"+
|
||||
"The queue will be paused to prevent data-loss with the assumption that you will add workers and unpause as required.", p.qid)
|
||||
p.pause()
|
||||
}
|
||||
p.lock.Unlock()
|
||||
}()
|
||||
}
|
||||
|
@ -290,13 +326,65 @@ func (p *WorkerPool) Wait() {
|
|||
p.cond.Wait()
|
||||
}
|
||||
|
||||
// IsPaused returns if the pool is paused
|
||||
func (p *WorkerPool) IsPaused() bool {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
select {
|
||||
case <-p.paused:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// IsPausedIsResumed returns if the pool is paused and a channel that is closed when it is resumed
|
||||
func (p *WorkerPool) IsPausedIsResumed() (<-chan struct{}, <-chan struct{}) {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
return p.paused, p.resumed
|
||||
}
|
||||
|
||||
// Pause pauses the WorkerPool
|
||||
func (p *WorkerPool) Pause() {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
p.pause()
|
||||
}
|
||||
|
||||
func (p *WorkerPool) pause() {
|
||||
select {
|
||||
case <-p.paused:
|
||||
default:
|
||||
p.resumed = make(chan struct{})
|
||||
close(p.paused)
|
||||
}
|
||||
}
|
||||
|
||||
// Resume resumes the WorkerPool
|
||||
func (p *WorkerPool) Resume() {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
select {
|
||||
case <-p.resumed:
|
||||
default:
|
||||
p.paused = make(chan struct{})
|
||||
close(p.resumed)
|
||||
}
|
||||
}
|
||||
|
||||
// CleanUp will drain the remaining contents of the channel
|
||||
// This should be called after AddWorkers context is closed
|
||||
func (p *WorkerPool) CleanUp(ctx context.Context) {
|
||||
log.Trace("WorkerPool: %d CleanUp", p.qid)
|
||||
close(p.dataChan)
|
||||
for data := range p.dataChan {
|
||||
p.handle(data)
|
||||
if unhandled := p.handle(data); unhandled != nil {
|
||||
if unhandled != nil {
|
||||
log.Error("Unhandled Data in clean-up of queue %d", p.qid)
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddInt64(&p.numInQueue, -1)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
@ -327,7 +415,9 @@ func (p *WorkerPool) FlushWithContext(ctx context.Context) error {
|
|||
for {
|
||||
select {
|
||||
case data := <-p.dataChan:
|
||||
p.handle(data)
|
||||
if unhandled := p.handle(data); unhandled != nil {
|
||||
log.Error("Unhandled Data whilst flushing queue %d", p.qid)
|
||||
}
|
||||
atomic.AddInt64(&p.numInQueue, -1)
|
||||
case <-p.baseCtx.Done():
|
||||
return p.baseCtx.Err()
|
||||
|
@ -341,13 +431,45 @@ func (p *WorkerPool) FlushWithContext(ctx context.Context) error {
|
|||
|
||||
func (p *WorkerPool) doWork(ctx context.Context) {
|
||||
delay := time.Millisecond * 300
|
||||
|
||||
// Create a common timer - we will use this elsewhere
|
||||
timer := time.NewTimer(0)
|
||||
util.StopTimer(timer)
|
||||
|
||||
paused, _ := p.IsPausedIsResumed()
|
||||
data := make([]Data, 0, p.batchLength)
|
||||
for {
|
||||
select {
|
||||
case <-paused:
|
||||
log.Trace("Worker for Queue %d Pausing", p.qid)
|
||||
if len(data) > 0 {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
if unhandled := p.handle(data...); unhandled != nil {
|
||||
log.Error("Unhandled Data in queue %d", p.qid)
|
||||
}
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
}
|
||||
_, resumed := p.IsPausedIsResumed()
|
||||
select {
|
||||
case <-resumed:
|
||||
paused, _ = p.IsPausedIsResumed()
|
||||
log.Trace("Worker for Queue %d Resuming", p.qid)
|
||||
util.StopTimer(timer)
|
||||
case <-ctx.Done():
|
||||
log.Trace("Worker shutting down")
|
||||
return
|
||||
}
|
||||
default:
|
||||
}
|
||||
select {
|
||||
case <-paused:
|
||||
// go back around
|
||||
case <-ctx.Done():
|
||||
if len(data) > 0 {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
p.handle(data...)
|
||||
if unhandled := p.handle(data...); unhandled != nil {
|
||||
log.Error("Unhandled Data in queue %d", p.qid)
|
||||
}
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
}
|
||||
log.Trace("Worker shutting down")
|
||||
|
@ -357,59 +479,36 @@ func (p *WorkerPool) doWork(ctx context.Context) {
|
|||
// the dataChan has been closed - we should finish up:
|
||||
if len(data) > 0 {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
p.handle(data...)
|
||||
if unhandled := p.handle(data...); unhandled != nil {
|
||||
log.Error("Unhandled Data in queue %d", p.qid)
|
||||
}
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
}
|
||||
log.Trace("Worker shutting down")
|
||||
return
|
||||
}
|
||||
data = append(data, datum)
|
||||
util.StopTimer(timer)
|
||||
|
||||
if len(data) >= p.batchLength {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
p.handle(data...)
|
||||
if unhandled := p.handle(data...); unhandled != nil {
|
||||
log.Error("Unhandled Data in queue %d", p.qid)
|
||||
}
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
data = make([]Data, 0, p.batchLength)
|
||||
} else {
|
||||
timer.Reset(delay)
|
||||
}
|
||||
default:
|
||||
timer := time.NewTimer(delay)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
util.StopTimer(timer)
|
||||
if len(data) > 0 {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
p.handle(data...)
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
case <-timer.C:
|
||||
delay = time.Millisecond * 100
|
||||
if len(data) > 0 {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
if unhandled := p.handle(data...); unhandled != nil {
|
||||
log.Error("Unhandled Data in queue %d", p.qid)
|
||||
}
|
||||
log.Trace("Worker shutting down")
|
||||
return
|
||||
case datum, ok := <-p.dataChan:
|
||||
util.StopTimer(timer)
|
||||
if !ok {
|
||||
// the dataChan has been closed - we should finish up:
|
||||
if len(data) > 0 {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
p.handle(data...)
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
}
|
||||
log.Trace("Worker shutting down")
|
||||
return
|
||||
}
|
||||
data = append(data, datum)
|
||||
if len(data) >= p.batchLength {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
p.handle(data...)
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
data = make([]Data, 0, p.batchLength)
|
||||
}
|
||||
case <-timer.C:
|
||||
delay = time.Millisecond * 100
|
||||
if len(data) > 0 {
|
||||
log.Trace("Handling: %d data, %v", len(data), data)
|
||||
p.handle(data...)
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
data = make([]Data, 0, p.batchLength)
|
||||
}
|
||||
|
||||
atomic.AddInt64(&p.numInQueue, -1*int64(len(data)))
|
||||
data = make([]Data, 0, p.batchLength)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue