forked from forgejo/forgejo
Integrate public as bindata optionally (#293)
* Dropped unused codekit config * Integrated dynamic and static bindata for public * Ignore public bindata * Add a general generate make task * Integrated flexible public assets into web command * Updated vendoring, added all missiong govendor deps * Made the linter happy with the bindata and dynamic code * Moved public bindata definition to modules directory * Ignoring the new bindata path now * Updated to the new public modules import path * Updated public bindata command and drop the new prefix
This commit is contained in:
parent
4680c349dd
commit
b6a95a8cb3
691 changed files with 305318 additions and 1272 deletions
899
vendor/github.com/ngaut/zkhelper/zk.go
generated
vendored
Normal file
899
vendor/github.com/ngaut/zkhelper/zk.go
generated
vendored
Normal file
|
@ -0,0 +1,899 @@
|
|||
// zk helper functions
|
||||
// modified from Vitess project
|
||||
|
||||
package zkhelper
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ngaut/go-zookeeper/zk"
|
||||
"github.com/ngaut/log"
|
||||
)
|
||||
|
||||
var (
|
||||
// This error is returned by functions that wait for a result
|
||||
// when they are interrupted.
|
||||
ErrInterrupted = errors.New("zkutil: obtaining lock was interrupted")
|
||||
|
||||
// This error is returned by functions that wait for a result
|
||||
// when the timeout value is reached.
|
||||
ErrTimeout = errors.New("zkutil: obtaining lock timed out")
|
||||
)
|
||||
|
||||
const (
|
||||
// PERM_DIRECTORY are default permissions for a node.
|
||||
PERM_DIRECTORY = zk.PermAdmin | zk.PermCreate | zk.PermDelete | zk.PermRead | zk.PermWrite
|
||||
// PERM_FILE allows a zk node to emulate file behavior by disallowing child nodes.
|
||||
PERM_FILE = zk.PermAdmin | zk.PermRead | zk.PermWrite
|
||||
MagicPrefix = "zk"
|
||||
)
|
||||
|
||||
func init() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
type MyZkConn struct {
|
||||
*zk.Conn
|
||||
}
|
||||
|
||||
func (conn *MyZkConn) Seq2Str(seq int64) string {
|
||||
return fmt.Sprintf("%0.10d", seq)
|
||||
}
|
||||
|
||||
func ConnectToZk(zkAddr string) (Conn, error) {
|
||||
zkConn, _, err := zk.Connect(strings.Split(zkAddr, ","), 3*time.Second)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &MyZkConn{Conn: zkConn}, nil
|
||||
}
|
||||
|
||||
func ConnectToZkWithTimeout(zkAddr string, recvTime time.Duration) (Conn, error) {
|
||||
zkConn, _, err := zk.Connect(strings.Split(zkAddr, ","), recvTime)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &MyZkConn{Conn: zkConn}, nil
|
||||
}
|
||||
|
||||
func DefaultACLs() []zk.ACL {
|
||||
return zk.WorldACL(zk.PermAll)
|
||||
}
|
||||
|
||||
func DefaultDirACLs() []zk.ACL {
|
||||
return zk.WorldACL(PERM_DIRECTORY)
|
||||
}
|
||||
|
||||
func DefaultFileACLs() []zk.ACL {
|
||||
return zk.WorldACL(PERM_FILE)
|
||||
}
|
||||
|
||||
// IsDirectory returns if this node should be treated as a directory.
|
||||
func IsDirectory(aclv []zk.ACL) bool {
|
||||
for _, acl := range aclv {
|
||||
if acl.Perms != PERM_DIRECTORY {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func ZkErrorEqual(a, b error) bool {
|
||||
if a != nil && b != nil {
|
||||
return a.Error() == b.Error()
|
||||
}
|
||||
|
||||
return a == b
|
||||
}
|
||||
|
||||
// Create a path and any pieces required, think mkdir -p.
|
||||
// Intermediate znodes are always created empty.
|
||||
func CreateRecursive(zconn Conn, zkPath, value string, flags int, aclv []zk.ACL) (pathCreated string, err error) {
|
||||
parts := strings.Split(zkPath, "/")
|
||||
if parts[1] != MagicPrefix {
|
||||
return "", fmt.Errorf("zkutil: non /%v path: %v", MagicPrefix, zkPath)
|
||||
}
|
||||
|
||||
pathCreated, err = zconn.Create(zkPath, []byte(value), int32(flags), aclv)
|
||||
if ZkErrorEqual(err, zk.ErrNoNode) {
|
||||
// Make sure that nodes are either "file" or "directory" to mirror file system
|
||||
// semantics.
|
||||
dirAclv := make([]zk.ACL, len(aclv))
|
||||
for i, acl := range aclv {
|
||||
dirAclv[i] = acl
|
||||
dirAclv[i].Perms = PERM_DIRECTORY
|
||||
}
|
||||
_, err = CreateRecursive(zconn, path.Dir(zkPath), "", flags, dirAclv)
|
||||
if err != nil && !ZkErrorEqual(err, zk.ErrNodeExists) {
|
||||
return "", err
|
||||
}
|
||||
pathCreated, err = zconn.Create(zkPath, []byte(value), int32(flags), aclv)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func CreateOrUpdate(zconn Conn, zkPath, value string, flags int, aclv []zk.ACL, recursive bool) (pathCreated string, err error) {
|
||||
if recursive {
|
||||
pathCreated, err = CreateRecursive(zconn, zkPath, value, 0, aclv)
|
||||
} else {
|
||||
pathCreated, err = zconn.Create(zkPath, []byte(value), 0, aclv)
|
||||
}
|
||||
if err != nil && ZkErrorEqual(err, zk.ErrNodeExists) {
|
||||
pathCreated = ""
|
||||
_, err = zconn.Set(zkPath, []byte(value), -1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type pathItem struct {
|
||||
path string
|
||||
err error
|
||||
}
|
||||
|
||||
func ChildrenRecursive(zconn Conn, zkPath string) ([]string, error) {
|
||||
var err error
|
||||
mutex := sync.Mutex{}
|
||||
wg := sync.WaitGroup{}
|
||||
pathList := make([]string, 0, 32)
|
||||
children, _, err := zconn.Children(zkPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, child := range children {
|
||||
wg.Add(1)
|
||||
go func(child string) {
|
||||
childPath := path.Join(zkPath, child)
|
||||
rChildren, zkErr := ChildrenRecursive(zconn, childPath)
|
||||
if zkErr != nil {
|
||||
// If other processes are deleting nodes, we need to ignore
|
||||
// the missing nodes.
|
||||
if !ZkErrorEqual(zkErr, zk.ErrNoNode) {
|
||||
mutex.Lock()
|
||||
err = zkErr
|
||||
mutex.Unlock()
|
||||
}
|
||||
} else {
|
||||
mutex.Lock()
|
||||
pathList = append(pathList, child)
|
||||
for _, rChild := range rChildren {
|
||||
pathList = append(pathList, path.Join(child, rChild))
|
||||
}
|
||||
mutex.Unlock()
|
||||
}
|
||||
wg.Done()
|
||||
}(child)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return pathList, nil
|
||||
}
|
||||
|
||||
func HasWildcard(path string) bool {
|
||||
for i := 0; i < len(path); i++ {
|
||||
switch path[i] {
|
||||
case '\\':
|
||||
if i+1 >= len(path) {
|
||||
return true
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
case '*', '?', '[':
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func resolveRecursive(zconn Conn, parts []string, toplevel bool) ([]string, error) {
|
||||
for i, part := range parts {
|
||||
if HasWildcard(part) {
|
||||
var children []string
|
||||
zkParentPath := strings.Join(parts[:i], "/")
|
||||
var err error
|
||||
children, _, err = zconn.Children(zkParentPath)
|
||||
if err != nil {
|
||||
// we asked for something like
|
||||
// /zk/cell/aaa/* and
|
||||
// /zk/cell/aaa doesn't exist
|
||||
// -> return empty list, no error
|
||||
// (note we check both a regular zk
|
||||
// error and the error the test
|
||||
// produces)
|
||||
if ZkErrorEqual(err, zk.ErrNoNode) {
|
||||
return nil, nil
|
||||
}
|
||||
// otherwise we return the error
|
||||
return nil, err
|
||||
}
|
||||
sort.Strings(children)
|
||||
|
||||
results := make([][]string, len(children))
|
||||
wg := &sync.WaitGroup{}
|
||||
mu := &sync.Mutex{}
|
||||
var firstError error
|
||||
|
||||
for j, child := range children {
|
||||
matched, err := path.Match(part, child)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if matched {
|
||||
// we have a match!
|
||||
wg.Add(1)
|
||||
newParts := make([]string, len(parts))
|
||||
copy(newParts, parts)
|
||||
newParts[i] = child
|
||||
go func(j int) {
|
||||
defer wg.Done()
|
||||
subResult, err := resolveRecursive(zconn, newParts, false)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
if firstError != nil {
|
||||
log.Infof("Multiple error: %v", err)
|
||||
} else {
|
||||
firstError = err
|
||||
}
|
||||
mu.Unlock()
|
||||
} else {
|
||||
results[j] = subResult
|
||||
}
|
||||
}(j)
|
||||
}
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
if firstError != nil {
|
||||
return nil, firstError
|
||||
}
|
||||
|
||||
result := make([]string, 0, 32)
|
||||
for j := 0; j < len(children); j++ {
|
||||
subResult := results[j]
|
||||
if subResult != nil {
|
||||
result = append(result, subResult...)
|
||||
}
|
||||
}
|
||||
|
||||
// we found a part that is a wildcard, we
|
||||
// added the children already, we're done
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
|
||||
// no part contains a wildcard, add the path if it exists, and done
|
||||
path := strings.Join(parts, "/")
|
||||
if toplevel {
|
||||
// for whatever the user typed at the toplevel, we don't
|
||||
// check it exists or not, we just return it
|
||||
return []string{path}, nil
|
||||
}
|
||||
|
||||
// this is an expanded path, we need to check if it exists
|
||||
_, stat, err := zconn.Exists(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if stat != nil {
|
||||
return []string{path}, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// resolve paths like:
|
||||
// /zk/nyc/vt/tablets/*/action
|
||||
// /zk/global/vt/keyspaces/*/shards/*/action
|
||||
// /zk/*/vt/tablets/*/action
|
||||
// into real existing paths
|
||||
//
|
||||
// If you send paths that don't contain any wildcard and
|
||||
// don't exist, this function will return an empty array.
|
||||
func ResolveWildcards(zconn Conn, zkPaths []string) ([]string, error) {
|
||||
// check all the paths start with /zk/ before doing anything
|
||||
// time consuming
|
||||
// relax this in case we are not talking to a metaconn and
|
||||
// just want to talk to a specified instance.
|
||||
// for _, zkPath := range zkPaths {
|
||||
// if _, err := ZkCellFromZkPath(zkPath); err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
// }
|
||||
|
||||
results := make([][]string, len(zkPaths))
|
||||
wg := &sync.WaitGroup{}
|
||||
mu := &sync.Mutex{}
|
||||
var firstError error
|
||||
|
||||
for i, zkPath := range zkPaths {
|
||||
wg.Add(1)
|
||||
parts := strings.Split(zkPath, "/")
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
subResult, err := resolveRecursive(zconn, parts, true)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
if firstError != nil {
|
||||
log.Infof("Multiple error: %v", err)
|
||||
} else {
|
||||
firstError = err
|
||||
}
|
||||
mu.Unlock()
|
||||
} else {
|
||||
results[i] = subResult
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
if firstError != nil {
|
||||
return nil, firstError
|
||||
}
|
||||
|
||||
result := make([]string, 0, 32)
|
||||
for i := 0; i < len(zkPaths); i++ {
|
||||
subResult := results[i]
|
||||
if subResult != nil {
|
||||
result = append(result, subResult...)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func DeleteRecursive(zconn Conn, zkPath string, version int) error {
|
||||
// version: -1 delete any version of the node at path - only applies to the top node
|
||||
err := zconn.Delete(zkPath, int32(version))
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if !ZkErrorEqual(err, zk.ErrNotEmpty) {
|
||||
return err
|
||||
}
|
||||
// Remove the ability for other nodes to get created while we are trying to delete.
|
||||
// Otherwise, you can enter a race condition, or get starved out from deleting.
|
||||
_, err = zconn.SetACL(zkPath, zk.WorldACL(zk.PermAdmin|zk.PermDelete|zk.PermRead), int32(version))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
children, _, err := zconn.Children(zkPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, child := range children {
|
||||
err := DeleteRecursive(zconn, path.Join(zkPath, child), -1)
|
||||
if err != nil && !ZkErrorEqual(err, zk.ErrNoNode) {
|
||||
return fmt.Errorf("zkutil: recursive delete failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = zconn.Delete(zkPath, int32(version))
|
||||
if err != nil && !ZkErrorEqual(err, zk.ErrNotEmpty) {
|
||||
err = fmt.Errorf("zkutil: nodes getting recreated underneath delete (app race condition): %v", zkPath)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// The lexically lowest node is the lock holder - verify that this
|
||||
// path holds the lock. Call this queue-lock because the semantics are
|
||||
// a hybrid. Normal zk locks make assumptions about sequential
|
||||
// numbering that don't hold when the data in a lock is modified.
|
||||
// if the provided 'interrupted' chan is closed, we'll just stop waiting
|
||||
// and return an interruption error
|
||||
func ObtainQueueLock(zconn Conn, zkPath string, wait time.Duration, interrupted chan struct{}) error {
|
||||
queueNode := path.Dir(zkPath)
|
||||
lockNode := path.Base(zkPath)
|
||||
|
||||
timer := time.NewTimer(wait)
|
||||
trylock:
|
||||
children, _, err := zconn.Children(queueNode)
|
||||
if err != nil {
|
||||
return fmt.Errorf("zkutil: trylock failed %v", err)
|
||||
}
|
||||
sort.Strings(children)
|
||||
if len(children) > 0 {
|
||||
if children[0] == lockNode {
|
||||
return nil
|
||||
}
|
||||
if wait > 0 {
|
||||
prevLock := ""
|
||||
for i := 1; i < len(children); i++ {
|
||||
if children[i] == lockNode {
|
||||
prevLock = children[i-1]
|
||||
break
|
||||
}
|
||||
}
|
||||
if prevLock == "" {
|
||||
return fmt.Errorf("zkutil: no previous queue node found: %v", zkPath)
|
||||
}
|
||||
|
||||
zkPrevLock := path.Join(queueNode, prevLock)
|
||||
_, stat, watch, err := zconn.ExistsW(zkPrevLock)
|
||||
if err != nil {
|
||||
return fmt.Errorf("zkutil: unable to watch queued node %v %v", zkPrevLock, err)
|
||||
}
|
||||
if stat == nil {
|
||||
goto trylock
|
||||
}
|
||||
select {
|
||||
case <-timer.C:
|
||||
break
|
||||
case <-interrupted:
|
||||
return ErrInterrupted
|
||||
case <-watch:
|
||||
// The precise event doesn't matter - try to read again regardless.
|
||||
goto trylock
|
||||
}
|
||||
}
|
||||
return ErrTimeout
|
||||
}
|
||||
return fmt.Errorf("zkutil: empty queue node: %v", queueNode)
|
||||
}
|
||||
|
||||
func ZkEventOk(e zk.Event) bool {
|
||||
return e.State == zk.StateConnected
|
||||
}
|
||||
|
||||
func NodeExists(zconn Conn, zkPath string) (bool, error) {
|
||||
b, _, err := zconn.Exists(zkPath)
|
||||
return b, err
|
||||
}
|
||||
|
||||
// Close the release channel when you want to clean up nicely.
|
||||
func CreatePidNode(zconn Conn, zkPath string, contents string, done chan struct{}) error {
|
||||
// On the first try, assume the cluster is up and running, that will
|
||||
// help hunt down any config issues present at startup
|
||||
if _, err := zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(PERM_FILE)); err != nil {
|
||||
if ZkErrorEqual(err, zk.ErrNodeExists) {
|
||||
err = zconn.Delete(zkPath, -1)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("zkutil: failed deleting pid node: %v: %v", zkPath, err)
|
||||
}
|
||||
_, err = zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(PERM_FILE))
|
||||
if err != nil {
|
||||
return fmt.Errorf("zkutil: failed creating pid node: %v: %v", zkPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
go func() {
|
||||
for {
|
||||
_, _, watch, err := zconn.GetW(zkPath)
|
||||
if err != nil {
|
||||
if ZkErrorEqual(err, zk.ErrNoNode) {
|
||||
_, err = zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(zk.PermAll))
|
||||
if err != nil {
|
||||
log.Warningf("failed recreating pid node: %v: %v", zkPath, err)
|
||||
} else {
|
||||
log.Infof("recreated pid node: %v", zkPath)
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
log.Warningf("failed reading pid node: %v", err)
|
||||
}
|
||||
} else {
|
||||
select {
|
||||
case event := <-watch:
|
||||
if ZkEventOk(event) && event.Type == zk.EventNodeDeleted {
|
||||
// Most likely another process has started up. However,
|
||||
// there is a chance that an ephemeral node is deleted by
|
||||
// the session expiring, yet that same session gets a watch
|
||||
// notification. This seems like buggy behavior, but rather
|
||||
// than race too hard on the node, just wait a bit and see
|
||||
// if the situation resolves itself.
|
||||
log.Warningf("pid deleted: %v", zkPath)
|
||||
} else {
|
||||
log.Infof("pid node event: %v", event)
|
||||
}
|
||||
// break here and wait for a bit before attempting
|
||||
case <-done:
|
||||
log.Infof("pid watcher stopped on done: %v", zkPath)
|
||||
return
|
||||
}
|
||||
}
|
||||
select {
|
||||
// No one likes a thundering herd, least of all zk.
|
||||
case <-time.After(5*time.Second + time.Duration(rand.Int63n(55e9))):
|
||||
case <-done:
|
||||
log.Infof("pid watcher stopped on done: %v", zkPath)
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ZLocker is an interface for a lock that can fail.
|
||||
type ZLocker interface {
|
||||
Lock(desc string) error
|
||||
LockWithTimeout(wait time.Duration, desc string) error
|
||||
Unlock() error
|
||||
Interrupt()
|
||||
}
|
||||
|
||||
// Experiment with a little bit of abstraction.
|
||||
// FIMXE(msolo) This object may need a mutex to ensure it can be shared
|
||||
// across goroutines.
|
||||
type zMutex struct {
|
||||
mu sync.Mutex
|
||||
zconn Conn
|
||||
path string // Path under which we try to create lock nodes.
|
||||
contents string
|
||||
interrupted chan struct{}
|
||||
name string // The name of the specific lock node we created.
|
||||
ephemeral bool
|
||||
}
|
||||
|
||||
// CreateMutex initializes an unaquired mutex. A mutex is released only
|
||||
// by Unlock. You can clean up a mutex with delete, but you should be
|
||||
// careful doing so.
|
||||
func CreateMutex(zconn Conn, zkPath string) ZLocker {
|
||||
zm, err := CreateMutexWithContents(zconn, zkPath, map[string]interface{}{})
|
||||
if err != nil {
|
||||
panic(err) // should never happen
|
||||
}
|
||||
return zm
|
||||
}
|
||||
|
||||
// CreateMutex initializes an unaquired mutex with special content for this mutex.
|
||||
// A mutex is released only by Unlock. You can clean up a mutex with delete, but you should be
|
||||
// careful doing so.
|
||||
func CreateMutexWithContents(zconn Conn, zkPath string, contents map[string]interface{}) (ZLocker, error) {
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pid := os.Getpid()
|
||||
contents["hostname"] = hostname
|
||||
contents["pid"] = pid
|
||||
|
||||
data, err := json.Marshal(contents)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &zMutex{zconn: zconn, path: zkPath, contents: string(data), interrupted: make(chan struct{})}, nil
|
||||
}
|
||||
|
||||
// Interrupt releases a lock that's held.
|
||||
func (zm *zMutex) Interrupt() {
|
||||
select {
|
||||
case zm.interrupted <- struct{}{}:
|
||||
default:
|
||||
log.Warningf("zmutex interrupt blocked")
|
||||
}
|
||||
}
|
||||
|
||||
// Lock returns nil when the lock is acquired.
|
||||
func (zm *zMutex) Lock(desc string) error {
|
||||
return zm.LockWithTimeout(365*24*time.Hour, desc)
|
||||
}
|
||||
|
||||
// LockWithTimeout returns nil when the lock is acquired. A lock is
|
||||
// held if the file exists and you are the creator. Setting the wait
|
||||
// to zero makes this a nonblocking lock check.
|
||||
//
|
||||
// FIXME(msolo) Disallow non-super users from removing the lock?
|
||||
func (zm *zMutex) LockWithTimeout(wait time.Duration, desc string) (err error) {
|
||||
timer := time.NewTimer(wait)
|
||||
defer func() {
|
||||
if panicErr := recover(); panicErr != nil || err != nil {
|
||||
zm.deleteLock()
|
||||
}
|
||||
}()
|
||||
// Ensure the rendezvous node is here.
|
||||
// FIXME(msolo) Assuming locks are contended, it will be cheaper to assume this just
|
||||
// exists.
|
||||
_, err = CreateRecursive(zm.zconn, zm.path, "", 0, zk.WorldACL(PERM_DIRECTORY))
|
||||
if err != nil && !ZkErrorEqual(err, zk.ErrNodeExists) {
|
||||
return err
|
||||
}
|
||||
|
||||
lockPrefix := path.Join(zm.path, "lock-")
|
||||
zflags := zk.FlagSequence
|
||||
if zm.ephemeral {
|
||||
zflags = zflags | zk.FlagEphemeral
|
||||
}
|
||||
|
||||
// update node content
|
||||
var lockContent map[string]interface{}
|
||||
err = json.Unmarshal([]byte(zm.contents), &lockContent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
lockContent["desc"] = desc
|
||||
newContent, err := json.Marshal(lockContent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
createlock:
|
||||
lockCreated, err := zm.zconn.Create(lockPrefix, newContent, int32(zflags), zk.WorldACL(PERM_FILE))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
name := path.Base(lockCreated)
|
||||
zm.mu.Lock()
|
||||
zm.name = name
|
||||
zm.mu.Unlock()
|
||||
|
||||
trylock:
|
||||
children, _, err := zm.zconn.Children(zm.path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("zkutil: trylock failed %v", err)
|
||||
}
|
||||
sort.Strings(children)
|
||||
if len(children) == 0 {
|
||||
return fmt.Errorf("zkutil: empty lock: %v", zm.path)
|
||||
}
|
||||
|
||||
if children[0] == name {
|
||||
// We are the lock owner.
|
||||
return nil
|
||||
}
|
||||
|
||||
// This is the degenerate case of a nonblocking lock check. It's not optimal, but
|
||||
// also probably not worth optimizing.
|
||||
if wait == 0 {
|
||||
return ErrTimeout
|
||||
}
|
||||
prevLock := ""
|
||||
for i := 1; i < len(children); i++ {
|
||||
if children[i] == name {
|
||||
prevLock = children[i-1]
|
||||
break
|
||||
}
|
||||
}
|
||||
if prevLock == "" {
|
||||
// This is an interesting case. The node disappeared
|
||||
// underneath us, probably due to a session loss. We can
|
||||
// recreate the lock node (with a new sequence number) and
|
||||
// keep trying.
|
||||
log.Warningf("zkutil: no lock node found: %v/%v", zm.path, zm.name)
|
||||
goto createlock
|
||||
}
|
||||
|
||||
zkPrevLock := path.Join(zm.path, prevLock)
|
||||
exist, stat, watch, err := zm.zconn.ExistsW(zkPrevLock)
|
||||
if err != nil {
|
||||
// FIXME(msolo) Should this be a retry?
|
||||
return fmt.Errorf("zkutil: unable to watch previous lock node %v %v", zkPrevLock, err)
|
||||
}
|
||||
if stat == nil || !exist {
|
||||
goto trylock
|
||||
}
|
||||
select {
|
||||
case <-timer.C:
|
||||
return ErrTimeout
|
||||
case <-zm.interrupted:
|
||||
return ErrInterrupted
|
||||
case event := <-watch:
|
||||
log.Infof("zkutil: lock event: %v", event)
|
||||
// The precise event doesn't matter - try to read again regardless.
|
||||
goto trylock
|
||||
}
|
||||
panic("unexpected")
|
||||
}
|
||||
|
||||
// Unlock returns nil if the lock was successfully
|
||||
// released. Otherwise, it is most likely a zk related error.
|
||||
func (zm *zMutex) Unlock() error {
|
||||
return zm.deleteLock()
|
||||
}
|
||||
|
||||
func (zm *zMutex) deleteLock() error {
|
||||
zm.mu.Lock()
|
||||
zpath := path.Join(zm.path, zm.name)
|
||||
zm.mu.Unlock()
|
||||
|
||||
err := zm.zconn.Delete(zpath, -1)
|
||||
if err != nil && !ZkErrorEqual(err, zk.ErrNoNode) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ZElector stores basic state for running an election.
|
||||
type ZElector struct {
|
||||
*zMutex
|
||||
path string
|
||||
leader string
|
||||
}
|
||||
|
||||
func (ze *ZElector) isLeader() bool {
|
||||
return ze.leader == ze.name
|
||||
}
|
||||
|
||||
type electionEvent struct {
|
||||
Event int
|
||||
Err error
|
||||
}
|
||||
|
||||
type backoffDelay struct {
|
||||
min time.Duration
|
||||
max time.Duration
|
||||
delay time.Duration
|
||||
}
|
||||
|
||||
func newBackoffDelay(min, max time.Duration) *backoffDelay {
|
||||
return &backoffDelay{min, max, min}
|
||||
}
|
||||
|
||||
func (bd *backoffDelay) NextDelay() time.Duration {
|
||||
delay := bd.delay
|
||||
bd.delay = 2 * bd.delay
|
||||
if bd.delay > bd.max {
|
||||
bd.delay = bd.max
|
||||
}
|
||||
return delay
|
||||
}
|
||||
|
||||
func (bd *backoffDelay) Reset() {
|
||||
bd.delay = bd.min
|
||||
}
|
||||
|
||||
// ElectorTask is the interface for a task that runs essentially
|
||||
// forever or until something bad happens. If a task must be stopped,
|
||||
// it should be handled promptly - no second notification will be
|
||||
// sent.
|
||||
type ElectorTask interface {
|
||||
Run() error
|
||||
Stop()
|
||||
// Return true if interrupted, false if it died of natural causes.
|
||||
// An interrupted task indicates that the election should stop.
|
||||
Interrupted() bool
|
||||
}
|
||||
|
||||
// CreateElection returns an initialized elector. An election is
|
||||
// really a cycle of events. You are flip-flopping between leader and
|
||||
// candidate. It's better to think of this as a stream of events that
|
||||
// one needs to react to.
|
||||
func CreateElection(zconn Conn, zkPath string) ZElector {
|
||||
zm, err := CreateElectionWithContents(zconn, zkPath, map[string]interface{}{})
|
||||
if err != nil {
|
||||
// should never happend
|
||||
panic(err)
|
||||
}
|
||||
return zm
|
||||
}
|
||||
|
||||
// CreateElection returns an initialized elector with special contents. An election is
|
||||
// really a cycle of events. You are flip-flopping between leader and
|
||||
// candidate. It's better to think of this as a stream of events that
|
||||
// one needs to react to.
|
||||
func CreateElectionWithContents(zconn Conn, zkPath string, contents map[string]interface{}) (ZElector, error) {
|
||||
l, err := CreateMutexWithContents(zconn, path.Join(zkPath, "candidates"), contents)
|
||||
if err != nil {
|
||||
return ZElector{}, err
|
||||
}
|
||||
zm := l.(*zMutex)
|
||||
zm.ephemeral = true
|
||||
return ZElector{zMutex: zm, path: zkPath}, nil
|
||||
}
|
||||
|
||||
// RunTask returns nil when the underlyingtask ends or the error it
|
||||
// generated.
|
||||
func (ze *ZElector) RunTask(task ElectorTask) error {
|
||||
delay := newBackoffDelay(100*time.Millisecond, 1*time.Minute)
|
||||
leaderPath := path.Join(ze.path, "leader")
|
||||
for {
|
||||
_, err := CreateRecursive(ze.zconn, leaderPath, "", 0, zk.WorldACL(PERM_FILE))
|
||||
if err == nil || ZkErrorEqual(err, zk.ErrNodeExists) {
|
||||
break
|
||||
}
|
||||
log.Warningf("election leader create failed: %v", err)
|
||||
time.Sleep(delay.NextDelay())
|
||||
}
|
||||
|
||||
for {
|
||||
err := ze.Lock("RunTask")
|
||||
if err != nil {
|
||||
log.Warningf("election lock failed: %v", err)
|
||||
if err == ErrInterrupted {
|
||||
return ErrInterrupted
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Confirm your win and deliver acceptance speech. This notifies
|
||||
// listeners who will have been watching the leader node for
|
||||
// changes.
|
||||
_, err = ze.zconn.Set(leaderPath, []byte(ze.contents), -1)
|
||||
if err != nil {
|
||||
log.Warningf("election promotion failed: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Infof("election promote leader %v", leaderPath)
|
||||
taskErrChan := make(chan error)
|
||||
go func() {
|
||||
taskErrChan <- task.Run()
|
||||
}()
|
||||
|
||||
watchLeader:
|
||||
// Watch the leader so we can get notified if something goes wrong.
|
||||
data, _, watch, err := ze.zconn.GetW(leaderPath)
|
||||
if err != nil {
|
||||
log.Warningf("election unable to watch leader node %v %v", leaderPath, err)
|
||||
// FIXME(msolo) Add delay
|
||||
goto watchLeader
|
||||
}
|
||||
|
||||
if string(data) != ze.contents {
|
||||
log.Warningf("election unable to promote leader")
|
||||
task.Stop()
|
||||
// We won the election, but we didn't become the leader. How is that possible?
|
||||
// (see Bush v. Gore for some inspiration)
|
||||
// It means:
|
||||
// 1. Someone isn't playing by the election rules (a bad actor).
|
||||
// Hard to detect - let's assume we don't have this problem. :)
|
||||
// 2. We lost our connection somehow and the ephemeral lock was cleared,
|
||||
// allowing someone else to win the election.
|
||||
continue
|
||||
}
|
||||
|
||||
// This is where we start our target process and watch for its failure.
|
||||
waitForEvent:
|
||||
select {
|
||||
case <-ze.interrupted:
|
||||
log.Warning("election interrupted - stop child process")
|
||||
task.Stop()
|
||||
// Once the process dies from the signal, this will all tear down.
|
||||
goto waitForEvent
|
||||
case taskErr := <-taskErrChan:
|
||||
// If our code fails, unlock to trigger an election.
|
||||
log.Infof("election child process ended: %v", taskErr)
|
||||
ze.Unlock()
|
||||
if task.Interrupted() {
|
||||
log.Warningf("election child process interrupted - stepping down")
|
||||
return ErrInterrupted
|
||||
}
|
||||
continue
|
||||
case zevent := <-watch:
|
||||
// We had a zk connection hiccup. We have a few choices,
|
||||
// but it depends on the constraints and the events.
|
||||
//
|
||||
// If we get SESSION_EXPIRED our connection loss triggered an
|
||||
// election that we won't have won and the thus the lock was
|
||||
// automatically freed. We have no choice but to start over.
|
||||
if zevent.State == zk.StateExpired {
|
||||
log.Warningf("election leader watch expired")
|
||||
task.Stop()
|
||||
continue
|
||||
}
|
||||
|
||||
// Otherwise, we had an intermittent issue or something touched
|
||||
// the node. Either we lost our position or someone broke
|
||||
// protocol and touched the leader node. We just reconnect and
|
||||
// revalidate. In the meantime, assume we are still the leader
|
||||
// until we determine otherwise.
|
||||
//
|
||||
// On a reconnect we will be able to see the leader
|
||||
// information. If we still hold the position, great. If not, we
|
||||
// kill the associated process.
|
||||
//
|
||||
// On a leader node change, we need to perform the same
|
||||
// validation. It's possible an election completes without the
|
||||
// old leader realizing he is out of touch.
|
||||
log.Warningf("election leader watch event %v", zevent)
|
||||
goto watchLeader
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue