forked from forgejo/forgejo
Upgrade blevesearch dependency to v2.0.1 (#14346)
* Upgrade blevesearch dependency to v2.0.1 * Update rupture to v1.0.0 * Fix test
This commit is contained in:
parent
3aa53dc6bc
commit
f5abe2f563
459 changed files with 7518 additions and 4211 deletions
19
vendor/github.com/blevesearch/bleve/v2/.gitignore
generated
vendored
Normal file
19
vendor/github.com/blevesearch/bleve/v2/.gitignore
generated
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
#*
|
||||
*.sublime-*
|
||||
*~
|
||||
.#*
|
||||
.project
|
||||
.settings
|
||||
**/.idea/
|
||||
**/*.iml
|
||||
.DS_Store
|
||||
query_string.y.go.tmp
|
||||
/analysis/token_filters/cld2/cld2-read-only
|
||||
/analysis/token_filters/cld2/libcld2_full.a
|
||||
/cmd/bleve/bleve
|
||||
vendor/**
|
||||
!vendor/manifest
|
||||
/y.output
|
||||
/search/query/y.output
|
||||
*.test
|
||||
tags
|
25
vendor/github.com/blevesearch/bleve/v2/.travis.yml
generated
vendored
Normal file
25
vendor/github.com/blevesearch/bleve/v2/.travis.yml
generated
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
sudo: false
|
||||
|
||||
language: go
|
||||
|
||||
go:
|
||||
- "1.12.x"
|
||||
- "1.13.x"
|
||||
- "1.14.x"
|
||||
|
||||
script:
|
||||
- go get golang.org/x/tools/cmd/cover
|
||||
- go get github.com/mattn/goveralls
|
||||
- go get github.com/kisielk/errcheck
|
||||
- go get -u github.com/FiloSottile/gvt
|
||||
- gvt restore
|
||||
- go test -race -v $(go list ./... | grep -v vendor/)
|
||||
- go vet $(go list ./... | grep -v vendor/)
|
||||
- go test ./test -v -indexType scorch
|
||||
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
|
||||
- docs/project-code-coverage.sh
|
||||
- docs/build_children.sh
|
||||
|
||||
notifications:
|
||||
email:
|
||||
- marty.schoch@gmail.com
|
16
vendor/github.com/blevesearch/bleve/v2/CONTRIBUTING.md
generated
vendored
Normal file
16
vendor/github.com/blevesearch/bleve/v2/CONTRIBUTING.md
generated
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Contributing to Bleve
|
||||
|
||||
We look forward to your contributions, but ask that you first review these guidelines.
|
||||
|
||||
### Sign the CLA
|
||||
|
||||
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements.
|
||||
|
||||
### Submitting a Pull Request
|
||||
|
||||
All types of contributions are welcome, but please keep the following in mind:
|
||||
|
||||
- If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged.
|
||||
- Existing tests should continue to pass, new tests for the contribution are nice to have.
|
||||
- All code should have gone through `go fmt`
|
||||
- All code should pass `go vet`
|
202
vendor/github.com/blevesearch/bleve/v2/LICENSE
generated
vendored
Normal file
202
vendor/github.com/blevesearch/bleve/v2/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
68
vendor/github.com/blevesearch/bleve/v2/README.md
generated
vendored
Normal file
68
vendor/github.com/blevesearch/bleve/v2/README.md
generated
vendored
Normal file
|
@ -0,0 +1,68 @@
|
|||
#  bleve
|
||||
|
||||
[](https://github.com/blevesearch/bleve/actions?query=workflow%3ATests+event%3Apush+branch%3Amaster)
|
||||
[](https://coveralls.io/github/blevesearch/bleve?branch=master)
|
||||
[](https://godoc.org/github.com/blevesearch/bleve)
|
||||
[](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://codebeat.co/projects/github-com-blevesearch-bleve)
|
||||
[](https://goreportcard.com/report/blevesearch/bleve)
|
||||
[](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
|
||||
[](https://opensource.org/licenses/Apache-2.0)
|
||||
|
||||
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
|
||||
|
||||
## Features
|
||||
|
||||
* Index any go data structure (including JSON)
|
||||
* Intelligent defaults backed up by powerful configuration
|
||||
* Supported field types:
|
||||
* Text, Numeric, Date
|
||||
* Supported query types:
|
||||
* Term, Phrase, Match, Match Phrase, Prefix
|
||||
* Conjunction, Disjunction, Boolean
|
||||
* Numeric Range, Date Range
|
||||
* Simple query [syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
|
||||
* tf-idf Scoring
|
||||
* Search result match highlighting
|
||||
* Supports Aggregating Facets:
|
||||
* Terms Facet
|
||||
* Numeric Range Facet
|
||||
* Date Range Facet
|
||||
|
||||
## Discussion
|
||||
|
||||
Discuss usage and development of bleve in the [google group](https://groups.google.com/forum/#!forum/bleve).
|
||||
|
||||
## Indexing
|
||||
|
||||
```go
|
||||
message := struct{
|
||||
Id string
|
||||
From string
|
||||
Body string
|
||||
}{
|
||||
Id: "example",
|
||||
From: "marty.schoch@gmail.com",
|
||||
Body: "bleve indexing is easy",
|
||||
}
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
index, err := bleve.New("example.bleve", mapping)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
index.Index(message.Id, message)
|
||||
```
|
||||
|
||||
## Querying
|
||||
|
||||
```go
|
||||
index, _ := bleve.Open("example.bleve")
|
||||
query := bleve.NewQueryStringQuery("bleve")
|
||||
searchRequest := bleve.NewSearchRequest(query)
|
||||
searchResult, _ := index.Search(searchRequest)
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Apache License Version 2.0
|
145
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/custom/custom.go
generated
vendored
Normal file
145
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/custom/custom.go
generated
vendored
Normal file
|
@ -0,0 +1,145 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package custom
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "custom"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
|
||||
var err error
|
||||
var charFilters []analysis.CharFilter
|
||||
charFiltersValue, ok := config["char_filters"]
|
||||
if ok {
|
||||
switch charFiltersValue := charFiltersValue.(type) {
|
||||
case []string:
|
||||
charFilters, err = getCharFilters(charFiltersValue, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case []interface{}:
|
||||
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
charFilters, err = getCharFilters(charFiltersNames, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
|
||||
}
|
||||
}
|
||||
|
||||
var tokenizerName string
|
||||
tokenizerValue, ok := config["tokenizer"]
|
||||
if ok {
|
||||
tokenizerName, ok = tokenizerValue.(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify tokenizer as string")
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("must specify tokenizer")
|
||||
}
|
||||
|
||||
tokenizer, err := cache.TokenizerNamed(tokenizerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var tokenFilters []analysis.TokenFilter
|
||||
tokenFiltersValue, ok := config["token_filters"]
|
||||
if ok {
|
||||
switch tokenFiltersValue := tokenFiltersValue.(type) {
|
||||
case []string:
|
||||
tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case []interface{}:
|
||||
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
|
||||
}
|
||||
}
|
||||
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
}
|
||||
if charFilters != nil {
|
||||
rv.CharFilters = charFilters
|
||||
}
|
||||
if tokenFilters != nil {
|
||||
rv.TokenFilters = tokenFilters
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
||||
|
||||
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
|
||||
charFilters := make([]analysis.CharFilter, len(charFilterNames))
|
||||
for i, charFilterName := range charFilterNames {
|
||||
charFilter, err := cache.CharFilterNamed(charFilterName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
charFilters[i] = charFilter
|
||||
}
|
||||
|
||||
return charFilters, nil
|
||||
}
|
||||
|
||||
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
|
||||
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
|
||||
for i, tokenFilterName := range tokenFilterNames {
|
||||
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tokenFilters[i] = tokenFilter
|
||||
}
|
||||
|
||||
return tokenFilters, nil
|
||||
}
|
||||
|
||||
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
|
||||
stringSlice := make([]string, len(interfaceSlice))
|
||||
for i, interfaceObj := range interfaceSlice {
|
||||
stringObj, ok := interfaceObj.(string)
|
||||
if ok {
|
||||
stringSlice[i] = stringObj
|
||||
} else {
|
||||
return nil, fmt.Errorf(objType + " name must be a string")
|
||||
}
|
||||
}
|
||||
|
||||
return stringSlice, nil
|
||||
}
|
38
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/keyword/keyword.go
generated
vendored
Normal file
38
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/keyword/keyword.go
generated
vendored
Normal file
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package keyword
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "keyword"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed(single.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
52
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/standard/standard.go
generated
vendored
Normal file
52
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/standard/standard.go
generated
vendored
Normal file
|
@ -0,0 +1,52 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package standard
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/lang/en"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "standard"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
64
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/flexible/flexible.go
generated
vendored
Normal file
64
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/flexible/flexible.go
generated
vendored
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package flexible
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "flexiblego"
|
||||
|
||||
type DateTimeParser struct {
|
||||
layouts []string
|
||||
}
|
||||
|
||||
func New(layouts []string) *DateTimeParser {
|
||||
return &DateTimeParser{
|
||||
layouts: layouts,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) {
|
||||
for _, layout := range p.layouts {
|
||||
rv, err := time.Parse(layout, input)
|
||||
if err == nil {
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return time.Time{}, analysis.ErrInvalidDateTime
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
layouts, ok := config["layouts"].([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify layouts")
|
||||
}
|
||||
var layoutStrs []string
|
||||
for _, layout := range layouts {
|
||||
layoutStr, ok := layout.(string)
|
||||
if ok {
|
||||
layoutStrs = append(layoutStrs, layoutStr)
|
||||
}
|
||||
}
|
||||
return New(layoutStrs), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
}
|
45
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/optional/optional.go
generated
vendored
Normal file
45
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/optional/optional.go
generated
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package optional
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/datetime/flexible"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "dateTimeOptional"
|
||||
|
||||
const rfc3339NoTimezone = "2006-01-02T15:04:05"
|
||||
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
|
||||
const rfc3339NoTime = "2006-01-02"
|
||||
|
||||
var layouts = []string{
|
||||
time.RFC3339Nano,
|
||||
time.RFC3339,
|
||||
rfc3339NoTimezone,
|
||||
rfc3339NoTimezoneNoT,
|
||||
rfc3339NoTime,
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return flexible.New(layouts), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
}
|
70
vendor/github.com/blevesearch/bleve/v2/analysis/freq.go
generated
vendored
Normal file
70
vendor/github.com/blevesearch/bleve/v2/analysis/freq.go
generated
vendored
Normal file
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, options index.FieldIndexingOptions) index.TokenFrequencies {
|
||||
rv := make(map[string]*index.TokenFreq, len(tokens))
|
||||
|
||||
if options.IncludeTermVectors() {
|
||||
tls := make([]index.TokenLocation, len(tokens))
|
||||
tlNext := 0
|
||||
|
||||
for _, token := range tokens {
|
||||
tls[tlNext] = index.TokenLocation{
|
||||
ArrayPositions: arrayPositions,
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
}
|
||||
|
||||
curr, ok := rv[string(token.Term)]
|
||||
if ok {
|
||||
curr.Locations = append(curr.Locations, &tls[tlNext])
|
||||
} else {
|
||||
curr = &index.TokenFreq{
|
||||
Term: token.Term,
|
||||
Locations: []*index.TokenLocation{&tls[tlNext]},
|
||||
}
|
||||
rv[string(token.Term)] = curr
|
||||
}
|
||||
|
||||
if !options.SkipFreqNorm() {
|
||||
curr.SetFrequency(curr.Frequency() + 1)
|
||||
}
|
||||
|
||||
tlNext++
|
||||
}
|
||||
} else {
|
||||
for _, token := range tokens {
|
||||
curr, exists := rv[string(token.Term)]
|
||||
if !exists {
|
||||
curr = &index.TokenFreq{
|
||||
Term: token.Term,
|
||||
}
|
||||
rv[string(token.Term)] = curr
|
||||
}
|
||||
|
||||
if !options.SkipFreqNorm() {
|
||||
curr.SetFrequency(curr.Frequency() + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
70
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/analyzer_en.go
generated
vendored
Normal file
70
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/analyzer_en.go
generated
vendored
Normal file
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package en implements an analyzer with reasonable defaults for processing
|
||||
// English text.
|
||||
//
|
||||
// It strips possessive suffixes ('s), transforms tokens to lower case,
|
||||
// removes stopwords from a built-in list, and applies porter stemming.
|
||||
//
|
||||
// The built-in stopwords list is defined in EnglishStopWords.
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/porter"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
)
|
||||
|
||||
const AnalyzerName = "en"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
possEnFilter,
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
stemmerEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
}
|
67
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/possessive_filter_en.go
generated
vendored
Normal file
67
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/possessive_filter_en.go
generated
vendored
Normal file
|
@ -0,0 +1,67 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
// PossessiveName is the name PossessiveFilter is registered as
|
||||
// in the bleve registry.
|
||||
const PossessiveName = "possessive_en"
|
||||
|
||||
const rightSingleQuotationMark = '’'
|
||||
const apostrophe = '\''
|
||||
const fullWidthApostrophe = '''
|
||||
|
||||
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
|
||||
|
||||
// PossessiveFilter implements a TokenFilter which
|
||||
// strips the English possessive suffix ('s) from tokens.
|
||||
// It handle a variety of apostrophe types, is case-insensitive
|
||||
// and doesn't distinguish between possessive and contraction.
|
||||
// (ie "She's So Rad" becomes "She So Rad")
|
||||
type PossessiveFilter struct {
|
||||
}
|
||||
|
||||
func NewPossessiveFilter() *PossessiveFilter {
|
||||
return &PossessiveFilter{}
|
||||
}
|
||||
|
||||
func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term)
|
||||
if lastRune == 's' || lastRune == 'S' {
|
||||
nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize])
|
||||
if nextLastRune == rightSingleQuotationMark ||
|
||||
nextLastRune == apostrophe ||
|
||||
nextLastRune == fullWidthApostrophe {
|
||||
token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize]
|
||||
}
|
||||
}
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewPossessiveFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
|
||||
}
|
49
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stemmer_en_snowball.go
generated
vendored
Normal file
49
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stemmer_en_snowball.go
generated
vendored
Normal file
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
|
||||
"github.com/blevesearch/snowballstem"
|
||||
"github.com/blevesearch/snowballstem/english"
|
||||
)
|
||||
|
||||
const SnowballStemmerName = "stemmer_en_snowball"
|
||||
|
||||
type EnglishStemmerFilter struct {
|
||||
}
|
||||
|
||||
func NewEnglishStemmerFilter() *EnglishStemmerFilter {
|
||||
return &EnglishStemmerFilter{}
|
||||
}
|
||||
|
||||
func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
env := snowballstem.NewEnv(string(token.Term))
|
||||
english.Stem(env)
|
||||
token.Term = []byte(env.Current())
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewEnglishStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
|
||||
}
|
33
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_filter_en.go
generated
vendored
Normal file
33
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_filter_en.go
generated
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/stop"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
}
|
344
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_words_en.go
generated
vendored
Normal file
344
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_words_en.go
generated
vendored
Normal file
|
@ -0,0 +1,344 @@
|
|||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_en"
|
||||
|
||||
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
|
||||
//
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
// ` was changed to ' to allow for literal string
|
||||
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| An English stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| Many of the forms below are quite rare (e.g. "yourselves") but included for
|
||||
| completeness.
|
||||
|
||||
| PRONOUNS FORMS
|
||||
| 1st person sing
|
||||
|
||||
i | subject, always in upper case of course
|
||||
|
||||
me | object
|
||||
my | possessive adjective
|
||||
| the possessive pronoun 'mine' is best suppressed, because of the
|
||||
| sense of coal-mine etc.
|
||||
myself | reflexive
|
||||
| 1st person plural
|
||||
we | subject
|
||||
|
||||
| us | object
|
||||
| care is required here because US = United States. It is usually
|
||||
| safe to remove it if it is in lower case.
|
||||
our | possessive adjective
|
||||
ours | possessive pronoun
|
||||
ourselves | reflexive
|
||||
| second person (archaic 'thou' forms not included)
|
||||
you | subject and object
|
||||
your | possessive adjective
|
||||
yours | possessive pronoun
|
||||
yourself | reflexive (singular)
|
||||
yourselves | reflexive (plural)
|
||||
| third person singular
|
||||
he | subject
|
||||
him | object
|
||||
his | possessive adjective and pronoun
|
||||
himself | reflexive
|
||||
|
||||
she | subject
|
||||
her | object and possessive adjective
|
||||
hers | possessive pronoun
|
||||
herself | reflexive
|
||||
|
||||
it | subject and object
|
||||
its | possessive adjective
|
||||
itself | reflexive
|
||||
| third person plural
|
||||
they | subject
|
||||
them | object
|
||||
their | possessive adjective
|
||||
theirs | possessive pronoun
|
||||
themselves | reflexive
|
||||
| other forms (demonstratives, interrogatives)
|
||||
what
|
||||
which
|
||||
who
|
||||
whom
|
||||
this
|
||||
that
|
||||
these
|
||||
those
|
||||
|
||||
| VERB FORMS (using F.R. Palmer's nomenclature)
|
||||
| BE
|
||||
am | 1st person, present
|
||||
is | -s form (3rd person, present)
|
||||
are | present
|
||||
was | 1st person, past
|
||||
were | past
|
||||
be | infinitive
|
||||
been | past participle
|
||||
being | -ing form
|
||||
| HAVE
|
||||
have | simple
|
||||
has | -s form
|
||||
had | past
|
||||
having | -ing form
|
||||
| DO
|
||||
do | simple
|
||||
does | -s form
|
||||
did | past
|
||||
doing | -ing form
|
||||
|
||||
| The forms below are, I believe, best omitted, because of the significant
|
||||
| homonym forms:
|
||||
|
||||
| He made a WILL
|
||||
| old tin CAN
|
||||
| merry month of MAY
|
||||
| a smell of MUST
|
||||
| fight the good fight with all thy MIGHT
|
||||
|
||||
| would, could, should, ought might however be included
|
||||
|
||||
| | AUXILIARIES
|
||||
| | WILL
|
||||
|will
|
||||
|
||||
would
|
||||
|
||||
| | SHALL
|
||||
|shall
|
||||
|
||||
should
|
||||
|
||||
| | CAN
|
||||
|can
|
||||
|
||||
could
|
||||
|
||||
| | MAY
|
||||
|may
|
||||
|might
|
||||
| | MUST
|
||||
|must
|
||||
| | OUGHT
|
||||
|
||||
ought
|
||||
|
||||
| COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
|
||||
| pronoun + verb
|
||||
|
||||
i'm
|
||||
you're
|
||||
he's
|
||||
she's
|
||||
it's
|
||||
we're
|
||||
they're
|
||||
i've
|
||||
you've
|
||||
we've
|
||||
they've
|
||||
i'd
|
||||
you'd
|
||||
he'd
|
||||
she'd
|
||||
we'd
|
||||
they'd
|
||||
i'll
|
||||
you'll
|
||||
he'll
|
||||
she'll
|
||||
we'll
|
||||
they'll
|
||||
|
||||
| verb + negation
|
||||
|
||||
isn't
|
||||
aren't
|
||||
wasn't
|
||||
weren't
|
||||
hasn't
|
||||
haven't
|
||||
hadn't
|
||||
doesn't
|
||||
don't
|
||||
didn't
|
||||
|
||||
| auxiliary + negation
|
||||
|
||||
won't
|
||||
wouldn't
|
||||
shan't
|
||||
shouldn't
|
||||
can't
|
||||
cannot
|
||||
couldn't
|
||||
mustn't
|
||||
|
||||
| miscellaneous forms
|
||||
|
||||
let's
|
||||
that's
|
||||
who's
|
||||
what's
|
||||
here's
|
||||
there's
|
||||
when's
|
||||
where's
|
||||
why's
|
||||
how's
|
||||
|
||||
| rarer forms
|
||||
|
||||
| daren't needn't
|
||||
|
||||
| doubtful forms
|
||||
|
||||
| oughtn't mightn't
|
||||
|
||||
| ARTICLES
|
||||
a
|
||||
an
|
||||
the
|
||||
|
||||
| THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
|
||||
| high, that classification is pointless.)
|
||||
and
|
||||
but
|
||||
if
|
||||
or
|
||||
because
|
||||
as
|
||||
until
|
||||
while
|
||||
|
||||
of
|
||||
at
|
||||
by
|
||||
for
|
||||
with
|
||||
about
|
||||
against
|
||||
between
|
||||
into
|
||||
through
|
||||
during
|
||||
before
|
||||
after
|
||||
above
|
||||
below
|
||||
to
|
||||
from
|
||||
up
|
||||
down
|
||||
in
|
||||
out
|
||||
on
|
||||
off
|
||||
over
|
||||
under
|
||||
|
||||
again
|
||||
further
|
||||
then
|
||||
once
|
||||
|
||||
here
|
||||
there
|
||||
when
|
||||
where
|
||||
why
|
||||
how
|
||||
|
||||
all
|
||||
any
|
||||
both
|
||||
each
|
||||
few
|
||||
more
|
||||
most
|
||||
other
|
||||
some
|
||||
such
|
||||
|
||||
no
|
||||
nor
|
||||
not
|
||||
only
|
||||
own
|
||||
same
|
||||
so
|
||||
than
|
||||
too
|
||||
very
|
||||
|
||||
| Just for the record, the following words are among the commonest in English
|
||||
|
||||
| one
|
||||
| every
|
||||
| least
|
||||
| less
|
||||
| many
|
||||
| now
|
||||
| ever
|
||||
| never
|
||||
| say
|
||||
| says
|
||||
| said
|
||||
| also
|
||||
| get
|
||||
| go
|
||||
| goes
|
||||
| just
|
||||
| made
|
||||
| make
|
||||
| put
|
||||
| see
|
||||
| seen
|
||||
| whether
|
||||
| like
|
||||
| well
|
||||
| back
|
||||
| even
|
||||
| still
|
||||
| way
|
||||
| take
|
||||
| since
|
||||
| another
|
||||
| however
|
||||
| two
|
||||
| three
|
||||
| four
|
||||
| five
|
||||
| first
|
||||
| second
|
||||
| new
|
||||
| old
|
||||
| high
|
||||
| long
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(EnglishStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
}
|
7
vendor/github.com/blevesearch/bleve/v2/analysis/test_words.txt
generated
vendored
Normal file
7
vendor/github.com/blevesearch/bleve/v2/analysis/test_words.txt
generated
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
# full line comment
|
||||
marty
|
||||
steve # trailing comment
|
||||
| different format of comment
|
||||
dustin
|
||||
siri | different style trailing comment
|
||||
multiple words with different whitespace
|
105
vendor/github.com/blevesearch/bleve/v2/analysis/token/lowercase/lowercase.go
generated
vendored
Normal file
105
vendor/github.com/blevesearch/bleve/v2/analysis/token/lowercase/lowercase.go
generated
vendored
Normal file
|
@ -0,0 +1,105 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package lowercase implements a TokenFilter which converts
|
||||
// tokens to lower case according to unicode rules.
|
||||
package lowercase
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
// Name is the name used to register LowerCaseFilter in the bleve registry
|
||||
const Name = "to_lower"
|
||||
|
||||
type LowerCaseFilter struct {
|
||||
}
|
||||
|
||||
func NewLowerCaseFilter() *LowerCaseFilter {
|
||||
return &LowerCaseFilter{}
|
||||
}
|
||||
|
||||
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
token.Term = toLowerDeferredCopy(token.Term)
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewLowerCaseFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor)
|
||||
}
|
||||
|
||||
// toLowerDeferredCopy will function exactly like
|
||||
// bytes.ToLower() only it will reuse (overwrite)
|
||||
// the original byte array when possible
|
||||
// NOTE: because its possible that the lower-case
|
||||
// form of a rune has a different utf-8 encoded
|
||||
// length, in these cases a new byte array is allocated
|
||||
func toLowerDeferredCopy(s []byte) []byte {
|
||||
j := 0
|
||||
for i := 0; i < len(s); {
|
||||
wid := 1
|
||||
r := rune(s[i])
|
||||
if r >= utf8.RuneSelf {
|
||||
r, wid = utf8.DecodeRune(s[i:])
|
||||
}
|
||||
|
||||
l := unicode.ToLower(r)
|
||||
|
||||
// If the rune is already lowercased, just move to the
|
||||
// next rune.
|
||||
if l == r {
|
||||
i += wid
|
||||
j += wid
|
||||
continue
|
||||
}
|
||||
|
||||
// Handles the Unicode edge-case where the last
|
||||
// rune in a word on the greek Σ needs to be converted
|
||||
// differently.
|
||||
if l == 'σ' && i+2 == len(s) {
|
||||
l = 'ς'
|
||||
}
|
||||
|
||||
lwid := utf8.RuneLen(l)
|
||||
if lwid > wid {
|
||||
// utf-8 encoded replacement is wider
|
||||
// for now, punt and defer
|
||||
// to bytes.ToLower() for the remainder
|
||||
// only known to happen with chars
|
||||
// Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
|
||||
// Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
|
||||
rest := bytes.ToLower(s[i:])
|
||||
rv := make([]byte, j+len(rest))
|
||||
copy(rv[:j], s[:j])
|
||||
copy(rv[j:], rest)
|
||||
return rv
|
||||
} else {
|
||||
utf8.EncodeRune(s[j:], l)
|
||||
}
|
||||
i += wid
|
||||
j += lwid
|
||||
}
|
||||
return s[:j]
|
||||
}
|
53
vendor/github.com/blevesearch/bleve/v2/analysis/token/porter/porter.go
generated
vendored
Normal file
53
vendor/github.com/blevesearch/bleve/v2/analysis/token/porter/porter.go
generated
vendored
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package porter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
|
||||
"github.com/blevesearch/go-porterstemmer"
|
||||
)
|
||||
|
||||
const Name = "stemmer_porter"
|
||||
|
||||
type PorterStemmer struct {
|
||||
}
|
||||
|
||||
func NewPorterStemmer() *PorterStemmer {
|
||||
return &PorterStemmer{}
|
||||
}
|
||||
|
||||
func (s *PorterStemmer) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
// if it is not a protected keyword, stem it
|
||||
if !token.KeyWord {
|
||||
termRunes := bytes.Runes(token.Term)
|
||||
stemmedRunes := porterstemmer.StemWithoutLowerCasing(termRunes)
|
||||
token.Term = analysis.BuildTermFromRunes(stemmedRunes)
|
||||
}
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewPorterStemmer(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, PorterStemmerConstructor)
|
||||
}
|
70
vendor/github.com/blevesearch/bleve/v2/analysis/token/stop/stop.go
generated
vendored
Normal file
70
vendor/github.com/blevesearch/bleve/v2/analysis/token/stop/stop.go
generated
vendored
Normal file
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package stop implements a TokenFilter removing tokens found in
|
||||
// a TokenMap.
|
||||
//
|
||||
// It constructor takes the following arguments:
|
||||
//
|
||||
// "stop_token_map" (string): the name of the token map identifying tokens to
|
||||
// remove.
|
||||
package stop
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "stop_tokens"
|
||||
|
||||
type StopTokensFilter struct {
|
||||
stopTokens analysis.TokenMap
|
||||
}
|
||||
|
||||
func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter {
|
||||
return &StopTokensFilter{
|
||||
stopTokens: stopTokens,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
j := 0
|
||||
for _, token := range input {
|
||||
_, isStopToken := f.stopTokens[string(token.Term)]
|
||||
if !isStopToken {
|
||||
input[j] = token
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
return input[:j]
|
||||
}
|
||||
|
||||
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
stopTokenMapName, ok := config["stop_token_map"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify stop_token_map")
|
||||
}
|
||||
stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building stop words filter: %v", err)
|
||||
}
|
||||
return NewStopTokensFilter(stopTokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
|
||||
}
|
79
vendor/github.com/blevesearch/bleve/v2/analysis/token/unicodenorm/unicodenorm.go
generated
vendored
Normal file
79
vendor/github.com/blevesearch/bleve/v2/analysis/token/unicodenorm/unicodenorm.go
generated
vendored
Normal file
|
@ -0,0 +1,79 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package unicodenorm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
const Name = "normalize_unicode"
|
||||
|
||||
const NFC = "nfc"
|
||||
const NFD = "nfd"
|
||||
const NFKC = "nfkc"
|
||||
const NFKD = "nfkd"
|
||||
|
||||
var forms = map[string]norm.Form{
|
||||
NFC: norm.NFC,
|
||||
NFD: norm.NFD,
|
||||
NFKC: norm.NFKC,
|
||||
NFKD: norm.NFKD,
|
||||
}
|
||||
|
||||
type UnicodeNormalizeFilter struct {
|
||||
form norm.Form
|
||||
}
|
||||
|
||||
func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) {
|
||||
form, ok := forms[formName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no form named %s", formName)
|
||||
}
|
||||
return &UnicodeNormalizeFilter{
|
||||
form: form,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter {
|
||||
filter, err := NewUnicodeNormalizeFilter(formName)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return filter
|
||||
}
|
||||
|
||||
func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
token.Term = s.form.Bytes(token.Term)
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
formVal, ok := config["form"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify form")
|
||||
}
|
||||
form := formVal
|
||||
return NewUnicodeNormalizeFilter(form)
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor)
|
||||
}
|
49
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/single/single.go
generated
vendored
Normal file
49
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/single/single.go
generated
vendored
Normal file
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package single
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "single"
|
||||
|
||||
type SingleTokenTokenizer struct {
|
||||
}
|
||||
|
||||
func NewSingleTokenTokenizer() *SingleTokenTokenizer {
|
||||
return &SingleTokenTokenizer{}
|
||||
}
|
||||
|
||||
func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
return analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: input,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: len(input),
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
return NewSingleTokenTokenizer(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
|
||||
}
|
131
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode/unicode.go
generated
vendored
Normal file
131
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode/unicode.go
generated
vendored
Normal file
|
@ -0,0 +1,131 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package unicode
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/segment"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "unicode"
|
||||
|
||||
type UnicodeTokenizer struct {
|
||||
}
|
||||
|
||||
func NewUnicodeTokenizer() *UnicodeTokenizer {
|
||||
return &UnicodeTokenizer{}
|
||||
}
|
||||
|
||||
func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx.
|
||||
rv := make(analysis.TokenStream, 0, 1)
|
||||
|
||||
ta := []analysis.Token(nil)
|
||||
taNext := 0
|
||||
|
||||
segmenter := segment.NewWordSegmenterDirect(input)
|
||||
start := 0
|
||||
pos := 1
|
||||
|
||||
guessRemaining := func(end int) int {
|
||||
avgSegmentLen := end / (len(rv) + 1)
|
||||
if avgSegmentLen < 1 {
|
||||
avgSegmentLen = 1
|
||||
}
|
||||
|
||||
remainingLen := len(input) - end
|
||||
|
||||
return remainingLen / avgSegmentLen
|
||||
}
|
||||
|
||||
for segmenter.Segment() {
|
||||
segmentBytes := segmenter.Bytes()
|
||||
end := start + len(segmentBytes)
|
||||
if segmenter.Type() != segment.None {
|
||||
if taNext >= len(ta) {
|
||||
remainingSegments := guessRemaining(end)
|
||||
if remainingSegments > 1000 {
|
||||
remainingSegments = 1000
|
||||
}
|
||||
if remainingSegments < 1 {
|
||||
remainingSegments = 1
|
||||
}
|
||||
|
||||
ta = make([]analysis.Token, remainingSegments)
|
||||
taNext = 0
|
||||
}
|
||||
|
||||
token := &ta[taNext]
|
||||
taNext++
|
||||
|
||||
token.Term = segmentBytes
|
||||
token.Start = start
|
||||
token.End = end
|
||||
token.Position = pos
|
||||
token.Type = convertType(segmenter.Type())
|
||||
|
||||
if len(rv) >= cap(rv) { // When rv is full, save it into rvx.
|
||||
rvx = append(rvx, rv)
|
||||
|
||||
rvCap := cap(rv) * 2
|
||||
if rvCap > 256 {
|
||||
rvCap = 256
|
||||
}
|
||||
|
||||
rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger.
|
||||
}
|
||||
|
||||
rv = append(rv, token)
|
||||
pos++
|
||||
}
|
||||
start = end
|
||||
}
|
||||
|
||||
if len(rvx) > 0 {
|
||||
n := len(rv)
|
||||
for _, r := range rvx {
|
||||
n += len(r)
|
||||
}
|
||||
rall := make(analysis.TokenStream, 0, n)
|
||||
for _, r := range rvx {
|
||||
rall = append(rall, r...)
|
||||
}
|
||||
return append(rall, rv...)
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
return NewUnicodeTokenizer(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
|
||||
}
|
||||
|
||||
func convertType(segmentWordType int) analysis.TokenType {
|
||||
switch segmentWordType {
|
||||
case segment.Ideo:
|
||||
return analysis.Ideographic
|
||||
case segment.Kana:
|
||||
return analysis.Ideographic
|
||||
case segment.Number:
|
||||
return analysis.Numeric
|
||||
}
|
||||
return analysis.AlphaNumeric
|
||||
}
|
76
vendor/github.com/blevesearch/bleve/v2/analysis/tokenmap.go
generated
vendored
Normal file
76
vendor/github.com/blevesearch/bleve/v2/analysis/tokenmap.go
generated
vendored
Normal file
|
@ -0,0 +1,76 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type TokenMap map[string]bool
|
||||
|
||||
func NewTokenMap() TokenMap {
|
||||
return make(TokenMap, 0)
|
||||
}
|
||||
|
||||
// LoadFile reads in a list of tokens from a text file,
|
||||
// one per line.
|
||||
// Comments are supported using `#` or `|`
|
||||
func (t TokenMap) LoadFile(filename string) error {
|
||||
data, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return t.LoadBytes(data)
|
||||
}
|
||||
|
||||
// LoadBytes reads in a list of tokens from memory,
|
||||
// one per line.
|
||||
// Comments are supported using `#` or `|`
|
||||
func (t TokenMap) LoadBytes(data []byte) error {
|
||||
bytesReader := bytes.NewReader(data)
|
||||
bufioReader := bufio.NewReader(bytesReader)
|
||||
line, err := bufioReader.ReadString('\n')
|
||||
for err == nil {
|
||||
t.LoadLine(line)
|
||||
line, err = bufioReader.ReadString('\n')
|
||||
}
|
||||
// if the err was EOF we still need to process the last value
|
||||
if err == io.EOF {
|
||||
t.LoadLine(line)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (t TokenMap) LoadLine(line string) {
|
||||
// find the start of a comment, if any
|
||||
startComment := strings.IndexAny(line, "#|")
|
||||
if startComment >= 0 {
|
||||
line = line[:startComment]
|
||||
}
|
||||
|
||||
tokens := strings.Fields(line)
|
||||
for _, token := range tokens {
|
||||
t.AddToken(token)
|
||||
}
|
||||
}
|
||||
|
||||
func (t TokenMap) AddToken(token string) {
|
||||
t[token] = true
|
||||
}
|
103
vendor/github.com/blevesearch/bleve/v2/analysis/type.go
generated
vendored
Normal file
103
vendor/github.com/blevesearch/bleve/v2/analysis/type.go
generated
vendored
Normal file
|
@ -0,0 +1,103 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
type CharFilter interface {
|
||||
Filter([]byte) []byte
|
||||
}
|
||||
|
||||
type TokenType int
|
||||
|
||||
const (
|
||||
AlphaNumeric TokenType = iota
|
||||
Ideographic
|
||||
Numeric
|
||||
DateTime
|
||||
Shingle
|
||||
Single
|
||||
Double
|
||||
Boolean
|
||||
)
|
||||
|
||||
// Token represents one occurrence of a term at a particular location in a
|
||||
// field.
|
||||
type Token struct {
|
||||
// Start specifies the byte offset of the beginning of the term in the
|
||||
// field.
|
||||
Start int `json:"start"`
|
||||
|
||||
// End specifies the byte offset of the end of the term in the field.
|
||||
End int `json:"end"`
|
||||
Term []byte `json:"term"`
|
||||
|
||||
// Position specifies the 1-based index of the token in the sequence of
|
||||
// occurrences of its term in the field.
|
||||
Position int `json:"position"`
|
||||
Type TokenType `json:"type"`
|
||||
KeyWord bool `json:"keyword"`
|
||||
}
|
||||
|
||||
func (t *Token) String() string {
|
||||
return fmt.Sprintf("Start: %d End: %d Position: %d Token: %s Type: %d", t.Start, t.End, t.Position, string(t.Term), t.Type)
|
||||
}
|
||||
|
||||
type TokenStream []*Token
|
||||
|
||||
// A Tokenizer splits an input string into tokens, the usual behaviour being to
|
||||
// map words to tokens.
|
||||
type Tokenizer interface {
|
||||
Tokenize([]byte) TokenStream
|
||||
}
|
||||
|
||||
// A TokenFilter adds, transforms or removes tokens from a token stream.
|
||||
type TokenFilter interface {
|
||||
Filter(TokenStream) TokenStream
|
||||
}
|
||||
|
||||
type Analyzer struct {
|
||||
CharFilters []CharFilter
|
||||
Tokenizer Tokenizer
|
||||
TokenFilters []TokenFilter
|
||||
}
|
||||
|
||||
func (a *Analyzer) Analyze(input []byte) TokenStream {
|
||||
if a.CharFilters != nil {
|
||||
for _, cf := range a.CharFilters {
|
||||
input = cf.Filter(input)
|
||||
}
|
||||
}
|
||||
tokens := a.Tokenizer.Tokenize(input)
|
||||
if a.TokenFilters != nil {
|
||||
for _, tf := range a.TokenFilters {
|
||||
tokens = tf.Filter(tokens)
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts")
|
||||
|
||||
type DateTimeParser interface {
|
||||
ParseDateTime(string) (time.Time, error)
|
||||
}
|
||||
|
||||
type ByteArrayConverter interface {
|
||||
Convert([]byte) (interface{}, error)
|
||||
}
|
92
vendor/github.com/blevesearch/bleve/v2/analysis/util.go
generated
vendored
Normal file
92
vendor/github.com/blevesearch/bleve/v2/analysis/util.go
generated
vendored
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func DeleteRune(in []rune, pos int) []rune {
|
||||
if pos >= len(in) {
|
||||
return in
|
||||
}
|
||||
copy(in[pos:], in[pos+1:])
|
||||
return in[:len(in)-1]
|
||||
}
|
||||
|
||||
func InsertRune(in []rune, pos int, r rune) []rune {
|
||||
// create a new slice 1 rune larger
|
||||
rv := make([]rune, len(in)+1)
|
||||
// copy the characters before the insert pos
|
||||
copy(rv[0:pos], in[0:pos])
|
||||
// set the inserted rune
|
||||
rv[pos] = r
|
||||
// copy the characters after the insert pos
|
||||
copy(rv[pos+1:], in[pos:])
|
||||
return rv
|
||||
}
|
||||
|
||||
// BuildTermFromRunesOptimistic will build a term from the provided runes
|
||||
// AND optimistically attempt to encode into the provided buffer
|
||||
// if at any point it appears the buffer is too small, a new buffer is
|
||||
// allocated and that is used instead
|
||||
// this should be used in cases where frequently the new term is the same
|
||||
// length or shorter than the original term (in number of bytes)
|
||||
func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte {
|
||||
rv := buf
|
||||
used := 0
|
||||
for _, r := range runes {
|
||||
nextLen := utf8.RuneLen(r)
|
||||
if used+nextLen > len(rv) {
|
||||
// alloc new buf
|
||||
buf = make([]byte, len(runes)*utf8.UTFMax)
|
||||
// copy work we've already done
|
||||
copy(buf, rv[:used])
|
||||
rv = buf
|
||||
}
|
||||
written := utf8.EncodeRune(rv[used:], r)
|
||||
used += written
|
||||
}
|
||||
return rv[:used]
|
||||
}
|
||||
|
||||
func BuildTermFromRunes(runes []rune) []byte {
|
||||
return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes)
|
||||
}
|
||||
|
||||
func TruncateRunes(input []byte, num int) []byte {
|
||||
runes := bytes.Runes(input)
|
||||
runes = runes[:len(runes)-num]
|
||||
out := BuildTermFromRunes(runes)
|
||||
return out
|
||||
}
|
||||
|
||||
func RunesEndsWith(input []rune, suffix string) bool {
|
||||
inputLen := len(input)
|
||||
suffixRunes := []rune(suffix)
|
||||
suffixLen := len(suffixRunes)
|
||||
if suffixLen > inputLen {
|
||||
return false
|
||||
}
|
||||
|
||||
for i := suffixLen - 1; i >= 0; i-- {
|
||||
if input[inputLen-(suffixLen-i)] != suffixRunes[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
94
vendor/github.com/blevesearch/bleve/v2/builder.go
generated
vendored
Normal file
94
vendor/github.com/blevesearch/bleve/v2/builder.go
generated
vendored
Normal file
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/index/scorch"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type builderImpl struct {
|
||||
b index.IndexBuilder
|
||||
m mapping.IndexMapping
|
||||
}
|
||||
|
||||
func (b *builderImpl) Index(id string, data interface{}) error {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
doc := document.NewDocument(id)
|
||||
err := b.m.MapDocument(doc, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = b.b.Index(doc)
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *builderImpl) Close() error {
|
||||
return b.b.Close()
|
||||
}
|
||||
|
||||
func newBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
|
||||
if path == "" {
|
||||
return nil, fmt.Errorf("builder requires path")
|
||||
}
|
||||
|
||||
err := mapping.Validate()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if config == nil {
|
||||
config = map[string]interface{}{}
|
||||
}
|
||||
|
||||
// the builder does not have an API to interact with internal storage
|
||||
// however we can pass k/v pairs through the config
|
||||
mappingBytes, err := json.Marshal(mapping)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config["internal"] = map[string][]byte{
|
||||
string(mappingInternalKey): mappingBytes,
|
||||
}
|
||||
|
||||
// do not use real config, as these are options for the builder,
|
||||
// not the resulting index
|
||||
meta := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{})
|
||||
err = meta.Save(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
config["path"] = indexStorePath(path)
|
||||
|
||||
b, err := scorch.NewBuilder(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := &builderImpl{
|
||||
b: b,
|
||||
m: mapping,
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
95
vendor/github.com/blevesearch/bleve/v2/config.go
generated
vendored
Normal file
95
vendor/github.com/blevesearch/bleve/v2/config.go
generated
vendored
Normal file
|
@ -0,0 +1,95 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"expvar"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/scorch"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/html"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var bleveExpVar = expvar.NewMap("bleve")
|
||||
|
||||
type configuration struct {
|
||||
Cache *registry.Cache
|
||||
DefaultHighlighter string
|
||||
DefaultKVStore string
|
||||
DefaultMemKVStore string
|
||||
DefaultIndexType string
|
||||
SlowSearchLogThreshold time.Duration
|
||||
analysisQueue *index.AnalysisQueue
|
||||
}
|
||||
|
||||
func (c *configuration) SetAnalysisQueueSize(n int) {
|
||||
if c.analysisQueue != nil {
|
||||
c.analysisQueue.Close()
|
||||
}
|
||||
c.analysisQueue = index.NewAnalysisQueue(n)
|
||||
}
|
||||
|
||||
func (c *configuration) Shutdown() {
|
||||
c.SetAnalysisQueueSize(0)
|
||||
}
|
||||
|
||||
func newConfiguration() *configuration {
|
||||
return &configuration{
|
||||
Cache: registry.NewCache(),
|
||||
analysisQueue: index.NewAnalysisQueue(4),
|
||||
}
|
||||
}
|
||||
|
||||
// Config contains library level configuration
|
||||
var Config *configuration
|
||||
|
||||
func init() {
|
||||
bootStart := time.Now()
|
||||
|
||||
// build the default configuration
|
||||
Config = newConfiguration()
|
||||
|
||||
// set the default highlighter
|
||||
Config.DefaultHighlighter = html.Name
|
||||
|
||||
// default kv store
|
||||
Config.DefaultKVStore = ""
|
||||
|
||||
// default mem only kv store
|
||||
Config.DefaultMemKVStore = gtreap.Name
|
||||
|
||||
// default index
|
||||
Config.DefaultIndexType = scorch.Name
|
||||
|
||||
bootDuration := time.Since(bootStart)
|
||||
bleveExpVar.Add("bootDuration", int64(bootDuration))
|
||||
indexStats = NewIndexStats()
|
||||
bleveExpVar.Set("indexes", indexStats)
|
||||
|
||||
initDisk()
|
||||
}
|
||||
|
||||
var logger = log.New(ioutil.Discard, "bleve", log.LstdFlags)
|
||||
|
||||
// SetLog sets the logger used for logging
|
||||
// by default log messages are sent to ioutil.Discard
|
||||
func SetLog(l *log.Logger) {
|
||||
logger = l
|
||||
}
|
23
vendor/github.com/blevesearch/bleve/v2/config_app.go
generated
vendored
Normal file
23
vendor/github.com/blevesearch/bleve/v2/config_app.go
generated
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build appengine appenginevm
|
||||
|
||||
package bleve
|
||||
|
||||
// in the appengine environment we cannot support disk based indexes
|
||||
// so we do no extra configuration in this method
|
||||
func initDisk() {
|
||||
|
||||
}
|
25
vendor/github.com/blevesearch/bleve/v2/config_disk.go
generated
vendored
Normal file
25
vendor/github.com/blevesearch/bleve/v2/config_disk.go
generated
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !appengine,!appenginevm
|
||||
|
||||
package bleve
|
||||
|
||||
import "github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
|
||||
|
||||
// in normal environments we configure boltdb as the default storage
|
||||
func initDisk() {
|
||||
// default kv store
|
||||
Config.DefaultKVStore = boltdb.Name
|
||||
}
|
38
vendor/github.com/blevesearch/bleve/v2/doc.go
generated
vendored
Normal file
38
vendor/github.com/blevesearch/bleve/v2/doc.go
generated
vendored
Normal file
|
@ -0,0 +1,38 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
Package bleve is a library for indexing and searching text.
|
||||
|
||||
Example Opening New Index, Indexing Data
|
||||
|
||||
message := struct{
|
||||
Id: "example"
|
||||
From: "marty.schoch@gmail.com",
|
||||
Body: "bleve indexing is easy",
|
||||
}
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
index, _ := bleve.New("example.bleve", mapping)
|
||||
index.Index(message.Id, message)
|
||||
|
||||
Example Opening Existing Index, Searching Data
|
||||
|
||||
index, _ := bleve.Open("example.bleve")
|
||||
query := bleve.NewQueryStringQuery("bleve")
|
||||
searchRequest := bleve.NewSearchRequest(query)
|
||||
searchResult, _ := index.Search(searchRequest)
|
||||
|
||||
*/
|
||||
package bleve
|
130
vendor/github.com/blevesearch/bleve/v2/document/document.go
generated
vendored
Normal file
130
vendor/github.com/blevesearch/bleve/v2/document/document.go
generated
vendored
Normal file
|
@ -0,0 +1,130 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocument int
|
||||
|
||||
func init() {
|
||||
var d Document
|
||||
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
|
||||
}
|
||||
|
||||
type Document struct {
|
||||
id string `json:"id"`
|
||||
Fields []Field `json:"fields"`
|
||||
CompositeFields []*CompositeField
|
||||
}
|
||||
|
||||
func NewDocument(id string) *Document {
|
||||
return &Document{
|
||||
id: id,
|
||||
Fields: make([]Field, 0),
|
||||
CompositeFields: make([]*CompositeField, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
|
||||
len(d.id)
|
||||
|
||||
for _, entry := range d.Fields {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range d.CompositeFields {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (d *Document) AddField(f Field) *Document {
|
||||
switch f := f.(type) {
|
||||
case *CompositeField:
|
||||
d.CompositeFields = append(d.CompositeFields, f)
|
||||
default:
|
||||
d.Fields = append(d.Fields, f)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func (d *Document) GoString() string {
|
||||
fields := ""
|
||||
for i, field := range d.Fields {
|
||||
if i != 0 {
|
||||
fields += ", "
|
||||
}
|
||||
fields += fmt.Sprintf("%#v", field)
|
||||
}
|
||||
compositeFields := ""
|
||||
for i, field := range d.CompositeFields {
|
||||
if i != 0 {
|
||||
compositeFields += ", "
|
||||
}
|
||||
compositeFields += fmt.Sprintf("%#v", field)
|
||||
}
|
||||
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID(), fields, compositeFields)
|
||||
}
|
||||
|
||||
func (d *Document) NumPlainTextBytes() uint64 {
|
||||
rv := uint64(0)
|
||||
for _, field := range d.Fields {
|
||||
rv += field.NumPlainTextBytes()
|
||||
}
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
for _, field := range d.Fields {
|
||||
if compositeField.includesField(field.Name()) {
|
||||
rv += field.NumPlainTextBytes()
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (d *Document) ID() string {
|
||||
return d.id
|
||||
}
|
||||
|
||||
func (d *Document) SetID(id string) {
|
||||
d.id = id
|
||||
}
|
||||
|
||||
func (d *Document) AddIDField() {
|
||||
d.AddField(NewTextFieldCustom("_id", nil, []byte(d.ID()), index.IndexField|index.StoreField, nil))
|
||||
}
|
||||
|
||||
func (d *Document) VisitFields(visitor index.FieldVisitor) {
|
||||
for _, f := range d.Fields {
|
||||
visitor(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) VisitComposite(visitor index.CompositeFieldVisitor) {
|
||||
for _, f := range d.CompositeFields {
|
||||
visitor(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) HasComposite() bool {
|
||||
return len(d.CompositeFields) > 0
|
||||
}
|
45
vendor/github.com/blevesearch/bleve/v2/document/field.go
generated
vendored
Normal file
45
vendor/github.com/blevesearch/bleve/v2/document/field.go
generated
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type Field interface {
|
||||
// Name returns the path of the field from the root DocumentMapping.
|
||||
// A root field path is "field", a subdocument field is "parent.field".
|
||||
Name() string
|
||||
// ArrayPositions returns the intermediate document and field indices
|
||||
// required to resolve the field value in the document. For example, if the
|
||||
// field path is "doc1.doc2.field" where doc1 and doc2 are slices or
|
||||
// arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in
|
||||
// "doc1", then "field" in "doc2".
|
||||
ArrayPositions() []uint64
|
||||
Options() index.FieldIndexingOptions
|
||||
Analyze()
|
||||
Value() []byte
|
||||
|
||||
// NumPlainTextBytes should return the number of plain text bytes
|
||||
// that this field represents - this is a common metric for tracking
|
||||
// the rate of indexing
|
||||
NumPlainTextBytes() uint64
|
||||
|
||||
Size() int
|
||||
|
||||
EncodedFieldType() byte
|
||||
AnalyzedLength() int
|
||||
AnalyzedTokenFrequencies() index.TokenFrequencies
|
||||
}
|
137
vendor/github.com/blevesearch/bleve/v2/document/field_boolean.go
generated
vendored
Normal file
137
vendor/github.com/blevesearch/bleve/v2/document/field_boolean.go
generated
vendored
Normal file
|
@ -0,0 +1,137 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeBooleanField int
|
||||
|
||||
func init() {
|
||||
var f BooleanField
|
||||
reflectStaticSizeBooleanField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultBooleanIndexingOptions = index.StoreField | index.IndexField | index.DocValues
|
||||
|
||||
type BooleanField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options index.FieldIndexingOptions
|
||||
value []byte
|
||||
numPlainTextBytes uint64
|
||||
length int
|
||||
frequencies index.TokenFrequencies
|
||||
}
|
||||
|
||||
func (b *BooleanField) Size() int {
|
||||
return reflectStaticSizeBooleanField + size.SizeOfPtr +
|
||||
len(b.name) +
|
||||
len(b.arrayPositions)*size.SizeOfUint64 +
|
||||
len(b.value)
|
||||
}
|
||||
|
||||
func (b *BooleanField) Name() string {
|
||||
return b.name
|
||||
}
|
||||
|
||||
func (b *BooleanField) ArrayPositions() []uint64 {
|
||||
return b.arrayPositions
|
||||
}
|
||||
|
||||
func (b *BooleanField) Options() index.FieldIndexingOptions {
|
||||
return b.options
|
||||
}
|
||||
|
||||
func (b *BooleanField) Analyze() {
|
||||
tokens := make(analysis.TokenStream, 0)
|
||||
tokens = append(tokens, &analysis.Token{
|
||||
Start: 0,
|
||||
End: len(b.value),
|
||||
Term: b.value,
|
||||
Position: 1,
|
||||
Type: analysis.Boolean,
|
||||
})
|
||||
|
||||
b.length = len(tokens)
|
||||
b.frequencies = analysis.TokenFrequency(tokens, b.arrayPositions, b.options)
|
||||
}
|
||||
|
||||
func (b *BooleanField) Value() []byte {
|
||||
return b.value
|
||||
}
|
||||
|
||||
func (b *BooleanField) Boolean() (bool, error) {
|
||||
if len(b.value) == 1 {
|
||||
return b.value[0] == 'T', nil
|
||||
}
|
||||
return false, fmt.Errorf("boolean field has %d bytes", len(b.value))
|
||||
}
|
||||
|
||||
func (b *BooleanField) GoString() string {
|
||||
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
|
||||
}
|
||||
|
||||
func (b *BooleanField) NumPlainTextBytes() uint64 {
|
||||
return b.numPlainTextBytes
|
||||
}
|
||||
|
||||
func (b *BooleanField) EncodedFieldType() byte {
|
||||
return 'b'
|
||||
}
|
||||
|
||||
func (b *BooleanField) AnalyzedLength() int {
|
||||
return b.length
|
||||
}
|
||||
|
||||
func (b *BooleanField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
return b.frequencies
|
||||
}
|
||||
|
||||
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
|
||||
return &BooleanField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField {
|
||||
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultNumericIndexingOptions)
|
||||
}
|
||||
|
||||
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options index.FieldIndexingOptions) *BooleanField {
|
||||
numPlainTextBytes := 5
|
||||
v := []byte("F")
|
||||
if b {
|
||||
numPlainTextBytes = 4
|
||||
v = []byte("T")
|
||||
}
|
||||
return &BooleanField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: v,
|
||||
options: options,
|
||||
numPlainTextBytes: uint64(numPlainTextBytes),
|
||||
}
|
||||
}
|
135
vendor/github.com/blevesearch/bleve/v2/document/field_composite.go
generated
vendored
Normal file
135
vendor/github.com/blevesearch/bleve/v2/document/field_composite.go
generated
vendored
Normal file
|
@ -0,0 +1,135 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeCompositeField int
|
||||
|
||||
func init() {
|
||||
var cf CompositeField
|
||||
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
|
||||
}
|
||||
|
||||
const DefaultCompositeIndexingOptions = index.IndexField
|
||||
|
||||
type CompositeField struct {
|
||||
name string
|
||||
includedFields map[string]bool
|
||||
excludedFields map[string]bool
|
||||
defaultInclude bool
|
||||
options index.FieldIndexingOptions
|
||||
totalLength int
|
||||
compositeFrequencies index.TokenFrequencies
|
||||
}
|
||||
|
||||
func NewCompositeField(name string, defaultInclude bool, include []string, exclude []string) *CompositeField {
|
||||
return NewCompositeFieldWithIndexingOptions(name, defaultInclude, include, exclude, DefaultCompositeIndexingOptions)
|
||||
}
|
||||
|
||||
func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, include []string, exclude []string, options index.FieldIndexingOptions) *CompositeField {
|
||||
rv := &CompositeField{
|
||||
name: name,
|
||||
options: options,
|
||||
defaultInclude: defaultInclude,
|
||||
includedFields: make(map[string]bool, len(include)),
|
||||
excludedFields: make(map[string]bool, len(exclude)),
|
||||
compositeFrequencies: make(index.TokenFrequencies),
|
||||
}
|
||||
|
||||
for _, i := range include {
|
||||
rv.includedFields[i] = true
|
||||
}
|
||||
for _, e := range exclude {
|
||||
rv.excludedFields[e] = true
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *CompositeField) Size() int {
|
||||
sizeInBytes := reflectStaticSizeCompositeField + size.SizeOfPtr +
|
||||
len(c.name)
|
||||
|
||||
for k, _ := range c.includedFields {
|
||||
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
|
||||
}
|
||||
|
||||
for k, _ := range c.excludedFields {
|
||||
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (c *CompositeField) Name() string {
|
||||
return c.name
|
||||
}
|
||||
|
||||
func (c *CompositeField) ArrayPositions() []uint64 {
|
||||
return []uint64{}
|
||||
}
|
||||
|
||||
func (c *CompositeField) Options() index.FieldIndexingOptions {
|
||||
return c.options
|
||||
}
|
||||
|
||||
func (c *CompositeField) Analyze() {
|
||||
}
|
||||
|
||||
func (c *CompositeField) Value() []byte {
|
||||
return []byte{}
|
||||
}
|
||||
|
||||
func (c *CompositeField) NumPlainTextBytes() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *CompositeField) includesField(field string) bool {
|
||||
shouldInclude := c.defaultInclude
|
||||
_, fieldShouldBeIncluded := c.includedFields[field]
|
||||
if fieldShouldBeIncluded {
|
||||
shouldInclude = true
|
||||
}
|
||||
_, fieldShouldBeExcluded := c.excludedFields[field]
|
||||
if fieldShouldBeExcluded {
|
||||
shouldInclude = false
|
||||
}
|
||||
return shouldInclude
|
||||
}
|
||||
|
||||
func (c *CompositeField) Compose(field string, length int, freq index.TokenFrequencies) {
|
||||
if c.includesField(field) {
|
||||
c.totalLength += length
|
||||
c.compositeFrequencies.MergeAll(field, freq)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *CompositeField) EncodedFieldType() byte {
|
||||
return 'c'
|
||||
}
|
||||
|
||||
func (c *CompositeField) AnalyzedLength() int {
|
||||
return c.totalLength
|
||||
}
|
||||
|
||||
func (c *CompositeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
return c.compositeFrequencies
|
||||
}
|
173
vendor/github.com/blevesearch/bleve/v2/document/field_datetime.go
generated
vendored
Normal file
173
vendor/github.com/blevesearch/bleve/v2/document/field_datetime.go
generated
vendored
Normal file
|
@ -0,0 +1,173 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDateTimeField int
|
||||
|
||||
func init() {
|
||||
var f DateTimeField
|
||||
reflectStaticSizeDateTimeField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultDateTimeIndexingOptions = index.StoreField | index.IndexField | index.DocValues
|
||||
const DefaultDateTimePrecisionStep uint = 4
|
||||
|
||||
var MinTimeRepresentable = time.Unix(0, math.MinInt64)
|
||||
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64)
|
||||
|
||||
type DateTimeField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options index.FieldIndexingOptions
|
||||
value numeric.PrefixCoded
|
||||
numPlainTextBytes uint64
|
||||
length int
|
||||
frequencies index.TokenFrequencies
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Size() int {
|
||||
return reflectStaticSizeDateTimeField + size.SizeOfPtr +
|
||||
len(n.name) +
|
||||
len(n.arrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *DateTimeField) ArrayPositions() []uint64 {
|
||||
return n.arrayPositions
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Options() index.FieldIndexingOptions {
|
||||
return n.options
|
||||
}
|
||||
|
||||
func (n *DateTimeField) EncodedFieldType() byte {
|
||||
return 'd'
|
||||
}
|
||||
|
||||
func (n *DateTimeField) AnalyzedLength() int {
|
||||
return n.length
|
||||
}
|
||||
|
||||
func (n *DateTimeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
return n.frequencies
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Analyze() {
|
||||
tokens := make(analysis.TokenStream, 0)
|
||||
tokens = append(tokens, &analysis.Token{
|
||||
Start: 0,
|
||||
End: len(n.value),
|
||||
Term: n.value,
|
||||
Position: 1,
|
||||
Type: analysis.DateTime,
|
||||
})
|
||||
|
||||
original, err := n.value.Int64()
|
||||
if err == nil {
|
||||
|
||||
shift := DefaultDateTimePrecisionStep
|
||||
for shift < 64 {
|
||||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
token := analysis.Token{
|
||||
Start: 0,
|
||||
End: len(shiftEncoded),
|
||||
Term: shiftEncoded,
|
||||
Position: 1,
|
||||
Type: analysis.DateTime,
|
||||
}
|
||||
tokens = append(tokens, &token)
|
||||
shift += DefaultDateTimePrecisionStep
|
||||
}
|
||||
}
|
||||
|
||||
n.length = len(tokens)
|
||||
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Value() []byte {
|
||||
return n.value
|
||||
}
|
||||
|
||||
func (n *DateTimeField) DateTime() (time.Time, error) {
|
||||
i64, err := n.value.Int64()
|
||||
if err != nil {
|
||||
return time.Time{}, err
|
||||
}
|
||||
return time.Unix(0, i64).UTC(), nil
|
||||
}
|
||||
|
||||
func (n *DateTimeField) GoString() string {
|
||||
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func (n *DateTimeField) NumPlainTextBytes() uint64 {
|
||||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
|
||||
return &DateTimeField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultDateTimeIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewDateTimeField(name string, arrayPositions []uint64, dt time.Time) (*DateTimeField, error) {
|
||||
return NewDateTimeFieldWithIndexingOptions(name, arrayPositions, dt, DefaultDateTimeIndexingOptions)
|
||||
}
|
||||
|
||||
func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, dt time.Time, options index.FieldIndexingOptions) (*DateTimeField, error) {
|
||||
if canRepresent(dt) {
|
||||
dtInt64 := dt.UnixNano()
|
||||
prefixCoded := numeric.MustNewPrefixCodedInt64(dtInt64, 0)
|
||||
return &DateTimeField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
// not correct, just a place holder until we revisit how fields are
|
||||
// represented and can fix this better
|
||||
numPlainTextBytes: uint64(8),
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("cannot represent %s in this type", dt)
|
||||
}
|
||||
|
||||
func canRepresent(dt time.Time) bool {
|
||||
if dt.Before(MinTimeRepresentable) || dt.After(MaxTimeRepresentable) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
166
vendor/github.com/blevesearch/bleve/v2/document/field_geopoint.go
generated
vendored
Normal file
166
vendor/github.com/blevesearch/bleve/v2/document/field_geopoint.go
generated
vendored
Normal file
|
@ -0,0 +1,166 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/geo"
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeGeoPointField int
|
||||
|
||||
func init() {
|
||||
var f GeoPointField
|
||||
reflectStaticSizeGeoPointField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
var GeoPrecisionStep uint = 9
|
||||
|
||||
type GeoPointField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options index.FieldIndexingOptions
|
||||
value numeric.PrefixCoded
|
||||
numPlainTextBytes uint64
|
||||
length int
|
||||
frequencies index.TokenFrequencies
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Size() int {
|
||||
return reflectStaticSizeGeoPointField + size.SizeOfPtr +
|
||||
len(n.name) +
|
||||
len(n.arrayPositions)*size.SizeOfUint64
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *GeoPointField) ArrayPositions() []uint64 {
|
||||
return n.arrayPositions
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Options() index.FieldIndexingOptions {
|
||||
return n.options
|
||||
}
|
||||
|
||||
func (n *GeoPointField) EncodedFieldType() byte {
|
||||
return 'g'
|
||||
}
|
||||
|
||||
func (n *GeoPointField) AnalyzedLength() int {
|
||||
return n.length
|
||||
}
|
||||
|
||||
func (n *GeoPointField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
return n.frequencies
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Analyze() {
|
||||
tokens := make(analysis.TokenStream, 0)
|
||||
tokens = append(tokens, &analysis.Token{
|
||||
Start: 0,
|
||||
End: len(n.value),
|
||||
Term: n.value,
|
||||
Position: 1,
|
||||
Type: analysis.Numeric,
|
||||
})
|
||||
|
||||
original, err := n.value.Int64()
|
||||
if err == nil {
|
||||
|
||||
shift := GeoPrecisionStep
|
||||
for shift < 64 {
|
||||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
token := analysis.Token{
|
||||
Start: 0,
|
||||
End: len(shiftEncoded),
|
||||
Term: shiftEncoded,
|
||||
Position: 1,
|
||||
Type: analysis.Numeric,
|
||||
}
|
||||
tokens = append(tokens, &token)
|
||||
shift += GeoPrecisionStep
|
||||
}
|
||||
}
|
||||
|
||||
n.length = len(tokens)
|
||||
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Value() []byte {
|
||||
return n.value
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Lon() (float64, error) {
|
||||
i64, err := n.value.Int64()
|
||||
if err != nil {
|
||||
return 0.0, err
|
||||
}
|
||||
return geo.MortonUnhashLon(uint64(i64)), nil
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Lat() (float64, error) {
|
||||
i64, err := n.value.Int64()
|
||||
if err != nil {
|
||||
return 0.0, err
|
||||
}
|
||||
return geo.MortonUnhashLat(uint64(i64)), nil
|
||||
}
|
||||
|
||||
func (n *GeoPointField) GoString() string {
|
||||
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func (n *GeoPointField) NumPlainTextBytes() uint64 {
|
||||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField {
|
||||
return &GeoPointField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField {
|
||||
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions)
|
||||
}
|
||||
|
||||
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options index.FieldIndexingOptions) *GeoPointField {
|
||||
mhash := geo.MortonHash(lon, lat)
|
||||
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0)
|
||||
return &GeoPointField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
// not correct, just a place holder until we revisit how fields are
|
||||
// represented and can fix this better
|
||||
numPlainTextBytes: uint64(8),
|
||||
}
|
||||
}
|
159
vendor/github.com/blevesearch/bleve/v2/document/field_numeric.go
generated
vendored
Normal file
159
vendor/github.com/blevesearch/bleve/v2/document/field_numeric.go
generated
vendored
Normal file
|
@ -0,0 +1,159 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeNumericField int
|
||||
|
||||
func init() {
|
||||
var f NumericField
|
||||
reflectStaticSizeNumericField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultNumericIndexingOptions = index.StoreField | index.IndexField | index.DocValues
|
||||
|
||||
const DefaultPrecisionStep uint = 4
|
||||
|
||||
type NumericField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options index.FieldIndexingOptions
|
||||
value numeric.PrefixCoded
|
||||
numPlainTextBytes uint64
|
||||
length int
|
||||
frequencies index.TokenFrequencies
|
||||
}
|
||||
|
||||
func (n *NumericField) Size() int {
|
||||
return reflectStaticSizeNumericField + size.SizeOfPtr +
|
||||
len(n.name) +
|
||||
len(n.arrayPositions)*size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (n *NumericField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *NumericField) ArrayPositions() []uint64 {
|
||||
return n.arrayPositions
|
||||
}
|
||||
|
||||
func (n *NumericField) Options() index.FieldIndexingOptions {
|
||||
return n.options
|
||||
}
|
||||
|
||||
func (n *NumericField) EncodedFieldType() byte {
|
||||
return 'n'
|
||||
}
|
||||
|
||||
func (n *NumericField) AnalyzedLength() int {
|
||||
return n.length
|
||||
}
|
||||
|
||||
func (n *NumericField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
return n.frequencies
|
||||
}
|
||||
|
||||
func (n *NumericField) Analyze() {
|
||||
tokens := make(analysis.TokenStream, 0)
|
||||
tokens = append(tokens, &analysis.Token{
|
||||
Start: 0,
|
||||
End: len(n.value),
|
||||
Term: n.value,
|
||||
Position: 1,
|
||||
Type: analysis.Numeric,
|
||||
})
|
||||
|
||||
original, err := n.value.Int64()
|
||||
if err == nil {
|
||||
|
||||
shift := DefaultPrecisionStep
|
||||
for shift < 64 {
|
||||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
token := analysis.Token{
|
||||
Start: 0,
|
||||
End: len(shiftEncoded),
|
||||
Term: shiftEncoded,
|
||||
Position: 1,
|
||||
Type: analysis.Numeric,
|
||||
}
|
||||
tokens = append(tokens, &token)
|
||||
shift += DefaultPrecisionStep
|
||||
}
|
||||
}
|
||||
|
||||
n.length = len(tokens)
|
||||
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
|
||||
}
|
||||
|
||||
func (n *NumericField) Value() []byte {
|
||||
return n.value
|
||||
}
|
||||
|
||||
func (n *NumericField) Number() (float64, error) {
|
||||
i64, err := n.value.Int64()
|
||||
if err != nil {
|
||||
return 0.0, err
|
||||
}
|
||||
return numeric.Int64ToFloat64(i64), nil
|
||||
}
|
||||
|
||||
func (n *NumericField) GoString() string {
|
||||
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func (n *NumericField) NumPlainTextBytes() uint64 {
|
||||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
|
||||
return &NumericField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField {
|
||||
return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DefaultNumericIndexingOptions)
|
||||
}
|
||||
|
||||
func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options index.FieldIndexingOptions) *NumericField {
|
||||
numberInt64 := numeric.Float64ToInt64(number)
|
||||
prefixCoded := numeric.MustNewPrefixCodedInt64(numberInt64, 0)
|
||||
return &NumericField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
// not correct, just a place holder until we revisit how fields are
|
||||
// represented and can fix this better
|
||||
numPlainTextBytes: uint64(8),
|
||||
}
|
||||
}
|
157
vendor/github.com/blevesearch/bleve/v2/document/field_text.go
generated
vendored
Normal file
157
vendor/github.com/blevesearch/bleve/v2/document/field_text.go
generated
vendored
Normal file
|
@ -0,0 +1,157 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTextField int
|
||||
|
||||
func init() {
|
||||
var f TextField
|
||||
reflectStaticSizeTextField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultTextIndexingOptions = index.IndexField | index.DocValues
|
||||
|
||||
type TextField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options index.FieldIndexingOptions
|
||||
analyzer *analysis.Analyzer
|
||||
value []byte
|
||||
numPlainTextBytes uint64
|
||||
length int
|
||||
frequencies index.TokenFrequencies
|
||||
}
|
||||
|
||||
func (t *TextField) Size() int {
|
||||
return reflectStaticSizeTextField + size.SizeOfPtr +
|
||||
len(t.name) +
|
||||
len(t.arrayPositions)*size.SizeOfUint64 +
|
||||
len(t.value)
|
||||
}
|
||||
|
||||
func (t *TextField) Name() string {
|
||||
return t.name
|
||||
}
|
||||
|
||||
func (t *TextField) ArrayPositions() []uint64 {
|
||||
return t.arrayPositions
|
||||
}
|
||||
|
||||
func (t *TextField) Options() index.FieldIndexingOptions {
|
||||
return t.options
|
||||
}
|
||||
|
||||
func (t *TextField) EncodedFieldType() byte {
|
||||
return 't'
|
||||
}
|
||||
|
||||
func (t *TextField) AnalyzedLength() int {
|
||||
return t.length
|
||||
}
|
||||
|
||||
func (t *TextField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
return t.frequencies
|
||||
}
|
||||
|
||||
func (t *TextField) Analyze() {
|
||||
var tokens analysis.TokenStream
|
||||
if t.analyzer != nil {
|
||||
bytesToAnalyze := t.Value()
|
||||
if t.options.IsStored() {
|
||||
// need to copy
|
||||
bytesCopied := make([]byte, len(bytesToAnalyze))
|
||||
copy(bytesCopied, bytesToAnalyze)
|
||||
bytesToAnalyze = bytesCopied
|
||||
}
|
||||
tokens = t.analyzer.Analyze(bytesToAnalyze)
|
||||
} else {
|
||||
tokens = analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Start: 0,
|
||||
End: len(t.value),
|
||||
Term: t.value,
|
||||
Position: 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
}
|
||||
}
|
||||
t.length = len(tokens) // number of tokens in this doc field
|
||||
t.frequencies = analysis.TokenFrequency(tokens, t.arrayPositions, t.options)
|
||||
}
|
||||
|
||||
func (t *TextField) Analyzer() *analysis.Analyzer {
|
||||
return t.analyzer
|
||||
}
|
||||
|
||||
func (t *TextField) Value() []byte {
|
||||
return t.value
|
||||
}
|
||||
|
||||
func (t *TextField) Text() string {
|
||||
return string(t.value)
|
||||
}
|
||||
|
||||
func (t *TextField) GoString() string {
|
||||
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
|
||||
}
|
||||
|
||||
func (t *TextField) NumPlainTextBytes() uint64 {
|
||||
return t.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
|
||||
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
|
||||
}
|
||||
|
||||
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options index.FieldIndexingOptions) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: options,
|
||||
value: value,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: DefaultTextIndexingOptions,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options index.FieldIndexingOptions, analyzer *analysis.Analyzer) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: options,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
50
vendor/github.com/blevesearch/bleve/v2/error.go
generated
vendored
Normal file
50
vendor/github.com/blevesearch/bleve/v2/error.go
generated
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
// Constant Error values which can be compared to determine the type of error
|
||||
const (
|
||||
ErrorIndexPathExists Error = iota
|
||||
ErrorIndexPathDoesNotExist
|
||||
ErrorIndexMetaMissing
|
||||
ErrorIndexMetaCorrupt
|
||||
ErrorIndexClosed
|
||||
ErrorAliasMulti
|
||||
ErrorAliasEmpty
|
||||
ErrorUnknownIndexType
|
||||
ErrorEmptyID
|
||||
ErrorIndexReadInconsistency
|
||||
)
|
||||
|
||||
// Error represents a more strongly typed bleve error for detecting
|
||||
// and handling specific types of errors.
|
||||
type Error int
|
||||
|
||||
func (e Error) Error() string {
|
||||
return errorMessages[e]
|
||||
}
|
||||
|
||||
var errorMessages = map[Error]string{
|
||||
ErrorIndexPathExists: "cannot create new index, path already exists",
|
||||
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist",
|
||||
ErrorIndexMetaMissing: "cannot open index, metadata missing",
|
||||
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt",
|
||||
ErrorIndexClosed: "index is closed",
|
||||
ErrorAliasMulti: "cannot perform single index operation on multiple index alias",
|
||||
ErrorAliasEmpty: "cannot perform operation on empty alias",
|
||||
ErrorUnknownIndexType: "unknown index type",
|
||||
ErrorEmptyID: "document ID cannot be empty",
|
||||
ErrorIndexReadInconsistency: "index read inconsistency detected",
|
||||
}
|
9
vendor/github.com/blevesearch/bleve/v2/geo/README.md
generated
vendored
Normal file
9
vendor/github.com/blevesearch/bleve/v2/geo/README.md
generated
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
# geo support in bleve
|
||||
|
||||
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html).
|
||||
|
||||
## Notes
|
||||
|
||||
- All of the APIs will use float64 for lon/lat values.
|
||||
- When describing a point in function arguments or return values, we always use the order lon, lat.
|
||||
- High level APIs will use TopLeft and BottomRight to describe bounding boxes. This may not map cleanly to min/max lon/lat when crossing the dateline. The lower level APIs will use min/max lon/lat and require the higher-level code to split boxes accordingly.
|
210
vendor/github.com/blevesearch/bleve/v2/geo/geo.go
generated
vendored
Normal file
210
vendor/github.com/blevesearch/bleve/v2/geo/geo.go
generated
vendored
Normal file
|
@ -0,0 +1,210 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package geo
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
)
|
||||
|
||||
// GeoBits is the number of bits used for a single geo point
|
||||
// Currently this is 32bits for lon and 32bits for lat
|
||||
var GeoBits uint = 32
|
||||
|
||||
var minLon = -180.0
|
||||
var minLat = -90.0
|
||||
var maxLon = 180.0
|
||||
var maxLat = 90.0
|
||||
var minLonRad = minLon * degreesToRadian
|
||||
var minLatRad = minLat * degreesToRadian
|
||||
var maxLonRad = maxLon * degreesToRadian
|
||||
var maxLatRad = maxLat * degreesToRadian
|
||||
var geoTolerance = 1e-6
|
||||
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
|
||||
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
|
||||
|
||||
var geoHashMaxLength = 12
|
||||
|
||||
// Point represents a geo point.
|
||||
type Point struct {
|
||||
Lon float64 `json:"lon"`
|
||||
Lat float64 `json:"lat"`
|
||||
}
|
||||
|
||||
// MortonHash computes the morton hash value for the provided geo point
|
||||
// This point is ordered as lon, lat.
|
||||
func MortonHash(lon, lat float64) uint64 {
|
||||
return numeric.Interleave(scaleLon(lon), scaleLat(lat))
|
||||
}
|
||||
|
||||
func scaleLon(lon float64) uint64 {
|
||||
rv := uint64((lon - minLon) * lonScale)
|
||||
return rv
|
||||
}
|
||||
|
||||
func scaleLat(lat float64) uint64 {
|
||||
rv := uint64((lat - minLat) * latScale)
|
||||
return rv
|
||||
}
|
||||
|
||||
// MortonUnhashLon extracts the longitude value from the provided morton hash.
|
||||
func MortonUnhashLon(hash uint64) float64 {
|
||||
return unscaleLon(numeric.Deinterleave(hash))
|
||||
}
|
||||
|
||||
// MortonUnhashLat extracts the latitude value from the provided morton hash.
|
||||
func MortonUnhashLat(hash uint64) float64 {
|
||||
return unscaleLat(numeric.Deinterleave(hash >> 1))
|
||||
}
|
||||
|
||||
func unscaleLon(lon uint64) float64 {
|
||||
return (float64(lon) / lonScale) + minLon
|
||||
}
|
||||
|
||||
func unscaleLat(lat uint64) float64 {
|
||||
return (float64(lat) / latScale) + minLat
|
||||
}
|
||||
|
||||
// compareGeo will compare two float values and see if they are the same
|
||||
// taking into consideration a known geo tolerance.
|
||||
func compareGeo(a, b float64) float64 {
|
||||
compare := a - b
|
||||
if math.Abs(compare) <= geoTolerance {
|
||||
return 0
|
||||
}
|
||||
return compare
|
||||
}
|
||||
|
||||
// RectIntersects checks whether rectangles a and b intersect
|
||||
func RectIntersects(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool {
|
||||
return !(aMaxX < bMinX || aMinX > bMaxX || aMaxY < bMinY || aMinY > bMaxY)
|
||||
}
|
||||
|
||||
// RectWithin checks whether box a is within box b
|
||||
func RectWithin(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool {
|
||||
rv := !(aMinX < bMinX || aMinY < bMinY || aMaxX > bMaxX || aMaxY > bMaxY)
|
||||
return rv
|
||||
}
|
||||
|
||||
// BoundingBoxContains checks whether the lon/lat point is within the box
|
||||
func BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat float64) bool {
|
||||
return compareGeo(lon, minLon) >= 0 && compareGeo(lon, maxLon) <= 0 &&
|
||||
compareGeo(lat, minLat) >= 0 && compareGeo(lat, maxLat) <= 0
|
||||
}
|
||||
|
||||
const degreesToRadian = math.Pi / 180
|
||||
const radiansToDegrees = 180 / math.Pi
|
||||
|
||||
// DegreesToRadians converts an angle in degrees to radians
|
||||
func DegreesToRadians(d float64) float64 {
|
||||
return d * degreesToRadian
|
||||
}
|
||||
|
||||
// RadiansToDegrees converts an angle in radians to degress
|
||||
func RadiansToDegrees(r float64) float64 {
|
||||
return r * radiansToDegrees
|
||||
}
|
||||
|
||||
var earthMeanRadiusMeters = 6371008.7714
|
||||
|
||||
func RectFromPointDistance(lon, lat, dist float64) (float64, float64, float64, float64, error) {
|
||||
err := checkLongitude(lon)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
err = checkLatitude(lat)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
radLon := DegreesToRadians(lon)
|
||||
radLat := DegreesToRadians(lat)
|
||||
radDistance := (dist + 7e-2) / earthMeanRadiusMeters
|
||||
|
||||
minLatL := radLat - radDistance
|
||||
maxLatL := radLat + radDistance
|
||||
|
||||
var minLonL, maxLonL float64
|
||||
if minLatL > minLatRad && maxLatL < maxLatRad {
|
||||
deltaLon := asin(sin(radDistance) / cos(radLat))
|
||||
minLonL = radLon - deltaLon
|
||||
if minLonL < minLonRad {
|
||||
minLonL += 2 * math.Pi
|
||||
}
|
||||
maxLonL = radLon + deltaLon
|
||||
if maxLonL > maxLonRad {
|
||||
maxLonL -= 2 * math.Pi
|
||||
}
|
||||
} else {
|
||||
// pole is inside distance
|
||||
minLatL = math.Max(minLatL, minLatRad)
|
||||
maxLatL = math.Min(maxLatL, maxLatRad)
|
||||
minLonL = minLonRad
|
||||
maxLonL = maxLonRad
|
||||
}
|
||||
|
||||
return RadiansToDegrees(minLonL),
|
||||
RadiansToDegrees(maxLatL),
|
||||
RadiansToDegrees(maxLonL),
|
||||
RadiansToDegrees(minLatL),
|
||||
nil
|
||||
}
|
||||
|
||||
func checkLatitude(latitude float64) error {
|
||||
if math.IsNaN(latitude) || latitude < minLat || latitude > maxLat {
|
||||
return fmt.Errorf("invalid latitude %f; must be between %f and %f", latitude, minLat, maxLat)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkLongitude(longitude float64) error {
|
||||
if math.IsNaN(longitude) || longitude < minLon || longitude > maxLon {
|
||||
return fmt.Errorf("invalid longitude %f; must be between %f and %f", longitude, minLon, maxLon)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func BoundingRectangleForPolygon(polygon []Point) (
|
||||
float64, float64, float64, float64, error) {
|
||||
err := checkLongitude(polygon[0].Lon)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
err = checkLatitude(polygon[0].Lat)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
maxY, minY := polygon[0].Lat, polygon[0].Lat
|
||||
maxX, minX := polygon[0].Lon, polygon[0].Lon
|
||||
for i := 1; i < len(polygon); i++ {
|
||||
err := checkLongitude(polygon[i].Lon)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
err = checkLatitude(polygon[i].Lat)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
|
||||
maxY = math.Max(maxY, polygon[i].Lat)
|
||||
minY = math.Min(minY, polygon[i].Lat)
|
||||
|
||||
maxX = math.Max(maxX, polygon[i].Lon)
|
||||
minX = math.Min(minX, polygon[i].Lon)
|
||||
}
|
||||
|
||||
return minX, maxY, maxX, minY, nil
|
||||
}
|
98
vendor/github.com/blevesearch/bleve/v2/geo/geo_dist.go
generated
vendored
Normal file
98
vendor/github.com/blevesearch/bleve/v2/geo/geo_dist.go
generated
vendored
Normal file
|
@ -0,0 +1,98 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package geo
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type distanceUnit struct {
|
||||
conv float64
|
||||
suffixes []string
|
||||
}
|
||||
|
||||
var inch = distanceUnit{0.0254, []string{"in", "inch"}}
|
||||
var yard = distanceUnit{0.9144, []string{"yd", "yards"}}
|
||||
var feet = distanceUnit{0.3048, []string{"ft", "feet"}}
|
||||
var kilom = distanceUnit{1000, []string{"km", "kilometers"}}
|
||||
var nauticalm = distanceUnit{1852.0, []string{"nm", "nauticalmiles"}}
|
||||
var millim = distanceUnit{0.001, []string{"mm", "millimeters"}}
|
||||
var centim = distanceUnit{0.01, []string{"cm", "centimeters"}}
|
||||
var miles = distanceUnit{1609.344, []string{"mi", "miles"}}
|
||||
var meters = distanceUnit{1, []string{"m", "meters"}}
|
||||
|
||||
var distanceUnits = []*distanceUnit{
|
||||
&inch, &yard, &feet, &kilom, &nauticalm, &millim, ¢im, &miles, &meters,
|
||||
}
|
||||
|
||||
// ParseDistance attempts to parse a distance string and return distance in
|
||||
// meters. Example formats supported:
|
||||
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
|
||||
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
|
||||
// "17mi" "17miles" "19m" "19meters"
|
||||
// If the unit cannot be determined, the entire string is parsed and the
|
||||
// unit of meters is assumed.
|
||||
// If the number portion cannot be parsed, 0 and the parse error are returned.
|
||||
func ParseDistance(d string) (float64, error) {
|
||||
for _, unit := range distanceUnits {
|
||||
for _, unitSuffix := range unit.suffixes {
|
||||
if strings.HasSuffix(d, unitSuffix) {
|
||||
parsedNum, err := strconv.ParseFloat(d[0:len(d)-len(unitSuffix)], 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return parsedNum * unit.conv, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
// no unit matched, try assuming meters?
|
||||
parsedNum, err := strconv.ParseFloat(d, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return parsedNum, nil
|
||||
}
|
||||
|
||||
// ParseDistanceUnit attempts to parse a distance unit and return the
|
||||
// multiplier for converting this to meters. If the unit cannot be parsed
|
||||
// then 0 and the error message is returned.
|
||||
func ParseDistanceUnit(u string) (float64, error) {
|
||||
for _, unit := range distanceUnits {
|
||||
for _, unitSuffix := range unit.suffixes {
|
||||
if u == unitSuffix {
|
||||
return unit.conv, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, fmt.Errorf("unknown distance unit: %s", u)
|
||||
}
|
||||
|
||||
// Haversin computes the distance between two points.
|
||||
// This implemenation uses the sloppy math implemenations which trade off
|
||||
// accuracy for performance. The distance returned is in kilometers.
|
||||
func Haversin(lon1, lat1, lon2, lat2 float64) float64 {
|
||||
x1 := lat1 * degreesToRadian
|
||||
x2 := lat2 * degreesToRadian
|
||||
h1 := 1 - cos(x1-x2)
|
||||
h2 := 1 - cos((lon1-lon2)*degreesToRadian)
|
||||
h := (h1 + cos(x1)*cos(x2)*h2) / 2
|
||||
avgLat := (x1 + x2) / 2
|
||||
diameter := earthDiameter(avgLat)
|
||||
|
||||
return diameter * asin(math.Min(1, math.Sqrt(h)))
|
||||
}
|
111
vendor/github.com/blevesearch/bleve/v2/geo/geohash.go
generated
vendored
Normal file
111
vendor/github.com/blevesearch/bleve/v2/geo/geohash.go
generated
vendored
Normal file
|
@ -0,0 +1,111 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
// This implementation is inspired from the geohash-js
|
||||
// ref: https://github.com/davetroy/geohash-js
|
||||
|
||||
package geo
|
||||
|
||||
// encoding encapsulates an encoding defined by a given base32 alphabet.
|
||||
type encoding struct {
|
||||
enc string
|
||||
dec [256]byte
|
||||
}
|
||||
|
||||
// newEncoding constructs a new encoding defined by the given alphabet,
|
||||
// which must be a 32-byte string.
|
||||
func newEncoding(encoder string) *encoding {
|
||||
e := new(encoding)
|
||||
e.enc = encoder
|
||||
for i := 0; i < len(e.dec); i++ {
|
||||
e.dec[i] = 0xff
|
||||
}
|
||||
for i := 0; i < len(encoder); i++ {
|
||||
e.dec[encoder[i]] = byte(i)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// base32encoding with the Geohash alphabet.
|
||||
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
|
||||
|
||||
var masks = []uint64{16, 8, 4, 2, 1}
|
||||
|
||||
// DecodeGeoHash decodes the string geohash faster with
|
||||
// higher precision. This api is in experimental phase.
|
||||
func DecodeGeoHash(geoHash string) (float64, float64) {
|
||||
even := true
|
||||
lat := []float64{-90.0, 90.0}
|
||||
lon := []float64{-180.0, 180.0}
|
||||
|
||||
for i := 0; i < len(geoHash); i++ {
|
||||
cd := uint64(base32encoding.dec[geoHash[i]])
|
||||
for j := 0; j < 5; j++ {
|
||||
if even {
|
||||
if cd&masks[j] > 0 {
|
||||
lon[0] = (lon[0] + lon[1]) / 2
|
||||
} else {
|
||||
lon[1] = (lon[0] + lon[1]) / 2
|
||||
}
|
||||
} else {
|
||||
if cd&masks[j] > 0 {
|
||||
lat[0] = (lat[0] + lat[1]) / 2
|
||||
} else {
|
||||
lat[1] = (lat[0] + lat[1]) / 2
|
||||
}
|
||||
}
|
||||
even = !even
|
||||
}
|
||||
}
|
||||
|
||||
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
|
||||
}
|
||||
|
||||
func EncodeGeoHash(lat, lon float64) string {
|
||||
even := true
|
||||
lats := []float64{-90.0, 90.0}
|
||||
lons := []float64{-180.0, 180.0}
|
||||
precision := 12
|
||||
var ch, bit uint64
|
||||
var geoHash string
|
||||
|
||||
for len(geoHash) < precision {
|
||||
if even {
|
||||
mid := (lons[0] + lons[1]) / 2
|
||||
if lon > mid {
|
||||
ch |= masks[bit]
|
||||
lons[0] = mid
|
||||
} else {
|
||||
lons[1] = mid
|
||||
}
|
||||
} else {
|
||||
mid := (lats[0] + lats[1]) / 2
|
||||
if lat > mid {
|
||||
ch |= masks[bit]
|
||||
lats[0] = mid
|
||||
} else {
|
||||
lats[1] = mid
|
||||
}
|
||||
}
|
||||
even = !even
|
||||
if bit < 4 {
|
||||
bit++
|
||||
} else {
|
||||
geoHash += string(base32encoding.enc[ch])
|
||||
ch = 0
|
||||
bit = 0
|
||||
}
|
||||
}
|
||||
|
||||
return geoHash
|
||||
}
|
181
vendor/github.com/blevesearch/bleve/v2/geo/parse.go
generated
vendored
Normal file
181
vendor/github.com/blevesearch/bleve/v2/geo/parse.go
generated
vendored
Normal file
|
@ -0,0 +1,181 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package geo
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ExtractGeoPoint takes an arbitrary interface{} and tries it's best to
|
||||
// interpret it is as geo point. Supported formats:
|
||||
// Container:
|
||||
// slice length 2 (GeoJSON)
|
||||
// first element lon, second element lat
|
||||
// string (coordinates separated by comma, or a geohash)
|
||||
// first element lat, second element lon
|
||||
// map[string]interface{}
|
||||
// exact keys lat and lon or lng
|
||||
// struct
|
||||
// w/exported fields case-insensitive match on lat and lon or lng
|
||||
// struct
|
||||
// satisfying Later and Loner or Lnger interfaces
|
||||
//
|
||||
// in all cases values must be some sort of numeric-like thing: int/uint/float
|
||||
func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
||||
var foundLon, foundLat bool
|
||||
|
||||
thingVal := reflect.ValueOf(thing)
|
||||
if !thingVal.IsValid() {
|
||||
return lon, lat, false
|
||||
}
|
||||
|
||||
thingTyp := thingVal.Type()
|
||||
|
||||
// is it a slice
|
||||
if thingVal.Kind() == reflect.Slice {
|
||||
// must be length 2
|
||||
if thingVal.Len() == 2 {
|
||||
first := thingVal.Index(0)
|
||||
if first.CanInterface() {
|
||||
firstVal := first.Interface()
|
||||
lon, foundLon = extractNumericVal(firstVal)
|
||||
}
|
||||
second := thingVal.Index(1)
|
||||
if second.CanInterface() {
|
||||
secondVal := second.Interface()
|
||||
lat, foundLat = extractNumericVal(secondVal)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// is it a string
|
||||
if thingVal.Kind() == reflect.String {
|
||||
geoStr := thingVal.Interface().(string)
|
||||
if strings.Contains(geoStr, ",") {
|
||||
// geo point with coordinates split by comma
|
||||
points := strings.Split(geoStr, ",")
|
||||
for i, point := range points {
|
||||
// trim any leading or trailing white spaces
|
||||
points[i] = strings.TrimSpace(point)
|
||||
}
|
||||
if len(points) == 2 {
|
||||
var err error
|
||||
lat, err = strconv.ParseFloat(points[0], 64)
|
||||
if err == nil {
|
||||
foundLat = true
|
||||
}
|
||||
lon, err = strconv.ParseFloat(points[1], 64)
|
||||
if err == nil {
|
||||
foundLon = true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// geohash
|
||||
if len(geoStr) <= geoHashMaxLength {
|
||||
lat, lon = DecodeGeoHash(geoStr)
|
||||
foundLat = true
|
||||
foundLon = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// is it a map
|
||||
if l, ok := thing.(map[string]interface{}); ok {
|
||||
if lval, ok := l["lon"]; ok {
|
||||
lon, foundLon = extractNumericVal(lval)
|
||||
} else if lval, ok := l["lng"]; ok {
|
||||
lon, foundLon = extractNumericVal(lval)
|
||||
}
|
||||
if lval, ok := l["lat"]; ok {
|
||||
lat, foundLat = extractNumericVal(lval)
|
||||
}
|
||||
}
|
||||
|
||||
// now try reflection on struct fields
|
||||
if thingVal.Kind() == reflect.Struct {
|
||||
for i := 0; i < thingVal.NumField(); i++ {
|
||||
fieldName := thingTyp.Field(i).Name
|
||||
if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
|
||||
if thingVal.Field(i).CanInterface() {
|
||||
fieldVal := thingVal.Field(i).Interface()
|
||||
lon, foundLon = extractNumericVal(fieldVal)
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(strings.ToLower(fieldName), "lng") {
|
||||
if thingVal.Field(i).CanInterface() {
|
||||
fieldVal := thingVal.Field(i).Interface()
|
||||
lon, foundLon = extractNumericVal(fieldVal)
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(strings.ToLower(fieldName), "lat") {
|
||||
if thingVal.Field(i).CanInterface() {
|
||||
fieldVal := thingVal.Field(i).Interface()
|
||||
lat, foundLat = extractNumericVal(fieldVal)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// last hope, some interfaces
|
||||
// lon
|
||||
if l, ok := thing.(loner); ok {
|
||||
lon = l.Lon()
|
||||
foundLon = true
|
||||
} else if l, ok := thing.(lnger); ok {
|
||||
lon = l.Lng()
|
||||
foundLon = true
|
||||
}
|
||||
// lat
|
||||
if l, ok := thing.(later); ok {
|
||||
lat = l.Lat()
|
||||
foundLat = true
|
||||
}
|
||||
|
||||
return lon, lat, foundLon && foundLat
|
||||
}
|
||||
|
||||
// extract numeric value (if possible) and returns a float64
|
||||
func extractNumericVal(v interface{}) (float64, bool) {
|
||||
val := reflect.ValueOf(v)
|
||||
if !val.IsValid() {
|
||||
return 0, false
|
||||
}
|
||||
typ := val.Type()
|
||||
switch typ.Kind() {
|
||||
case reflect.Float32, reflect.Float64:
|
||||
return val.Float(), true
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
return float64(val.Int()), true
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||
return float64(val.Uint()), true
|
||||
}
|
||||
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// various support interfaces which can be used to find lat/lon
|
||||
type loner interface {
|
||||
Lon() float64
|
||||
}
|
||||
|
||||
type later interface {
|
||||
Lat() float64
|
||||
}
|
||||
|
||||
type lnger interface {
|
||||
Lng() float64
|
||||
}
|
212
vendor/github.com/blevesearch/bleve/v2/geo/sloppy.go
generated
vendored
Normal file
212
vendor/github.com/blevesearch/bleve/v2/geo/sloppy.go
generated
vendored
Normal file
|
@ -0,0 +1,212 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package geo
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
var earthDiameterPerLatitude []float64
|
||||
var sinTab []float64
|
||||
var cosTab []float64
|
||||
var asinTab []float64
|
||||
var asinDer1DivF1Tab []float64
|
||||
var asinDer2DivF2Tab []float64
|
||||
var asinDer3DivF3Tab []float64
|
||||
var asinDer4DivF4Tab []float64
|
||||
|
||||
const radiusTabsSize = (1 << 10) + 1
|
||||
const radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1)
|
||||
const radiusIndexer = 1 / radiusDelta
|
||||
const sinCosTabsSize = (1 << 11) + 1
|
||||
const asinTabsSize = (1 << 13) + 1
|
||||
const oneDivF2 = 1 / 2.0
|
||||
const oneDivF3 = 1 / 6.0
|
||||
const oneDivF4 = 1 / 24.0
|
||||
|
||||
// 1.57079632673412561417e+00 first 33 bits of pi/2
|
||||
var pio2Hi = math.Float64frombits(0x3FF921FB54400000)
|
||||
|
||||
// 6.07710050650619224932e-11 pi/2 - PIO2_HI
|
||||
var pio2Lo = math.Float64frombits(0x3DD0B4611A626331)
|
||||
|
||||
var asinPio2Hi = math.Float64frombits(0x3FF921FB54442D18) // 1.57079632679489655800e+00
|
||||
var asinPio2Lo = math.Float64frombits(0x3C91A62633145C07) // 6.12323399573676603587e-17
|
||||
var asinPs0 = math.Float64frombits(0x3fc5555555555555) // 1.66666666666666657415e-01
|
||||
var asinPs1 = math.Float64frombits(0xbfd4d61203eb6f7d) // -3.25565818622400915405e-01
|
||||
var asinPs2 = math.Float64frombits(0x3fc9c1550e884455) // 2.01212532134862925881e-01
|
||||
var asinPs3 = math.Float64frombits(0xbfa48228b5688f3b) // -4.00555345006794114027e-02
|
||||
var asinPs4 = math.Float64frombits(0x3f49efe07501b288) // 7.91534994289814532176e-04
|
||||
var asinPs5 = math.Float64frombits(0x3f023de10dfdf709) // 3.47933107596021167570e-05
|
||||
var asinQs1 = math.Float64frombits(0xc0033a271c8a2d4b) // -2.40339491173441421878e+00
|
||||
var asinQs2 = math.Float64frombits(0x40002ae59c598ac8) // 2.02094576023350569471e+00
|
||||
var asinQs3 = math.Float64frombits(0xbfe6066c1b8d0159) // -6.88283971605453293030e-01
|
||||
var asinQs4 = math.Float64frombits(0x3fb3b8c5b12e9282) // 7.70381505559019352791e-02
|
||||
|
||||
var twoPiHi = 4 * pio2Hi
|
||||
var twoPiLo = 4 * pio2Lo
|
||||
var sinCosDeltaHi = twoPiHi/sinCosTabsSize - 1
|
||||
var sinCosDeltaLo = twoPiLo/sinCosTabsSize - 1
|
||||
var sinCosIndexer = 1 / (sinCosDeltaHi + sinCosDeltaLo)
|
||||
var sinCosMaxValueForIntModulo = ((math.MaxInt64 >> 9) / sinCosIndexer) * 0.99
|
||||
var asinMaxValueForTabs = math.Sin(73.0 * degreesToRadian)
|
||||
|
||||
var asinDelta = asinMaxValueForTabs / (asinTabsSize - 1)
|
||||
var asinIndexer = 1 / asinDelta
|
||||
|
||||
func init() {
|
||||
// initializes the tables used for the sloppy math functions
|
||||
|
||||
// sin and cos
|
||||
sinTab = make([]float64, sinCosTabsSize)
|
||||
cosTab = make([]float64, sinCosTabsSize)
|
||||
sinCosPiIndex := (sinCosTabsSize - 1) / 2
|
||||
sinCosPiMul2Index := 2 * sinCosPiIndex
|
||||
sinCosPiMul05Index := sinCosPiIndex / 2
|
||||
sinCosPiMul15Index := 3 * sinCosPiIndex / 2
|
||||
for i := 0; i < sinCosTabsSize; i++ {
|
||||
// angle: in [0,2*PI].
|
||||
angle := float64(i)*sinCosDeltaHi + float64(i)*sinCosDeltaLo
|
||||
sinAngle := math.Sin(angle)
|
||||
cosAngle := math.Cos(angle)
|
||||
// For indexes corresponding to null cosine or sine, we make sure the value is zero
|
||||
// and not an epsilon. This allows for a much better accuracy for results close to zero.
|
||||
if i == sinCosPiIndex {
|
||||
sinAngle = 0.0
|
||||
} else if i == sinCosPiMul2Index {
|
||||
sinAngle = 0.0
|
||||
} else if i == sinCosPiMul05Index {
|
||||
sinAngle = 0.0
|
||||
} else if i == sinCosPiMul15Index {
|
||||
sinAngle = 0.0
|
||||
}
|
||||
sinTab[i] = sinAngle
|
||||
cosTab[i] = cosAngle
|
||||
}
|
||||
|
||||
// asin
|
||||
asinTab = make([]float64, asinTabsSize)
|
||||
asinDer1DivF1Tab = make([]float64, asinTabsSize)
|
||||
asinDer2DivF2Tab = make([]float64, asinTabsSize)
|
||||
asinDer3DivF3Tab = make([]float64, asinTabsSize)
|
||||
asinDer4DivF4Tab = make([]float64, asinTabsSize)
|
||||
for i := 0; i < asinTabsSize; i++ {
|
||||
// x: in [0,ASIN_MAX_VALUE_FOR_TABS].
|
||||
x := float64(i) * asinDelta
|
||||
asinTab[i] = math.Asin(x)
|
||||
oneMinusXSqInv := 1.0 / (1 - x*x)
|
||||
oneMinusXSqInv05 := math.Sqrt(oneMinusXSqInv)
|
||||
oneMinusXSqInv15 := oneMinusXSqInv05 * oneMinusXSqInv
|
||||
oneMinusXSqInv25 := oneMinusXSqInv15 * oneMinusXSqInv
|
||||
oneMinusXSqInv35 := oneMinusXSqInv25 * oneMinusXSqInv
|
||||
asinDer1DivF1Tab[i] = oneMinusXSqInv05
|
||||
asinDer2DivF2Tab[i] = (x * oneMinusXSqInv15) * oneDivF2
|
||||
asinDer3DivF3Tab[i] = ((1 + 2*x*x) * oneMinusXSqInv25) * oneDivF3
|
||||
asinDer4DivF4Tab[i] = ((5 + 2*x*(2+x*(5-2*x))) * oneMinusXSqInv35) * oneDivF4
|
||||
}
|
||||
|
||||
// earth radius
|
||||
a := 6378137.0
|
||||
b := 6356752.31420
|
||||
a2 := a * a
|
||||
b2 := b * b
|
||||
earthDiameterPerLatitude = make([]float64, radiusTabsSize)
|
||||
earthDiameterPerLatitude[0] = 2.0 * a / 1000
|
||||
earthDiameterPerLatitude[radiusTabsSize-1] = 2.0 * b / 1000
|
||||
for i := 1; i < radiusTabsSize-1; i++ {
|
||||
lat := math.Pi * float64(i) / (2*radiusTabsSize - 1)
|
||||
one := math.Pow(a2*math.Cos(lat), 2)
|
||||
two := math.Pow(b2*math.Sin(lat), 2)
|
||||
three := math.Pow(float64(a)*math.Cos(lat), 2)
|
||||
four := math.Pow(b*math.Sin(lat), 2)
|
||||
radius := math.Sqrt((one + two) / (three + four))
|
||||
earthDiameterPerLatitude[i] = 2 * radius / 1000
|
||||
}
|
||||
}
|
||||
|
||||
// earthDiameter returns an estimation of the earth's diameter at the specified
|
||||
// latitude in kilometers
|
||||
func earthDiameter(lat float64) float64 {
|
||||
index := math.Mod(math.Abs(lat)*radiusIndexer+0.5, float64(len(earthDiameterPerLatitude)))
|
||||
if math.IsNaN(index) {
|
||||
return 0
|
||||
}
|
||||
return earthDiameterPerLatitude[int(index)]
|
||||
}
|
||||
|
||||
var pio2 = math.Pi / 2
|
||||
|
||||
func sin(a float64) float64 {
|
||||
return cos(a - pio2)
|
||||
}
|
||||
|
||||
// cos is a sloppy math (faster) implementation of math.Cos
|
||||
func cos(a float64) float64 {
|
||||
if a < 0.0 {
|
||||
a = -a
|
||||
}
|
||||
if a > sinCosMaxValueForIntModulo {
|
||||
return math.Cos(a)
|
||||
}
|
||||
// index: possibly outside tables range.
|
||||
index := int(a*sinCosIndexer + 0.5)
|
||||
delta := (a - float64(index)*sinCosDeltaHi) - float64(index)*sinCosDeltaLo
|
||||
// Making sure index is within tables range.
|
||||
// Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo.
|
||||
index &= (sinCosTabsSize - 2) // index % (SIN_COS_TABS_SIZE-1)
|
||||
indexCos := cosTab[index]
|
||||
indexSin := sinTab[index]
|
||||
return indexCos + delta*(-indexSin+delta*(-indexCos*oneDivF2+delta*(indexSin*oneDivF3+delta*indexCos*oneDivF4)))
|
||||
}
|
||||
|
||||
// asin is a sloppy math (faster) implementation of math.Asin
|
||||
func asin(a float64) float64 {
|
||||
var negateResult bool
|
||||
if a < 0 {
|
||||
a = -a
|
||||
negateResult = true
|
||||
}
|
||||
if a <= asinMaxValueForTabs {
|
||||
index := int(a*asinIndexer + 0.5)
|
||||
delta := a - float64(index)*asinDelta
|
||||
result := asinTab[index] + delta*(asinDer1DivF1Tab[index]+delta*(asinDer2DivF2Tab[index]+delta*(asinDer3DivF3Tab[index]+delta*asinDer4DivF4Tab[index])))
|
||||
if negateResult {
|
||||
return -result
|
||||
}
|
||||
return result
|
||||
}
|
||||
// value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN
|
||||
// This part is derived from fdlibm.
|
||||
if a < 1 {
|
||||
t := (1.0 - a) * 0.5
|
||||
p := t * (asinPs0 + t*(asinPs1+t*(asinPs2+t*(asinPs3+t*(asinPs4+t+asinPs5)))))
|
||||
q := 1.0 + t*(asinQs1+t*(asinQs2+t*(asinQs3+t*asinQs4)))
|
||||
s := math.Sqrt(t)
|
||||
z := s + s*(p/q)
|
||||
result := asinPio2Hi - ((z + z) - asinPio2Lo)
|
||||
if negateResult {
|
||||
return -result
|
||||
}
|
||||
return result
|
||||
}
|
||||
// value >= 1.0, or value is NaN
|
||||
if a == 1.0 {
|
||||
if negateResult {
|
||||
return -math.Pi / 2
|
||||
}
|
||||
return math.Pi / 2
|
||||
}
|
||||
return math.NaN()
|
||||
}
|
29
vendor/github.com/blevesearch/bleve/v2/go.mod
generated
vendored
Normal file
29
vendor/github.com/blevesearch/bleve/v2/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
module github.com/blevesearch/bleve/v2
|
||||
|
||||
go 1.13
|
||||
|
||||
require (
|
||||
github.com/RoaringBitmap/roaring v0.4.23
|
||||
github.com/blevesearch/bleve_index_api v1.0.0
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3
|
||||
github.com/blevesearch/scorch_segment_api v1.0.0
|
||||
github.com/blevesearch/segment v0.9.0
|
||||
github.com/blevesearch/snowballstem v0.9.0
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.1
|
||||
github.com/blevesearch/zapx/v11 v11.1.10
|
||||
github.com/blevesearch/zapx/v12 v12.1.10
|
||||
github.com/blevesearch/zapx/v13 v13.1.10
|
||||
github.com/blevesearch/zapx/v14 v14.1.10
|
||||
github.com/blevesearch/zapx/v15 v15.1.10
|
||||
github.com/couchbase/moss v0.1.0
|
||||
github.com/couchbase/vellum v1.0.2
|
||||
github.com/golang/protobuf v1.3.2
|
||||
github.com/kljensen/snowball v0.6.0
|
||||
github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563
|
||||
github.com/spf13/cobra v0.0.5
|
||||
github.com/steveyen/gtreap v0.1.0
|
||||
github.com/syndtr/goleveldb v1.0.0
|
||||
github.com/willf/bitset v1.1.10
|
||||
go.etcd.io/bbolt v1.3.5
|
||||
golang.org/x/text v0.3.0
|
||||
)
|
128
vendor/github.com/blevesearch/bleve/v2/go.sum
generated
vendored
Normal file
128
vendor/github.com/blevesearch/bleve/v2/go.sum
generated
vendored
Normal file
|
@ -0,0 +1,128 @@
|
|||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/RoaringBitmap/roaring v0.4.23 h1:gpyfd12QohbqhFO4NVDUdoPOCXsyahYRQhINmlHxKeo=
|
||||
github.com/RoaringBitmap/roaring v0.4.23/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
|
||||
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
|
||||
github.com/blevesearch/bleve_index_api v1.0.0 h1:Ds3XeuTxjXCkG6pgIwWDRyooJKNIuOKemnN0N0IkhTU=
|
||||
github.com/blevesearch/bleve_index_api v1.0.0/go.mod h1:fiwKS0xLEm+gBRgv5mumf0dhgFr2mDgZah1pqv1c1M4=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
|
||||
github.com/blevesearch/mmap-go v1.0.2 h1:JtMHb+FgQCTTYIhtMvimw15dJwu1Y5lrZDMOFXVWPk0=
|
||||
github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA=
|
||||
github.com/blevesearch/scorch_segment_api v1.0.0 h1:BUkCPWDg2gimTEyVDXf85I2buqqt4lh28uaVMiJsIYk=
|
||||
github.com/blevesearch/scorch_segment_api v1.0.0/go.mod h1:KgRYmlfYC27NeM6cXOHx8LBgq7jn0atpV8mVWoBKBng=
|
||||
github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt22Ac=
|
||||
github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ=
|
||||
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
|
||||
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.1 h1:1SYRwyoFLwG3sj0ed89RLtM15amfX2pXlYbFOnF8zNU=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.1/go.mod h1:MQDVGpHZrpe3Uy26zJBf/a8h0FZY6xJbthIMm8myH2Q=
|
||||
github.com/blevesearch/zapx/v11 v11.1.10 h1:8Eo3rXiHsVSP9Sk+4StrrwLrj9vyulhMVPmxTf8ZuDg=
|
||||
github.com/blevesearch/zapx/v11 v11.1.10/go.mod h1:DTjbcBqrr/Uo82UBilDC8lEew42gN/OcIyiTNFtSijc=
|
||||
github.com/blevesearch/zapx/v12 v12.1.10 h1:sqR+/0Z4dSTovApRqLA1HnilMtQer7a4UvPrNmPzlTM=
|
||||
github.com/blevesearch/zapx/v12 v12.1.10/go.mod h1:14NmKnPrnKAIyiEJM566k/Jk+FQpuiflT5d3uaaK3MI=
|
||||
github.com/blevesearch/zapx/v13 v13.1.10 h1:zCneEVRJDXwtDfSwh+33Dxguliv192vCK283zdGH4Sw=
|
||||
github.com/blevesearch/zapx/v13 v13.1.10/go.mod h1:YsVY6YGpTEAlJOMjdL7EsdBLvjWd8kPa2gwJDNpqLJo=
|
||||
github.com/blevesearch/zapx/v14 v14.1.10 h1:nD0vw2jxKogJFfA5WyoS4wNwZlVby3Aq8aW7CZi6YIw=
|
||||
github.com/blevesearch/zapx/v14 v14.1.10/go.mod h1:hsULl5eJSxs5NEfBsmeT9qrqdCP+/ecpVZKt60M4V64=
|
||||
github.com/blevesearch/zapx/v15 v15.1.10 h1:kZR3b9jO9l6s2B5UHI+1N1llLzJ4nYikkXQTMrDl1vQ=
|
||||
github.com/blevesearch/zapx/v15 v15.1.10/go.mod h1:4ypq25bwtSQKzwEF1UERyIhmGTbMT3brY/n4NC5gRnM=
|
||||
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
|
||||
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
|
||||
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
|
||||
github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
|
||||
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
|
||||
github.com/couchbase/moss v0.1.0 h1:HCL+xxHUwmOaL44kMM/gU08OW6QGCui1WVFO58bjhNI=
|
||||
github.com/couchbase/moss v0.1.0/go.mod h1:9MaHIaRuy9pvLPUJxB8sh8OrLfyDczECVL37grCIubs=
|
||||
github.com/couchbase/vellum v1.0.2 h1:BrbP0NKiyDdndMPec8Jjhy0U47CZ0Lgx3xUC2r9rZqw=
|
||||
github.com/couchbase/vellum v1.0.2/go.mod h1:FcwrEivFpNi24R3jLOs3n+fs5RnuQnQqCLBJ1uAg1W4=
|
||||
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
||||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
|
||||
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
|
||||
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
||||
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
|
||||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
|
||||
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
|
||||
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||
github.com/kljensen/snowball v0.6.0 h1:6DZLCcZeL0cLfodx+Md4/OLC6b/bfurWUOUGs1ydfOU=
|
||||
github.com/kljensen/snowball v0.6.0/go.mod h1:27N7E8fVU5H68RlUmnWwZCfxgt4POBJfENGMvNRhldw=
|
||||
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
|
||||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg=
|
||||
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
|
||||
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
|
||||
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs=
|
||||
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
|
||||
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
|
||||
github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ=
|
||||
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563 h1:dY6ETXrvDG7Sa4vE8ZQG4yqWg6UnOcbqTAahkV813vQ=
|
||||
github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
|
||||
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
|
||||
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
|
||||
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
|
||||
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
|
||||
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
|
||||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
|
||||
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
|
||||
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
|
||||
github.com/steveyen/gtreap v0.1.0 h1:CjhzTa274PyJLJuMZwIzCO1PfC00oRa8d1Kc78bFXJM=
|
||||
github.com/steveyen/gtreap v0.1.0/go.mod h1:kl/5J7XbrOmlIbYIXdRHDDE5QxHqpk0cmkT7Z4dM9/Y=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
|
||||
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
|
||||
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
|
||||
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
|
||||
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
|
||||
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
|
||||
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
|
||||
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
|
||||
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
|
||||
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
|
||||
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd h1:nTDtHvHSdCn1m6ITfMRqtOd/9+7a3s8RBNOZ3eYZzJA=
|
||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 h1:LfCXLvNmTYH9kEmVgqbnsWfruoXZIrh4YBgqVHtDvw0=
|
||||
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
|
||||
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
||||
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
308
vendor/github.com/blevesearch/bleve/v2/index.go
generated
vendored
Normal file
308
vendor/github.com/blevesearch/bleve/v2/index.go
generated
vendored
Normal file
|
@ -0,0 +1,308 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
// A Batch groups together multiple Index and Delete
|
||||
// operations you would like performed at the same
|
||||
// time. The Batch structure is NOT thread-safe.
|
||||
// You should only perform operations on a batch
|
||||
// from a single thread at a time. Once batch
|
||||
// execution has started, you may not modify it.
|
||||
type Batch struct {
|
||||
index Index
|
||||
internal *index.Batch
|
||||
|
||||
lastDocSize uint64
|
||||
totalSize uint64
|
||||
}
|
||||
|
||||
// Index adds the specified index operation to the
|
||||
// batch. NOTE: the bleve Index is not updated
|
||||
// until the batch is executed.
|
||||
func (b *Batch) Index(id string, data interface{}) error {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
doc := document.NewDocument(id)
|
||||
err := b.index.Mapping().MapDocument(doc, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.internal.Update(doc)
|
||||
|
||||
b.lastDocSize = uint64(doc.Size() +
|
||||
len(id) + size.SizeOfString) // overhead from internal
|
||||
b.totalSize += b.lastDocSize
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Batch) LastDocSize() uint64 {
|
||||
return b.lastDocSize
|
||||
}
|
||||
|
||||
func (b *Batch) TotalDocsSize() uint64 {
|
||||
return b.totalSize
|
||||
}
|
||||
|
||||
// IndexAdvanced adds the specified index operation to the
|
||||
// batch which skips the mapping. NOTE: the bleve Index is not updated
|
||||
// until the batch is executed.
|
||||
func (b *Batch) IndexAdvanced(doc *document.Document) (err error) {
|
||||
if doc.ID() == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
b.internal.Update(doc)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete adds the specified delete operation to the
|
||||
// batch. NOTE: the bleve Index is not updated until
|
||||
// the batch is executed.
|
||||
func (b *Batch) Delete(id string) {
|
||||
if id != "" {
|
||||
b.internal.Delete(id)
|
||||
}
|
||||
}
|
||||
|
||||
// SetInternal adds the specified set internal
|
||||
// operation to the batch. NOTE: the bleve Index is
|
||||
// not updated until the batch is executed.
|
||||
func (b *Batch) SetInternal(key, val []byte) {
|
||||
b.internal.SetInternal(key, val)
|
||||
}
|
||||
|
||||
// DeleteInternal adds the specified delete internal
|
||||
// operation to the batch. NOTE: the bleve Index is
|
||||
// not updated until the batch is executed.
|
||||
func (b *Batch) DeleteInternal(key []byte) {
|
||||
b.internal.DeleteInternal(key)
|
||||
}
|
||||
|
||||
// Size returns the total number of operations inside the batch
|
||||
// including normal index operations and internal operations.
|
||||
func (b *Batch) Size() int {
|
||||
return len(b.internal.IndexOps) + len(b.internal.InternalOps)
|
||||
}
|
||||
|
||||
// String prints a user friendly string representation of what
|
||||
// is inside this batch.
|
||||
func (b *Batch) String() string {
|
||||
return b.internal.String()
|
||||
}
|
||||
|
||||
// Reset returns a Batch to the empty state so that it can
|
||||
// be re-used in the future.
|
||||
func (b *Batch) Reset() {
|
||||
b.internal.Reset()
|
||||
b.lastDocSize = 0
|
||||
b.totalSize = 0
|
||||
}
|
||||
|
||||
func (b *Batch) Merge(o *Batch) {
|
||||
if o != nil && o.internal != nil {
|
||||
b.internal.Merge(o.internal)
|
||||
if o.LastDocSize() > 0 {
|
||||
b.lastDocSize = o.LastDocSize()
|
||||
}
|
||||
b.totalSize = uint64(b.internal.TotalDocSize())
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Batch) SetPersistedCallback(f index.BatchCallback) {
|
||||
b.internal.SetPersistedCallback(f)
|
||||
}
|
||||
|
||||
func (b *Batch) PersistedCallback() index.BatchCallback {
|
||||
return b.internal.PersistedCallback()
|
||||
}
|
||||
|
||||
// An Index implements all the indexing and searching
|
||||
// capabilities of bleve. An Index can be created
|
||||
// using the New() and Open() methods.
|
||||
//
|
||||
// Index() takes an input value, deduces a DocumentMapping for its type,
|
||||
// assigns string paths to its fields or values then applies field mappings on
|
||||
// them.
|
||||
//
|
||||
// The DocumentMapping used to index a value is deduced by the following rules:
|
||||
// 1) If value implements mapping.bleveClassifier interface, resolve the mapping
|
||||
// from BleveType().
|
||||
// 2) If value implements mapping.Classifier interface, resolve the mapping
|
||||
// from Type().
|
||||
// 3) If value has a string field or value at IndexMapping.TypeField.
|
||||
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing
|
||||
// is described below.
|
||||
// 4) If IndexMapping.DefaultType is registered, return it.
|
||||
// 5) Return IndexMapping.DefaultMapping.
|
||||
//
|
||||
// Each field or nested field of the value is identified by a string path, then
|
||||
// mapped to one or several FieldMappings which extract the result for analysis.
|
||||
//
|
||||
// Struct values fields are identified by their "json:" tag, or by their name.
|
||||
// Nested fields are identified by prefixing with their parent identifier,
|
||||
// separated by a dot.
|
||||
//
|
||||
// Map values entries are identified by their string key. Entries not indexed
|
||||
// by strings are ignored. Entry values are identified recursively like struct
|
||||
// fields.
|
||||
//
|
||||
// Slice and array values are identified by their field name. Their elements
|
||||
// are processed sequentially with the same FieldMapping.
|
||||
//
|
||||
// String, float64 and time.Time values are identified by their field name.
|
||||
// Other types are ignored.
|
||||
//
|
||||
// Each value identifier is decomposed in its parts and recursively address
|
||||
// SubDocumentMappings in the tree starting at the root DocumentMapping. If a
|
||||
// mapping is found, all its FieldMappings are applied to the value. If no
|
||||
// mapping is found and the root DocumentMapping is dynamic, default mappings
|
||||
// are used based on value type and IndexMapping default configurations.
|
||||
//
|
||||
// Finally, mapped values are analyzed, indexed or stored. See
|
||||
// FieldMapping.Analyzer to know how an analyzer is resolved for a given field.
|
||||
//
|
||||
// Examples:
|
||||
//
|
||||
// type Date struct {
|
||||
// Day string `json:"day"`
|
||||
// Month string
|
||||
// Year string
|
||||
// }
|
||||
//
|
||||
// type Person struct {
|
||||
// FirstName string `json:"first_name"`
|
||||
// LastName string
|
||||
// BirthDate Date `json:"birth_date"`
|
||||
// }
|
||||
//
|
||||
// A Person value FirstName is mapped by the SubDocumentMapping at
|
||||
// "first_name". Its LastName is mapped by the one at "LastName". The day of
|
||||
// BirthDate is mapped to the SubDocumentMapping "day" of the root
|
||||
// SubDocumentMapping "birth_date". It will appear as the "birth_date.day"
|
||||
// field in the index. The month is mapped to "birth_date.Month".
|
||||
type Index interface {
|
||||
// Index analyzes, indexes or stores mapped data fields. Supplied
|
||||
// identifier is bound to analyzed data and will be retrieved by search
|
||||
// requests. See Index interface documentation for details about mapping
|
||||
// rules.
|
||||
Index(id string, data interface{}) error
|
||||
Delete(id string) error
|
||||
|
||||
NewBatch() *Batch
|
||||
Batch(b *Batch) error
|
||||
|
||||
// Document returns specified document or nil if the document is not
|
||||
// indexed or stored.
|
||||
Document(id string) (index.Document, error)
|
||||
// DocCount returns the number of documents in the index.
|
||||
DocCount() (uint64, error)
|
||||
|
||||
Search(req *SearchRequest) (*SearchResult, error)
|
||||
SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error)
|
||||
|
||||
Fields() ([]string, error)
|
||||
|
||||
FieldDict(field string) (index.FieldDict, error)
|
||||
FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error)
|
||||
FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error)
|
||||
|
||||
Close() error
|
||||
|
||||
Mapping() mapping.IndexMapping
|
||||
|
||||
Stats() *IndexStat
|
||||
StatsMap() map[string]interface{}
|
||||
|
||||
GetInternal(key []byte) ([]byte, error)
|
||||
SetInternal(key, val []byte) error
|
||||
DeleteInternal(key []byte) error
|
||||
|
||||
// Name returns the name of the index (by default this is the path)
|
||||
Name() string
|
||||
// SetName lets you assign your own logical name to this index
|
||||
SetName(string)
|
||||
|
||||
// Advanced returns the internal index implementation
|
||||
Advanced() (index.Index, error)
|
||||
}
|
||||
|
||||
// New index at the specified path, must not exist.
|
||||
// The provided mapping will be used for all
|
||||
// Index/Search operations.
|
||||
func New(path string, mapping mapping.IndexMapping) (Index, error) {
|
||||
return newIndexUsing(path, mapping, Config.DefaultIndexType, Config.DefaultKVStore, nil)
|
||||
}
|
||||
|
||||
// NewMemOnly creates a memory-only index.
|
||||
// The contents of the index is NOT persisted,
|
||||
// and will be lost once closed.
|
||||
// The provided mapping will be used for all
|
||||
// Index/Search operations.
|
||||
func NewMemOnly(mapping mapping.IndexMapping) (Index, error) {
|
||||
return newIndexUsing("", mapping, upsidedown.Name, Config.DefaultMemKVStore, nil)
|
||||
}
|
||||
|
||||
// NewUsing creates index at the specified path,
|
||||
// which must not already exist.
|
||||
// The provided mapping will be used for all
|
||||
// Index/Search operations.
|
||||
// The specified index type will be used.
|
||||
// The specified kvstore implementation will be used
|
||||
// and the provided kvconfig will be passed to its
|
||||
// constructor. Note that currently the values of kvconfig must
|
||||
// be able to be marshaled and unmarshaled using the encoding/json library (used
|
||||
// when reading/writing the index metadata file).
|
||||
func NewUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (Index, error) {
|
||||
return newIndexUsing(path, mapping, indexType, kvstore, kvconfig)
|
||||
}
|
||||
|
||||
// Open index at the specified path, must exist.
|
||||
// The mapping used when it was created will be used for all Index/Search operations.
|
||||
func Open(path string) (Index, error) {
|
||||
return openIndexUsing(path, nil)
|
||||
}
|
||||
|
||||
// OpenUsing opens index at the specified path, must exist.
|
||||
// The mapping used when it was created will be used for all Index/Search operations.
|
||||
// The provided runtimeConfig can override settings
|
||||
// persisted when the kvstore was created.
|
||||
func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) {
|
||||
return openIndexUsing(path, runtimeConfig)
|
||||
}
|
||||
|
||||
// Builder is a limited interface, used to build indexes in an offline mode.
|
||||
// Items cannot be updated or deleted, and the caller MUST ensure a document is
|
||||
// indexed only once.
|
||||
type Builder interface {
|
||||
Index(id string, data interface{}) error
|
||||
Close() error
|
||||
}
|
||||
|
||||
// NewBuilder creates a builder, which will build an index at the specified path,
|
||||
// using the specified mapping and options.
|
||||
func NewBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
|
||||
return newBuilder(path, mapping, config)
|
||||
}
|
367
vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md
generated
vendored
Normal file
367
vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md
generated
vendored
Normal file
|
@ -0,0 +1,367 @@
|
|||
# scorch
|
||||
|
||||
## Definitions
|
||||
|
||||
Batch
|
||||
- A collection of Documents to mutate in the index.
|
||||
|
||||
Document
|
||||
- Has a unique identifier (arbitrary bytes).
|
||||
- Is comprised of a list of fields.
|
||||
|
||||
Field
|
||||
- Has a name (string).
|
||||
- Has a type (text, number, date, geopoint).
|
||||
- Has a value (depending on type).
|
||||
- Can be indexed, stored, or both.
|
||||
- If indexed, can be analyzed.
|
||||
-m If indexed, can optionally store term vectors.
|
||||
|
||||
## Scope
|
||||
|
||||
Scorch *MUST* implement the bleve.index API without requiring any changes to this API.
|
||||
|
||||
Scorch *MAY* introduce new interfaces, which can be discovered to allow use of new capabilities not in the current API.
|
||||
|
||||
## Implementation
|
||||
|
||||
The scorch implementation starts with the concept of a segmented index.
|
||||
|
||||
A segment is simply a slice, subset, or portion of the entire index. A segmented index is one which is composed of one or more segments. Although segments are created in a particular order, knowing this ordering is not required to achieve correct semantics when querying. Because there is no ordering, this means that when searching an index, you can (and should) search all the segments concurrently.
|
||||
|
||||
### Internal Wrapper
|
||||
|
||||
In order to accommodate the existing APIs while also improving the implementation, the scorch implementation includes some wrapper functionality that must be described.
|
||||
|
||||
#### \_id field
|
||||
|
||||
In scorch, field 0 is prearranged to be named \_id. All documents have a value for this field, which is the documents external identifier. In this version the field *MUST* be both indexed AND stored. The scorch wrapper adds this field, as it will not be present in the Document from the calling bleve code.
|
||||
|
||||
NOTE: If a document already contains a field \_id, it will be replaced. If this is problematic, the caller must ensure such a scenario does not happen.
|
||||
|
||||
### Proposed Structures
|
||||
|
||||
```
|
||||
type Segment interface {
|
||||
|
||||
Dictionary(field string) TermDictionary
|
||||
|
||||
}
|
||||
|
||||
type TermDictionary interface {
|
||||
|
||||
PostingsList(term string, excluding PostingsList) PostingsList
|
||||
|
||||
}
|
||||
|
||||
type PostingsList interface {
|
||||
|
||||
Next() Posting
|
||||
|
||||
And(other PostingsList) PostingsList
|
||||
Or(other PostingsList) PostingsList
|
||||
|
||||
}
|
||||
|
||||
type Posting interface {
|
||||
Number() uint64
|
||||
|
||||
Frequency() uint64
|
||||
Norm() float64
|
||||
|
||||
Locations() Locations
|
||||
}
|
||||
|
||||
type Locations interface {
|
||||
Start() uint64
|
||||
End() uint64
|
||||
Pos() uint64
|
||||
ArrayPositions() ...
|
||||
}
|
||||
|
||||
type DeletedDocs {
|
||||
|
||||
}
|
||||
|
||||
type SegmentSnapshot struct {
|
||||
segment Segment
|
||||
deleted PostingsList
|
||||
}
|
||||
|
||||
type IndexSnapshot struct {
|
||||
segment []SegmentSnapshot
|
||||
}
|
||||
```
|
||||
**What about errors?**
|
||||
**What about memory mgmnt or context?**
|
||||
**Postings List separate iterator to separate stateful from stateless**
|
||||
### Mutating the Index
|
||||
|
||||
The bleve.index API has methods for directly making individual mutations (Update/Delete/SetInternal/DeleteInternal), however for this first implementation, we assume that all of these calls can simply be turned into a Batch of size 1. This may be highly inefficient, but it will be correct. This decision is made based on the fact that Couchbase FTS always uses Batches.
|
||||
|
||||
NOTE: As a side-effect of this decision, it should be clear that performance tuning may depend on the batch size, which may in-turn require changes in FTS.
|
||||
|
||||
From this point forward, only Batch mutations will be discussed.
|
||||
|
||||
Sequence of Operations:
|
||||
|
||||
1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like:
|
||||
|
||||
```
|
||||
foreach segment {
|
||||
dict := segment.Dictionary("\_id")
|
||||
postings := empty postings list
|
||||
foreach docID {
|
||||
postings = postings.Or(dict.PostingsList(docID, nil))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
NOTE: it is illustrated above as nested for loops, but some or all of these could be concurrently. The end result is that for each segment, we have (possibly empty) bitset.
|
||||
|
||||
2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool.
|
||||
|
||||
3. (after 2 completes) Analyzed documents are fed into a function which builds a new Segment representing this information.
|
||||
|
||||
4. We now have everything we need to update the state of the system to include this new snapshot.
|
||||
|
||||
- Acquire a lock
|
||||
- Create a new IndexSnapshot
|
||||
- For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot.
|
||||
- Create a new SegmentSnapshot wrapping our new segment with nil deleted docs.
|
||||
- Append the new SegmentSnapshot to the IndexSnapshot
|
||||
- Release the lock
|
||||
|
||||
An ASCII art example:
|
||||
```
|
||||
0 - Empty Index
|
||||
|
||||
No segments
|
||||
|
||||
IndexSnapshot
|
||||
segments []
|
||||
deleted []
|
||||
|
||||
|
||||
1 - Index Batch [ A B C ]
|
||||
|
||||
segment 0
|
||||
numbers [ 1 2 3 ]
|
||||
\_id [ A B C ]
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 ]
|
||||
deleted [ nil ]
|
||||
|
||||
|
||||
2 - Index Batch [ B' ]
|
||||
|
||||
segment 0 1
|
||||
numbers [ 1 2 3 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-1:
|
||||
[ 0 1 0 ]
|
||||
|
||||
OR it with previous (nil) (call it 0-1)
|
||||
[ 0 1 0 ]
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 ]
|
||||
deleted [ 0-1 nil ]
|
||||
|
||||
3 - Index Batch [ C' ]
|
||||
|
||||
segment 0 1 2
|
||||
numbers [ 1 2 3 ] [ 1 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ] [ C ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-2:
|
||||
[ 0 0 1 ]
|
||||
|
||||
OR it with previous ([ 0 1 0 ]) (call it 0-12)
|
||||
[ 0 1 1 ]
|
||||
|
||||
Compute bitset segment-1-deleted-by-2:
|
||||
[ 0 ]
|
||||
|
||||
OR it with previous (nil)
|
||||
still just nil
|
||||
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 2 ]
|
||||
deleted [ 0-12 nil nil ]
|
||||
```
|
||||
|
||||
**is there opportunity to stop early when doc is found in one segment**
|
||||
**also, more efficient way to find bits for long lists of ids?**
|
||||
|
||||
### Searching
|
||||
|
||||
In the bleve.index API all searching starts by getting an IndexReader, which represents a snapshot of the index at a point in time.
|
||||
|
||||
As described in the section above, our index implementation maintains a pointer to the current IndexSnapshot. When a caller gets an IndexReader, they get a copy of this pointer, and can use it as long as they like. The IndexSnapshot contains SegmentSnapshots, which only contain pointers to immutable segments. The deleted posting lists associated with a segment change over time, but the particular deleted posting list in YOUR snapshot is immutable. This gives a stable view of the data.
|
||||
|
||||
#### Term Search
|
||||
|
||||
Term search is the only searching primitive exposed in today's bleve.index API. This ultimately could limit our ability to take advantage of the indexing improvements, but it also means it will be easier to get a first version of this working.
|
||||
|
||||
A term search for term T in field F will look something like this:
|
||||
|
||||
```
|
||||
searchResultPostings = empty
|
||||
foreach segment {
|
||||
dict := segment.Dictionary(F)
|
||||
segmentResultPostings = dict.PostingsList(T, segmentSnapshotDeleted)
|
||||
// make segmentLocal numbers into global numbers, and flip bits in searchResultPostings
|
||||
}
|
||||
```
|
||||
|
||||
The searchResultPostings will be a new implementation of the TermFieldReader inteface.
|
||||
|
||||
As a reminder this interface is:
|
||||
|
||||
```
|
||||
// TermFieldReader is the interface exposing the enumeration of documents
|
||||
// containing a given term in a given field. Documents are returned in byte
|
||||
// lexicographic order over their identifiers.
|
||||
type TermFieldReader interface {
|
||||
// Next returns the next document containing the term in this field, or nil
|
||||
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc
|
||||
// is optional, and when non-nil, will be used instead of allocating memory.
|
||||
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
|
||||
|
||||
// Advance resets the enumeration at specified document or its immediate
|
||||
// follower.
|
||||
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
|
||||
|
||||
// Count returns the number of documents contains the term in this field.
|
||||
Count() uint64
|
||||
Close() error
|
||||
}
|
||||
```
|
||||
|
||||
At first glance this appears problematic, we have no way to return documents in order of their identifiers. But it turns out the wording of this perhaps too strong, or a bit ambiguous. Originally, this referred to the external identifiers, but with the introduction of a distinction between internal/external identifiers, returning them in order of their internal identifiers is also acceptable. **ASIDE**: the reason for this is that most callers just use Next() and literally don't care what the order is, they could be in any order and it would be fine. There is only one search that cares and that is the ConjunctionSearcher, which relies on Next/Advance having very specific semantics. Later in this document we will have a proposal to split into multiple interfaces:
|
||||
|
||||
- The weakest interface, only supports Next() no ordering at all.
|
||||
- Ordered, supporting Advance()
|
||||
- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking)
|
||||
|
||||
But, the good news is that we don't even have to do that for our first implementation. As long as the global numbers we use for internal identifiers are consistent within this IndexSnapshot, then Next() will be ordered by ascending document number, and Advance() will still work correctly.
|
||||
|
||||
NOTE: there is another place where we rely on the ordering of these hits, and that is in the "\_id" sort order. Previously this was the natural order, and a NOOP for the collector, now it must be implemented by actually sorting on the "\_id" field. We probably should introduce at least a marker interface to detect this.
|
||||
|
||||
An ASCII art example:
|
||||
|
||||
```
|
||||
Let's start with the IndexSnapshot we ended with earlier:
|
||||
|
||||
3 - Index Batch [ C' ]
|
||||
|
||||
segment 0 1 2
|
||||
numbers [ 1 2 3 ] [ 1 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ] [ C ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-2:
|
||||
[ 0 0 1 ]
|
||||
|
||||
OR it with previous ([ 0 1 0 ]) (call it 0-12)
|
||||
[ 0 1 1 ]
|
||||
|
||||
Compute bitset segment-1-deleted-by-2:
|
||||
[ 0 0 0 ]
|
||||
|
||||
OR it with previous (nil)
|
||||
still just nil
|
||||
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 2 ]
|
||||
deleted [ 0-12 nil nil ]
|
||||
|
||||
Now let's search for the term 'cat' in the field 'desc' and let's assume that Document C (both versions) would match it.
|
||||
|
||||
Concurrently:
|
||||
|
||||
- Segment 0
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' EXCLUDING 0-12
|
||||
- raw segment matches [ 0 0 1 ] but excluding [ 0 1 1 ] gives [ 0 0 0 ]
|
||||
- Segment 1
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' excluding nil
|
||||
- [ 0 ]
|
||||
- Segment 2
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' excluding nil
|
||||
- [ 1 ]
|
||||
|
||||
Map local bitsets into global number space (global meaning cross-segment but still unique to this snapshot)
|
||||
|
||||
IndexSnapshot already should have mapping something like:
|
||||
0 - Offset 0
|
||||
1 - Offset 3 (because segment 0 had 3 docs)
|
||||
2 - Offset 4 (because segment 1 had 1 doc)
|
||||
|
||||
This maps to search result bitset:
|
||||
|
||||
[ 0 0 0 0 1]
|
||||
|
||||
Caller would call Next() and get doc number 5 (assuming 1 based indexing for now)
|
||||
|
||||
Caller could then ask to get term locations, stored fields, external doc ID for document number 5. Internally in the IndexSnapshot, we can now convert that back, and realize doc number 5 comes from segment 2, 5-4=1 so we're looking for doc number 1 in segment 2. That happens to be C...
|
||||
|
||||
```
|
||||
|
||||
#### Future improvements
|
||||
|
||||
In the future, interfaces to detect these non-serially operating TermFieldReaders could expose their own And() and Or() up to the higher level Conjunction/Disjunction searchers. Doing this alone offers some win, but also means there would be greater burden on the Searcher code rewriting logical expressions for maximum performance.
|
||||
|
||||
Another related topic is that of peak memory usage. With serially operating TermFieldReaders it was necessary to start them all at the same time and operate in unison. However, with these non-serially operating TermFieldReaders we have the option of doing a few at a time, consolidating them, dispoting the intermediaries, and then doing a few more. For very complex queries with many clauses this could reduce peak memory usage.
|
||||
|
||||
|
||||
### Memory Tracking
|
||||
|
||||
All segments must be able to produce two statistics, an estimate of their explicit memory usage, and their actual size on disk (if any). For in-memory segments, disk usage could be zero, and the memory usage represents the entire information content. For mmap-based disk segments, the memory could be as low as the size of tracking structure itself (say just a few pointers).
|
||||
|
||||
This would allow the implementation to throttle or block incoming mutations when a threshold memory usage has (or would be) exceeded.
|
||||
|
||||
### Persistence
|
||||
|
||||
Obviously, we want to support (but maybe not require) asynchronous persistence of segments. My expectation is that segments are initially built in memory. At some point they are persisted to disk. This poses some interesting challenges.
|
||||
|
||||
At runtime, the state of an index (it's IndexSnapshot) is not only the contents of the segments, but also the bitmasks of deleted documents. These bitmasks indirectly encode an ordering in which the segments were added. The reason is that the bitmasks encode which items have been obsoleted by other (subsequent or more future) segments. In the runtime implementation we compute bitmask deltas and then merge them at the same time we bring the new segment in. One idea is that we could take a similar approach on disk. When we persist a segment, we persist the bitmask deltas of segments known to exist at that time, and eventually these can get merged up into a base segment deleted bitmask.
|
||||
|
||||
This also relates to the topic rollback, addressed next...
|
||||
|
||||
|
||||
### Rollback
|
||||
|
||||
One desirable property in the Couchbase ecosystem is the ability to rollback to some previous (though typically not long ago) state. One idea for keeping this property in this design is to protect some of the most recent segments from merging. Then, if necessary, they could be "undone" to reveal previous states of the system. In these scenarios "undone" has to properly undo the deleted bitmasks on the other segments. Again, the current thinking is that rather than "undo" anything, it could be work that was deferred in the first place, thus making it easier to logically undo.
|
||||
|
||||
Another possibly related approach would be to tie this into our existing snapshot mechanism. Perhaps simulating a slow reader (holding onto index snapshots) for some period of time, can be the mechanism to achieve the desired end goal.
|
||||
|
||||
|
||||
### Internal Storage
|
||||
|
||||
The bleve.index API has support for "internal storage". The ability to store information under a separate name space.
|
||||
|
||||
This is not used for high volume storage, so it is tempting to think we could just put a small k/v store alongside the rest of the index. But, the reality is that this storage is used to maintain key information related to the rollback scenario. Because of this, its crucial that ordering and overwriting of key/value pairs correspond with actual segment persistence in the index. Based on this, I believe its important to put the internal key/value pairs inside the segments themselves. But, this also means that they must follow a similar "deleted" bitmask approach to obsolete values in older segments. But, this also seems to substantially increase the complexity of the solution because of the separate name space, it would appear to require its own bitmask. Further keys aren't numeric, which then implies yet another mapping from internal key to number, etc.
|
||||
|
||||
More thought is required here.
|
||||
|
||||
### Merging
|
||||
|
||||
The segmented index approach requires merging to prevent the number of segments from growing too large.
|
||||
|
||||
Recent experience with LSMs has taught us that having the correct merge strategy can make a huge difference in the overall performance of the system. In particular, a simple merge strategy which merges segments too aggressively can lead to high write amplification and unnecessarily rendering cached data useless.
|
||||
|
||||
A few simple principles have been identified.
|
||||
|
||||
- Roughly we merge multiple smaller segments into a single larger one.
|
||||
- The larger a segment gets the less likely we should be to ever merge it.
|
||||
- Segments with large numbers of deleted/obsoleted items are good candidates as the merge will result in a space savings.
|
||||
- Segments with all items deleted/obsoleted can be dropped.
|
||||
|
||||
Merging of a segment should be able to proceed even if that segment is held by an ongoing snapshot, it should only delay the removal of it.
|
333
vendor/github.com/blevesearch/bleve/v2/index/scorch/builder.go
generated
vendored
Normal file
333
vendor/github.com/blevesearch/bleve/v2/index/scorch/builder.go
generated
vendored
Normal file
|
@ -0,0 +1,333 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const DefaultBuilderBatchSize = 1000
|
||||
const DefaultBuilderMergeMax = 10
|
||||
|
||||
type Builder struct {
|
||||
m sync.Mutex
|
||||
segCount uint64
|
||||
path string
|
||||
buildPath string
|
||||
segPaths []string
|
||||
batchSize int
|
||||
mergeMax int
|
||||
batch *index.Batch
|
||||
internal map[string][]byte
|
||||
segPlugin SegmentPlugin
|
||||
}
|
||||
|
||||
func NewBuilder(config map[string]interface{}) (*Builder, error) {
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify path")
|
||||
}
|
||||
|
||||
buildPathPrefix, _ := config["buildPathPrefix"].(string)
|
||||
buildPath, err := ioutil.TempDir(buildPathPrefix, "scorch-offline-build")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv := &Builder{
|
||||
path: path,
|
||||
buildPath: buildPath,
|
||||
mergeMax: DefaultBuilderMergeMax,
|
||||
batchSize: DefaultBuilderBatchSize,
|
||||
batch: index.NewBatch(),
|
||||
segPlugin: defaultSegmentPlugin,
|
||||
}
|
||||
|
||||
err = rv.parseConfig(config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing builder config: %v", err)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (o *Builder) parseConfig(config map[string]interface{}) (err error) {
|
||||
if v, ok := config["mergeMax"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("mergeMax parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
o.mergeMax = t
|
||||
}
|
||||
}
|
||||
|
||||
if v, ok := config["batchSize"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("batchSize parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
o.batchSize = t
|
||||
}
|
||||
}
|
||||
|
||||
if v, ok := config["internal"]; ok {
|
||||
if vinternal, ok := v.(map[string][]byte); ok {
|
||||
o.internal = vinternal
|
||||
}
|
||||
}
|
||||
|
||||
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
|
||||
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
|
||||
uint32(forcedSegmentVersion))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.segPlugin = segPlugin
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Index will place the document into the index.
|
||||
// It is invalid to index the same document multiple times.
|
||||
func (o *Builder) Index(doc index.Document) error {
|
||||
o.m.Lock()
|
||||
defer o.m.Unlock()
|
||||
|
||||
o.batch.Update(doc)
|
||||
|
||||
return o.maybeFlushBatchLOCKED(o.batchSize)
|
||||
}
|
||||
|
||||
func (o *Builder) maybeFlushBatchLOCKED(moreThan int) error {
|
||||
if len(o.batch.IndexOps) >= moreThan {
|
||||
defer o.batch.Reset()
|
||||
return o.executeBatchLOCKED(o.batch)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) {
|
||||
analysisResults := make([]index.Document, 0, len(batch.IndexOps))
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
// insert _id field
|
||||
doc.AddIDField()
|
||||
// perform analysis directly
|
||||
analyze(doc)
|
||||
analysisResults = append(analysisResults, doc)
|
||||
}
|
||||
}
|
||||
|
||||
seg, _, err := o.segPlugin.New(analysisResults)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error building segment base: %v", err)
|
||||
}
|
||||
|
||||
filename := zapFileName(o.segCount)
|
||||
o.segCount++
|
||||
path := o.buildPath + string(os.PathSeparator) + filename
|
||||
|
||||
if segUnpersisted, ok := seg.(segment.UnpersistedSegment); ok {
|
||||
err = segUnpersisted.Persist(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error persisting segment base to %s: %v", path, err)
|
||||
}
|
||||
|
||||
o.segPaths = append(o.segPaths, path)
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("new segment does not implement unpersisted: %T", seg)
|
||||
}
|
||||
|
||||
func (o *Builder) doMerge() error {
|
||||
// as long as we have more than 1 segment, keep merging
|
||||
for len(o.segPaths) > 1 {
|
||||
|
||||
// merge the next <mergeMax> number of segments into one new one
|
||||
// or, if there are fewer than <mergeMax> remaining, merge them all
|
||||
mergeCount := o.mergeMax
|
||||
if mergeCount > len(o.segPaths) {
|
||||
mergeCount = len(o.segPaths)
|
||||
}
|
||||
|
||||
mergePaths := o.segPaths[0:mergeCount]
|
||||
o.segPaths = o.segPaths[mergeCount:]
|
||||
|
||||
// open each of the segments to be merged
|
||||
mergeSegs := make([]segment.Segment, 0, mergeCount)
|
||||
|
||||
// closeOpenedSegs attempts to close all opened
|
||||
// segments even if an error occurs, in which case
|
||||
// the first error is returned
|
||||
closeOpenedSegs := func() error {
|
||||
var err error
|
||||
for _, seg := range mergeSegs {
|
||||
clErr := seg.Close()
|
||||
if clErr != nil && err == nil {
|
||||
err = clErr
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
for _, mergePath := range mergePaths {
|
||||
seg, err := o.segPlugin.Open(mergePath)
|
||||
if err != nil {
|
||||
_ = closeOpenedSegs()
|
||||
return fmt.Errorf("error opening segment (%s) for merge: %v", mergePath, err)
|
||||
}
|
||||
mergeSegs = append(mergeSegs, seg)
|
||||
}
|
||||
|
||||
// do the merge
|
||||
mergedSegPath := o.buildPath + string(os.PathSeparator) + zapFileName(o.segCount)
|
||||
drops := make([]*roaring.Bitmap, mergeCount)
|
||||
_, _, err := o.segPlugin.Merge(mergeSegs, drops, mergedSegPath, nil, nil)
|
||||
if err != nil {
|
||||
_ = closeOpenedSegs()
|
||||
return fmt.Errorf("error merging segments (%v): %v", mergePaths, err)
|
||||
}
|
||||
o.segCount++
|
||||
o.segPaths = append(o.segPaths, mergedSegPath)
|
||||
|
||||
// close segments opened for merge
|
||||
err = closeOpenedSegs()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing opened segments: %v", err)
|
||||
}
|
||||
|
||||
// remove merged segments
|
||||
for _, mergePath := range mergePaths {
|
||||
err = os.RemoveAll(mergePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing segment %s after merge: %v", mergePath, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *Builder) Close() error {
|
||||
o.m.Lock()
|
||||
defer o.m.Unlock()
|
||||
|
||||
// see if there is a partial batch
|
||||
err := o.maybeFlushBatchLOCKED(1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error flushing batch before close: %v", err)
|
||||
}
|
||||
|
||||
// perform all the merging
|
||||
err = o.doMerge()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while merging: %v", err)
|
||||
}
|
||||
|
||||
// ensure the store path exists
|
||||
err = os.MkdirAll(o.path, 0700)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move final segment into place
|
||||
// segment id 2 is chosen to match the behavior of a scorch
|
||||
// index which indexes a single batch of data
|
||||
finalSegPath := o.path + string(os.PathSeparator) + zapFileName(2)
|
||||
err = os.Rename(o.segPaths[0], finalSegPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error moving final segment into place: %v", err)
|
||||
}
|
||||
|
||||
// remove the buildPath, as it is no longer needed
|
||||
err = os.RemoveAll(o.buildPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing build path: %v", err)
|
||||
}
|
||||
|
||||
// prepare wrapping
|
||||
seg, err := o.segPlugin.Open(finalSegPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening final segment")
|
||||
}
|
||||
|
||||
// create a segment snapshot for this segment
|
||||
ss := &SegmentSnapshot{
|
||||
segment: seg,
|
||||
}
|
||||
is := &IndexSnapshot{
|
||||
epoch: 3, // chosen to match scorch behavior when indexing a single batch
|
||||
segment: []*SegmentSnapshot{ss},
|
||||
creator: "scorch-builder",
|
||||
internal: o.internal,
|
||||
}
|
||||
|
||||
// create the root bolt
|
||||
rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// start a write transaction
|
||||
tx, err := rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// fill the root bolt with this fake index snapshot
|
||||
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin)
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
_ = rootBolt.Close()
|
||||
return fmt.Errorf("error preparing bolt snapshot in root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// commit bolt data
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
_ = rootBolt.Close()
|
||||
return fmt.Errorf("error committing bolt tx in root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// close bolt
|
||||
err = rootBolt.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// close final segment
|
||||
err = seg.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing final segment: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
33
vendor/github.com/blevesearch/bleve/v2/index/scorch/empty.go
generated
vendored
Normal file
33
vendor/github.com/blevesearch/bleve/v2/index/scorch/empty.go
generated
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import segment "github.com/blevesearch/scorch_segment_api"
|
||||
|
||||
type emptyPostingsIterator struct{}
|
||||
|
||||
func (e *emptyPostingsIterator) Next() (segment.Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *emptyPostingsIterator) Advance(uint64) (segment.Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *emptyPostingsIterator) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
var anEmptyPostingsIterator = &emptyPostingsIterator{}
|
64
vendor/github.com/blevesearch/bleve/v2/index/scorch/event.go
generated
vendored
Normal file
64
vendor/github.com/blevesearch/bleve/v2/index/scorch/event.go
generated
vendored
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import "time"
|
||||
|
||||
// RegistryAsyncErrorCallbacks should be treated as read-only after
|
||||
// process init()'ialization.
|
||||
var RegistryAsyncErrorCallbacks = map[string]func(error){}
|
||||
|
||||
// RegistryEventCallbacks should be treated as read-only after
|
||||
// process init()'ialization.
|
||||
var RegistryEventCallbacks = map[string]func(Event){}
|
||||
|
||||
// Event represents the information provided in an OnEvent() callback.
|
||||
type Event struct {
|
||||
Kind EventKind
|
||||
Scorch *Scorch
|
||||
Duration time.Duration
|
||||
}
|
||||
|
||||
// EventKind represents an event code for OnEvent() callbacks.
|
||||
type EventKind int
|
||||
|
||||
// EventKindCloseStart is fired when a Scorch.Close() has begun.
|
||||
var EventKindCloseStart = EventKind(1)
|
||||
|
||||
// EventKindClose is fired when a scorch index has been fully closed.
|
||||
var EventKindClose = EventKind(2)
|
||||
|
||||
// EventKindMergerProgress is fired when the merger has completed a
|
||||
// round of merge processing.
|
||||
var EventKindMergerProgress = EventKind(3)
|
||||
|
||||
// EventKindPersisterProgress is fired when the persister has completed
|
||||
// a round of persistence processing.
|
||||
var EventKindPersisterProgress = EventKind(4)
|
||||
|
||||
// EventKindBatchIntroductionStart is fired when Batch() is invoked which
|
||||
// introduces a new segment.
|
||||
var EventKindBatchIntroductionStart = EventKind(5)
|
||||
|
||||
// EventKindBatchIntroduction is fired when Batch() completes.
|
||||
var EventKindBatchIntroduction = EventKind(6)
|
||||
|
||||
// EventKindMergeTaskIntroductionStart is fired when the merger is about to
|
||||
// start the introduction of merged segment from a single merge task.
|
||||
var EventKindMergeTaskIntroductionStart = EventKind(7)
|
||||
|
||||
// EventKindMergeTaskIntroduction is fired when the merger has completed
|
||||
// the introduction of merged segment from a single merge task.
|
||||
var EventKindMergeTaskIntroduction = EventKind(8)
|
92
vendor/github.com/blevesearch/bleve/v2/index/scorch/int.go
generated
vendored
Normal file
92
vendor/github.com/blevesearch/bleve/v2/index/scorch/int.go
generated
vendored
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2014 The Cockroach Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
// implied. See the License for the specific language governing
|
||||
// permissions and limitations under the License.
|
||||
|
||||
// This code originated from:
|
||||
// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding.go
|
||||
|
||||
// Modified to not use pkg/errors
|
||||
|
||||
package scorch
|
||||
|
||||
import "fmt"
|
||||
|
||||
const (
|
||||
// intMin is chosen such that the range of int tags does not overlap the
|
||||
// ascii character set that is frequently used in testing.
|
||||
intMin = 0x80 // 128
|
||||
intMaxWidth = 8
|
||||
intZero = intMin + intMaxWidth // 136
|
||||
intSmall = intMax - intZero - intMaxWidth // 109
|
||||
// intMax is the maximum int tag value.
|
||||
intMax = 0xfd // 253
|
||||
)
|
||||
|
||||
// encodeUvarintAscending encodes the uint64 value using a variable length
|
||||
// (length-prefixed) representation. The length is encoded as a single
|
||||
// byte indicating the number of encoded bytes (-8) to follow. See
|
||||
// EncodeVarintAscending for rationale. The encoded bytes are appended to the
|
||||
// supplied buffer and the final buffer is returned.
|
||||
func encodeUvarintAscending(b []byte, v uint64) []byte {
|
||||
switch {
|
||||
case v <= intSmall:
|
||||
return append(b, intZero+byte(v))
|
||||
case v <= 0xff:
|
||||
return append(b, intMax-7, byte(v))
|
||||
case v <= 0xffff:
|
||||
return append(b, intMax-6, byte(v>>8), byte(v))
|
||||
case v <= 0xffffff:
|
||||
return append(b, intMax-5, byte(v>>16), byte(v>>8), byte(v))
|
||||
case v <= 0xffffffff:
|
||||
return append(b, intMax-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
case v <= 0xffffffffff:
|
||||
return append(b, intMax-3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8),
|
||||
byte(v))
|
||||
case v <= 0xffffffffffff:
|
||||
return append(b, intMax-2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16),
|
||||
byte(v>>8), byte(v))
|
||||
case v <= 0xffffffffffffff:
|
||||
return append(b, intMax-1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24),
|
||||
byte(v>>16), byte(v>>8), byte(v))
|
||||
default:
|
||||
return append(b, intMax, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32),
|
||||
byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
}
|
||||
}
|
||||
|
||||
// decodeUvarintAscending decodes a varint encoded uint64 from the input
|
||||
// buffer. The remainder of the input buffer and the decoded uint64
|
||||
// are returned.
|
||||
func decodeUvarintAscending(b []byte) ([]byte, uint64, error) {
|
||||
if len(b) == 0 {
|
||||
return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value")
|
||||
}
|
||||
length := int(b[0]) - intZero
|
||||
b = b[1:] // skip length byte
|
||||
if length <= intSmall {
|
||||
return b, uint64(length), nil
|
||||
}
|
||||
length -= intSmall
|
||||
if length < 0 || length > 8 {
|
||||
return nil, 0, fmt.Errorf("invalid uvarint length of %d", length)
|
||||
} else if len(b) < length {
|
||||
return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value: %q", b)
|
||||
}
|
||||
var v uint64
|
||||
// It is faster to range over the elements in a slice than to index
|
||||
// into the slice on each loop iteration.
|
||||
for _, t := range b[:length] {
|
||||
v = (v << 8) | uint64(t)
|
||||
}
|
||||
return b[length:], v, nil
|
||||
}
|
449
vendor/github.com/blevesearch/bleve/v2/index/scorch/introducer.go
generated
vendored
Normal file
449
vendor/github.com/blevesearch/bleve/v2/index/scorch/introducer.go
generated
vendored
Normal file
|
@ -0,0 +1,449 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
)
|
||||
|
||||
type segmentIntroduction struct {
|
||||
id uint64
|
||||
data segment.Segment
|
||||
obsoletes map[uint64]*roaring.Bitmap
|
||||
ids []string
|
||||
internal map[string][]byte
|
||||
|
||||
applied chan error
|
||||
persisted chan error
|
||||
persistedCallback index.BatchCallback
|
||||
}
|
||||
|
||||
type persistIntroduction struct {
|
||||
persisted map[uint64]segment.Segment
|
||||
applied notificationChan
|
||||
}
|
||||
|
||||
type epochWatcher struct {
|
||||
epoch uint64
|
||||
notifyCh notificationChan
|
||||
}
|
||||
|
||||
func (s *Scorch) introducerLoop() {
|
||||
var epochWatchers []*epochWatcher
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceLoop, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
|
||||
case epochWatcher := <-s.introducerNotifier:
|
||||
epochWatchers = append(epochWatchers, epochWatcher)
|
||||
|
||||
case nextMerge := <-s.merges:
|
||||
s.introduceMerge(nextMerge)
|
||||
|
||||
case next := <-s.introductions:
|
||||
err := s.introduceSegment(next)
|
||||
if err != nil {
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
case persist := <-s.persists:
|
||||
s.introducePersist(persist)
|
||||
|
||||
}
|
||||
|
||||
var epochCurr uint64
|
||||
s.rootLock.RLock()
|
||||
if s.root != nil {
|
||||
epochCurr = s.root.epoch
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
var epochWatchersNext []*epochWatcher
|
||||
for _, w := range epochWatchers {
|
||||
if w.epoch < epochCurr {
|
||||
close(w.notifyCh)
|
||||
} else {
|
||||
epochWatchersNext = append(epochWatchersNext, w)
|
||||
}
|
||||
}
|
||||
epochWatchers = epochWatchersNext
|
||||
}
|
||||
|
||||
s.asyncTasks.Done()
|
||||
}
|
||||
|
||||
func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceSegmentBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceSegmentEnd, 1)
|
||||
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
nsegs := len(root.segment)
|
||||
|
||||
// prepare new index snapshot
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
segment: make([]*SegmentSnapshot, 0, nsegs+1),
|
||||
offsets: make([]uint64, 0, nsegs+1),
|
||||
internal: make(map[string][]byte, len(root.internal)),
|
||||
refs: 1,
|
||||
creator: "introduceSegment",
|
||||
}
|
||||
|
||||
// iterate through current segments
|
||||
var running uint64
|
||||
var docsToPersistCount, memSegments, fileSegments uint64
|
||||
for i := range root.segment {
|
||||
// see if optimistic work included this segment
|
||||
delta, ok := next.obsoletes[root.segment[i].id]
|
||||
if !ok {
|
||||
var err error
|
||||
delta, err = root.segment[i].segment.DocNumbers(next.ids)
|
||||
if err != nil {
|
||||
next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
|
||||
close(next.applied)
|
||||
_ = newSnapshot.DecRef()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
newss := &SegmentSnapshot{
|
||||
id: root.segment[i].id,
|
||||
segment: root.segment[i].segment,
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
creator: root.segment[i].creator,
|
||||
}
|
||||
|
||||
// apply new obsoletions
|
||||
if root.segment[i].deleted == nil {
|
||||
newss.deleted = delta
|
||||
} else {
|
||||
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
|
||||
}
|
||||
if newss.deleted.IsEmpty() {
|
||||
newss.deleted = nil
|
||||
}
|
||||
|
||||
// check for live size before copying
|
||||
if newss.LiveSize() > 0 {
|
||||
newSnapshot.segment = append(newSnapshot.segment, newss)
|
||||
root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += newss.segment.Count()
|
||||
}
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
|
||||
// append new segment, if any, to end of the new index snapshot
|
||||
if next.data != nil {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
id: next.id,
|
||||
segment: next.data, // take ownership of next.data's ref-count
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
creator: "introduceSegment",
|
||||
}
|
||||
newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
|
||||
// increment numItemsIntroduced which tracks the number of items
|
||||
// queued for persistence.
|
||||
atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count())
|
||||
atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1)
|
||||
}
|
||||
// copy old values
|
||||
for key, oldVal := range root.internal {
|
||||
newSnapshot.internal[key] = oldVal
|
||||
}
|
||||
// set new values and apply deletes
|
||||
for key, newVal := range next.internal {
|
||||
if newVal != nil {
|
||||
newSnapshot.internal[key] = newVal
|
||||
} else {
|
||||
delete(newSnapshot.internal, key)
|
||||
}
|
||||
}
|
||||
|
||||
newSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
if next.persisted != nil {
|
||||
s.rootPersisted = append(s.rootPersisted, next.persisted)
|
||||
}
|
||||
if next.persistedCallback != nil {
|
||||
s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback)
|
||||
}
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
|
||||
// release lock
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
close(next.applied)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
||||
atomic.AddUint64(&s.stats.TotIntroducePersistBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroducePersistEnd, 1)
|
||||
|
||||
s.rootLock.Lock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
nextSnapshotEpoch := s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
s.rootLock.Unlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
newIndexSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
epoch: nextSnapshotEpoch,
|
||||
segment: make([]*SegmentSnapshot, len(root.segment)),
|
||||
offsets: make([]uint64, len(root.offsets)),
|
||||
internal: make(map[string][]byte, len(root.internal)),
|
||||
refs: 1,
|
||||
creator: "introducePersist",
|
||||
}
|
||||
|
||||
var docsToPersistCount, memSegments, fileSegments uint64
|
||||
for i, segmentSnapshot := range root.segment {
|
||||
// see if this segment has been replaced
|
||||
if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
id: segmentSnapshot.id,
|
||||
segment: replacement,
|
||||
deleted: segmentSnapshot.deleted,
|
||||
cachedDocs: segmentSnapshot.cachedDocs,
|
||||
creator: "introducePersist",
|
||||
}
|
||||
newIndexSnapshot.segment[i] = newSegmentSnapshot
|
||||
delete(persist.persisted, segmentSnapshot.id)
|
||||
|
||||
// update items persisted incase of a new segment snapshot
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
|
||||
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
|
||||
fileSegments++
|
||||
} else {
|
||||
newIndexSnapshot.segment[i] = root.segment[i]
|
||||
newIndexSnapshot.segment[i].segment.AddRef()
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
}
|
||||
newIndexSnapshot.offsets[i] = root.offsets[i]
|
||||
}
|
||||
|
||||
for k, v := range root.internal {
|
||||
newIndexSnapshot.internal[k] = v
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
newIndexSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
rootPrev := s.root
|
||||
s.root = newIndexSnapshot
|
||||
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
close(persist.applied)
|
||||
}
|
||||
|
||||
// The introducer should definitely handle the segmentMerge.notify
|
||||
// channel before exiting the introduceMerge.
|
||||
func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
|
||||
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: root.internal,
|
||||
refs: 1,
|
||||
creator: "introduceMerge",
|
||||
}
|
||||
|
||||
// iterate through current segments
|
||||
newSegmentDeleted := roaring.NewBitmap()
|
||||
var running, docsToPersistCount, memSegments, fileSegments uint64
|
||||
for i := range root.segment {
|
||||
segmentID := root.segment[i].id
|
||||
if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
|
||||
// this segment is going away, see if anything else was deleted since we started the merge
|
||||
if segSnapAtMerge != nil && root.segment[i].deleted != nil {
|
||||
// assume all these deletes are new
|
||||
deletedSince := root.segment[i].deleted
|
||||
// if we already knew about some of them, remove
|
||||
if segSnapAtMerge.deleted != nil {
|
||||
deletedSince = roaring.AndNot(root.segment[i].deleted, segSnapAtMerge.deleted)
|
||||
}
|
||||
deletedSinceItr := deletedSince.Iterator()
|
||||
for deletedSinceItr.HasNext() {
|
||||
oldDocNum := deletedSinceItr.Next()
|
||||
newDocNum := nextMerge.oldNewDocNums[segmentID][oldDocNum]
|
||||
newSegmentDeleted.Add(uint32(newDocNum))
|
||||
}
|
||||
}
|
||||
// clean up the old segment map to figure out the
|
||||
// obsolete segments wrt root in meantime, whatever
|
||||
// segments left behind in old map after processing
|
||||
// the root segments would be the obsolete segment set
|
||||
delete(nextMerge.old, segmentID)
|
||||
} else if root.segment[i].LiveSize() > 0 {
|
||||
// this segment is staying
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: root.segment[i].id,
|
||||
segment: root.segment[i].segment,
|
||||
deleted: root.segment[i].deleted,
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
creator: root.segment[i].creator,
|
||||
})
|
||||
root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += root.segment[i].segment.Count()
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// before the newMerge introduction, need to clean the newly
|
||||
// merged segment wrt the current root segments, hence
|
||||
// applying the obsolete segment contents to newly merged segment
|
||||
for segID, ss := range nextMerge.old {
|
||||
obsoleted := ss.DocNumbersLive()
|
||||
if obsoleted != nil {
|
||||
obsoletedIter := obsoleted.Iterator()
|
||||
for obsoletedIter.HasNext() {
|
||||
oldDocNum := obsoletedIter.Next()
|
||||
newDocNum := nextMerge.oldNewDocNums[segID][oldDocNum]
|
||||
newSegmentDeleted.Add(uint32(newDocNum))
|
||||
}
|
||||
}
|
||||
}
|
||||
var skipped bool
|
||||
// In case where all the docs in the newly merged segment getting
|
||||
// deleted by the time we reach here, can skip the introduction.
|
||||
if nextMerge.new != nil &&
|
||||
nextMerge.new.Count() > newSegmentDeleted.GetCardinality() {
|
||||
// put new segment at end
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: nextMerge.id,
|
||||
segment: nextMerge.new, // take ownership for nextMerge.new's ref-count
|
||||
deleted: newSegmentDeleted,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
creator: "introduceMerge",
|
||||
})
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
|
||||
|
||||
switch nextMerge.new.(type) {
|
||||
case segment.PersistedSegment:
|
||||
fileSegments++
|
||||
default:
|
||||
docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
|
||||
memSegments++
|
||||
}
|
||||
} else {
|
||||
skipped = true
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsObsoleted, 1)
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
|
||||
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
|
||||
|
||||
newSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
|
||||
// release lock
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
// notify requester that we incorporated this
|
||||
nextMerge.notifyCh <- &mergeTaskIntroStatus{
|
||||
indexSnapshot: newSnapshot,
|
||||
skipped: skipped}
|
||||
close(nextMerge.notifyCh)
|
||||
}
|
||||
|
||||
func isMemorySegment(s *SegmentSnapshot) bool {
|
||||
switch s.segment.(type) {
|
||||
case segment.PersistedSegment:
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
504
vendor/github.com/blevesearch/bleve/v2/index/scorch/merge.go
generated
vendored
Normal file
504
vendor/github.com/blevesearch/bleve/v2/index/scorch/merge.go
generated
vendored
Normal file
|
@ -0,0 +1,504 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/v2/index/scorch/mergeplan"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
)
|
||||
|
||||
func (s *Scorch) mergerLoop() {
|
||||
var lastEpochMergePlanned uint64
|
||||
var ctrlMsg *mergerCtrl
|
||||
mergePlannerOptions, err := s.parseMergePlannerOptions()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
|
||||
s.asyncTasks.Done()
|
||||
return
|
||||
}
|
||||
ctrlMsgDflt := &mergerCtrl{ctx: context.Background(),
|
||||
options: mergePlannerOptions,
|
||||
doneCh: nil}
|
||||
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopBeg, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
|
||||
default:
|
||||
// check to see if there is a new snapshot to persist
|
||||
s.rootLock.Lock()
|
||||
ourSnapshot := s.root
|
||||
ourSnapshot.AddRef()
|
||||
atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if ctrlMsg == nil && ourSnapshot.epoch != lastEpochMergePlanned {
|
||||
ctrlMsg = ctrlMsgDflt
|
||||
}
|
||||
if ctrlMsg != nil {
|
||||
startTime := time.Now()
|
||||
|
||||
// lets get started
|
||||
err := s.planMergeAtSnapshot(ctrlMsg.ctx, ctrlMsg.options,
|
||||
ourSnapshot)
|
||||
if err != nil {
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
|
||||
if err == segment.ErrClosed {
|
||||
// index has been closed
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
// continue the workloop on a user triggered cancel
|
||||
if ctrlMsg.doneCh != nil {
|
||||
close(ctrlMsg.doneCh)
|
||||
ctrlMsg = nil
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// exit the workloop on index closure
|
||||
ctrlMsg = nil
|
||||
break OUTER
|
||||
}
|
||||
s.fireAsyncError(fmt.Errorf("merging err: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if ctrlMsg.doneCh != nil {
|
||||
close(ctrlMsg.doneCh)
|
||||
}
|
||||
ctrlMsg = nil
|
||||
|
||||
lastEpochMergePlanned = ourSnapshot.epoch
|
||||
|
||||
atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch)
|
||||
|
||||
s.fireEvent(EventKindMergerProgress, time.Since(startTime))
|
||||
}
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
// tell the persister we're waiting for changes
|
||||
// first make a epochWatcher chan
|
||||
ew := &epochWatcher{
|
||||
epoch: lastEpochMergePlanned,
|
||||
notifyCh: make(notificationChan, 1),
|
||||
}
|
||||
|
||||
// give it to the persister
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case s.persisterNotifier <- ew:
|
||||
case ctrlMsg = <-s.forceMergeRequestCh:
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// now wait for persister (but also detect close)
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case <-ew.notifyCh:
|
||||
case ctrlMsg = <-s.forceMergeRequestCh:
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopEnd, 1)
|
||||
}
|
||||
|
||||
s.asyncTasks.Done()
|
||||
}
|
||||
|
||||
type mergerCtrl struct {
|
||||
ctx context.Context
|
||||
options *mergeplan.MergePlanOptions
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// ForceMerge helps users trigger a merge operation on
|
||||
// an online scorch index.
|
||||
func (s *Scorch) ForceMerge(ctx context.Context,
|
||||
mo *mergeplan.MergePlanOptions) error {
|
||||
// check whether force merge is already under processing
|
||||
s.rootLock.Lock()
|
||||
if s.stats.TotFileMergeForceOpsStarted >
|
||||
s.stats.TotFileMergeForceOpsCompleted {
|
||||
s.rootLock.Unlock()
|
||||
return fmt.Errorf("force merge already in progress")
|
||||
}
|
||||
|
||||
s.stats.TotFileMergeForceOpsStarted++
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if mo != nil {
|
||||
err := mergeplan.ValidateMergePlannerOptions(mo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// assume the default single segment merge policy
|
||||
mo = &mergeplan.SingleSegmentMergePlanOptions
|
||||
}
|
||||
msg := &mergerCtrl{options: mo,
|
||||
doneCh: make(chan struct{}),
|
||||
ctx: ctx,
|
||||
}
|
||||
|
||||
// request the merger perform a force merge
|
||||
select {
|
||||
case s.forceMergeRequestCh <- msg:
|
||||
case <-s.closeCh:
|
||||
return nil
|
||||
}
|
||||
|
||||
// wait for the force merge operation completion
|
||||
select {
|
||||
case <-msg.doneCh:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeForceOpsCompleted, 1)
|
||||
case <-s.closeCh:
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
|
||||
error) {
|
||||
mergePlannerOptions := mergeplan.DefaultMergePlanOptions
|
||||
if v, ok := s.config["scorchMergePlanOptions"]; ok {
|
||||
b, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return &mergePlannerOptions, err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(b, &mergePlannerOptions)
|
||||
if err != nil {
|
||||
return &mergePlannerOptions, err
|
||||
}
|
||||
|
||||
err = mergeplan.ValidateMergePlannerOptions(&mergePlannerOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &mergePlannerOptions, nil
|
||||
}
|
||||
|
||||
type closeChWrapper struct {
|
||||
ch1 chan struct{}
|
||||
ctx context.Context
|
||||
closeCh chan struct{}
|
||||
}
|
||||
|
||||
func newCloseChWrapper(ch1 chan struct{},
|
||||
ctx context.Context) *closeChWrapper {
|
||||
return &closeChWrapper{ch1: ch1,
|
||||
ctx: ctx,
|
||||
closeCh: make(chan struct{})}
|
||||
}
|
||||
|
||||
func (w *closeChWrapper) close() {
|
||||
select {
|
||||
case <-w.closeCh:
|
||||
default:
|
||||
close(w.closeCh)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *closeChWrapper) listen() {
|
||||
select {
|
||||
case <-w.ch1:
|
||||
w.close()
|
||||
case <-w.ctx.Done():
|
||||
w.close()
|
||||
case <-w.closeCh:
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
options *mergeplan.MergePlanOptions, ourSnapshot *IndexSnapshot) error {
|
||||
// build list of persisted segments in this snapshot
|
||||
var onlyPersistedSnapshots []mergeplan.Segment
|
||||
for _, segmentSnapshot := range ourSnapshot.segment {
|
||||
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
|
||||
|
||||
// give this list to the planner
|
||||
resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
|
||||
return fmt.Errorf("merge planning err: %v", err)
|
||||
}
|
||||
if resultMergePlan == nil {
|
||||
// nothing to do
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
|
||||
return nil
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
|
||||
|
||||
// process tasks in serial for now
|
||||
var filenames []string
|
||||
|
||||
cw := newCloseChWrapper(s.closeCh, ctx)
|
||||
defer cw.close()
|
||||
|
||||
go cw.listen()
|
||||
|
||||
for _, task := range resultMergePlan.Tasks {
|
||||
if len(task.Segments) == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
|
||||
|
||||
oldMap := make(map[uint64]*SegmentSnapshot)
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
|
||||
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
|
||||
|
||||
for _, planSegment := range task.Segments {
|
||||
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
|
||||
oldMap[segSnapshot.id] = segSnapshot
|
||||
if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
if segSnapshot.LiveSize() == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
|
||||
oldMap[segSnapshot.id] = nil
|
||||
} else {
|
||||
segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
|
||||
docsToDrop = append(docsToDrop, segSnapshot.deleted)
|
||||
}
|
||||
// track the files getting merged for unsetting the
|
||||
// removal ineligibility. This helps to unflip files
|
||||
// even with fast merger, slow persister work flows.
|
||||
path := persistedSeg.Path()
|
||||
filenames = append(filenames,
|
||||
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var oldNewDocNums map[uint64][]uint64
|
||||
var seg segment.Segment
|
||||
var filename string
|
||||
if len(segmentsToMerge) > 0 {
|
||||
filename = zapFileName(newSegmentID)
|
||||
s.markIneligibleForRemoval(filename)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
|
||||
fileMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
|
||||
newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
|
||||
cw.closeCh, s)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
|
||||
|
||||
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
|
||||
atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
if err == segment.ErrClosed {
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("merging failed: %v", err)
|
||||
}
|
||||
|
||||
seg, err = s.segPlugin.Open(path)
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
return err
|
||||
}
|
||||
oldNewDocNums = make(map[uint64][]uint64)
|
||||
for i, segNewDocNums := range newDocNums {
|
||||
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeSegments, uint64(len(segmentsToMerge)))
|
||||
}
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: newSegmentID,
|
||||
old: oldMap,
|
||||
oldNewDocNums: oldNewDocNums,
|
||||
new: seg,
|
||||
notifyCh: make(chan *mergeTaskIntroStatus),
|
||||
}
|
||||
|
||||
s.fireEvent(EventKindMergeTaskIntroductionStart, 0)
|
||||
|
||||
// give it to the introducer
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
_ = seg.Close()
|
||||
return segment.ErrClosed
|
||||
case s.merges <- sm:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
|
||||
}
|
||||
|
||||
introStartTime := time.Now()
|
||||
// it is safe to blockingly wait for the merge introduction
|
||||
// here as the introducer is bound to handle the notify channel.
|
||||
introStatus := <-sm.notifyCh
|
||||
introTime := uint64(time.Since(introStartTime))
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
|
||||
atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
|
||||
if introStatus != nil && introStatus.indexSnapshot != nil {
|
||||
_ = introStatus.indexSnapshot.DecRef()
|
||||
if introStatus.skipped {
|
||||
// close the segment on skipping introduction.
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
_ = seg.Close()
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
|
||||
|
||||
s.fireEvent(EventKindMergeTaskIntroduction, 0)
|
||||
}
|
||||
|
||||
// once all the newly merged segment introductions are done,
|
||||
// its safe to unflip the removal ineligibility for the replaced
|
||||
// older segments
|
||||
for _, f := range filenames {
|
||||
s.unmarkIneligibleForRemoval(f)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mergeTaskIntroStatus struct {
|
||||
indexSnapshot *IndexSnapshot
|
||||
skipped bool
|
||||
}
|
||||
|
||||
type segmentMerge struct {
|
||||
id uint64
|
||||
old map[uint64]*SegmentSnapshot
|
||||
oldNewDocNums map[uint64][]uint64
|
||||
new segment.Segment
|
||||
notifyCh chan *mergeTaskIntroStatus
|
||||
}
|
||||
|
||||
// perform a merging of the given SegmentBase instances into a new,
|
||||
// persisted segment, and synchronously introduce that new segment
|
||||
// into the root
|
||||
func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
||||
sbs []segment.Segment, sbsDrops []*roaring.Bitmap,
|
||||
sbsIndexes []int) (*IndexSnapshot, uint64, error) {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
|
||||
|
||||
memMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
|
||||
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
filename := zapFileName(newSegmentID)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
|
||||
newDocNums, _, err :=
|
||||
s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
|
||||
|
||||
memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
|
||||
atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
seg, err := s.segPlugin.Open(path)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// update persisted stats
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
|
||||
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: newSegmentID,
|
||||
old: make(map[uint64]*SegmentSnapshot),
|
||||
oldNewDocNums: make(map[uint64][]uint64),
|
||||
new: seg,
|
||||
notifyCh: make(chan *mergeTaskIntroStatus),
|
||||
}
|
||||
|
||||
for i, idx := range sbsIndexes {
|
||||
ss := snapshot.segment[idx]
|
||||
sm.old[ss.id] = ss
|
||||
sm.oldNewDocNums[ss.id] = newDocNums[i]
|
||||
}
|
||||
|
||||
select { // send to introducer
|
||||
case <-s.closeCh:
|
||||
_ = seg.DecRef()
|
||||
return nil, 0, segment.ErrClosed
|
||||
case s.merges <- sm:
|
||||
}
|
||||
|
||||
// blockingly wait for the introduction to complete
|
||||
var newSnapshot *IndexSnapshot
|
||||
introStatus := <-sm.notifyCh
|
||||
if introStatus != nil && introStatus.indexSnapshot != nil {
|
||||
newSnapshot = introStatus.indexSnapshot
|
||||
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
|
||||
atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
|
||||
if introStatus.skipped {
|
||||
// close the segment on skipping introduction.
|
||||
_ = newSnapshot.DecRef()
|
||||
_ = seg.Close()
|
||||
newSnapshot = nil
|
||||
}
|
||||
}
|
||||
|
||||
return newSnapshot, newSegmentID, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
|
||||
}
|
397
vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go
generated
vendored
Normal file
397
vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go
generated
vendored
Normal file
|
@ -0,0 +1,397 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package mergeplan provides a segment merge planning approach that's
|
||||
// inspired by Lucene's TieredMergePolicy.java and descriptions like
|
||||
// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
|
||||
package mergeplan
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Segment represents the information that the planner needs to
|
||||
// calculate segment merging.
|
||||
type Segment interface {
|
||||
// Unique id of the segment -- used for sorting.
|
||||
Id() uint64
|
||||
|
||||
// Full segment size (the size before any logical deletions).
|
||||
FullSize() int64
|
||||
|
||||
// Size of the live data of the segment; i.e., FullSize() minus
|
||||
// any logical deletions.
|
||||
LiveSize() int64
|
||||
}
|
||||
|
||||
// Plan() will functionally compute a merge plan. A segment will be
|
||||
// assigned to at most a single MergeTask in the output MergePlan. A
|
||||
// segment not assigned to any MergeTask means the segment should
|
||||
// remain unmerged.
|
||||
func Plan(segments []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
||||
return plan(segments, o)
|
||||
}
|
||||
|
||||
// A MergePlan is the result of the Plan() API.
|
||||
//
|
||||
// The planner doesn’t know how or whether these tasks are executed --
|
||||
// that’s up to a separate merge execution system, which might execute
|
||||
// these tasks concurrently or not, and which might execute all the
|
||||
// tasks or not.
|
||||
type MergePlan struct {
|
||||
Tasks []*MergeTask
|
||||
}
|
||||
|
||||
// A MergeTask represents several segments that should be merged
|
||||
// together into a single segment.
|
||||
type MergeTask struct {
|
||||
Segments []Segment
|
||||
}
|
||||
|
||||
// The MergePlanOptions is designed to be reusable between planning calls.
|
||||
type MergePlanOptions struct {
|
||||
// Max # segments per logarithmic tier, or max width of any
|
||||
// logarithmic “step”. Smaller values mean more merging but fewer
|
||||
// segments. Should be >= SegmentsPerMergeTask, else you'll have
|
||||
// too much merging.
|
||||
MaxSegmentsPerTier int
|
||||
|
||||
// Max size of any segment produced after merging. Actual
|
||||
// merging, however, may produce segment sizes different than the
|
||||
// planner’s predicted sizes.
|
||||
MaxSegmentSize int64
|
||||
|
||||
// The growth factor for each tier in a staircase of idealized
|
||||
// segments computed by CalcBudget().
|
||||
TierGrowth float64
|
||||
|
||||
// The number of segments in any resulting MergeTask. e.g.,
|
||||
// len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask.
|
||||
SegmentsPerMergeTask int
|
||||
|
||||
// Small segments are rounded up to this size, i.e., treated as
|
||||
// equal (floor) size for consideration. This is to prevent lots
|
||||
// of tiny segments from resulting in a long tail in the index.
|
||||
FloorSegmentSize int64
|
||||
|
||||
// Controls how aggressively merges that reclaim more deletions
|
||||
// are favored. Higher values will more aggressively target
|
||||
// merges that reclaim deletions, but be careful not to go so high
|
||||
// that way too much merging takes place; a value of 3.0 is
|
||||
// probably nearly too high. A value of 0.0 means deletions don't
|
||||
// impact merge selection.
|
||||
ReclaimDeletesWeight float64
|
||||
|
||||
// Optional, defaults to mergeplan.CalcBudget().
|
||||
CalcBudget func(totalSize int64, firstTierSize int64,
|
||||
o *MergePlanOptions) (budgetNumSegments int)
|
||||
|
||||
// Optional, defaults to mergeplan.ScoreSegments().
|
||||
ScoreSegments func(segments []Segment, o *MergePlanOptions) float64
|
||||
|
||||
// Optional.
|
||||
Logger func(string)
|
||||
}
|
||||
|
||||
// Returns the higher of the input or FloorSegmentSize.
|
||||
func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
|
||||
if s > o.FloorSegmentSize {
|
||||
return s
|
||||
}
|
||||
return o.FloorSegmentSize
|
||||
}
|
||||
|
||||
// MaxSegmentSizeLimit represents the maximum size of a segment,
|
||||
// this limit comes with hit-1 optimisation/max encoding limit uint31.
|
||||
const MaxSegmentSizeLimit = 1<<31 - 1
|
||||
|
||||
// ErrMaxSegmentSizeTooLarge is returned when the size of the segment
|
||||
// exceeds the MaxSegmentSizeLimit
|
||||
var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit")
|
||||
|
||||
// DefaultMergePlanOptions suggests the default options.
|
||||
var DefaultMergePlanOptions = MergePlanOptions{
|
||||
MaxSegmentsPerTier: 10,
|
||||
MaxSegmentSize: 5000000,
|
||||
TierGrowth: 10.0,
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 2000,
|
||||
ReclaimDeletesWeight: 2.0,
|
||||
}
|
||||
|
||||
// SingleSegmentMergePlanOptions helps in creating a
|
||||
// single segment index.
|
||||
var SingleSegmentMergePlanOptions = MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1 << 30,
|
||||
TierGrowth: 1.0,
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 1 << 30,
|
||||
ReclaimDeletesWeight: 2.0,
|
||||
}
|
||||
|
||||
// -------------------------------------------
|
||||
|
||||
func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
||||
if len(segmentsIn) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o == nil {
|
||||
o = &DefaultMergePlanOptions
|
||||
}
|
||||
|
||||
segments := append([]Segment(nil), segmentsIn...) // Copy.
|
||||
|
||||
sort.Sort(byLiveSizeDescending(segments))
|
||||
|
||||
var minLiveSize int64 = math.MaxInt64
|
||||
|
||||
var eligibles []Segment
|
||||
var eligiblesLiveSize int64
|
||||
|
||||
for _, segment := range segments {
|
||||
if minLiveSize > segment.LiveSize() {
|
||||
minLiveSize = segment.LiveSize()
|
||||
}
|
||||
|
||||
// Only small-enough segments are eligible.
|
||||
if segment.LiveSize() < o.MaxSegmentSize/2 {
|
||||
eligibles = append(eligibles, segment)
|
||||
eligiblesLiveSize += segment.LiveSize()
|
||||
}
|
||||
}
|
||||
|
||||
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
|
||||
|
||||
calcBudget := o.CalcBudget
|
||||
if calcBudget == nil {
|
||||
calcBudget = CalcBudget
|
||||
}
|
||||
|
||||
budgetNumSegments := calcBudget(eligiblesLiveSize, minLiveSize, o)
|
||||
|
||||
scoreSegments := o.ScoreSegments
|
||||
if scoreSegments == nil {
|
||||
scoreSegments = ScoreSegments
|
||||
}
|
||||
|
||||
rv := &MergePlan{}
|
||||
|
||||
var empties []Segment
|
||||
for _, eligible := range eligibles {
|
||||
if eligible.LiveSize() <= 0 {
|
||||
empties = append(empties, eligible)
|
||||
}
|
||||
}
|
||||
if len(empties) > 0 {
|
||||
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: empties})
|
||||
eligibles = removeSegments(eligibles, empties)
|
||||
}
|
||||
|
||||
// While we’re over budget, keep looping, which might produce
|
||||
// another MergeTask.
|
||||
for len(eligibles) > 0 && (len(eligibles)+len(rv.Tasks)) > budgetNumSegments {
|
||||
// Track a current best roster as we examine and score
|
||||
// potential rosters of merges.
|
||||
var bestRoster []Segment
|
||||
var bestRosterScore float64 // Lower score is better.
|
||||
|
||||
for startIdx := 0; startIdx < len(eligibles); startIdx++ {
|
||||
var roster []Segment
|
||||
var rosterLiveSize int64
|
||||
|
||||
for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ {
|
||||
eligible := eligibles[idx]
|
||||
|
||||
if rosterLiveSize+eligible.LiveSize() < o.MaxSegmentSize {
|
||||
roster = append(roster, eligible)
|
||||
rosterLiveSize += eligible.LiveSize()
|
||||
}
|
||||
}
|
||||
|
||||
if len(roster) > 0 {
|
||||
rosterScore := scoreSegments(roster, o)
|
||||
|
||||
if len(bestRoster) == 0 || rosterScore < bestRosterScore {
|
||||
bestRoster = roster
|
||||
bestRosterScore = rosterScore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(bestRoster) == 0 {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
|
||||
|
||||
eligibles = removeSegments(eligibles, bestRoster)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// Compute the number of segments that would be needed to cover the
|
||||
// totalSize, by climbing up a logarithmically growing staircase of
|
||||
// segment tiers.
|
||||
func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
|
||||
budgetNumSegments int) {
|
||||
tierSize := firstTierSize
|
||||
if tierSize < 1 {
|
||||
tierSize = 1
|
||||
}
|
||||
|
||||
maxSegmentsPerTier := o.MaxSegmentsPerTier
|
||||
if maxSegmentsPerTier < 1 {
|
||||
maxSegmentsPerTier = 1
|
||||
}
|
||||
|
||||
tierGrowth := o.TierGrowth
|
||||
if tierGrowth < 1.0 {
|
||||
tierGrowth = 1.0
|
||||
}
|
||||
|
||||
for totalSize > 0 {
|
||||
segmentsInTier := float64(totalSize) / float64(tierSize)
|
||||
if segmentsInTier < float64(maxSegmentsPerTier) {
|
||||
budgetNumSegments += int(math.Ceil(segmentsInTier))
|
||||
break
|
||||
}
|
||||
|
||||
budgetNumSegments += maxSegmentsPerTier
|
||||
totalSize -= int64(maxSegmentsPerTier) * tierSize
|
||||
tierSize = int64(float64(tierSize) * tierGrowth)
|
||||
}
|
||||
|
||||
return budgetNumSegments
|
||||
}
|
||||
|
||||
// Of note, removeSegments() keeps the ordering of the results stable.
|
||||
func removeSegments(segments []Segment, toRemove []Segment) []Segment {
|
||||
rv := make([]Segment, 0, len(segments)-len(toRemove))
|
||||
OUTER:
|
||||
for _, segment := range segments {
|
||||
for _, r := range toRemove {
|
||||
if segment == r {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
rv = append(rv, segment)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// Smaller result score is better.
|
||||
func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
|
||||
var totBeforeSize int64
|
||||
var totAfterSize int64
|
||||
var totAfterSizeFloored int64
|
||||
|
||||
for _, segment := range segments {
|
||||
totBeforeSize += segment.FullSize()
|
||||
totAfterSize += segment.LiveSize()
|
||||
totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize())
|
||||
}
|
||||
|
||||
if totBeforeSize <= 0 || totAfterSize <= 0 || totAfterSizeFloored <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Roughly guess the "balance" of the segments -- whether the
|
||||
// segments are about the same size.
|
||||
balance :=
|
||||
float64(o.RaiseToFloorSegmentSize(segments[0].LiveSize())) /
|
||||
float64(totAfterSizeFloored)
|
||||
|
||||
// Gently favor smaller merges over bigger ones. We don't want to
|
||||
// make the exponent too large else we end up with poor merges of
|
||||
// small segments in order to avoid the large merges.
|
||||
score := balance * math.Pow(float64(totAfterSize), 0.05)
|
||||
|
||||
// Strongly favor merges that reclaim deletes.
|
||||
nonDelRatio := float64(totAfterSize) / float64(totBeforeSize)
|
||||
|
||||
score *= math.Pow(nonDelRatio, o.ReclaimDeletesWeight)
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
// ------------------------------------------
|
||||
|
||||
// ToBarChart returns an ASCII rendering of the segments and the plan.
|
||||
// The barMax is the max width of the bars in the bar chart.
|
||||
func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) string {
|
||||
rv := make([]string, 0, len(segments))
|
||||
|
||||
var maxFullSize int64
|
||||
for _, segment := range segments {
|
||||
if maxFullSize < segment.FullSize() {
|
||||
maxFullSize = segment.FullSize()
|
||||
}
|
||||
}
|
||||
if maxFullSize < 0 {
|
||||
maxFullSize = 1
|
||||
}
|
||||
|
||||
for _, segment := range segments {
|
||||
barFull := int(segment.FullSize())
|
||||
barLive := int(segment.LiveSize())
|
||||
|
||||
if maxFullSize > int64(barMax) {
|
||||
barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
|
||||
barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
|
||||
}
|
||||
|
||||
barKind := " "
|
||||
barChar := "."
|
||||
|
||||
if plan != nil {
|
||||
TASK_LOOP:
|
||||
for taski, task := range plan.Tasks {
|
||||
for _, taskSegment := range task.Segments {
|
||||
if taskSegment == segment {
|
||||
barKind = "*"
|
||||
barChar = fmt.Sprintf("%d", taski)
|
||||
break TASK_LOOP
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bar :=
|
||||
strings.Repeat(barChar, barLive)[0:barLive] +
|
||||
strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
|
||||
|
||||
rv = append(rv, fmt.Sprintf("%s %5d: %5d /%5d - %s %s", prefix,
|
||||
segment.Id(),
|
||||
segment.LiveSize(),
|
||||
segment.FullSize(),
|
||||
barKind, bar))
|
||||
}
|
||||
|
||||
return strings.Join(rv, "\n")
|
||||
}
|
||||
|
||||
// ValidateMergePlannerOptions validates the merge planner options
|
||||
func ValidateMergePlannerOptions(options *MergePlanOptions) error {
|
||||
if options.MaxSegmentSize > MaxSegmentSizeLimit {
|
||||
return ErrMaxSegmentSizeTooLarge
|
||||
}
|
||||
return nil
|
||||
}
|
28
vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/sort.go
generated
vendored
Normal file
28
vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/sort.go
generated
vendored
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mergeplan
|
||||
|
||||
type byLiveSizeDescending []Segment
|
||||
|
||||
func (a byLiveSizeDescending) Len() int { return len(a) }
|
||||
|
||||
func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
|
||||
func (a byLiveSizeDescending) Less(i, j int) bool {
|
||||
if a[i].LiveSize() != a[j].LiveSize() {
|
||||
return a[i].LiveSize() > a[j].LiveSize()
|
||||
}
|
||||
return a[i].Id() < a[j].Id()
|
||||
}
|
396
vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize.go
generated
vendored
Normal file
396
vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize.go
generated
vendored
Normal file
|
@ -0,0 +1,396 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
var OptimizeConjunction = true
|
||||
var OptimizeConjunctionUnadorned = true
|
||||
var OptimizeDisjunctionUnadorned = true
|
||||
|
||||
func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if OptimizeConjunction && kind == "conjunction" {
|
||||
return s.optimizeConjunction(octx)
|
||||
}
|
||||
|
||||
if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" {
|
||||
return s.optimizeConjunctionUnadorned(octx)
|
||||
}
|
||||
|
||||
if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
|
||||
return s.optimizeDisjunctionUnadorned(octx)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRConjunction)
|
||||
if !ok {
|
||||
return octx, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize conjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRConjunction struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
itr0, ok := o.tfrs[0].iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr0.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
itr1, ok := o.tfrs[1].iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr1.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
bm := roaring.And(itr0.ActualBitmap(), itr1.ActualBitmap())
|
||||
|
||||
for _, tfr := range o.tfrs[2:] {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
bm.And(itr.ActualBitmap())
|
||||
}
|
||||
|
||||
// in this conjunction optimization, the postings iterators
|
||||
// will all share the same AND'ed together actual bitmap. The
|
||||
// regular conjunction searcher machinery will still be used,
|
||||
// but the underlying bitmap will be smaller.
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if ok && itr.ActualBitmap() != nil {
|
||||
itr.ReplaceActual(bm)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
// An "unadorned" conjunction optimization is appropriate when
|
||||
// additional or subsidiary information like freq-norm's and
|
||||
// term-vectors are not required, and instead only the internal-id's
|
||||
// are needed.
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRConjunctionUnadorned)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRConjunctionUnadorned struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
var OptimizeTFRConjunctionUnadornedTerm = []byte("<conjunction:unadorned>")
|
||||
var OptimizeTFRConjunctionUnadornedField = "*"
|
||||
|
||||
// Finish of an unadorned conjunction optimization will compute a
|
||||
// termFieldReader with an "actual" bitmap that represents the
|
||||
// constituent bitmaps AND'ed together. This termFieldReader cannot
|
||||
// provide any freq-norm or termVector associated information.
|
||||
func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// We use an artificial term and field because the optimized
|
||||
// termFieldReader can represent multiple terms and fields.
|
||||
oTFR := o.snapshot.unadornedTermFieldReader(
|
||||
OptimizeTFRConjunctionUnadornedTerm, OptimizeTFRConjunctionUnadornedField)
|
||||
|
||||
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
|
||||
|
||||
OUTER:
|
||||
for i := range o.snapshot.segment {
|
||||
actualBMs = actualBMs[:0]
|
||||
|
||||
var docNum1HitLast uint64
|
||||
var docNum1HitLastOk bool
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
if _, ok := tfr.iterators[i].(*emptyPostingsIterator); ok {
|
||||
// An empty postings iterator means the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
// We only optimize postings iterators that support this operation.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// If the postings iterator is "1-hit" optimized, then we
|
||||
// can perform several optimizations up-front here.
|
||||
docNum1Hit, ok := itr.DocNum1Hit()
|
||||
if ok {
|
||||
if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
|
||||
// The docNum1Hit doesn't match the previous
|
||||
// docNum1HitLast, so the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
docNum1HitLast = docNum1Hit
|
||||
docNum1HitLastOk = true
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() == nil {
|
||||
// An empty actual bitmap means the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// Collect the actual bitmap for more processing later.
|
||||
actualBMs = append(actualBMs, itr.ActualBitmap())
|
||||
}
|
||||
|
||||
if docNum1HitLastOk {
|
||||
// We reach here if all the 1-hit optimized posting
|
||||
// iterators had the same 1-hit docNum, so we can check if
|
||||
// our collected actual bitmaps also have that docNum.
|
||||
for _, bm := range actualBMs {
|
||||
if !bm.Contains(uint32(docNum1HitLast)) {
|
||||
// The docNum1Hit isn't in one of our actual
|
||||
// bitmaps, so the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
|
||||
// The actual bitmaps and docNum1Hits all contain or have
|
||||
// the same 1-hit docNum, so that's our AND'ed result.
|
||||
oTFR.iterators[i] = newUnadornedPostingsIteratorFrom1Hit(docNum1HitLast)
|
||||
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if len(actualBMs) == 0 {
|
||||
// If we've collected no actual bitmaps at this point,
|
||||
// then the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if len(actualBMs) == 1 {
|
||||
// If we've only 1 actual bitmap, then that's our result.
|
||||
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(actualBMs[0])
|
||||
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// Else, AND together our collected bitmaps as our result.
|
||||
bm := roaring.And(actualBMs[0], actualBMs[1])
|
||||
|
||||
for _, actualBM := range actualBMs[2:] {
|
||||
bm.And(actualBM)
|
||||
}
|
||||
|
||||
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(bm)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
|
||||
return oTFR, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
// An "unadorned" disjunction optimization is appropriate when
|
||||
// additional or subsidiary information like freq-norm's and
|
||||
// term-vectors are not required, and instead only the internal-id's
|
||||
// are needed.
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRDisjunctionUnadorned{
|
||||
snapshot: s.snapshot,
|
||||
}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRDisjunctionUnadorned struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
|
||||
var OptimizeTFRDisjunctionUnadornedField = "*"
|
||||
|
||||
// Finish of an unadorned disjunction optimization will compute a
|
||||
// termFieldReader with an "actual" bitmap that represents the
|
||||
// constituent bitmaps OR'ed together. This termFieldReader cannot
|
||||
// provide any freq-norm or termVector associated information.
|
||||
func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
var cMax uint64
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() != nil {
|
||||
c := itr.ActualBitmap().GetCardinality()
|
||||
if cMax < c {
|
||||
cMax = c
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We use an artificial term and field because the optimized
|
||||
// termFieldReader can represent multiple terms and fields.
|
||||
oTFR := o.snapshot.unadornedTermFieldReader(
|
||||
OptimizeTFRDisjunctionUnadornedTerm, OptimizeTFRDisjunctionUnadornedField)
|
||||
|
||||
var docNums []uint32 // Collected docNum's from 1-hit posting lists.
|
||||
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
docNums = docNums[:0]
|
||||
actualBMs = actualBMs[:0]
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
docNum, ok := itr.DocNum1Hit()
|
||||
if ok {
|
||||
docNums = append(docNums, uint32(docNum))
|
||||
continue
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() != nil {
|
||||
actualBMs = append(actualBMs, itr.ActualBitmap())
|
||||
}
|
||||
}
|
||||
|
||||
var bm *roaring.Bitmap
|
||||
if len(actualBMs) > 2 {
|
||||
bm = roaring.HeapOr(actualBMs...)
|
||||
} else if len(actualBMs) == 2 {
|
||||
bm = roaring.Or(actualBMs[0], actualBMs[1])
|
||||
} else if len(actualBMs) == 1 {
|
||||
bm = actualBMs[0].Clone()
|
||||
}
|
||||
|
||||
if bm == nil {
|
||||
bm = roaring.New()
|
||||
}
|
||||
|
||||
bm.AddMany(docNums)
|
||||
|
||||
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(bm)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
|
||||
return oTFR, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
func (i *IndexSnapshot) unadornedTermFieldReader(
|
||||
term []byte, field string) *IndexSnapshotTermFieldReader {
|
||||
// This IndexSnapshotTermFieldReader will not be recycled, more
|
||||
// conversation here: https://github.com/blevesearch/bleve/pull/1438
|
||||
return &IndexSnapshotTermFieldReader{
|
||||
term: term,
|
||||
field: field,
|
||||
snapshot: i,
|
||||
iterators: make([]segment.PostingsIterator, len(i.segment)),
|
||||
segmentOffset: 0,
|
||||
includeFreq: false,
|
||||
includeNorm: false,
|
||||
includeTermVectors: false,
|
||||
recycle: false,
|
||||
}
|
||||
}
|
990
vendor/github.com/blevesearch/bleve/v2/index/scorch/persister.go
generated
vendored
Normal file
990
vendor/github.com/blevesearch/bleve/v2/index/scorch/persister.go
generated
vendored
Normal file
|
@ -0,0 +1,990 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
// DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
|
||||
// persistence of segments with the default safe batch option.
|
||||
// If the default safe batch option results in high number of
|
||||
// files on disk, then users may initialise this configuration parameter
|
||||
// with higher values so that the persister will nap a bit within it's
|
||||
// work loop to favour better in-memory merging of segments to result
|
||||
// in fewer segment files on disk. But that may come with an indexing
|
||||
// performance overhead.
|
||||
// Unsafe batch users are advised to override this to higher value
|
||||
// for better performance especially with high data density.
|
||||
var DefaultPersisterNapTimeMSec int = 0 // ms
|
||||
|
||||
// DefaultPersisterNapUnderNumFiles helps in controlling the pace of
|
||||
// persister. At times of a slow merger progress with heavy file merging
|
||||
// operations, its better to pace down the persister for letting the merger
|
||||
// to catch up within a range defined by this parameter.
|
||||
// Fewer files on disk (as per the merge plan) would result in keeping the
|
||||
// file handle usage under limit, faster disk merger and a healthier index.
|
||||
// Its been observed that such a loosely sync'ed introducer-persister-merger
|
||||
// trio results in better overall performance.
|
||||
var DefaultPersisterNapUnderNumFiles int = 1000
|
||||
|
||||
var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64
|
||||
|
||||
type persisterOptions struct {
|
||||
// PersisterNapTimeMSec controls the wait/delay injected into
|
||||
// persistence workloop to improve the chances for
|
||||
// a healthier and heavier in-memory merging
|
||||
PersisterNapTimeMSec int
|
||||
|
||||
// PersisterNapTimeMSec > 0, and the number of files is less than
|
||||
// PersisterNapUnderNumFiles, then the persister will sleep
|
||||
// PersisterNapTimeMSec amount of time to improve the chances for
|
||||
// a healthier and heavier in-memory merging
|
||||
PersisterNapUnderNumFiles int
|
||||
|
||||
// MemoryPressurePauseThreshold let persister to have a better leeway
|
||||
// for prudently performing the memory merge of segments on a memory
|
||||
// pressure situation. Here the config value is an upper threshold
|
||||
// for the number of paused application threads. The default value would
|
||||
// be a very high number to always favour the merging of memory segments.
|
||||
MemoryPressurePauseThreshold uint64
|
||||
}
|
||||
|
||||
type notificationChan chan struct{}
|
||||
|
||||
func (s *Scorch) persisterLoop() {
|
||||
defer s.asyncTasks.Done()
|
||||
|
||||
var persistWatchers []*epochWatcher
|
||||
var lastPersistedEpoch, lastMergedEpoch uint64
|
||||
var ew *epochWatcher
|
||||
|
||||
var unpersistedCallbacks []index.BatchCallback
|
||||
|
||||
po, err := s.parsePersisterOptions()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
|
||||
s.asyncTasks.Done()
|
||||
return
|
||||
}
|
||||
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case ew = <-s.persisterNotifier:
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
default:
|
||||
}
|
||||
if ew != nil && ew.epoch > lastMergedEpoch {
|
||||
lastMergedEpoch = ew.epoch
|
||||
}
|
||||
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
|
||||
lastMergedEpoch, persistWatchers, po)
|
||||
|
||||
var ourSnapshot *IndexSnapshot
|
||||
var ourPersisted []chan error
|
||||
var ourPersistedCallbacks []index.BatchCallback
|
||||
|
||||
// check to see if there is a new snapshot to persist
|
||||
s.rootLock.Lock()
|
||||
if s.root != nil && s.root.epoch > lastPersistedEpoch {
|
||||
ourSnapshot = s.root
|
||||
ourSnapshot.AddRef()
|
||||
ourPersisted = s.rootPersisted
|
||||
s.rootPersisted = nil
|
||||
ourPersistedCallbacks = s.persistedCallbacks
|
||||
s.persistedCallbacks = nil
|
||||
atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
|
||||
atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if ourSnapshot != nil {
|
||||
startTime := time.Now()
|
||||
|
||||
err := s.persistSnapshot(ourSnapshot, po)
|
||||
for _, ch := range ourPersisted {
|
||||
if err != nil {
|
||||
ch <- err
|
||||
}
|
||||
close(ch)
|
||||
}
|
||||
if err != nil {
|
||||
atomic.StoreUint64(&s.iStats.persistEpoch, 0)
|
||||
if err == segment.ErrClosed {
|
||||
// index has been closed
|
||||
_ = ourSnapshot.DecRef()
|
||||
break OUTER
|
||||
}
|
||||
|
||||
// save this current snapshot's persistedCallbacks, to invoke during
|
||||
// the retry attempt
|
||||
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
|
||||
|
||||
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if unpersistedCallbacks != nil {
|
||||
// in the event of this being a retry attempt for persisting a snapshot
|
||||
// that had earlier failed, prepend the persistedCallbacks associated
|
||||
// with earlier segment(s) to the latest persistedCallbacks
|
||||
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
|
||||
unpersistedCallbacks = nil
|
||||
}
|
||||
|
||||
for i := range ourPersistedCallbacks {
|
||||
ourPersistedCallbacks[i](err)
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
|
||||
|
||||
lastPersistedEpoch = ourSnapshot.epoch
|
||||
for _, ew := range persistWatchers {
|
||||
close(ew.notifyCh)
|
||||
}
|
||||
|
||||
persistWatchers = nil
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
changed := false
|
||||
s.rootLock.RLock()
|
||||
if s.root != nil && s.root.epoch != lastPersistedEpoch {
|
||||
changed = true
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
|
||||
|
||||
if changed {
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
|
||||
// tell the introducer we're waiting for changes
|
||||
w := &epochWatcher{
|
||||
epoch: lastPersistedEpoch,
|
||||
notifyCh: make(notificationChan, 1),
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case s.introducerNotifier <- w:
|
||||
}
|
||||
|
||||
s.removeOldData() // might as well cleanup while waiting
|
||||
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopWait, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case <-w.notifyCh:
|
||||
// woken up, next loop should pick up work
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopWaitNotified, 1)
|
||||
case ew = <-s.persisterNotifier:
|
||||
// if the watchers are already caught up then let them wait,
|
||||
// else let them continue to do the catch up
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopEnd, 1)
|
||||
}
|
||||
}
|
||||
|
||||
func notifyMergeWatchers(lastPersistedEpoch uint64,
|
||||
persistWatchers []*epochWatcher) []*epochWatcher {
|
||||
var watchersNext []*epochWatcher
|
||||
for _, w := range persistWatchers {
|
||||
if w.epoch < lastPersistedEpoch {
|
||||
close(w.notifyCh)
|
||||
} else {
|
||||
watchersNext = append(watchersNext, w)
|
||||
}
|
||||
}
|
||||
return watchersNext
|
||||
}
|
||||
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
|
||||
lastMergedEpoch uint64, persistWatchers []*epochWatcher,
|
||||
po *persisterOptions) (uint64, []*epochWatcher) {
|
||||
|
||||
// First, let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
|
||||
// Check the merger lag by counting the segment files on disk,
|
||||
numFilesOnDisk, _, _ := s.diskFileStats(nil)
|
||||
|
||||
// On finding fewer files on disk, persister takes a short pause
|
||||
// for sufficient in-memory segments to pile up for the next
|
||||
// memory merge cum persist loop.
|
||||
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
|
||||
po.PersisterNapTimeMSec > 0 && s.NumEventsBlocking() == 0 {
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
|
||||
atomic.AddUint64(&s.stats.TotPersisterNapPauseCompleted, 1)
|
||||
|
||||
case ew := <-s.persisterNotifier:
|
||||
// unblock the merger in meantime
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
lastMergedEpoch = ew.epoch
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
atomic.AddUint64(&s.stats.TotPersisterMergerNapBreak, 1)
|
||||
}
|
||||
return lastMergedEpoch, persistWatchers
|
||||
}
|
||||
|
||||
// Finding too many files on disk could be due to two reasons.
|
||||
// 1. Too many older snapshots awaiting the clean up.
|
||||
// 2. The merger could be lagging behind on merging the disk files.
|
||||
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
|
||||
s.removeOldData()
|
||||
numFilesOnDisk, _, _ = s.diskFileStats(nil)
|
||||
}
|
||||
|
||||
// Persister pause until the merger catches up to reduce the segment
|
||||
// file count under the threshold.
|
||||
// But if there is memory pressure, then skip this sleep maneuvers.
|
||||
OUTER:
|
||||
for po.PersisterNapUnderNumFiles > 0 &&
|
||||
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
|
||||
lastMergedEpoch < lastPersistedEpoch {
|
||||
atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case ew := <-s.persisterNotifier:
|
||||
persistWatchers = append(persistWatchers, ew)
|
||||
lastMergedEpoch = ew.epoch
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotPersisterSlowMergerResume, 1)
|
||||
|
||||
// let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
|
||||
numFilesOnDisk, _, _ = s.diskFileStats(nil)
|
||||
}
|
||||
|
||||
return lastMergedEpoch, persistWatchers
|
||||
}
|
||||
|
||||
func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
|
||||
po := persisterOptions{
|
||||
PersisterNapTimeMSec: DefaultPersisterNapTimeMSec,
|
||||
PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
|
||||
MemoryPressurePauseThreshold: DefaultMemoryPressurePauseThreshold,
|
||||
}
|
||||
if v, ok := s.config["scorchPersisterOptions"]; ok {
|
||||
b, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return &po, err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(b, &po)
|
||||
if err != nil {
|
||||
return &po, err
|
||||
}
|
||||
}
|
||||
return &po, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
|
||||
po *persisterOptions) error {
|
||||
// Perform in-memory segment merging only when the memory pressure is
|
||||
// below the configured threshold, else the persister performs the
|
||||
// direct persistence of segments.
|
||||
if s.NumEventsBlocking() < po.MemoryPressurePauseThreshold {
|
||||
persisted, err := s.persistSnapshotMaybeMerge(snapshot)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if persisted {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return s.persistSnapshotDirect(snapshot)
|
||||
}
|
||||
|
||||
// DefaultMinSegmentsForInMemoryMerge represents the default number of
|
||||
// in-memory zap segments that persistSnapshotMaybeMerge() needs to
|
||||
// see in an IndexSnapshot before it decides to merge and persist
|
||||
// those segments
|
||||
var DefaultMinSegmentsForInMemoryMerge = 2
|
||||
|
||||
// persistSnapshotMaybeMerge examines the snapshot and might merge and
|
||||
// persist the in-memory zap segments if there are enough of them
|
||||
func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
|
||||
bool, error) {
|
||||
// collect the in-memory zap segments (SegmentBase instances)
|
||||
var sbs []segment.Segment
|
||||
var sbsDrops []*roaring.Bitmap
|
||||
var sbsIndexes []int
|
||||
|
||||
for i, segmentSnapshot := range snapshot.segment {
|
||||
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); !ok {
|
||||
sbs = append(sbs, segmentSnapshot.segment)
|
||||
sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
|
||||
sbsIndexes = append(sbsIndexes, i)
|
||||
}
|
||||
}
|
||||
|
||||
if len(sbs) < DefaultMinSegmentsForInMemoryMerge {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
newSnapshot, newSegmentID, err := s.mergeSegmentBases(
|
||||
snapshot, sbs, sbsDrops, sbsIndexes)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if newSnapshot == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = newSnapshot.DecRef()
|
||||
}()
|
||||
|
||||
mergedSegmentIDs := map[uint64]struct{}{}
|
||||
for _, idx := range sbsIndexes {
|
||||
mergedSegmentIDs[snapshot.segment[idx].id] = struct{}{}
|
||||
}
|
||||
|
||||
// construct a snapshot that's logically equivalent to the input
|
||||
// snapshot, but with merged segments replaced by the new segment
|
||||
equiv := &IndexSnapshot{
|
||||
parent: snapshot.parent,
|
||||
segment: make([]*SegmentSnapshot, 0, len(snapshot.segment)),
|
||||
internal: snapshot.internal,
|
||||
epoch: snapshot.epoch,
|
||||
creator: "persistSnapshotMaybeMerge",
|
||||
}
|
||||
|
||||
// copy to the equiv the segments that weren't replaced
|
||||
for _, segment := range snapshot.segment {
|
||||
if _, wasMerged := mergedSegmentIDs[segment.id]; !wasMerged {
|
||||
equiv.segment = append(equiv.segment, segment)
|
||||
}
|
||||
}
|
||||
|
||||
// append to the equiv the new segment
|
||||
for _, segment := range newSnapshot.segment {
|
||||
if segment.id == newSegmentID {
|
||||
equiv.segment = append(equiv.segment, &SegmentSnapshot{
|
||||
id: newSegmentID,
|
||||
segment: segment.segment,
|
||||
deleted: nil, // nil since merging handled deletions
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
err = s.persistSnapshotDirect(equiv)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
|
||||
segPlugin SegmentPlugin) ([]string, map[uint64]string, error) {
|
||||
snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
newSnapshotKey := encodeUvarintAscending(nil, snapshot.epoch)
|
||||
snapshotBucket, err := snapshotsBucket.CreateBucketIfNotExists(newSnapshotKey)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// persist meta values
|
||||
metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(segPlugin.Type()))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
buf := make([]byte, binary.MaxVarintLen32)
|
||||
binary.BigEndian.PutUint32(buf, segPlugin.Version())
|
||||
err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// persist internal values
|
||||
internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// TODO optimize writing these in order?
|
||||
for k, v := range snapshot.internal {
|
||||
err = internalBucket.Put([]byte(k), v)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var filenames []string
|
||||
newSegmentPaths := make(map[uint64]string)
|
||||
|
||||
// first ensure that each segment in this snapshot has been persisted
|
||||
for _, segmentSnapshot := range snapshot.segment {
|
||||
snapshotSegmentKey := encodeUvarintAscending(nil, segmentSnapshot.id)
|
||||
snapshotSegmentBucket, err := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
switch seg := segmentSnapshot.segment.(type) {
|
||||
case segment.PersistedSegment:
|
||||
segPath := seg.Path()
|
||||
filename := strings.TrimPrefix(segPath, path+string(os.PathSeparator))
|
||||
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
case segment.UnpersistedSegment:
|
||||
// need to persist this to disk
|
||||
filename := zapFileName(segmentSnapshot.id)
|
||||
path := path + string(os.PathSeparator) + filename
|
||||
err = seg.Persist(path)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error persisting segment: %v", err)
|
||||
}
|
||||
newSegmentPaths[segmentSnapshot.id] = path
|
||||
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
filenames = append(filenames, filename)
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("unknown segment type: %T", seg)
|
||||
}
|
||||
// store current deleted bits
|
||||
var roaringBuf bytes.Buffer
|
||||
if segmentSnapshot.deleted != nil {
|
||||
_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error persisting roaring bytes: %v", err)
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return filenames, newSegmentPaths, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
||||
// start a write transaction
|
||||
tx, err := s.rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// defer rollback on error
|
||||
defer func() {
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
||||
filenames, newSegmentPaths, err := prepareBoltSnapshot(snapshot, tx, s.path, s.segPlugin)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// we need to swap in a new root only when we've persisted 1 or
|
||||
// more segments -- whereby the new root would have 1-for-1
|
||||
// replacements of in-memory segments with file-based segments
|
||||
//
|
||||
// other cases like updates to internal values only, and/or when
|
||||
// there are only deletions, are already covered and persisted by
|
||||
// the newly populated boltdb snapshotBucket above
|
||||
if len(newSegmentPaths) > 0 {
|
||||
// now try to open all the new snapshots
|
||||
newSegments := make(map[uint64]segment.Segment)
|
||||
defer func() {
|
||||
for _, s := range newSegments {
|
||||
if s != nil {
|
||||
// cleanup segments that were opened but not
|
||||
// swapped into the new root
|
||||
_ = s.Close()
|
||||
}
|
||||
}
|
||||
}()
|
||||
for segmentID, path := range newSegmentPaths {
|
||||
newSegments[segmentID], err = s.segPlugin.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening new segment at %s, %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
persist := &persistIntroduction{
|
||||
persisted: newSegments,
|
||||
applied: make(notificationChan),
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
return segment.ErrClosed
|
||||
case s.persists <- persist:
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
return segment.ErrClosed
|
||||
case <-persist.applied:
|
||||
}
|
||||
}
|
||||
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.rootBolt.Sync()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// allow files to become eligible for removal after commit, such
|
||||
// as file segments from snapshots that came from the merger
|
||||
s.rootLock.Lock()
|
||||
for _, filename := range filenames {
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func zapFileName(epoch uint64) string {
|
||||
return fmt.Sprintf("%012x.zap", epoch)
|
||||
}
|
||||
|
||||
// bolt snapshot code
|
||||
|
||||
var boltSnapshotsBucket = []byte{'s'}
|
||||
var boltPathKey = []byte{'p'}
|
||||
var boltDeletedKey = []byte{'d'}
|
||||
var boltInternalKey = []byte{'i'}
|
||||
var boltMetaDataKey = []byte{'m'}
|
||||
var boltMetaDataSegmentTypeKey = []byte("type")
|
||||
var boltMetaDataSegmentVersionKey = []byte("version")
|
||||
|
||||
func (s *Scorch) loadFromBolt() error {
|
||||
return s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
foundRoot := false
|
||||
c := snapshots.Cursor()
|
||||
for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
|
||||
_, snapshotEpoch, err := decodeUvarintAscending(k)
|
||||
if err != nil {
|
||||
log.Printf("unable to parse segment epoch %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
if foundRoot {
|
||||
s.AddEligibleForRemoval(snapshotEpoch)
|
||||
continue
|
||||
}
|
||||
snapshot := snapshots.Bucket(k)
|
||||
if snapshot == nil {
|
||||
log.Printf("snapshot key, but bucket missing %x, continuing", k)
|
||||
s.AddEligibleForRemoval(snapshotEpoch)
|
||||
continue
|
||||
}
|
||||
indexSnapshot, err := s.loadSnapshot(snapshot)
|
||||
if err != nil {
|
||||
log.Printf("unable to load snapshot, %v, continuing", err)
|
||||
s.AddEligibleForRemoval(snapshotEpoch)
|
||||
continue
|
||||
}
|
||||
indexSnapshot.epoch = snapshotEpoch
|
||||
// set the nextSegmentID
|
||||
s.nextSegmentID, err = s.maxSegmentIDOnDisk()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.nextSegmentID++
|
||||
s.rootLock.Lock()
|
||||
s.nextSnapshotEpoch = snapshotEpoch + 1
|
||||
rootPrev := s.root
|
||||
s.root = indexSnapshot
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
foundRoot = true
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// LoadSnapshot loads the segment with the specified epoch
|
||||
// NOTE: this is currently ONLY intended to be used by the command-line tool
|
||||
func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
|
||||
err = s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
snapshotKey := encodeUvarintAscending(nil, epoch)
|
||||
snapshot := snapshots.Bucket(snapshotKey)
|
||||
if snapshot == nil {
|
||||
return fmt.Errorf("snapshot with epoch: %v - doesn't exist", epoch)
|
||||
}
|
||||
rv, err = s.loadSnapshot(snapshot)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
||||
|
||||
rv := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: make(map[string][]byte),
|
||||
refs: 1,
|
||||
creator: "loadSnapshot",
|
||||
}
|
||||
// first we look for the meta-data bucket, this will tell us
|
||||
// which segment type/version was used for this snapshot
|
||||
// all operations for this scorch will use this type/version
|
||||
metaBucket := snapshot.Bucket(boltMetaDataKey)
|
||||
if metaBucket == nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("meta-data bucket missing")
|
||||
}
|
||||
segmentType := string(metaBucket.Get(boltMetaDataSegmentTypeKey))
|
||||
segmentVersion := binary.BigEndian.Uint32(
|
||||
metaBucket.Get(boltMetaDataSegmentVersionKey))
|
||||
err := s.loadSegmentPlugin(segmentType, segmentVersion)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf(
|
||||
"unable to load correct segment wrapper: %v", err)
|
||||
}
|
||||
var running uint64
|
||||
c := snapshot.Cursor()
|
||||
for k, _ := c.First(); k != nil; k, _ = c.Next() {
|
||||
if k[0] == boltInternalKey[0] {
|
||||
internalBucket := snapshot.Bucket(k)
|
||||
err := internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
copiedVal := append([]byte(nil), val...)
|
||||
rv.internal[string(key)] = copiedVal
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, err
|
||||
}
|
||||
} else if k[0] != boltMetaDataKey[0] {
|
||||
segmentBucket := snapshot.Bucket(k)
|
||||
if segmentBucket == nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("segment key, but bucket missing % x", k)
|
||||
}
|
||||
segmentSnapshot, err := s.loadSegment(segmentBucket)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("failed to load segment: %v", err)
|
||||
}
|
||||
_, segmentSnapshot.id, err = decodeUvarintAscending(k)
|
||||
if err != nil {
|
||||
_ = rv.DecRef()
|
||||
return nil, fmt.Errorf("failed to decode segment id: %v", err)
|
||||
}
|
||||
rv.segment = append(rv.segment, segmentSnapshot)
|
||||
rv.offsets = append(rv.offsets, running)
|
||||
running += segmentSnapshot.segment.Count()
|
||||
}
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) {
|
||||
pathBytes := segmentBucket.Get(boltPathKey)
|
||||
if pathBytes == nil {
|
||||
return nil, fmt.Errorf("segment path missing")
|
||||
}
|
||||
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
|
||||
segment, err := s.segPlugin.Open(segmentPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening bolt segment: %v", err)
|
||||
}
|
||||
|
||||
rv := &SegmentSnapshot{
|
||||
segment: segment,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
}
|
||||
deletedBytes := segmentBucket.Get(boltDeletedKey)
|
||||
if deletedBytes != nil {
|
||||
deletedBitmap := roaring.NewBitmap()
|
||||
r := bytes.NewReader(deletedBytes)
|
||||
_, err := deletedBitmap.ReadFrom(r)
|
||||
if err != nil {
|
||||
_ = segment.Close()
|
||||
return nil, fmt.Errorf("error reading deleted bytes: %v", err)
|
||||
}
|
||||
if !deletedBitmap.IsEmpty() {
|
||||
rv.deleted = deletedBitmap
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) removeOldData() {
|
||||
removed, err := s.removeOldBoltSnapshots()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
|
||||
|
||||
err = s.removeOldZapFiles()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
// NumSnapshotsToKeep represents how many recent, old snapshots to
|
||||
// keep around per Scorch instance. Useful for apps that require
|
||||
// rollback'ability.
|
||||
var NumSnapshotsToKeep = 1
|
||||
|
||||
// Removes enough snapshots from the rootBolt so that the
|
||||
// s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
|
||||
func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
|
||||
persistedEpochs, err := s.RootBoltSnapshotEpochs()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(persistedEpochs) <= s.numSnapshotsToKeep {
|
||||
// we need to keep everything
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// make a map of epochs to protect from deletion
|
||||
protectedEpochs := make(map[uint64]struct{}, s.numSnapshotsToKeep)
|
||||
for _, epoch := range persistedEpochs[0:s.numSnapshotsToKeep] {
|
||||
protectedEpochs[epoch] = struct{}{}
|
||||
}
|
||||
|
||||
var epochsToRemove []uint64
|
||||
var newEligible []uint64
|
||||
s.rootLock.Lock()
|
||||
for _, epoch := range s.eligibleForRemoval {
|
||||
if _, ok := protectedEpochs[epoch]; ok {
|
||||
// protected
|
||||
newEligible = append(newEligible, epoch)
|
||||
} else {
|
||||
epochsToRemove = append(epochsToRemove, epoch)
|
||||
}
|
||||
}
|
||||
s.eligibleForRemoval = newEligible
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if len(epochsToRemove) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
tx, err := s.rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer func() {
|
||||
if err == nil {
|
||||
err = tx.Commit()
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
if err == nil {
|
||||
err = s.rootBolt.Sync()
|
||||
}
|
||||
}()
|
||||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
for _, epochToRemove := range epochsToRemove {
|
||||
k := encodeUvarintAscending(nil, epochToRemove)
|
||||
err = snapshots.DeleteBucket(k)
|
||||
if err == bolt.ErrBucketNotFound {
|
||||
err = nil
|
||||
}
|
||||
if err == nil {
|
||||
numRemoved++
|
||||
}
|
||||
}
|
||||
|
||||
return numRemoved, err
|
||||
}
|
||||
|
||||
func (s *Scorch) maxSegmentIDOnDisk() (uint64, error) {
|
||||
currFileInfos, err := ioutil.ReadDir(s.path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var rv uint64
|
||||
for _, finfo := range currFileInfos {
|
||||
fname := finfo.Name()
|
||||
if filepath.Ext(fname) == ".zap" {
|
||||
prefix := strings.TrimSuffix(fname, ".zap")
|
||||
id, err2 := strconv.ParseUint(prefix, 16, 64)
|
||||
if err2 != nil {
|
||||
return 0, err2
|
||||
}
|
||||
if id > rv {
|
||||
rv = id
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv, err
|
||||
}
|
||||
|
||||
// Removes any *.zap files which aren't listed in the rootBolt.
|
||||
func (s *Scorch) removeOldZapFiles() error {
|
||||
liveFileNames, err := s.loadZapFileNames()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
currFileInfos, err := ioutil.ReadDir(s.path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.rootLock.RLock()
|
||||
|
||||
for _, finfo := range currFileInfos {
|
||||
fname := finfo.Name()
|
||||
if filepath.Ext(fname) == ".zap" {
|
||||
if _, exists := liveFileNames[fname]; !exists && !s.ineligibleForRemoval[fname] {
|
||||
err := os.Remove(s.path + string(os.PathSeparator) + fname)
|
||||
if err != nil {
|
||||
log.Printf("got err removing file: %s, err: %v", fname, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) {
|
||||
var rv []uint64
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
sc := snapshots.Cursor()
|
||||
for sk, _ := sc.Last(); sk != nil; sk, _ = sc.Prev() {
|
||||
_, snapshotEpoch, err := decodeUvarintAscending(sk)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
rv = append(rv, snapshotEpoch)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return rv, err
|
||||
}
|
||||
|
||||
// Returns the *.zap file names that are listed in the rootBolt.
|
||||
func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
|
||||
rv := map[string]struct{}{}
|
||||
err := s.rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
sc := snapshots.Cursor()
|
||||
for sk, _ := sc.First(); sk != nil; sk, _ = sc.Next() {
|
||||
snapshot := snapshots.Bucket(sk)
|
||||
if snapshot == nil {
|
||||
continue
|
||||
}
|
||||
segc := snapshot.Cursor()
|
||||
for segk, _ := segc.First(); segk != nil; segk, _ = segc.Next() {
|
||||
if segk[0] == boltInternalKey[0] {
|
||||
continue
|
||||
}
|
||||
segmentBucket := snapshot.Bucket(segk)
|
||||
if segmentBucket == nil {
|
||||
continue
|
||||
}
|
||||
pathBytes := segmentBucket.Get(boltPathKey)
|
||||
if pathBytes == nil {
|
||||
continue
|
||||
}
|
||||
pathString := string(pathBytes)
|
||||
rv[string(pathString)] = struct{}{}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return rv, err
|
||||
}
|
63
vendor/github.com/blevesearch/bleve/v2/index/scorch/regexp.go
generated
vendored
Normal file
63
vendor/github.com/blevesearch/bleve/v2/index/scorch/regexp.go
generated
vendored
Normal file
|
@ -0,0 +1,63 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
|
||||
"github.com/couchbase/vellum/regexp"
|
||||
)
|
||||
|
||||
func parseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
|
||||
// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
|
||||
|
||||
parsed, err := syntax.Parse(pattern, syntax.Perl)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
prefix := literalPrefix(parsed)
|
||||
if prefix != "" {
|
||||
prefixBeg := []byte(prefix)
|
||||
prefixEnd := calculateExclusiveEndFromPrefix(prefixBeg)
|
||||
return re, prefixBeg, prefixEnd, nil
|
||||
}
|
||||
|
||||
return re, nil, nil, nil
|
||||
}
|
||||
|
||||
// Returns the literal prefix given the parse tree for a regexp
|
||||
func literalPrefix(s *syntax.Regexp) string {
|
||||
// traverse the left-most branch in the parse tree as long as the
|
||||
// node represents a concatenation
|
||||
for s != nil && s.Op == syntax.OpConcat {
|
||||
if len(s.Sub) < 1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
s = s.Sub[0]
|
||||
}
|
||||
|
||||
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
|
||||
return string(s.Rune)
|
||||
}
|
||||
|
||||
return "" // no literal prefix
|
||||
}
|
212
vendor/github.com/blevesearch/bleve/v2/index/scorch/rollback.go
generated
vendored
Normal file
212
vendor/github.com/blevesearch/bleve/v2/index/scorch/rollback.go
generated
vendored
Normal file
|
@ -0,0 +1,212 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type RollbackPoint struct {
|
||||
epoch uint64
|
||||
meta map[string][]byte
|
||||
}
|
||||
|
||||
func (r *RollbackPoint) GetInternal(key []byte) []byte {
|
||||
return r.meta[string(key)]
|
||||
}
|
||||
|
||||
// RollbackPoints returns an array of rollback points available for
|
||||
// the application to rollback to, with more recent rollback points
|
||||
// (higher epochs) coming first.
|
||||
func RollbackPoints(path string) ([]*RollbackPoint, error) {
|
||||
if len(path) == 0 {
|
||||
return nil, fmt.Errorf("RollbackPoints: invalid path")
|
||||
}
|
||||
|
||||
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBoltOpt := &bolt.Options{
|
||||
ReadOnly: true,
|
||||
}
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil || rootBolt == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// start a read-only bolt transaction
|
||||
tx, err := rootBolt.Begin(false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("RollbackPoints: failed to start" +
|
||||
" read-only transaction")
|
||||
}
|
||||
|
||||
// read-only bolt transactions to be rolled back
|
||||
defer func() {
|
||||
_ = tx.Rollback()
|
||||
_ = rootBolt.Close()
|
||||
}()
|
||||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rollbackPoints := []*RollbackPoint{}
|
||||
|
||||
c1 := snapshots.Cursor()
|
||||
for k, _ := c1.Last(); k != nil; k, _ = c1.Prev() {
|
||||
_, snapshotEpoch, err := decodeUvarintAscending(k)
|
||||
if err != nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" unable to parse segment epoch %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
|
||||
snapshot := snapshots.Bucket(k)
|
||||
if snapshot == nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" snapshot key, but bucket missing %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
|
||||
meta := map[string][]byte{}
|
||||
c2 := snapshot.Cursor()
|
||||
for j, _ := c2.First(); j != nil; j, _ = c2.Next() {
|
||||
if j[0] == boltInternalKey[0] {
|
||||
internalBucket := snapshot.Bucket(j)
|
||||
err = internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
copiedVal := append([]byte(nil), val...)
|
||||
meta[string(key)] = copiedVal
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" failed in fetching internal data: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
rollbackPoints = append(rollbackPoints, &RollbackPoint{
|
||||
epoch: snapshotEpoch,
|
||||
meta: meta,
|
||||
})
|
||||
}
|
||||
|
||||
return rollbackPoints, nil
|
||||
}
|
||||
|
||||
// Rollback atomically and durably brings the store back to the point
|
||||
// in time as represented by the RollbackPoint.
|
||||
// Rollback() should only be passed a RollbackPoint that came from the
|
||||
// same store using the RollbackPoints() API along with the index path.
|
||||
func Rollback(path string, to *RollbackPoint) error {
|
||||
if to == nil {
|
||||
return fmt.Errorf("Rollback: RollbackPoint is nil")
|
||||
}
|
||||
if len(path) == 0 {
|
||||
return fmt.Errorf("Rollback: index path is empty")
|
||||
}
|
||||
|
||||
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBoltOpt := &bolt.Options{
|
||||
ReadOnly: false,
|
||||
}
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil || rootBolt == nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
err1 := rootBolt.Close()
|
||||
if err1 != nil && err == nil {
|
||||
err = err1
|
||||
}
|
||||
}()
|
||||
|
||||
// pick all the younger persisted epochs in bolt store
|
||||
// including the target one.
|
||||
var found bool
|
||||
var eligibleEpochs []uint64
|
||||
err = rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
sc := snapshots.Cursor()
|
||||
for sk, _ := sc.Last(); sk != nil && !found; sk, _ = sc.Prev() {
|
||||
_, snapshotEpoch, err := decodeUvarintAscending(sk)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if snapshotEpoch == to.epoch {
|
||||
found = true
|
||||
}
|
||||
eligibleEpochs = append(eligibleEpochs, snapshotEpoch)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if len(eligibleEpochs) == 0 {
|
||||
return fmt.Errorf("Rollback: no persisted epochs found in bolt")
|
||||
}
|
||||
if !found {
|
||||
return fmt.Errorf("Rollback: target epoch %d not found in bolt", to.epoch)
|
||||
}
|
||||
|
||||
// start a write transaction
|
||||
tx, err := rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err == nil {
|
||||
err = tx.Commit()
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
if err == nil {
|
||||
err = rootBolt.Sync()
|
||||
}
|
||||
}()
|
||||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
for _, epoch := range eligibleEpochs {
|
||||
k := encodeUvarintAscending(nil, epoch)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if epoch == to.epoch {
|
||||
// return here as it already processed until the given epoch
|
||||
return nil
|
||||
}
|
||||
err = snapshots.DeleteBucket(k)
|
||||
if err == bolt.ErrBucketNotFound {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
662
vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go
generated
vendored
Normal file
662
vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go
generated
vendored
Normal file
|
@ -0,0 +1,662 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const Name = "scorch"
|
||||
|
||||
const Version uint8 = 2
|
||||
|
||||
var ErrClosed = fmt.Errorf("scorch closed")
|
||||
|
||||
type Scorch struct {
|
||||
nextSegmentID uint64
|
||||
stats Stats
|
||||
iStats internalStats
|
||||
|
||||
readOnly bool
|
||||
version uint8
|
||||
config map[string]interface{}
|
||||
analysisQueue *index.AnalysisQueue
|
||||
path string
|
||||
|
||||
unsafeBatch bool
|
||||
|
||||
rootLock sync.RWMutex
|
||||
root *IndexSnapshot // holds 1 ref-count on the root
|
||||
rootPersisted []chan error // closed when root is persisted
|
||||
persistedCallbacks []index.BatchCallback
|
||||
nextSnapshotEpoch uint64
|
||||
eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC.
|
||||
ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
|
||||
|
||||
numSnapshotsToKeep int
|
||||
closeCh chan struct{}
|
||||
introductions chan *segmentIntroduction
|
||||
persists chan *persistIntroduction
|
||||
merges chan *segmentMerge
|
||||
introducerNotifier chan *epochWatcher
|
||||
persisterNotifier chan *epochWatcher
|
||||
rootBolt *bolt.DB
|
||||
asyncTasks sync.WaitGroup
|
||||
|
||||
onEvent func(event Event)
|
||||
onAsyncError func(err error)
|
||||
|
||||
forceMergeRequestCh chan *mergerCtrl
|
||||
|
||||
segPlugin SegmentPlugin
|
||||
}
|
||||
|
||||
type internalStats struct {
|
||||
persistEpoch uint64
|
||||
persistSnapshotSize uint64
|
||||
mergeEpoch uint64
|
||||
mergeSnapshotSize uint64
|
||||
newSegBufBytesAdded uint64
|
||||
newSegBufBytesRemoved uint64
|
||||
analysisBytesAdded uint64
|
||||
analysisBytesRemoved uint64
|
||||
}
|
||||
|
||||
func NewScorch(storeName string,
|
||||
config map[string]interface{},
|
||||
analysisQueue *index.AnalysisQueue) (index.Index, error) {
|
||||
rv := &Scorch{
|
||||
version: Version,
|
||||
config: config,
|
||||
analysisQueue: analysisQueue,
|
||||
nextSnapshotEpoch: 1,
|
||||
closeCh: make(chan struct{}),
|
||||
ineligibleForRemoval: map[string]bool{},
|
||||
forceMergeRequestCh: make(chan *mergerCtrl, 1),
|
||||
segPlugin: defaultSegmentPlugin,
|
||||
}
|
||||
|
||||
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
|
||||
err := rv.loadSegmentPlugin(forcedSegmentType,
|
||||
uint32(forcedSegmentVersion))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
|
||||
ro, ok := config["read_only"].(bool)
|
||||
if ok {
|
||||
rv.readOnly = ro
|
||||
}
|
||||
ub, ok := config["unsafe_batch"].(bool)
|
||||
if ok {
|
||||
rv.unsafeBatch = ub
|
||||
}
|
||||
ecbName, ok := config["eventCallbackName"].(string)
|
||||
if ok {
|
||||
rv.onEvent = RegistryEventCallbacks[ecbName]
|
||||
}
|
||||
aecbName, ok := config["asyncErrorCallbackName"].(string)
|
||||
if ok {
|
||||
rv.onAsyncError = RegistryAsyncErrorCallbacks[aecbName]
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// configForceSegmentTypeVersion checks if the caller has requested a
|
||||
// specific segment type/version
|
||||
func configForceSegmentTypeVersion(config map[string]interface{}) (string, uint32, error) {
|
||||
forcedSegmentVersion, err := parseToInteger(config["forceSegmentVersion"])
|
||||
if err != nil {
|
||||
return "", 0, nil
|
||||
}
|
||||
|
||||
forcedSegmentType, ok := config["forceSegmentType"].(string)
|
||||
if !ok {
|
||||
return "", 0, fmt.Errorf(
|
||||
"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
|
||||
}
|
||||
|
||||
return forcedSegmentType, uint32(forcedSegmentVersion), nil
|
||||
}
|
||||
|
||||
func (s *Scorch) NumEventsBlocking() uint64 {
|
||||
eventsCompleted := atomic.LoadUint64(&s.stats.TotEventTriggerCompleted)
|
||||
eventsStarted := atomic.LoadUint64(&s.stats.TotEventTriggerStarted)
|
||||
return eventsStarted - eventsCompleted
|
||||
}
|
||||
|
||||
func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
|
||||
if s.onEvent != nil {
|
||||
atomic.AddUint64(&s.stats.TotEventTriggerStarted, 1)
|
||||
s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
|
||||
atomic.AddUint64(&s.stats.TotEventTriggerCompleted, 1)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scorch) fireAsyncError(err error) {
|
||||
if s.onAsyncError != nil {
|
||||
s.onAsyncError(err)
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotOnErrors, 1)
|
||||
}
|
||||
|
||||
func (s *Scorch) Open() error {
|
||||
err := s.openBolt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.asyncTasks.Add(1)
|
||||
go s.introducerLoop()
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
s.asyncTasks.Add(1)
|
||||
go s.persisterLoop()
|
||||
s.asyncTasks.Add(1)
|
||||
go s.mergerLoop()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) openBolt() error {
|
||||
var ok bool
|
||||
s.path, ok = s.config["path"].(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("must specify path")
|
||||
}
|
||||
if s.path == "" {
|
||||
s.unsafeBatch = true
|
||||
}
|
||||
|
||||
var rootBoltOpt *bolt.Options
|
||||
if s.readOnly {
|
||||
rootBoltOpt = &bolt.Options{
|
||||
ReadOnly: true,
|
||||
}
|
||||
} else {
|
||||
if s.path != "" {
|
||||
err := os.MkdirAll(s.path, 0700)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
|
||||
var err error
|
||||
if s.path != "" {
|
||||
s.rootBolt, err = bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// now see if there is any existing state to load
|
||||
err = s.loadFromBolt()
|
||||
if err != nil {
|
||||
_ = s.Close()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment)))
|
||||
|
||||
s.introductions = make(chan *segmentIntroduction)
|
||||
s.persists = make(chan *persistIntroduction)
|
||||
s.merges = make(chan *segmentMerge)
|
||||
s.introducerNotifier = make(chan *epochWatcher, 1)
|
||||
s.persisterNotifier = make(chan *epochWatcher, 1)
|
||||
s.closeCh = make(chan struct{})
|
||||
s.forceMergeRequestCh = make(chan *mergerCtrl, 1)
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
err := s.removeOldZapFiles() // Before persister or merger create any new files.
|
||||
if err != nil {
|
||||
_ = s.Close()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
s.numSnapshotsToKeep = NumSnapshotsToKeep
|
||||
if v, ok := s.config["numSnapshotsToKeep"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("numSnapshotsToKeep parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
s.numSnapshotsToKeep = t
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) Close() (err error) {
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
s.fireEvent(EventKindClose, time.Since(startTime))
|
||||
}()
|
||||
|
||||
s.fireEvent(EventKindCloseStart, 0)
|
||||
|
||||
// signal to async tasks we want to close
|
||||
close(s.closeCh)
|
||||
// wait for them to close
|
||||
s.asyncTasks.Wait()
|
||||
// now close the root bolt
|
||||
if s.rootBolt != nil {
|
||||
err = s.rootBolt.Close()
|
||||
s.rootLock.Lock()
|
||||
if s.root != nil {
|
||||
err2 := s.root.DecRef()
|
||||
if err == nil {
|
||||
err = err2
|
||||
}
|
||||
}
|
||||
s.root = nil
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (s *Scorch) Update(doc index.Document) error {
|
||||
b := index.NewBatch()
|
||||
b.Update(doc)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
func (s *Scorch) Delete(id string) error {
|
||||
b := index.NewBatch()
|
||||
b.Delete(id)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
// Batch applices a batch of changes to the index atomically
|
||||
func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
start := time.Now()
|
||||
|
||||
defer func() {
|
||||
s.fireEvent(EventKindBatchIntroduction, time.Since(start))
|
||||
}()
|
||||
|
||||
resultChan := make(chan index.Document, len(batch.IndexOps))
|
||||
|
||||
var numUpdates uint64
|
||||
var numDeletes uint64
|
||||
var numPlainTextBytes uint64
|
||||
var ids []string
|
||||
for docID, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
// insert _id field
|
||||
doc.AddIDField()
|
||||
numUpdates++
|
||||
numPlainTextBytes += doc.NumPlainTextBytes()
|
||||
} else {
|
||||
numDeletes++
|
||||
}
|
||||
ids = append(ids, docID)
|
||||
}
|
||||
|
||||
// FIXME could sort ids list concurrent with analysis?
|
||||
|
||||
if numUpdates > 0 {
|
||||
go func() {
|
||||
for k := range batch.IndexOps {
|
||||
doc := batch.IndexOps[k]
|
||||
if doc != nil {
|
||||
// put the work on the queue
|
||||
s.analysisQueue.Queue(func() {
|
||||
analyze(doc)
|
||||
resultChan <- doc
|
||||
})
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// wait for analysis result
|
||||
analysisResults := make([]index.Document, int(numUpdates))
|
||||
var itemsDeQueued uint64
|
||||
var totalAnalysisSize int
|
||||
for itemsDeQueued < numUpdates {
|
||||
result := <-resultChan
|
||||
resultSize := result.Size()
|
||||
atomic.AddUint64(&s.iStats.analysisBytesAdded, uint64(resultSize))
|
||||
totalAnalysisSize += resultSize
|
||||
analysisResults[itemsDeQueued] = result
|
||||
itemsDeQueued++
|
||||
}
|
||||
close(resultChan)
|
||||
defer atomic.AddUint64(&s.iStats.analysisBytesRemoved, uint64(totalAnalysisSize))
|
||||
|
||||
atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start)))
|
||||
|
||||
indexStart := time.Now()
|
||||
|
||||
// notify handlers that we're about to introduce a segment
|
||||
s.fireEvent(EventKindBatchIntroductionStart, 0)
|
||||
|
||||
var newSegment segment.Segment
|
||||
var bufBytes uint64
|
||||
if len(analysisResults) > 0 {
|
||||
newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
atomic.AddUint64(&s.iStats.newSegBufBytesAdded, bufBytes)
|
||||
} else {
|
||||
atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
|
||||
}
|
||||
|
||||
err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback())
|
||||
if err != nil {
|
||||
if newSegment != nil {
|
||||
_ = newSegment.Close()
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotOnErrors, 1)
|
||||
} else {
|
||||
atomic.AddUint64(&s.stats.TotUpdates, numUpdates)
|
||||
atomic.AddUint64(&s.stats.TotDeletes, numDeletes)
|
||||
atomic.AddUint64(&s.stats.TotBatches, 1)
|
||||
atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.iStats.newSegBufBytesRemoved, bufBytes)
|
||||
atomic.AddUint64(&s.stats.TotIndexTime, uint64(time.Since(indexStart)))
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
||||
internalOps map[string][]byte, persistedCallback index.BatchCallback) error {
|
||||
|
||||
// new introduction
|
||||
introduction := &segmentIntroduction{
|
||||
id: atomic.AddUint64(&s.nextSegmentID, 1),
|
||||
data: newSegment,
|
||||
ids: ids,
|
||||
obsoletes: make(map[uint64]*roaring.Bitmap),
|
||||
internal: internalOps,
|
||||
applied: make(chan error),
|
||||
persistedCallback: persistedCallback,
|
||||
}
|
||||
|
||||
if !s.unsafeBatch {
|
||||
introduction.persisted = make(chan error, 1)
|
||||
}
|
||||
|
||||
// optimistically prepare obsoletes outside of rootLock
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
for _, seg := range root.segment {
|
||||
delta, err := seg.segment.DocNumbers(ids)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
introduction.obsoletes[seg.id] = delta
|
||||
}
|
||||
|
||||
introStartTime := time.Now()
|
||||
|
||||
s.introductions <- introduction
|
||||
|
||||
// block until this segment is applied
|
||||
err := <-introduction.applied
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if introduction.persisted != nil {
|
||||
err = <-introduction.persisted
|
||||
}
|
||||
|
||||
introTime := uint64(time.Since(introStartTime))
|
||||
atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
|
||||
atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Scorch) SetInternal(key, val []byte) error {
|
||||
b := index.NewBatch()
|
||||
b.SetInternal(key, val)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
func (s *Scorch) DeleteInternal(key []byte) error {
|
||||
b := index.NewBatch()
|
||||
b.DeleteInternal(key)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
// Reader returns a low-level accessor on the index data. Close it to
|
||||
// release associated resources.
|
||||
func (s *Scorch) Reader() (index.IndexReader, error) {
|
||||
return s.currentSnapshot(), nil
|
||||
}
|
||||
|
||||
func (s *Scorch) currentSnapshot() *IndexSnapshot {
|
||||
s.rootLock.RLock()
|
||||
rv := s.root
|
||||
if rv != nil {
|
||||
rv.AddRef()
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *Scorch) Stats() json.Marshaler {
|
||||
return &s.stats
|
||||
}
|
||||
|
||||
func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
|
||||
uint64, uint64) {
|
||||
var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64
|
||||
if s.path != "" {
|
||||
finfos, err := ioutil.ReadDir(s.path)
|
||||
if err == nil {
|
||||
for _, finfo := range finfos {
|
||||
if !finfo.IsDir() {
|
||||
numBytesUsedDisk += uint64(finfo.Size())
|
||||
numFilesOnDisk++
|
||||
if rootSegmentPaths != nil {
|
||||
fname := s.path + string(os.PathSeparator) + finfo.Name()
|
||||
if _, fileAtRoot := rootSegmentPaths[fname]; fileAtRoot {
|
||||
numBytesOnDiskByRoot += uint64(finfo.Size())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// if no root files path given, then consider all disk files.
|
||||
if rootSegmentPaths == nil {
|
||||
return numFilesOnDisk, numBytesUsedDisk, numBytesUsedDisk
|
||||
}
|
||||
|
||||
return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
|
||||
}
|
||||
|
||||
func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m := s.stats.ToMap()
|
||||
|
||||
indexSnapshot := s.currentSnapshot()
|
||||
defer func() {
|
||||
_ = indexSnapshot.Close()
|
||||
}()
|
||||
|
||||
rootSegPaths := indexSnapshot.diskSegmentsPaths()
|
||||
|
||||
s.rootLock.RLock()
|
||||
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot := s.diskFileStats(rootSegPaths)
|
||||
|
||||
m["CurOnDiskBytes"] = numBytesUsedDisk
|
||||
m["CurOnDiskFiles"] = numFilesOnDisk
|
||||
|
||||
// TODO: consider one day removing these backwards compatible
|
||||
// names for apps using the old names
|
||||
m["updates"] = m["TotUpdates"]
|
||||
m["deletes"] = m["TotDeletes"]
|
||||
m["batches"] = m["TotBatches"]
|
||||
m["errors"] = m["TotOnErrors"]
|
||||
m["analysis_time"] = m["TotAnalysisTime"]
|
||||
m["index_time"] = m["TotIndexTime"]
|
||||
m["term_searchers_started"] = m["TotTermSearchersStarted"]
|
||||
m["term_searchers_finished"] = m["TotTermSearchersFinished"]
|
||||
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
|
||||
m["num_items_introduced"] = m["TotIntroducedItems"]
|
||||
m["num_items_persisted"] = m["TotPersistedItems"]
|
||||
m["num_recs_to_persist"] = m["TotItemsToPersist"]
|
||||
// total disk bytes found in index directory inclusive of older snapshots
|
||||
m["num_bytes_used_disk"] = numBytesUsedDisk
|
||||
// total disk bytes by the latest root index, exclusive of older snapshots
|
||||
m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
|
||||
// num_bytes_used_disk_by_root_reclaimable is an approximation about the
|
||||
// reclaimable disk space in an index. (eg: from a full compaction)
|
||||
m["num_bytes_used_disk_by_root_reclaimable"] = uint64(float64(numBytesOnDiskByRoot) *
|
||||
indexSnapshot.reClaimableDocsRatio())
|
||||
m["num_files_on_disk"] = numFilesOnDisk
|
||||
m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
|
||||
m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
|
||||
m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
|
||||
m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
|
||||
m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func (s *Scorch) Analyze(d index.Document) {
|
||||
analyze(d)
|
||||
}
|
||||
|
||||
func analyze(d index.Document) {
|
||||
d.VisitFields(func(field index.Field) {
|
||||
if field.Options().IsIndexed() {
|
||||
field.Analyze()
|
||||
|
||||
if d.HasComposite() && field.Name() != "_id" {
|
||||
// see if any of the composite fields need this
|
||||
d.VisitComposite(func(cf index.CompositeField) {
|
||||
cf.Compose(field.Name(), field.AnalyzedLength(), field.AnalyzedTokenFrequencies())
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
|
||||
s.rootLock.Lock()
|
||||
if s.root == nil || s.root.epoch != epoch {
|
||||
s.eligibleForRemoval = append(s.eligibleForRemoval, epoch)
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Scorch) MemoryUsed() (memUsed uint64) {
|
||||
indexSnapshot := s.currentSnapshot()
|
||||
if indexSnapshot == nil {
|
||||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = indexSnapshot.Close()
|
||||
}()
|
||||
|
||||
// Account for current root snapshot overhead
|
||||
memUsed += uint64(indexSnapshot.Size())
|
||||
|
||||
// Account for snapshot that the persister may be working on
|
||||
persistEpoch := atomic.LoadUint64(&s.iStats.persistEpoch)
|
||||
persistSnapshotSize := atomic.LoadUint64(&s.iStats.persistSnapshotSize)
|
||||
if persistEpoch != 0 && indexSnapshot.epoch > persistEpoch {
|
||||
// the snapshot that the persister is working on isn't the same as
|
||||
// the current snapshot
|
||||
memUsed += persistSnapshotSize
|
||||
}
|
||||
|
||||
// Account for snapshot that the merger may be working on
|
||||
mergeEpoch := atomic.LoadUint64(&s.iStats.mergeEpoch)
|
||||
mergeSnapshotSize := atomic.LoadUint64(&s.iStats.mergeSnapshotSize)
|
||||
if mergeEpoch != 0 && indexSnapshot.epoch > mergeEpoch {
|
||||
// the snapshot that the merger is working on isn't the same as
|
||||
// the current snapshot
|
||||
memUsed += mergeSnapshotSize
|
||||
}
|
||||
|
||||
memUsed += (atomic.LoadUint64(&s.iStats.newSegBufBytesAdded) -
|
||||
atomic.LoadUint64(&s.iStats.newSegBufBytesRemoved))
|
||||
|
||||
memUsed += (atomic.LoadUint64(&s.iStats.analysisBytesAdded) -
|
||||
atomic.LoadUint64(&s.iStats.analysisBytesRemoved))
|
||||
|
||||
return memUsed
|
||||
}
|
||||
|
||||
func (s *Scorch) markIneligibleForRemoval(filename string) {
|
||||
s.rootLock.Lock()
|
||||
s.ineligibleForRemoval[filename] = true
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Scorch) unmarkIneligibleForRemoval(filename string) {
|
||||
s.rootLock.Lock()
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterIndexType(Name, NewScorch)
|
||||
}
|
||||
|
||||
func parseToInteger(i interface{}) (int, error) {
|
||||
switch v := i.(type) {
|
||||
case float64:
|
||||
return int(v), nil
|
||||
case int:
|
||||
return v, nil
|
||||
|
||||
default:
|
||||
return 0, fmt.Errorf("expects int or float64 value")
|
||||
}
|
||||
}
|
133
vendor/github.com/blevesearch/bleve/v2/index/scorch/segment_plugin.go
generated
vendored
Normal file
133
vendor/github.com/blevesearch/bleve/v2/index/scorch/segment_plugin.go
generated
vendored
Normal file
|
@ -0,0 +1,133 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
|
||||
zapv11 "github.com/blevesearch/zapx/v11"
|
||||
zapv12 "github.com/blevesearch/zapx/v12"
|
||||
zapv13 "github.com/blevesearch/zapx/v13"
|
||||
zapv14 "github.com/blevesearch/zapx/v14"
|
||||
zapv15 "github.com/blevesearch/zapx/v15"
|
||||
)
|
||||
|
||||
// SegmentPlugin represents the essential functions required by a package to plug in
|
||||
// it's segment implementation
|
||||
type SegmentPlugin interface {
|
||||
|
||||
// Type is the name for this segment plugin
|
||||
Type() string
|
||||
|
||||
// Version is a numeric value identifying a specific version of this type.
|
||||
// When incompatible changes are made to a particular type of plugin, the
|
||||
// version must be incremented.
|
||||
Version() uint32
|
||||
|
||||
// New takes a set of Documents and turns them into a new Segment
|
||||
New(results []index.Document) (segment.Segment, uint64, error)
|
||||
|
||||
// Open attempts to open the file at the specified path and
|
||||
// return the corresponding Segment
|
||||
Open(path string) (segment.Segment, error)
|
||||
|
||||
// Merge takes a set of Segments, and creates a new segment on disk at
|
||||
// the specified path.
|
||||
// Drops is a set of bitmaps (one for each segment) indicating which
|
||||
// documents can be dropped from the segments during the merge.
|
||||
// If the closeCh channel is closed, Merge will cease doing work at
|
||||
// the next opportunity, and return an error (closed).
|
||||
// StatsReporter can optionally be provided, in which case progress
|
||||
// made during the merge is reported while operation continues.
|
||||
// Returns:
|
||||
// A slice of new document numbers (one for each input segment),
|
||||
// this allows the caller to know a particular document's new
|
||||
// document number in the newly merged segment.
|
||||
// The number of bytes written to the new segment file.
|
||||
// An error, if any occurred.
|
||||
Merge(segments []segment.Segment, drops []*roaring.Bitmap, path string,
|
||||
closeCh chan struct{}, s segment.StatsReporter) (
|
||||
[][]uint64, uint64, error)
|
||||
}
|
||||
|
||||
var supportedSegmentPlugins map[string]map[uint32]SegmentPlugin
|
||||
var defaultSegmentPlugin SegmentPlugin
|
||||
|
||||
func init() {
|
||||
ResetSegmentPlugins()
|
||||
RegisterSegmentPlugin(&zapv15.ZapPlugin{}, true)
|
||||
RegisterSegmentPlugin(&zapv14.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv13.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv12.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv11.ZapPlugin{}, false)
|
||||
}
|
||||
|
||||
func ResetSegmentPlugins() {
|
||||
supportedSegmentPlugins = map[string]map[uint32]SegmentPlugin{}
|
||||
}
|
||||
|
||||
func RegisterSegmentPlugin(plugin SegmentPlugin, makeDefault bool) {
|
||||
if _, ok := supportedSegmentPlugins[plugin.Type()]; !ok {
|
||||
supportedSegmentPlugins[plugin.Type()] = map[uint32]SegmentPlugin{}
|
||||
}
|
||||
supportedSegmentPlugins[plugin.Type()][plugin.Version()] = plugin
|
||||
if makeDefault {
|
||||
defaultSegmentPlugin = plugin
|
||||
}
|
||||
}
|
||||
|
||||
func SupportedSegmentTypes() (rv []string) {
|
||||
for k := range supportedSegmentPlugins {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
|
||||
for k := range supportedSegmentPlugins[typ] {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func chooseSegmentPlugin(forcedSegmentType string,
|
||||
forcedSegmentVersion uint32) (SegmentPlugin, error) {
|
||||
if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
|
||||
if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
|
||||
return segPlugin, nil
|
||||
}
|
||||
return nil, fmt.Errorf(
|
||||
"unsupported version %d for segment type: %s, supported: %v",
|
||||
forcedSegmentVersion, forcedSegmentType,
|
||||
SupportedSegmentTypeVersions(forcedSegmentType))
|
||||
}
|
||||
return nil, fmt.Errorf("unsupported segment type: %s, supported: %v",
|
||||
forcedSegmentType, SupportedSegmentTypes())
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
|
||||
forcedSegmentVersion uint32) error {
|
||||
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
|
||||
forcedSegmentVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.segPlugin = segPlugin
|
||||
return nil
|
||||
}
|
764
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index.go
generated
vendored
Normal file
764
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index.go
generated
vendored
Normal file
|
@ -0,0 +1,764 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
"github.com/couchbase/vellum"
|
||||
lev "github.com/couchbase/vellum/levenshtein"
|
||||
)
|
||||
|
||||
// re usable, threadsafe levenshtein builders
|
||||
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
|
||||
|
||||
type asynchSegmentResult struct {
|
||||
dict segment.TermDictionary
|
||||
dictItr segment.DictionaryIterator
|
||||
|
||||
index int
|
||||
docs *roaring.Bitmap
|
||||
|
||||
postings segment.PostingsList
|
||||
|
||||
err error
|
||||
}
|
||||
|
||||
var reflectStaticSizeIndexSnapshot int
|
||||
|
||||
func init() {
|
||||
var is interface{} = IndexSnapshot{}
|
||||
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
|
||||
var err error
|
||||
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
|
||||
}
|
||||
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
type IndexSnapshot struct {
|
||||
parent *Scorch
|
||||
segment []*SegmentSnapshot
|
||||
offsets []uint64
|
||||
internal map[string][]byte
|
||||
epoch uint64
|
||||
size uint64
|
||||
creator string
|
||||
|
||||
m sync.Mutex // Protects the fields that follow.
|
||||
refs int64
|
||||
|
||||
m2 sync.Mutex // Protects the fields that follow.
|
||||
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
|
||||
return i.segment
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Internal() map[string][]byte {
|
||||
return i.internal
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) AddRef() {
|
||||
i.m.Lock()
|
||||
i.refs++
|
||||
i.m.Unlock()
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DecRef() (err error) {
|
||||
i.m.Lock()
|
||||
i.refs--
|
||||
if i.refs == 0 {
|
||||
for _, s := range i.segment {
|
||||
if s != nil {
|
||||
err2 := s.segment.DecRef()
|
||||
if err == nil {
|
||||
err = err2
|
||||
}
|
||||
}
|
||||
}
|
||||
if i.parent != nil {
|
||||
go i.parent.AddEligibleForRemoval(i.epoch)
|
||||
}
|
||||
}
|
||||
i.m.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Close() error {
|
||||
return i.DecRef()
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Size() int {
|
||||
return int(i.size)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) updateSize() {
|
||||
i.size += uint64(reflectStaticSizeIndexSnapshot)
|
||||
for _, s := range i.segment {
|
||||
i.size += uint64(s.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
|
||||
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
|
||||
randomLookup bool) (*IndexSnapshotFieldDict, error) {
|
||||
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
go func(index int, segment *SegmentSnapshot) {
|
||||
dict, err := segment.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
results <- &asynchSegmentResult{err: err}
|
||||
} else {
|
||||
if randomLookup {
|
||||
results <- &asynchSegmentResult{dict: dict}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
||||
}
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
||||
var err error
|
||||
rv := &IndexSnapshotFieldDict{
|
||||
snapshot: i,
|
||||
cursors: make([]*segmentDictCursor, 0, len(i.segment)),
|
||||
}
|
||||
for count := 0; count < len(i.segment); count++ {
|
||||
asr := <-results
|
||||
if asr.err != nil && err == nil {
|
||||
err = asr.err
|
||||
} else {
|
||||
if !randomLookup {
|
||||
next, err2 := asr.dictItr.Next()
|
||||
if err2 != nil && err == nil {
|
||||
err = err2
|
||||
}
|
||||
if next != nil {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
itr: asr.dictItr,
|
||||
curr: *next,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
dict: asr.dict,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
// after ensuring we've read all items on channel
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !randomLookup {
|
||||
// prepare heap
|
||||
heap.Init(rv)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.AutomatonIterator(nil, nil, nil)
|
||||
}, false)
|
||||
}
|
||||
|
||||
// calculateExclusiveEndFromInclusiveEnd produces the next key
|
||||
// when sorting using memcmp style comparisons, suitable to
|
||||
// use as the end key in a traditional (inclusive, exclusive]
|
||||
// start/end range
|
||||
func calculateExclusiveEndFromInclusiveEnd(inclusiveEnd []byte) []byte {
|
||||
rv := inclusiveEnd
|
||||
if len(inclusiveEnd) > 0 {
|
||||
rv = make([]byte, len(inclusiveEnd))
|
||||
copy(rv, inclusiveEnd)
|
||||
if rv[len(rv)-1] < 0xff {
|
||||
// last byte can be incremented by one
|
||||
rv[len(rv)-1]++
|
||||
} else {
|
||||
// last byte is already 0xff, so append 0
|
||||
// next key is simply one byte longer
|
||||
rv = append(rv, 0x0)
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
|
||||
endTerm []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
endTermExclusive := calculateExclusiveEndFromInclusiveEnd(endTerm)
|
||||
return i.AutomatonIterator(nil, startTerm, endTermExclusive)
|
||||
}, false)
|
||||
}
|
||||
|
||||
// calculateExclusiveEndFromPrefix produces the first key that
|
||||
// does not have the same prefix as the input bytes, suitable
|
||||
// to use as the end key in a traditional (inclusive, exclusive]
|
||||
// start/end range
|
||||
func calculateExclusiveEndFromPrefix(in []byte) []byte {
|
||||
rv := make([]byte, len(in))
|
||||
copy(rv, in)
|
||||
for i := len(rv) - 1; i >= 0; i-- {
|
||||
rv[i] = rv[i] + 1
|
||||
if rv[i] != 0 {
|
||||
return rv // didn't overflow, so stop
|
||||
}
|
||||
}
|
||||
// all bytes were 0xff, so return nil
|
||||
// as there is no end key for this prefix
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictPrefix(field string,
|
||||
termPrefix []byte) (index.FieldDict, error) {
|
||||
termPrefixEnd := calculateExclusiveEndFromPrefix(termPrefix)
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.AutomatonIterator(nil, termPrefix, termPrefixEnd)
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRegexp(field string,
|
||||
termRegex string) (index.FieldDict, error) {
|
||||
// TODO: potential optimization where the literal prefix represents the,
|
||||
// entire regexp, allowing us to use PrefixIterator(prefixTerm)?
|
||||
|
||||
a, prefixBeg, prefixEnd, err := parseRegexp(termRegex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) getLevAutomaton(term string,
|
||||
fuzziness uint8) (vellum.Automaton, error) {
|
||||
if fuzziness == 1 {
|
||||
return lb1.BuildDfa(term, fuzziness)
|
||||
} else if fuzziness == 2 {
|
||||
return lb2.BuildDfa(term, fuzziness)
|
||||
}
|
||||
return nil, fmt.Errorf("fuzziness exceeds the max limit")
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictFuzzy(field string,
|
||||
term string, fuzziness int, prefix string) (index.FieldDict, error) {
|
||||
a, err := i.getLevAutomaton(term, uint8(fuzziness))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var prefixBeg, prefixEnd []byte
|
||||
if prefix != "" {
|
||||
prefixBeg = []byte(prefix)
|
||||
prefixEnd = calculateExclusiveEndFromPrefix(prefixBeg)
|
||||
}
|
||||
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, nil, true)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
go func(index int, segment *SegmentSnapshot) {
|
||||
results <- &asynchSegmentResult{
|
||||
index: index,
|
||||
docs: segment.DocNumbersLive(),
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
||||
return i.newDocIDReader(results)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
go func(index int, segment *SegmentSnapshot) {
|
||||
docs, err := segment.DocNumbers(ids)
|
||||
if err != nil {
|
||||
results <- &asynchSegmentResult{err: err}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{
|
||||
index: index,
|
||||
docs: docs,
|
||||
}
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
||||
return i.newDocIDReader(results)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index.DocIDReader, error) {
|
||||
rv := &IndexSnapshotDocIDReader{
|
||||
snapshot: i,
|
||||
iterators: make([]roaring.IntIterable, len(i.segment)),
|
||||
}
|
||||
var err error
|
||||
for count := 0; count < len(i.segment); count++ {
|
||||
asr := <-results
|
||||
if asr.err != nil {
|
||||
if err == nil {
|
||||
// returns the first error encountered
|
||||
err = asr.err
|
||||
}
|
||||
} else if err == nil {
|
||||
rv.iterators[asr.index] = asr.docs.Iterator()
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Fields() ([]string, error) {
|
||||
// FIXME not making this concurrent for now as it's not used in hot path
|
||||
// of any searches at the moment (just a debug aid)
|
||||
fieldsMap := map[string]struct{}{}
|
||||
for _, segment := range i.segment {
|
||||
fields := segment.Fields()
|
||||
for _, field := range fields {
|
||||
fieldsMap[field] = struct{}{}
|
||||
}
|
||||
}
|
||||
rv := make([]string, 0, len(fieldsMap))
|
||||
for k := range fieldsMap {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) GetInternal(key []byte) ([]byte, error) {
|
||||
return i.internal[string(key)], nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocCount() (uint64, error) {
|
||||
var rv uint64
|
||||
for _, segment := range i.segment {
|
||||
rv += segment.Count()
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) {
|
||||
// FIXME could be done more efficiently directly, but reusing for simplicity
|
||||
tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := tfr.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
next, err := tfr.Next(nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if next == nil {
|
||||
// no such doc exists
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
docNum, err := docInternalToNumber(next.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
||||
|
||||
rvd := document.NewDocument(id)
|
||||
err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool {
|
||||
if name == "_id" {
|
||||
return true
|
||||
}
|
||||
|
||||
// copy value, array positions to preserve them beyond the scope of this callback
|
||||
value := append([]byte(nil), val...)
|
||||
arrayPos := append([]uint64(nil), pos...)
|
||||
|
||||
switch typ {
|
||||
case 't':
|
||||
rvd.AddField(document.NewTextField(name, arrayPos, value))
|
||||
case 'n':
|
||||
rvd.AddField(document.NewNumericFieldFromBytes(name, arrayPos, value))
|
||||
case 'd':
|
||||
rvd.AddField(document.NewDateTimeFieldFromBytes(name, arrayPos, value))
|
||||
case 'b':
|
||||
rvd.AddField(document.NewBooleanFieldFromBytes(name, arrayPos, value))
|
||||
case 'g':
|
||||
rvd.AddField(document.NewGeoPointFieldFromBytes(name, arrayPos, value))
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rvd, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) {
|
||||
segmentIndex := sort.Search(len(i.offsets),
|
||||
func(x int) bool {
|
||||
return i.offsets[x] > docNum
|
||||
}) - 1
|
||||
|
||||
localDocNum := docNum - i.offsets[segmentIndex]
|
||||
return int(segmentIndex), localDocNum
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
|
||||
docNum, err := docInternalToNumber(id)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
||||
|
||||
v, err := i.segment[segmentIndex].DocID(localDocNum)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if v == nil {
|
||||
return "", fmt.Errorf("document number %d not found", docNum)
|
||||
}
|
||||
|
||||
return string(v), nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) {
|
||||
// FIXME could be done more efficiently directly, but reusing for simplicity
|
||||
tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := tfr.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
next, err := tfr.Next(nil)
|
||||
if err != nil || next == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return next.ID, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
|
||||
includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
rv := i.allocTermFieldReaderDicts(field)
|
||||
|
||||
rv.term = term
|
||||
rv.field = field
|
||||
rv.snapshot = i
|
||||
if rv.postings == nil {
|
||||
rv.postings = make([]segment.PostingsList, len(i.segment))
|
||||
}
|
||||
if rv.iterators == nil {
|
||||
rv.iterators = make([]segment.PostingsIterator, len(i.segment))
|
||||
}
|
||||
rv.segmentOffset = 0
|
||||
rv.includeFreq = includeFreq
|
||||
rv.includeNorm = includeNorm
|
||||
rv.includeTermVectors = includeTermVectors
|
||||
rv.currPosting = nil
|
||||
rv.currID = rv.currID[:0]
|
||||
|
||||
if rv.dicts == nil {
|
||||
rv.dicts = make([]segment.TermDictionary, len(i.segment))
|
||||
for i, segment := range i.segment {
|
||||
dict, err := segment.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.dicts[i] = dict
|
||||
}
|
||||
}
|
||||
|
||||
for i, segment := range i.segment {
|
||||
pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.postings[i] = pl
|
||||
rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i])
|
||||
}
|
||||
atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1))
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) {
|
||||
i.m2.Lock()
|
||||
if i.fieldTFRs != nil {
|
||||
tfrs := i.fieldTFRs[field]
|
||||
last := len(tfrs) - 1
|
||||
if last >= 0 {
|
||||
tfr = tfrs[last]
|
||||
tfrs[last] = nil
|
||||
i.fieldTFRs[field] = tfrs[:last]
|
||||
i.m2.Unlock()
|
||||
return
|
||||
}
|
||||
}
|
||||
i.m2.Unlock()
|
||||
return &IndexSnapshotTermFieldReader{
|
||||
recycle: true,
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
|
||||
if !tfr.recycle {
|
||||
// Do not recycle an optimized unadorned term field reader (used for
|
||||
// ConjunctionUnadorned or DisjunctionUnadorned), during when a fresh
|
||||
// roaring.Bitmap is built by AND-ing or OR-ing individual bitmaps,
|
||||
// and we'll need to release them for GC. (See MB-40916)
|
||||
return
|
||||
}
|
||||
|
||||
i.parent.rootLock.RLock()
|
||||
obsolete := i.parent.root != i
|
||||
i.parent.rootLock.RUnlock()
|
||||
if obsolete {
|
||||
// if we're not the current root (mutations happened), don't bother recycling
|
||||
return
|
||||
}
|
||||
|
||||
i.m2.Lock()
|
||||
if i.fieldTFRs == nil {
|
||||
i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
|
||||
}
|
||||
i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
|
||||
i.m2.Unlock()
|
||||
}
|
||||
|
||||
func docNumberToBytes(buf []byte, in uint64) []byte {
|
||||
if len(buf) != 8 {
|
||||
if cap(buf) >= 8 {
|
||||
buf = buf[0:8]
|
||||
} else {
|
||||
buf = make([]byte, 8)
|
||||
}
|
||||
}
|
||||
binary.BigEndian.PutUint64(buf, in)
|
||||
return buf
|
||||
}
|
||||
|
||||
func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
|
||||
if len(in) != 8 {
|
||||
return 0, fmt.Errorf("wrong len for IndexInternalID: %q", in)
|
||||
}
|
||||
return binary.BigEndian.Uint64(in), nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) documentVisitFieldTermsOnSegment(
|
||||
segmentIndex int, localDocNum uint64, fields []string, cFields []string,
|
||||
visitor index.DocValueVisitor, dvs segment.DocVisitState) (
|
||||
cFieldsOut []string, dvsOut segment.DocVisitState, err error) {
|
||||
ss := i.segment[segmentIndex]
|
||||
|
||||
var vFields []string // fields that are visitable via the segment
|
||||
|
||||
ssv, ssvOk := ss.segment.(segment.DocValueVisitable)
|
||||
if ssvOk && ssv != nil {
|
||||
vFields, err = ssv.VisitableDocValueFields()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var errCh chan error
|
||||
|
||||
// cFields represents the fields that we'll need from the
|
||||
// cachedDocs, and might be optionally be provided by the caller,
|
||||
// if the caller happens to know we're on the same segmentIndex
|
||||
// from a previous invocation
|
||||
if cFields == nil {
|
||||
cFields = subtractStrings(fields, vFields)
|
||||
|
||||
if !ss.cachedDocs.hasFields(cFields) {
|
||||
errCh = make(chan error, 1)
|
||||
|
||||
go func() {
|
||||
err := ss.cachedDocs.prepareFields(cFields, ss)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
}
|
||||
close(errCh)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
if ssvOk && ssv != nil && len(vFields) > 0 {
|
||||
dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if errCh != nil {
|
||||
err = <-errCh
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if len(cFields) > 0 {
|
||||
ss.cachedDocs.visitDoc(localDocNum, cFields, visitor)
|
||||
}
|
||||
|
||||
return cFields, dvs, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocValueReader(fields []string) (
|
||||
index.DocValueReader, error) {
|
||||
return &DocValueReader{i: i, fields: fields, currSegmentIndex: -1}, nil
|
||||
}
|
||||
|
||||
type DocValueReader struct {
|
||||
i *IndexSnapshot
|
||||
fields []string
|
||||
dvs segment.DocVisitState
|
||||
|
||||
currSegmentIndex int
|
||||
currCachedFields []string
|
||||
}
|
||||
|
||||
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
|
||||
visitor index.DocValueVisitor) (err error) {
|
||||
docNum, err := docInternalToNumber(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
segmentIndex, localDocNum := dvr.i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
||||
if segmentIndex >= len(dvr.i.segment) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if dvr.currSegmentIndex != segmentIndex {
|
||||
dvr.currSegmentIndex = segmentIndex
|
||||
dvr.currCachedFields = nil
|
||||
}
|
||||
|
||||
dvr.currCachedFields, dvr.dvs, err = dvr.i.documentVisitFieldTermsOnSegment(
|
||||
dvr.currSegmentIndex, localDocNum, dvr.fields, dvr.currCachedFields, visitor, dvr.dvs)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DumpAll() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
close(rv)
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DumpDoc(id string) chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
close(rv)
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DumpFields() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
close(rv)
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) diskSegmentsPaths() map[string]struct{} {
|
||||
rv := make(map[string]struct{}, len(i.segment))
|
||||
for _, segmentSnapshot := range i.segment {
|
||||
if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
rv[seg.Path()] = struct{}{}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// reClaimableDocsRatio gives a ratio about the obsoleted or
|
||||
// reclaimable documents present in a given index snapshot.
|
||||
func (i *IndexSnapshot) reClaimableDocsRatio() float64 {
|
||||
var totalCount, liveCount uint64
|
||||
for _, segmentSnapshot := range i.segment {
|
||||
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
totalCount += uint64(segmentSnapshot.FullSize())
|
||||
liveCount += uint64(segmentSnapshot.Count())
|
||||
}
|
||||
}
|
||||
|
||||
if totalCount > 0 {
|
||||
return float64(totalCount-liveCount) / float64(totalCount)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// subtractStrings returns set a minus elements of set b.
|
||||
func subtractStrings(a, b []string) []string {
|
||||
if len(b) == 0 {
|
||||
return a
|
||||
}
|
||||
|
||||
rv := make([]string, 0, len(a))
|
||||
OUTER:
|
||||
for _, as := range a {
|
||||
for _, bs := range b {
|
||||
if as == bs {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
rv = append(rv, as)
|
||||
}
|
||||
return rv
|
||||
}
|
108
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_dict.go
generated
vendored
Normal file
108
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_dict.go
generated
vendored
Normal file
|
@ -0,0 +1,108 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
)
|
||||
|
||||
type segmentDictCursor struct {
|
||||
dict segment.TermDictionary
|
||||
itr segment.DictionaryIterator
|
||||
curr index.DictEntry
|
||||
}
|
||||
|
||||
type IndexSnapshotFieldDict struct {
|
||||
snapshot *IndexSnapshot
|
||||
cursors []*segmentDictCursor
|
||||
entry index.DictEntry
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) }
|
||||
func (i *IndexSnapshotFieldDict) Less(a, b int) bool {
|
||||
return i.cursors[a].curr.Term < i.cursors[b].curr.Term
|
||||
}
|
||||
func (i *IndexSnapshotFieldDict) Swap(a, b int) {
|
||||
i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a]
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Push(x interface{}) {
|
||||
i.cursors = append(i.cursors, x.(*segmentDictCursor))
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Pop() interface{} {
|
||||
n := len(i.cursors)
|
||||
x := i.cursors[n-1]
|
||||
i.cursors = i.cursors[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
i.entry = i.cursors[0].curr
|
||||
next, err := i.cursors[0].itr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// at end of this cursor, remove it
|
||||
heap.Pop(i)
|
||||
} else {
|
||||
// modified heap, fix it
|
||||
i.cursors[0].curr = *next
|
||||
heap.Fix(i, 0)
|
||||
}
|
||||
// look for any other entries with the exact same term
|
||||
for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
|
||||
i.entry.Count += i.cursors[0].curr.Count
|
||||
next, err := i.cursors[0].itr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// at end of this cursor, remove it
|
||||
heap.Pop(i)
|
||||
} else {
|
||||
// modified heap, fix it
|
||||
i.cursors[0].curr = *next
|
||||
heap.Fix(i, 0)
|
||||
}
|
||||
}
|
||||
|
||||
return &i.entry, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
for _, cursor := range i.cursors {
|
||||
if found, _ := cursor.dict.Contains(key); found {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
80
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_doc.go
generated
vendored
Normal file
80
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_doc.go
generated
vendored
Normal file
|
@ -0,0 +1,80 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotDocIDReader int
|
||||
|
||||
func init() {
|
||||
var isdr IndexSnapshotDocIDReader
|
||||
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotDocIDReader struct {
|
||||
snapshot *IndexSnapshot
|
||||
iterators []roaring.IntIterable
|
||||
segmentOffset int
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Size() int {
|
||||
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
if !i.iterators[i.segmentOffset].HasNext() {
|
||||
i.segmentOffset++
|
||||
continue
|
||||
}
|
||||
next := i.iterators[i.segmentOffset].Next()
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
return docNumberToBytes(nil, uint64(next)+globalOffset), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Advance(ID index.IndexInternalID) (index.IndexInternalID, error) {
|
||||
// FIXME do something better
|
||||
next, err := i.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
return nil, nil
|
||||
}
|
||||
for bytes.Compare(next, ID) < 0 {
|
||||
next, err = i.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
return next, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Close() error {
|
||||
return nil
|
||||
}
|
188
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go
generated
vendored
Normal file
188
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go
generated
vendored
Normal file
|
@ -0,0 +1,188 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotTermFieldReader int
|
||||
|
||||
func init() {
|
||||
var istfr IndexSnapshotTermFieldReader
|
||||
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotTermFieldReader struct {
|
||||
term []byte
|
||||
field string
|
||||
snapshot *IndexSnapshot
|
||||
dicts []segment.TermDictionary
|
||||
postings []segment.PostingsList
|
||||
iterators []segment.PostingsIterator
|
||||
segmentOffset int
|
||||
includeFreq bool
|
||||
includeNorm bool
|
||||
includeTermVectors bool
|
||||
currPosting segment.Posting
|
||||
currID index.IndexInternalID
|
||||
recycle bool
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
|
||||
len(i.term) +
|
||||
len(i.field) +
|
||||
len(i.currID)
|
||||
|
||||
for _, entry := range i.postings {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range i.iterators {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
if i.currPosting != nil {
|
||||
sizeInBytes += i.currPosting.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
// find the next hit
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
next, err := i.iterators[i.segmentOffset].Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next != nil {
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
nnum := next.Number()
|
||||
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
|
||||
i.postingToTermFieldDoc(next, rv)
|
||||
|
||||
i.currID = rv.ID
|
||||
i.currPosting = next
|
||||
return rv, nil
|
||||
}
|
||||
i.segmentOffset++
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) {
|
||||
if i.includeFreq {
|
||||
rv.Freq = next.Frequency()
|
||||
}
|
||||
if i.includeNorm {
|
||||
rv.Norm = next.Norm()
|
||||
}
|
||||
if i.includeTermVectors {
|
||||
locs := next.Locations()
|
||||
if cap(rv.Vectors) < len(locs) {
|
||||
rv.Vectors = make([]*index.TermFieldVector, len(locs))
|
||||
backing := make([]index.TermFieldVector, len(locs))
|
||||
for i := range backing {
|
||||
rv.Vectors[i] = &backing[i]
|
||||
}
|
||||
}
|
||||
rv.Vectors = rv.Vectors[:len(locs)]
|
||||
for i, loc := range locs {
|
||||
*rv.Vectors[i] = index.TermFieldVector{
|
||||
Start: loc.Start(),
|
||||
End: loc.End(),
|
||||
Pos: loc.Pos(),
|
||||
ArrayPositions: loc.ArrayPositions(),
|
||||
Field: loc.Field(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
// FIXME do something better
|
||||
// for now, if we need to seek backwards, then restart from the beginning
|
||||
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
|
||||
i2, err := i.snapshot.TermFieldReader(i.term, i.field,
|
||||
i.includeFreq, i.includeNorm, i.includeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// close the current term field reader before replacing it with a new one
|
||||
_ = i.Close()
|
||||
*i = *(i2.(*IndexSnapshotTermFieldReader))
|
||||
}
|
||||
num, err := docInternalToNumber(ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
|
||||
}
|
||||
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
|
||||
if segIndex >= len(i.snapshot.segment) {
|
||||
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
|
||||
segIndex, len(i.snapshot.segment))
|
||||
}
|
||||
// skip directly to the target segment
|
||||
i.segmentOffset = segIndex
|
||||
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// we jumped directly to the segment that should have contained it
|
||||
// but it wasn't there, so reuse Next() which should correctly
|
||||
// get the next hit after it (we moved i.segmentOffset)
|
||||
return i.Next(preAlloced)
|
||||
}
|
||||
|
||||
if preAlloced == nil {
|
||||
preAlloced = &index.TermFieldDoc{}
|
||||
}
|
||||
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
|
||||
i.snapshot.offsets[segIndex])
|
||||
i.postingToTermFieldDoc(next, preAlloced)
|
||||
i.currID = preAlloced.ID
|
||||
i.currPosting = next
|
||||
return preAlloced, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Count() uint64 {
|
||||
var rv uint64
|
||||
for _, posting := range i.postings {
|
||||
rv += posting.Count()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Close() error {
|
||||
if i.snapshot != nil {
|
||||
atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
|
||||
i.snapshot.recycleTermFieldReader(i)
|
||||
}
|
||||
return nil
|
||||
}
|
279
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_segment.go
generated
vendored
Normal file
279
vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_segment.go
generated
vendored
Normal file
|
@ -0,0 +1,279 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
)
|
||||
|
||||
var TermSeparator byte = 0xff
|
||||
|
||||
var TermSeparatorSplitSlice = []byte{TermSeparator}
|
||||
|
||||
type SegmentSnapshot struct {
|
||||
id uint64
|
||||
segment segment.Segment
|
||||
deleted *roaring.Bitmap
|
||||
creator string
|
||||
|
||||
cachedDocs *cachedDocs
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Segment() segment.Segment {
|
||||
return s.segment
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Deleted() *roaring.Bitmap {
|
||||
return s.deleted
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Id() uint64 {
|
||||
return s.id
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) FullSize() int64 {
|
||||
return int64(s.segment.Count())
|
||||
}
|
||||
|
||||
func (s SegmentSnapshot) LiveSize() int64 {
|
||||
return int64(s.Count())
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Close() error {
|
||||
return s.segment.Close()
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.StoredFieldValueVisitor) error {
|
||||
return s.segment.VisitStoredFields(num, visitor)
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) DocID(num uint64) ([]byte, error) {
|
||||
return s.segment.DocID(num)
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Count() uint64 {
|
||||
rv := s.segment.Count()
|
||||
if s.deleted != nil {
|
||||
rv -= s.deleted.GetCardinality()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
|
||||
rv, err := s.segment.DocNumbers(docIDs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if s.deleted != nil {
|
||||
rv.AndNot(s.deleted)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// DocNumbersLive returns a bitmap containing doc numbers for all live docs
|
||||
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
|
||||
rv := roaring.NewBitmap()
|
||||
rv.AddRange(0, s.segment.Count())
|
||||
if s.deleted != nil {
|
||||
rv.AndNot(s.deleted)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Fields() []string {
|
||||
return s.segment.Fields()
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Size() (rv int) {
|
||||
rv = s.segment.Size()
|
||||
if s.deleted != nil {
|
||||
rv += int(s.deleted.GetSizeInBytes())
|
||||
}
|
||||
rv += s.cachedDocs.Size()
|
||||
return
|
||||
}
|
||||
|
||||
type cachedFieldDocs struct {
|
||||
m sync.Mutex
|
||||
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
|
||||
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
|
||||
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (cfd *cachedFieldDocs) Size() int {
|
||||
var rv int
|
||||
cfd.m.Lock()
|
||||
for _, entry := range cfd.docs {
|
||||
rv += 8 /* size of uint64 */ + len(entry)
|
||||
}
|
||||
cfd.m.Unlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
|
||||
cfd.m.Lock()
|
||||
defer func() {
|
||||
close(cfd.readyCh)
|
||||
cfd.m.Unlock()
|
||||
}()
|
||||
|
||||
cfd.size += uint64(size.SizeOfUint64) /* size field */
|
||||
dict, err := ss.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
cfd.err = err
|
||||
return
|
||||
}
|
||||
|
||||
var postings segment.PostingsList
|
||||
var postingsItr segment.PostingsIterator
|
||||
|
||||
dictItr := dict.AutomatonIterator(nil, nil, nil)
|
||||
next, err := dictItr.Next()
|
||||
for err == nil && next != nil {
|
||||
var err1 error
|
||||
postings, err1 = dict.PostingsList([]byte(next.Term), nil, postings)
|
||||
if err1 != nil {
|
||||
cfd.err = err1
|
||||
return
|
||||
}
|
||||
|
||||
cfd.size += uint64(size.SizeOfUint64) /* map key */
|
||||
postingsItr = postings.Iterator(false, false, false, postingsItr)
|
||||
nextPosting, err2 := postingsItr.Next()
|
||||
for err2 == nil && nextPosting != nil {
|
||||
docNum := nextPosting.Number()
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
|
||||
cfd.size += uint64(len(next.Term) + 1) // map value
|
||||
nextPosting, err2 = postingsItr.Next()
|
||||
}
|
||||
|
||||
if err2 != nil {
|
||||
cfd.err = err2
|
||||
return
|
||||
}
|
||||
|
||||
next, err = dictItr.Next()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cfd.err = err
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
type cachedDocs struct {
|
||||
size uint64
|
||||
m sync.Mutex // As the cache is asynchronously prepared, need a lock
|
||||
cache map[string]*cachedFieldDocs // Keyed by field
|
||||
}
|
||||
|
||||
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
|
||||
c.m.Lock()
|
||||
|
||||
if c.cache == nil {
|
||||
c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
|
||||
}
|
||||
|
||||
for _, field := range wantedFields {
|
||||
_, exists := c.cache[field]
|
||||
if !exists {
|
||||
c.cache[field] = &cachedFieldDocs{
|
||||
readyCh: make(chan struct{}),
|
||||
docs: make(map[uint64][]byte),
|
||||
}
|
||||
|
||||
go c.cache[field].prepareField(field, ss)
|
||||
}
|
||||
}
|
||||
|
||||
for _, field := range wantedFields {
|
||||
cachedFieldDocs := c.cache[field]
|
||||
c.m.Unlock()
|
||||
<-cachedFieldDocs.readyCh
|
||||
|
||||
if cachedFieldDocs.err != nil {
|
||||
return cachedFieldDocs.err
|
||||
}
|
||||
c.m.Lock()
|
||||
}
|
||||
|
||||
c.updateSizeLOCKED()
|
||||
|
||||
c.m.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// hasFields returns true if the cache has all the given fields
|
||||
func (c *cachedDocs) hasFields(fields []string) bool {
|
||||
c.m.Lock()
|
||||
for _, field := range fields {
|
||||
if _, exists := c.cache[field]; !exists {
|
||||
c.m.Unlock()
|
||||
return false // found a field not in cache
|
||||
}
|
||||
}
|
||||
c.m.Unlock()
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *cachedDocs) Size() int {
|
||||
return int(atomic.LoadUint64(&c.size))
|
||||
}
|
||||
|
||||
func (c *cachedDocs) updateSizeLOCKED() {
|
||||
sizeInBytes := 0
|
||||
for k, v := range c.cache { // cachedFieldDocs
|
||||
sizeInBytes += len(k)
|
||||
if v != nil {
|
||||
sizeInBytes += v.Size()
|
||||
}
|
||||
}
|
||||
atomic.StoreUint64(&c.size, uint64(sizeInBytes))
|
||||
}
|
||||
|
||||
func (c *cachedDocs) visitDoc(localDocNum uint64,
|
||||
fields []string, visitor index.DocValueVisitor) {
|
||||
c.m.Lock()
|
||||
|
||||
for _, field := range fields {
|
||||
if cachedFieldDocs, exists := c.cache[field]; exists {
|
||||
c.m.Unlock()
|
||||
<-cachedFieldDocs.readyCh
|
||||
c.m.Lock()
|
||||
|
||||
if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
|
||||
for {
|
||||
i := bytes.Index(tlist, TermSeparatorSplitSlice)
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
visitor(field, tlist[0:i])
|
||||
tlist = tlist[i+1:]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.m.Unlock()
|
||||
}
|
152
vendor/github.com/blevesearch/bleve/v2/index/scorch/stats.go
generated
vendored
Normal file
152
vendor/github.com/blevesearch/bleve/v2/index/scorch/stats.go
generated
vendored
Normal file
|
@ -0,0 +1,152 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Stats tracks statistics about the index, fields that are
|
||||
// prefixed like CurXxxx are gauges (can go up and down),
|
||||
// and fields that are prefixed like TotXxxx are monotonically
|
||||
// increasing counters.
|
||||
type Stats struct {
|
||||
TotUpdates uint64
|
||||
TotDeletes uint64
|
||||
|
||||
TotBatches uint64
|
||||
TotBatchesEmpty uint64
|
||||
TotBatchIntroTime uint64
|
||||
MaxBatchIntroTime uint64
|
||||
|
||||
CurRootEpoch uint64
|
||||
LastPersistedEpoch uint64
|
||||
LastMergedEpoch uint64
|
||||
|
||||
TotOnErrors uint64
|
||||
|
||||
TotAnalysisTime uint64
|
||||
TotIndexTime uint64
|
||||
|
||||
TotIndexedPlainTextBytes uint64
|
||||
|
||||
TotTermSearchersStarted uint64
|
||||
TotTermSearchersFinished uint64
|
||||
|
||||
TotEventTriggerStarted uint64
|
||||
TotEventTriggerCompleted uint64
|
||||
|
||||
TotIntroduceLoop uint64
|
||||
TotIntroduceSegmentBeg uint64
|
||||
TotIntroduceSegmentEnd uint64
|
||||
TotIntroducePersistBeg uint64
|
||||
TotIntroducePersistEnd uint64
|
||||
TotIntroduceMergeBeg uint64
|
||||
TotIntroduceMergeEnd uint64
|
||||
TotIntroduceRevertBeg uint64
|
||||
TotIntroduceRevertEnd uint64
|
||||
|
||||
TotIntroducedItems uint64
|
||||
TotIntroducedSegmentsBatch uint64
|
||||
TotIntroducedSegmentsMerge uint64
|
||||
|
||||
TotPersistLoopBeg uint64
|
||||
TotPersistLoopErr uint64
|
||||
TotPersistLoopProgress uint64
|
||||
TotPersistLoopWait uint64
|
||||
TotPersistLoopWaitNotified uint64
|
||||
TotPersistLoopEnd uint64
|
||||
|
||||
TotPersistedItems uint64
|
||||
TotItemsToPersist uint64
|
||||
TotPersistedSegments uint64
|
||||
|
||||
TotPersisterSlowMergerPause uint64
|
||||
TotPersisterSlowMergerResume uint64
|
||||
|
||||
TotPersisterNapPauseCompleted uint64
|
||||
TotPersisterMergerNapBreak uint64
|
||||
|
||||
TotFileMergeLoopBeg uint64
|
||||
TotFileMergeLoopErr uint64
|
||||
TotFileMergeLoopEnd uint64
|
||||
|
||||
TotFileMergeForceOpsStarted uint64
|
||||
TotFileMergeForceOpsCompleted uint64
|
||||
|
||||
TotFileMergePlan uint64
|
||||
TotFileMergePlanErr uint64
|
||||
TotFileMergePlanNone uint64
|
||||
TotFileMergePlanOk uint64
|
||||
|
||||
TotFileMergePlanTasks uint64
|
||||
TotFileMergePlanTasksDone uint64
|
||||
TotFileMergePlanTasksErr uint64
|
||||
TotFileMergePlanTasksSegments uint64
|
||||
TotFileMergePlanTasksSegmentsEmpty uint64
|
||||
|
||||
TotFileMergeSegmentsEmpty uint64
|
||||
TotFileMergeSegments uint64
|
||||
TotFileSegmentsAtRoot uint64
|
||||
TotFileMergeWrittenBytes uint64
|
||||
|
||||
TotFileMergeZapBeg uint64
|
||||
TotFileMergeZapEnd uint64
|
||||
TotFileMergeZapTime uint64
|
||||
MaxFileMergeZapTime uint64
|
||||
TotFileMergeZapIntroductionTime uint64
|
||||
MaxFileMergeZapIntroductionTime uint64
|
||||
|
||||
TotFileMergeIntroductions uint64
|
||||
TotFileMergeIntroductionsDone uint64
|
||||
TotFileMergeIntroductionsSkipped uint64
|
||||
TotFileMergeIntroductionsObsoleted uint64
|
||||
|
||||
CurFilesIneligibleForRemoval uint64
|
||||
TotSnapshotsRemovedFromMetaStore uint64
|
||||
|
||||
TotMemMergeBeg uint64
|
||||
TotMemMergeErr uint64
|
||||
TotMemMergeDone uint64
|
||||
TotMemMergeZapBeg uint64
|
||||
TotMemMergeZapEnd uint64
|
||||
TotMemMergeZapTime uint64
|
||||
MaxMemMergeZapTime uint64
|
||||
TotMemMergeSegments uint64
|
||||
TotMemorySegmentsAtRoot uint64
|
||||
}
|
||||
|
||||
// atomically populates the returned map
|
||||
func (s *Stats) ToMap() map[string]interface{} {
|
||||
m := map[string]interface{}{}
|
||||
sve := reflect.ValueOf(s).Elem()
|
||||
svet := sve.Type()
|
||||
for i := 0; i < svet.NumField(); i++ {
|
||||
svef := sve.Field(i)
|
||||
if svef.CanAddr() {
|
||||
svefp := svef.Addr().Interface()
|
||||
m[svet.Field(i).Name] = atomic.LoadUint64(svefp.(*uint64))
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler, and in contrast to standard
|
||||
// json marshaling provides atomic safety
|
||||
func (s *Stats) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(s.ToMap())
|
||||
}
|
161
vendor/github.com/blevesearch/bleve/v2/index/scorch/unadorned.go
generated
vendored
Normal file
161
vendor/github.com/blevesearch/bleve/v2/index/scorch/unadorned.go
generated
vendored
Normal file
|
@ -0,0 +1,161 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
segment "github.com/blevesearch/scorch_segment_api"
|
||||
"math"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
var reflectStaticSizeUnadornedPostingsIteratorBitmap int
|
||||
var reflectStaticSizeUnadornedPostingsIterator1Hit int
|
||||
var reflectStaticSizeUnadornedPosting int
|
||||
|
||||
func init() {
|
||||
var pib unadornedPostingsIteratorBitmap
|
||||
reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
|
||||
var pi1h unadornedPostingsIterator1Hit
|
||||
reflectStaticSizeUnadornedPostingsIterator1Hit = int(reflect.TypeOf(pi1h).Size())
|
||||
var up UnadornedPosting
|
||||
reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
|
||||
}
|
||||
|
||||
type unadornedPostingsIteratorBitmap struct {
|
||||
actual roaring.IntPeekable
|
||||
actualBM *roaring.Bitmap
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) Next() (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(0)
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) Advance(docNum uint64) (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(docNum)
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
|
||||
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
return UnadornedPosting(docNum), nil
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
|
||||
if i.actual == nil || !i.actual.HasNext() {
|
||||
return 0, false
|
||||
}
|
||||
i.actual.AdvanceIfNeeded(uint32(atOrAfter))
|
||||
|
||||
if !i.actual.HasNext() {
|
||||
return 0, false // couldn't find anything
|
||||
}
|
||||
|
||||
return uint64(i.actual.Next()), true
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) Size() int {
|
||||
return reflectStaticSizeUnadornedPostingsIteratorBitmap
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) ActualBitmap() *roaring.Bitmap {
|
||||
return i.actualBM
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) DocNum1Hit() (uint64, bool) {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) ReplaceActual(actual *roaring.Bitmap) {
|
||||
i.actualBM = actual
|
||||
i.actual = actual.Iterator()
|
||||
}
|
||||
|
||||
func newUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) segment.PostingsIterator {
|
||||
return &unadornedPostingsIteratorBitmap{
|
||||
actualBM: bm,
|
||||
actual: bm.Iterator(),
|
||||
}
|
||||
}
|
||||
|
||||
const docNum1HitFinished = math.MaxUint64
|
||||
|
||||
type unadornedPostingsIterator1Hit struct {
|
||||
docNum uint64
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) Next() (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(0)
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) Advance(docNum uint64) (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(docNum)
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
|
||||
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
return UnadornedPosting(docNum), nil
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
|
||||
if i.docNum == docNum1HitFinished {
|
||||
return 0, false
|
||||
}
|
||||
if i.docNum < atOrAfter {
|
||||
// advanced past our 1-hit
|
||||
i.docNum = docNum1HitFinished // consume our 1-hit docNum
|
||||
return 0, false
|
||||
}
|
||||
docNum := i.docNum
|
||||
i.docNum = docNum1HitFinished // consume our 1-hit docNum
|
||||
return docNum, true
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) Size() int {
|
||||
return reflectStaticSizeUnadornedPostingsIterator1Hit
|
||||
}
|
||||
|
||||
func newUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) segment.PostingsIterator {
|
||||
return &unadornedPostingsIterator1Hit{
|
||||
docNum1Hit,
|
||||
}
|
||||
}
|
||||
|
||||
type UnadornedPosting uint64
|
||||
|
||||
func (p UnadornedPosting) Number() uint64 {
|
||||
return uint64(p)
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Frequency() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Norm() float64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Locations() []segment.Location {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Size() int {
|
||||
return reflectStaticSizeUnadornedPosting
|
||||
}
|
129
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/analysis.go
generated
vendored
Normal file
129
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/analysis.go
generated
vendored
Normal file
|
@ -0,0 +1,129 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type IndexRow interface {
|
||||
KeySize() int
|
||||
KeyTo([]byte) (int, error)
|
||||
Key() []byte
|
||||
|
||||
ValueSize() int
|
||||
ValueTo([]byte) (int, error)
|
||||
Value() []byte
|
||||
}
|
||||
|
||||
type AnalysisResult struct {
|
||||
DocID string
|
||||
Rows []IndexRow
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) Analyze(d index.Document) *AnalysisResult {
|
||||
return udc.analyze(d)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) analyze(d index.Document) *AnalysisResult {
|
||||
rv := &AnalysisResult{
|
||||
DocID: d.ID(),
|
||||
Rows: make([]IndexRow, 0, 100),
|
||||
}
|
||||
|
||||
docIDBytes := []byte(d.ID())
|
||||
|
||||
// track our back index entries
|
||||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
|
||||
|
||||
// information we collate as we merge fields with same name
|
||||
fieldTermFreqs := make(map[uint16]index.TokenFrequencies)
|
||||
fieldLengths := make(map[uint16]int)
|
||||
fieldIncludeTermVectors := make(map[uint16]bool)
|
||||
fieldNames := make(map[uint16]string)
|
||||
|
||||
analyzeField := func(field index.Field, storable bool) {
|
||||
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
|
||||
if newFieldRow != nil {
|
||||
rv.Rows = append(rv.Rows, newFieldRow)
|
||||
}
|
||||
fieldNames[fieldIndex] = field.Name()
|
||||
|
||||
if field.Options().IsIndexed() {
|
||||
field.Analyze()
|
||||
fieldLength := field.AnalyzedLength()
|
||||
tokenFreqs := field.AnalyzedTokenFrequencies()
|
||||
existingFreqs := fieldTermFreqs[fieldIndex]
|
||||
if existingFreqs == nil {
|
||||
fieldTermFreqs[fieldIndex] = tokenFreqs
|
||||
} else {
|
||||
existingFreqs.MergeAll(field.Name(), tokenFreqs)
|
||||
fieldTermFreqs[fieldIndex] = existingFreqs
|
||||
}
|
||||
fieldLengths[fieldIndex] += fieldLength
|
||||
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
|
||||
}
|
||||
|
||||
if storable && field.Options().IsStored() {
|
||||
rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries)
|
||||
}
|
||||
}
|
||||
|
||||
// walk all the fields, record stored fields now
|
||||
// place information about indexed fields into map
|
||||
// this collates information across fields with
|
||||
// same names (arrays)
|
||||
d.VisitFields(func(field index.Field) {
|
||||
analyzeField(field, true)
|
||||
})
|
||||
|
||||
if d.HasComposite() {
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||
// see if any of the composite fields need this
|
||||
d.VisitComposite(func(field index.CompositeField) {
|
||||
field.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
|
||||
})
|
||||
}
|
||||
|
||||
d.VisitComposite(func(field index.CompositeField) {
|
||||
analyzeField(field, false)
|
||||
})
|
||||
}
|
||||
|
||||
rowsCapNeeded := len(rv.Rows) + 1
|
||||
for _, tokenFreqs := range fieldTermFreqs {
|
||||
rowsCapNeeded += len(tokenFreqs)
|
||||
}
|
||||
|
||||
rv.Rows = append(make([]IndexRow, 0, rowsCapNeeded), rv.Rows...)
|
||||
|
||||
backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs))
|
||||
|
||||
// walk through the collated information and process
|
||||
// once for each indexed field (unique name)
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||
fieldLength := fieldLengths[fieldIndex]
|
||||
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
|
||||
|
||||
// encode this field
|
||||
rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries)
|
||||
}
|
||||
|
||||
// build the back index row
|
||||
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries)
|
||||
rv.Rows = append(rv.Rows, backIndexRow)
|
||||
|
||||
return rv
|
||||
}
|
8
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/benchmark_all.sh
generated
vendored
Normal file
8
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/benchmark_all.sh
generated
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
#!/bin/sh
|
||||
|
||||
BENCHMARKS=`grep "func Benchmark" *_test.go | sed 's/.*func //' | sed s/\(.*{//`
|
||||
|
||||
for BENCHMARK in $BENCHMARKS
|
||||
do
|
||||
go test -v -run=xxx -bench=^$BENCHMARK$ -benchtime=10s -tags 'forestdb leveldb' | grep -v ok | grep -v PASS
|
||||
done
|
174
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/dump.go
generated
vendored
Normal file
174
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/dump.go
generated
vendored
Normal file
|
@ -0,0 +1,174 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
// the functions in this file are only intended to be used by
|
||||
// the bleve_dump utility and the debug http handlers
|
||||
// if your application relies on them, you're doing something wrong
|
||||
// they may change or be removed at any time
|
||||
|
||||
func dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) {
|
||||
start := prefix
|
||||
if start == nil {
|
||||
start = []byte{0}
|
||||
}
|
||||
it := kvreader.PrefixIterator(start)
|
||||
defer func() {
|
||||
cerr := it.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
ck := make([]byte, len(key))
|
||||
copy(ck, key)
|
||||
cv := make([]byte, len(val))
|
||||
copy(cv, val)
|
||||
row, err := ParseFromKeyValue(ck, cv)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
rv <- row
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
}
|
||||
|
||||
func dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) {
|
||||
it := kvreader.RangeIterator(start, end)
|
||||
defer func() {
|
||||
cerr := it.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
ck := make([]byte, len(key))
|
||||
copy(ck, key)
|
||||
cv := make([]byte, len(val))
|
||||
copy(cv, val)
|
||||
row, err := ParseFromKeyValue(ck, cv)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
rv <- row
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexReader) DumpAll() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
defer close(rv)
|
||||
dumpRange(i.kvreader, rv, nil, nil)
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexReader) DumpFields() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
defer close(rv)
|
||||
dumpPrefix(i.kvreader, rv, []byte{'f'})
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
type keyset [][]byte
|
||||
|
||||
func (k keyset) Len() int { return len(k) }
|
||||
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
|
||||
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 }
|
||||
|
||||
// DumpDoc returns all rows in the index related to this doc id
|
||||
func (i *IndexReader) DumpDoc(id string) chan interface{} {
|
||||
idBytes := []byte(id)
|
||||
|
||||
rv := make(chan interface{})
|
||||
|
||||
go func() {
|
||||
defer close(rv)
|
||||
|
||||
back, err := backIndexRowForDoc(i.kvreader, []byte(id))
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
|
||||
// no such doc
|
||||
if back == nil {
|
||||
return
|
||||
}
|
||||
// build sorted list of term keys
|
||||
keys := make(keyset, 0)
|
||||
for _, entry := range back.termsEntries {
|
||||
for i := range entry.Terms {
|
||||
tfr := NewTermFrequencyRow([]byte(entry.Terms[i]), uint16(*entry.Field), idBytes, 0, 0)
|
||||
key := tfr.Key()
|
||||
keys = append(keys, key)
|
||||
}
|
||||
}
|
||||
sort.Sort(keys)
|
||||
|
||||
// first add all the stored rows
|
||||
storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc()
|
||||
dumpPrefix(i.kvreader, rv, storedRowPrefix)
|
||||
|
||||
// now walk term keys in order and add them as well
|
||||
if len(keys) > 0 {
|
||||
it := i.kvreader.RangeIterator(keys[0], nil)
|
||||
defer func() {
|
||||
cerr := it.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
|
||||
for _, key := range keys {
|
||||
it.Seek(key)
|
||||
rkey, rval, valid := it.Current()
|
||||
if !valid {
|
||||
break
|
||||
}
|
||||
rck := make([]byte, len(rkey))
|
||||
copy(rck, key)
|
||||
rcv := make([]byte, len(rval))
|
||||
copy(rcv, rval)
|
||||
row, err := ParseFromKeyValue(rck, rcv)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
rv <- row
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return rv
|
||||
}
|
88
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/field_cache.go
generated
vendored
Normal file
88
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/field_cache.go
generated
vendored
Normal file
|
@ -0,0 +1,88 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type FieldCache struct {
|
||||
fieldIndexes map[string]uint16
|
||||
indexFields []string
|
||||
lastFieldIndex int
|
||||
mutex sync.RWMutex
|
||||
}
|
||||
|
||||
func NewFieldCache() *FieldCache {
|
||||
return &FieldCache{
|
||||
fieldIndexes: make(map[string]uint16),
|
||||
lastFieldIndex: -1,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *FieldCache) AddExisting(field string, index uint16) {
|
||||
f.mutex.Lock()
|
||||
f.addLOCKED(field, index)
|
||||
f.mutex.Unlock()
|
||||
}
|
||||
|
||||
func (f *FieldCache) addLOCKED(field string, index uint16) uint16 {
|
||||
f.fieldIndexes[field] = index
|
||||
if len(f.indexFields) < int(index)+1 {
|
||||
prevIndexFields := f.indexFields
|
||||
f.indexFields = make([]string, int(index)+16)
|
||||
copy(f.indexFields, prevIndexFields)
|
||||
}
|
||||
f.indexFields[int(index)] = field
|
||||
if int(index) > f.lastFieldIndex {
|
||||
f.lastFieldIndex = int(index)
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// FieldNamed returns the index of the field, and whether or not it existed
|
||||
// before this call. if createIfMissing is true, and new field index is assigned
|
||||
// but the second return value will still be false
|
||||
func (f *FieldCache) FieldNamed(field string, createIfMissing bool) (uint16, bool) {
|
||||
f.mutex.RLock()
|
||||
if index, ok := f.fieldIndexes[field]; ok {
|
||||
f.mutex.RUnlock()
|
||||
return index, true
|
||||
} else if !createIfMissing {
|
||||
f.mutex.RUnlock()
|
||||
return 0, false
|
||||
}
|
||||
// trade read lock for write lock
|
||||
f.mutex.RUnlock()
|
||||
f.mutex.Lock()
|
||||
// need to check again with write lock
|
||||
if index, ok := f.fieldIndexes[field]; ok {
|
||||
f.mutex.Unlock()
|
||||
return index, true
|
||||
}
|
||||
// assign next field id
|
||||
index := f.addLOCKED(field, uint16(f.lastFieldIndex+1))
|
||||
f.mutex.Unlock()
|
||||
return index, false
|
||||
}
|
||||
|
||||
func (f *FieldCache) FieldIndexed(index uint16) (field string) {
|
||||
f.mutex.RLock()
|
||||
if int(index) < len(f.indexFields) {
|
||||
field = f.indexFields[int(index)]
|
||||
}
|
||||
f.mutex.RUnlock()
|
||||
return field
|
||||
}
|
78
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/field_dict.go
generated
vendored
Normal file
78
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/field_dict.go
generated
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type UpsideDownCouchFieldDict struct {
|
||||
indexReader *IndexReader
|
||||
iterator store.KVIterator
|
||||
dictRow *DictionaryRow
|
||||
dictEntry *index.DictEntry
|
||||
field uint16
|
||||
}
|
||||
|
||||
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) {
|
||||
|
||||
startKey := NewDictionaryRow(startTerm, field, 0).Key()
|
||||
if endTerm == nil {
|
||||
endTerm = []byte{ByteSeparator}
|
||||
} else {
|
||||
endTerm = incrementBytes(endTerm)
|
||||
}
|
||||
endKey := NewDictionaryRow(endTerm, field, 0).Key()
|
||||
|
||||
it := indexReader.kvreader.RangeIterator(startKey, endKey)
|
||||
|
||||
return &UpsideDownCouchFieldDict{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
dictRow: &DictionaryRow{}, // Pre-alloced, reused row.
|
||||
dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry.
|
||||
field: field,
|
||||
}, nil
|
||||
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) {
|
||||
key, val, valid := r.iterator.Current()
|
||||
if !valid {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
err := r.dictRow.parseDictionaryK(key)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected error parsing dictionary row key: %v", err)
|
||||
}
|
||||
err = r.dictRow.parseDictionaryV(val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected error parsing dictionary row val: %v", err)
|
||||
}
|
||||
r.dictEntry.Term = string(r.dictRow.term)
|
||||
r.dictEntry.Count = r.dictRow.count
|
||||
// advance the iterator to the next term
|
||||
r.iterator.Next()
|
||||
return r.dictEntry, nil
|
||||
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldDict) Close() error {
|
||||
return r.iterator.Close()
|
||||
}
|
225
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/index_reader.go
generated
vendored
Normal file
225
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/index_reader.go
generated
vendored
Normal file
|
@ -0,0 +1,225 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexReader int
|
||||
|
||||
func init() {
|
||||
var ir IndexReader
|
||||
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
|
||||
}
|
||||
|
||||
type IndexReader struct {
|
||||
index *UpsideDownCouch
|
||||
kvreader store.KVReader
|
||||
docCount uint64
|
||||
}
|
||||
|
||||
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
||||
if fieldExists {
|
||||
return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors)
|
||||
}
|
||||
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0), includeFreq, includeNorm, includeTermVectors)
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) {
|
||||
return i.FieldDictRange(fieldName, nil, nil)
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
||||
if fieldExists {
|
||||
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm)
|
||||
}
|
||||
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{})
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) {
|
||||
return i.FieldDictRange(fieldName, termPrefix, termPrefix)
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocIDReaderAll() (index.DocIDReader, error) {
|
||||
return newUpsideDownCouchDocIDReader(i)
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
|
||||
return newUpsideDownCouchDocIDReaderOnly(i, ids)
|
||||
}
|
||||
|
||||
func (i *IndexReader) Document(id string) (doc index.Document, err error) {
|
||||
// first hit the back index to confirm doc exists
|
||||
var backIndexRow *BackIndexRow
|
||||
backIndexRow, err = backIndexRowForDoc(i.kvreader, []byte(id))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if backIndexRow == nil {
|
||||
return
|
||||
}
|
||||
rvd := document.NewDocument(id)
|
||||
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil)
|
||||
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
|
||||
it := i.kvreader.PrefixIterator(storedRowScanPrefix)
|
||||
defer func() {
|
||||
if cerr := it.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
safeVal := make([]byte, len(val))
|
||||
copy(safeVal, val)
|
||||
var row *StoredRow
|
||||
row, err = NewStoredRowKV(key, safeVal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if row != nil {
|
||||
fieldName := i.index.fieldCache.FieldIndexed(row.field)
|
||||
field := decodeFieldType(row.typ, fieldName, row.arrayPositions, row.value)
|
||||
if field != nil {
|
||||
rvd.AddField(field)
|
||||
}
|
||||
}
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
return rvd, nil
|
||||
}
|
||||
|
||||
func (i *IndexReader) documentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocValueVisitor) error {
|
||||
fieldsMap := make(map[uint16]string, len(fields))
|
||||
for _, f := range fields {
|
||||
id, ok := i.index.fieldCache.FieldNamed(f, false)
|
||||
if ok {
|
||||
fieldsMap[id] = f
|
||||
}
|
||||
}
|
||||
|
||||
tempRow := BackIndexRow{
|
||||
doc: id,
|
||||
}
|
||||
|
||||
keyBuf := GetRowBuffer()
|
||||
if tempRow.KeySize() > len(keyBuf) {
|
||||
keyBuf = make([]byte, 2*tempRow.KeySize())
|
||||
}
|
||||
defer PutRowBuffer(keyBuf)
|
||||
keySize, err := tempRow.KeyTo(keyBuf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
value, err := i.kvreader.Get(keyBuf[:keySize])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return visitBackIndexRow(value, func(field uint32, term []byte) {
|
||||
if field, ok := fieldsMap[uint16(field)]; ok {
|
||||
visitor(field, term)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexReader) Fields() (fields []string, err error) {
|
||||
fields = make([]string, 0)
|
||||
it := i.kvreader.PrefixIterator([]byte{'f'})
|
||||
defer func() {
|
||||
if cerr := it.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
var row UpsideDownCouchRow
|
||||
row, err = ParseFromKeyValue(key, val)
|
||||
if err != nil {
|
||||
fields = nil
|
||||
return
|
||||
}
|
||||
if row != nil {
|
||||
fieldRow, ok := row.(*FieldRow)
|
||||
if ok {
|
||||
fields = append(fields, fieldRow.name)
|
||||
}
|
||||
}
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (i *IndexReader) GetInternal(key []byte) ([]byte, error) {
|
||||
internalRow := NewInternalRow(key, nil)
|
||||
return i.kvreader.Get(internalRow.Key())
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocCount() (uint64, error) {
|
||||
return i.docCount, nil
|
||||
}
|
||||
|
||||
func (i *IndexReader) Close() error {
|
||||
return i.kvreader.Close()
|
||||
}
|
||||
|
||||
func (i *IndexReader) ExternalID(id index.IndexInternalID) (string, error) {
|
||||
return string(id), nil
|
||||
}
|
||||
|
||||
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) {
|
||||
return index.IndexInternalID(id), nil
|
||||
}
|
||||
|
||||
func incrementBytes(in []byte) []byte {
|
||||
rv := make([]byte, len(in))
|
||||
copy(rv, in)
|
||||
for i := len(rv) - 1; i >= 0; i-- {
|
||||
rv[i] = rv[i] + 1
|
||||
if rv[i] != 0 {
|
||||
// didn't overflow, so stop
|
||||
break
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocValueReader(fields []string) (index.DocValueReader, error) {
|
||||
return &DocValueReader{i: i, fields: fields}, nil
|
||||
}
|
||||
|
||||
type DocValueReader struct {
|
||||
i *IndexReader
|
||||
fields []string
|
||||
}
|
||||
|
||||
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
|
||||
visitor index.DocValueVisitor) error {
|
||||
return dvr.i.documentVisitFieldTerms(id, dvr.fields, visitor)
|
||||
}
|
376
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/reader.go
generated
vendored
Normal file
376
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/reader.go
generated
vendored
Normal file
|
@ -0,0 +1,376 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeUpsideDownCouchTermFieldReader int
|
||||
var reflectStaticSizeUpsideDownCouchDocIDReader int
|
||||
|
||||
func init() {
|
||||
var tfr UpsideDownCouchTermFieldReader
|
||||
reflectStaticSizeUpsideDownCouchTermFieldReader =
|
||||
int(reflect.TypeOf(tfr).Size())
|
||||
var cdr UpsideDownCouchDocIDReader
|
||||
reflectStaticSizeUpsideDownCouchDocIDReader =
|
||||
int(reflect.TypeOf(cdr).Size())
|
||||
}
|
||||
|
||||
type UpsideDownCouchTermFieldReader struct {
|
||||
count uint64
|
||||
indexReader *IndexReader
|
||||
iterator store.KVIterator
|
||||
term []byte
|
||||
tfrNext *TermFrequencyRow
|
||||
tfrPrealloc TermFrequencyRow
|
||||
keyBuf []byte
|
||||
field uint16
|
||||
includeTermVectors bool
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
|
||||
len(r.term) +
|
||||
r.tfrPrealloc.Size() +
|
||||
len(r.keyBuf)
|
||||
|
||||
if r.tfrNext != nil {
|
||||
sizeInBytes += r.tfrNext.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
||||
bufNeeded := termFrequencyRowKeySize(term, nil)
|
||||
if bufNeeded < dictionaryRowKeySize(term) {
|
||||
bufNeeded = dictionaryRowKeySize(term)
|
||||
}
|
||||
buf := make([]byte, bufNeeded)
|
||||
|
||||
bufUsed := dictionaryRowKeyTo(buf, field, term)
|
||||
val, err := indexReader.kvreader.Get(buf[:bufUsed])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if val == nil {
|
||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
||||
rv := &UpsideDownCouchTermFieldReader{
|
||||
count: 0,
|
||||
term: term,
|
||||
field: field,
|
||||
includeTermVectors: includeTermVectors,
|
||||
}
|
||||
rv.tfrNext = &rv.tfrPrealloc
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
count, err := dictionaryRowParseV(val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
|
||||
it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
|
||||
|
||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
||||
return &UpsideDownCouchTermFieldReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
count: count,
|
||||
term: term,
|
||||
field: field,
|
||||
includeTermVectors: includeTermVectors,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
|
||||
return r.count
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
if r.iterator != nil {
|
||||
// We treat tfrNext also like an initialization flag, which
|
||||
// tells us whether we need to invoke the underlying
|
||||
// iterator.Next(). The first time, don't call iterator.Next().
|
||||
if r.tfrNext != nil {
|
||||
r.iterator.Next()
|
||||
} else {
|
||||
r.tfrNext = &r.tfrPrealloc
|
||||
}
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
tfr := r.tfrNext
|
||||
err := tfr.parseKDoc(key, r.term)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = tfr.parseV(val, r.includeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
rv.ID = append(rv.ID, tfr.doc...)
|
||||
rv.Freq = tfr.freq
|
||||
rv.Norm = float64(tfr.norm)
|
||||
if tfr.vectors != nil {
|
||||
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) {
|
||||
if r.iterator != nil {
|
||||
if r.tfrNext == nil {
|
||||
r.tfrNext = &TermFrequencyRow{}
|
||||
}
|
||||
tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0)
|
||||
r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.iterator.Seek(r.keyBuf)
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
err := tfr.parseKDoc(key, r.term)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = tfr.parseV(val, r.includeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv = preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
rv.ID = append(rv.ID, tfr.doc...)
|
||||
rv.Freq = tfr.freq
|
||||
rv.Norm = float64(tfr.norm)
|
||||
if tfr.vectors != nil {
|
||||
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Close() error {
|
||||
if r.indexReader != nil {
|
||||
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1))
|
||||
}
|
||||
if r.iterator != nil {
|
||||
return r.iterator.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type UpsideDownCouchDocIDReader struct {
|
||||
indexReader *IndexReader
|
||||
iterator store.KVIterator
|
||||
only []string
|
||||
onlyPos int
|
||||
onlyMode bool
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
|
||||
reflectStaticSizeIndexReader + size.SizeOfPtr
|
||||
|
||||
for _, entry := range r.only {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||
startBytes := []byte{0x0}
|
||||
endBytes := []byte{0xff}
|
||||
|
||||
bisr := NewBackIndexRow(startBytes, nil, nil)
|
||||
bier := NewBackIndexRow(endBytes, nil, nil)
|
||||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
|
||||
|
||||
return &UpsideDownCouchDocIDReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
|
||||
// we don't actually own the list of ids, so if before we sort we must copy
|
||||
idsCopy := make([]string, len(ids))
|
||||
copy(idsCopy, ids)
|
||||
// ensure ids are sorted
|
||||
sort.Strings(idsCopy)
|
||||
startBytes := []byte{0x0}
|
||||
if len(idsCopy) > 0 {
|
||||
startBytes = []byte(idsCopy[0])
|
||||
}
|
||||
endBytes := []byte{0xff}
|
||||
if len(idsCopy) > 0 {
|
||||
endBytes = incrementBytes([]byte(idsCopy[len(idsCopy)-1]))
|
||||
}
|
||||
bisr := NewBackIndexRow(startBytes, nil, nil)
|
||||
bier := NewBackIndexRow(endBytes, nil, nil)
|
||||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
|
||||
|
||||
return &UpsideDownCouchDocIDReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
only: idsCopy,
|
||||
onlyMode: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) {
|
||||
key, val, valid := r.iterator.Current()
|
||||
|
||||
if r.onlyMode {
|
||||
var rv index.IndexInternalID
|
||||
for valid && r.onlyPos < len(r.only) {
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
|
||||
ok := r.nextOnly()
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
key, val, valid = r.iterator.Current()
|
||||
continue
|
||||
} else {
|
||||
rv = append([]byte(nil), br.doc...)
|
||||
break
|
||||
}
|
||||
}
|
||||
if valid && r.onlyPos < len(r.only) {
|
||||
ok := r.nextOnly()
|
||||
if ok {
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
} else {
|
||||
if valid {
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := append([]byte(nil), br.doc...)
|
||||
r.iterator.Next()
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) {
|
||||
|
||||
if r.onlyMode {
|
||||
r.onlyPos = sort.SearchStrings(r.only, string(docID))
|
||||
if r.onlyPos >= len(r.only) {
|
||||
// advanced to key after our last only key
|
||||
return nil, nil
|
||||
}
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
key, val, valid := r.iterator.Current()
|
||||
|
||||
var rv index.IndexInternalID
|
||||
for valid && r.onlyPos < len(r.only) {
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
|
||||
// the only key we seek'd to didn't exist
|
||||
// now look for the closest key that did exist in only
|
||||
r.onlyPos = sort.SearchStrings(r.only, string(br.doc))
|
||||
if r.onlyPos >= len(r.only) {
|
||||
// advanced to key after our last only key
|
||||
return nil, nil
|
||||
}
|
||||
// now seek to this new only key
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
key, val, valid = r.iterator.Current()
|
||||
continue
|
||||
} else {
|
||||
rv = append([]byte(nil), br.doc...)
|
||||
break
|
||||
}
|
||||
}
|
||||
if valid && r.onlyPos < len(r.only) {
|
||||
ok := r.nextOnly()
|
||||
if ok {
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
} else {
|
||||
bir := NewBackIndexRow(docID, nil, nil)
|
||||
r.iterator.Seek(bir.Key())
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := append([]byte(nil), br.doc...)
|
||||
r.iterator.Next()
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Close() error {
|
||||
return r.iterator.Close()
|
||||
}
|
||||
|
||||
// move the r.only pos forward one, skipping duplicates
|
||||
// return true if there is more data, or false if we got to the end of the list
|
||||
func (r *UpsideDownCouchDocIDReader) nextOnly() bool {
|
||||
|
||||
// advance 1 position, until we see a different key
|
||||
// it's already sorted, so this skips duplicates
|
||||
start := r.onlyPos
|
||||
r.onlyPos++
|
||||
for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] {
|
||||
start = r.onlyPos
|
||||
r.onlyPos++
|
||||
}
|
||||
// inidicate if we got to the end of the list
|
||||
return r.onlyPos < len(r.only)
|
||||
}
|
1141
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/row.go
generated
vendored
Normal file
1141
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/row.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
76
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/row_merge.go
generated
vendored
Normal file
76
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/row_merge.go
generated
vendored
Normal file
|
@ -0,0 +1,76 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
var mergeOperator upsideDownMerge
|
||||
|
||||
var dictionaryTermIncr []byte
|
||||
var dictionaryTermDecr []byte
|
||||
|
||||
func init() {
|
||||
dictionaryTermIncr = make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(dictionaryTermIncr, uint64(1))
|
||||
dictionaryTermDecr = make([]byte, 8)
|
||||
var negOne = int64(-1)
|
||||
binary.LittleEndian.PutUint64(dictionaryTermDecr, uint64(negOne))
|
||||
}
|
||||
|
||||
type upsideDownMerge struct{}
|
||||
|
||||
func (m *upsideDownMerge) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) {
|
||||
// set up record based on key
|
||||
dr, err := NewDictionaryRowK(key)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
if len(existingValue) > 0 {
|
||||
// if existing value, parse it
|
||||
err = dr.parseDictionaryV(existingValue)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
|
||||
// now process operands
|
||||
for _, operand := range operands {
|
||||
next := int64(binary.LittleEndian.Uint64(operand))
|
||||
if next < 0 && uint64(-next) > dr.count {
|
||||
// subtracting next from existing would overflow
|
||||
dr.count = 0
|
||||
} else if next < 0 {
|
||||
dr.count -= uint64(-next)
|
||||
} else {
|
||||
dr.count += uint64(next)
|
||||
}
|
||||
}
|
||||
|
||||
return dr.Value(), true
|
||||
}
|
||||
|
||||
func (m *upsideDownMerge) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) {
|
||||
left := int64(binary.LittleEndian.Uint64(leftOperand))
|
||||
right := int64(binary.LittleEndian.Uint64(rightOperand))
|
||||
rv := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(rv, uint64(left+right))
|
||||
return rv, true
|
||||
}
|
||||
|
||||
func (m *upsideDownMerge) Name() string {
|
||||
return "upsideDownMerge"
|
||||
}
|
55
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/stats.go
generated
vendored
Normal file
55
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/stats.go
generated
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type indexStat struct {
|
||||
updates, deletes, batches, errors uint64
|
||||
analysisTime, indexTime uint64
|
||||
termSearchersStarted uint64
|
||||
termSearchersFinished uint64
|
||||
numPlainTextBytesIndexed uint64
|
||||
i *UpsideDownCouch
|
||||
}
|
||||
|
||||
func (i *indexStat) statsMap() map[string]interface{} {
|
||||
m := map[string]interface{}{}
|
||||
m["updates"] = atomic.LoadUint64(&i.updates)
|
||||
m["deletes"] = atomic.LoadUint64(&i.deletes)
|
||||
m["batches"] = atomic.LoadUint64(&i.batches)
|
||||
m["errors"] = atomic.LoadUint64(&i.errors)
|
||||
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime)
|
||||
m["index_time"] = atomic.LoadUint64(&i.indexTime)
|
||||
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
|
||||
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
|
||||
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
|
||||
|
||||
if o, ok := i.i.store.(store.KVStoreStats); ok {
|
||||
m["kv"] = o.StatsMap()
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func (i *indexStat) MarshalJSON() ([]byte, error) {
|
||||
m := i.statsMap()
|
||||
return json.Marshal(m)
|
||||
}
|
85
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/iterator.go
generated
vendored
Normal file
85
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/iterator.go
generated
vendored
Normal file
|
@ -0,0 +1,85 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type Iterator struct {
|
||||
store *Store
|
||||
tx *bolt.Tx
|
||||
cursor *bolt.Cursor
|
||||
prefix []byte
|
||||
start []byte
|
||||
end []byte
|
||||
valid bool
|
||||
key []byte
|
||||
val []byte
|
||||
}
|
||||
|
||||
func (i *Iterator) updateValid() {
|
||||
i.valid = (i.key != nil)
|
||||
if i.valid {
|
||||
if i.prefix != nil {
|
||||
i.valid = bytes.HasPrefix(i.key, i.prefix)
|
||||
} else if i.end != nil {
|
||||
i.valid = bytes.Compare(i.key, i.end) < 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *Iterator) Seek(k []byte) {
|
||||
if i.start != nil && bytes.Compare(k, i.start) < 0 {
|
||||
k = i.start
|
||||
}
|
||||
if i.prefix != nil && !bytes.HasPrefix(k, i.prefix) {
|
||||
if bytes.Compare(k, i.prefix) < 0 {
|
||||
k = i.prefix
|
||||
} else {
|
||||
i.valid = false
|
||||
return
|
||||
}
|
||||
}
|
||||
i.key, i.val = i.cursor.Seek(k)
|
||||
i.updateValid()
|
||||
}
|
||||
|
||||
func (i *Iterator) Next() {
|
||||
i.key, i.val = i.cursor.Next()
|
||||
i.updateValid()
|
||||
}
|
||||
|
||||
func (i *Iterator) Current() ([]byte, []byte, bool) {
|
||||
return i.key, i.val, i.valid
|
||||
}
|
||||
|
||||
func (i *Iterator) Key() []byte {
|
||||
return i.key
|
||||
}
|
||||
|
||||
func (i *Iterator) Value() []byte {
|
||||
return i.val
|
||||
}
|
||||
|
||||
func (i *Iterator) Valid() bool {
|
||||
return i.valid
|
||||
}
|
||||
|
||||
func (i *Iterator) Close() error {
|
||||
return nil
|
||||
}
|
73
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/reader.go
generated
vendored
Normal file
73
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/reader.go
generated
vendored
Normal file
|
@ -0,0 +1,73 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
store *Store
|
||||
tx *bolt.Tx
|
||||
bucket *bolt.Bucket
|
||||
}
|
||||
|
||||
func (r *Reader) Get(key []byte) ([]byte, error) {
|
||||
var rv []byte
|
||||
v := r.bucket.Get(key)
|
||||
if v != nil {
|
||||
rv = make([]byte, len(v))
|
||||
copy(rv, v)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
|
||||
return store.MultiGet(r, keys)
|
||||
}
|
||||
|
||||
func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator {
|
||||
cursor := r.bucket.Cursor()
|
||||
|
||||
rv := &Iterator{
|
||||
store: r.store,
|
||||
tx: r.tx,
|
||||
cursor: cursor,
|
||||
prefix: prefix,
|
||||
}
|
||||
|
||||
rv.Seek(prefix)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator {
|
||||
cursor := r.bucket.Cursor()
|
||||
|
||||
rv := &Iterator{
|
||||
store: r.store,
|
||||
tx: r.tx,
|
||||
cursor: cursor,
|
||||
start: start,
|
||||
end: end,
|
||||
}
|
||||
|
||||
rv.Seek(start)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (r *Reader) Close() error {
|
||||
return r.tx.Rollback()
|
||||
}
|
26
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/stats.go
generated
vendored
Normal file
26
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/stats.go
generated
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb
|
||||
|
||||
import "encoding/json"
|
||||
|
||||
type stats struct {
|
||||
s *Store
|
||||
}
|
||||
|
||||
func (s *stats) MarshalJSON() ([]byte, error) {
|
||||
bs := s.s.db.Stats()
|
||||
return json.Marshal(bs)
|
||||
}
|
181
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/store.go
generated
vendored
Normal file
181
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/store.go
generated
vendored
Normal file
|
@ -0,0 +1,181 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package boltdb implements a store.KVStore on top of BoltDB. It supports the
|
||||
// following options:
|
||||
//
|
||||
// "bucket" (string): the name of BoltDB bucket to use, defaults to "bleve".
|
||||
//
|
||||
// "nosync" (bool): if true, set boltdb.DB.NoSync to true. It speeds up index
|
||||
// operations in exchange of losing integrity guarantees if indexation aborts
|
||||
// without closing the index. Use it when rebuilding indexes from zero.
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const (
|
||||
Name = "boltdb"
|
||||
defaultCompactBatchSize = 100
|
||||
)
|
||||
|
||||
type Store struct {
|
||||
path string
|
||||
bucket string
|
||||
db *bolt.DB
|
||||
noSync bool
|
||||
fillPercent float64
|
||||
mo store.MergeOperator
|
||||
}
|
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify path")
|
||||
}
|
||||
if path == "" {
|
||||
return nil, os.ErrInvalid
|
||||
}
|
||||
|
||||
bucket, ok := config["bucket"].(string)
|
||||
if !ok {
|
||||
bucket = "bleve"
|
||||
}
|
||||
|
||||
noSync, _ := config["nosync"].(bool)
|
||||
|
||||
fillPercent, ok := config["fillPercent"].(float64)
|
||||
if !ok {
|
||||
fillPercent = bolt.DefaultFillPercent
|
||||
}
|
||||
|
||||
bo := &bolt.Options{}
|
||||
ro, ok := config["read_only"].(bool)
|
||||
if ok {
|
||||
bo.ReadOnly = ro
|
||||
}
|
||||
|
||||
if initialMmapSize, ok := config["initialMmapSize"].(int); ok {
|
||||
bo.InitialMmapSize = initialMmapSize
|
||||
} else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok {
|
||||
bo.InitialMmapSize = int(initialMmapSize)
|
||||
}
|
||||
|
||||
db, err := bolt.Open(path, 0600, bo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
db.NoSync = noSync
|
||||
|
||||
if !bo.ReadOnly {
|
||||
err = db.Update(func(tx *bolt.Tx) error {
|
||||
_, err := tx.CreateBucketIfNotExists([]byte(bucket))
|
||||
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
rv := Store{
|
||||
path: path,
|
||||
bucket: bucket,
|
||||
db: db,
|
||||
mo: mo,
|
||||
noSync: noSync,
|
||||
fillPercent: fillPercent,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (bs *Store) Close() error {
|
||||
return bs.db.Close()
|
||||
}
|
||||
|
||||
func (bs *Store) Reader() (store.KVReader, error) {
|
||||
tx, err := bs.db.Begin(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Reader{
|
||||
store: bs,
|
||||
tx: tx,
|
||||
bucket: tx.Bucket([]byte(bs.bucket)),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (bs *Store) Writer() (store.KVWriter, error) {
|
||||
return &Writer{
|
||||
store: bs,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (bs *Store) Stats() json.Marshaler {
|
||||
return &stats{
|
||||
s: bs,
|
||||
}
|
||||
}
|
||||
|
||||
// CompactWithBatchSize removes DictionaryTerm entries with a count of zero (in batchSize batches)
|
||||
// Removing entries is a workaround for github issue #374.
|
||||
func (bs *Store) CompactWithBatchSize(batchSize int) error {
|
||||
for {
|
||||
cnt := 0
|
||||
err := bs.db.Batch(func(tx *bolt.Tx) error {
|
||||
c := tx.Bucket([]byte(bs.bucket)).Cursor()
|
||||
prefix := []byte("d")
|
||||
|
||||
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() {
|
||||
if bytes.Equal(v, []byte{0}) {
|
||||
cnt++
|
||||
if err := c.Delete(); err != nil {
|
||||
return err
|
||||
}
|
||||
if cnt == batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cnt == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Compact calls CompactWithBatchSize with a default batch size of 100. This is a workaround
|
||||
// for github issue #374.
|
||||
func (bs *Store) Compact() error {
|
||||
return bs.CompactWithBatchSize(defaultCompactBatchSize)
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterKVStore(Name, New)
|
||||
}
|
95
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/writer.go
generated
vendored
Normal file
95
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb/writer.go
generated
vendored
Normal file
|
@ -0,0 +1,95 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
store *Store
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch {
|
||||
return store.NewEmulatedBatch(w.store.mo)
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
|
||||
return make([]byte, options.TotalBytes), w.NewBatch(), nil
|
||||
}
|
||||
|
||||
func (w *Writer) ExecuteBatch(batch store.KVBatch) (err error) {
|
||||
|
||||
emulatedBatch, ok := batch.(*store.EmulatedBatch)
|
||||
if !ok {
|
||||
return fmt.Errorf("wrong type of batch")
|
||||
}
|
||||
|
||||
tx, err := w.store.db.Begin(true)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// defer function to ensure that once started,
|
||||
// we either Commit tx or Rollback
|
||||
defer func() {
|
||||
// if nothing went wrong, commit
|
||||
if err == nil {
|
||||
// careful to catch error here too
|
||||
err = tx.Commit()
|
||||
} else {
|
||||
// caller should see error that caused abort,
|
||||
// not success or failure of Rollback itself
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
||||
bucket := tx.Bucket([]byte(w.store.bucket))
|
||||
bucket.FillPercent = w.store.fillPercent
|
||||
|
||||
for k, mergeOps := range emulatedBatch.Merger.Merges {
|
||||
kb := []byte(k)
|
||||
existingVal := bucket.Get(kb)
|
||||
mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps)
|
||||
if !fullMergeOk {
|
||||
err = fmt.Errorf("merge operator returned failure")
|
||||
return
|
||||
}
|
||||
err = bucket.Put(kb, mergedVal)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, op := range emulatedBatch.Ops {
|
||||
if op.V != nil {
|
||||
err = bucket.Put(op.K, op.V)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
err = bucket.Delete(op.K)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (w *Writer) Close() error {
|
||||
return nil
|
||||
}
|
152
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap/iterator.go
generated
vendored
Normal file
152
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap/iterator.go
generated
vendored
Normal file
|
@ -0,0 +1,152 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sync"
|
||||
|
||||
"github.com/steveyen/gtreap"
|
||||
)
|
||||
|
||||
type Iterator struct {
|
||||
t *gtreap.Treap
|
||||
|
||||
m sync.Mutex
|
||||
cancelCh chan struct{}
|
||||
nextCh chan *Item
|
||||
curr *Item
|
||||
currOk bool
|
||||
|
||||
prefix []byte
|
||||
start []byte
|
||||
end []byte
|
||||
}
|
||||
|
||||
func (w *Iterator) Seek(k []byte) {
|
||||
if w.start != nil && bytes.Compare(k, w.start) < 0 {
|
||||
k = w.start
|
||||
}
|
||||
if w.prefix != nil && !bytes.HasPrefix(k, w.prefix) {
|
||||
if bytes.Compare(k, w.prefix) < 0 {
|
||||
k = w.prefix
|
||||
} else {
|
||||
var end []byte
|
||||
for i := len(w.prefix) - 1; i >= 0; i-- {
|
||||
c := w.prefix[i]
|
||||
if c < 0xff {
|
||||
end = make([]byte, i+1)
|
||||
copy(end, w.prefix)
|
||||
end[i] = c + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
k = end
|
||||
}
|
||||
}
|
||||
w.restart(&Item{k: k})
|
||||
}
|
||||
|
||||
func (w *Iterator) restart(start *Item) *Iterator {
|
||||
cancelCh := make(chan struct{})
|
||||
nextCh := make(chan *Item, 1)
|
||||
|
||||
w.m.Lock()
|
||||
if w.cancelCh != nil {
|
||||
close(w.cancelCh)
|
||||
}
|
||||
w.cancelCh = cancelCh
|
||||
w.nextCh = nextCh
|
||||
w.curr = nil
|
||||
w.currOk = false
|
||||
w.m.Unlock()
|
||||
|
||||
go func() {
|
||||
if start != nil {
|
||||
w.t.VisitAscend(start, func(itm gtreap.Item) bool {
|
||||
select {
|
||||
case <-cancelCh:
|
||||
return false
|
||||
case nextCh <- itm.(*Item):
|
||||
return true
|
||||
}
|
||||
})
|
||||
}
|
||||
close(nextCh)
|
||||
}()
|
||||
|
||||
w.Next()
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
func (w *Iterator) Next() {
|
||||
w.m.Lock()
|
||||
nextCh := w.nextCh
|
||||
w.m.Unlock()
|
||||
w.curr, w.currOk = <-nextCh
|
||||
}
|
||||
|
||||
func (w *Iterator) Current() ([]byte, []byte, bool) {
|
||||
w.m.Lock()
|
||||
defer w.m.Unlock()
|
||||
if !w.currOk || w.curr == nil {
|
||||
return nil, nil, false
|
||||
}
|
||||
if w.prefix != nil && !bytes.HasPrefix(w.curr.k, w.prefix) {
|
||||
return nil, nil, false
|
||||
} else if w.end != nil && bytes.Compare(w.curr.k, w.end) >= 0 {
|
||||
return nil, nil, false
|
||||
}
|
||||
return w.curr.k, w.curr.v, w.currOk
|
||||
}
|
||||
|
||||
func (w *Iterator) Key() []byte {
|
||||
k, _, ok := w.Current()
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return k
|
||||
}
|
||||
|
||||
func (w *Iterator) Value() []byte {
|
||||
_, v, ok := w.Current()
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func (w *Iterator) Valid() bool {
|
||||
_, _, ok := w.Current()
|
||||
return ok
|
||||
}
|
||||
|
||||
func (w *Iterator) Close() error {
|
||||
w.m.Lock()
|
||||
if w.cancelCh != nil {
|
||||
close(w.cancelCh)
|
||||
}
|
||||
w.cancelCh = nil
|
||||
w.nextCh = nil
|
||||
w.curr = nil
|
||||
w.currOk = false
|
||||
w.m.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
66
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap/reader.go
generated
vendored
Normal file
66
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap/reader.go
generated
vendored
Normal file
|
@ -0,0 +1,66 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
|
||||
"github.com/steveyen/gtreap"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
t *gtreap.Treap
|
||||
}
|
||||
|
||||
func (w *Reader) Get(k []byte) (v []byte, err error) {
|
||||
var rv []byte
|
||||
itm := w.t.Get(&Item{k: k})
|
||||
if itm != nil {
|
||||
rv = make([]byte, len(itm.(*Item).v))
|
||||
copy(rv, itm.(*Item).v)
|
||||
return rv, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
|
||||
return store.MultiGet(r, keys)
|
||||
}
|
||||
|
||||
func (w *Reader) PrefixIterator(k []byte) store.KVIterator {
|
||||
rv := Iterator{
|
||||
t: w.t,
|
||||
prefix: k,
|
||||
}
|
||||
rv.restart(&Item{k: k})
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (w *Reader) RangeIterator(start, end []byte) store.KVIterator {
|
||||
rv := Iterator{
|
||||
t: w.t,
|
||||
start: start,
|
||||
end: end,
|
||||
}
|
||||
rv.restart(&Item{k: start})
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (w *Reader) Close() error {
|
||||
return nil
|
||||
}
|
82
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap/store.go
generated
vendored
Normal file
82
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap/store.go
generated
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
"github.com/steveyen/gtreap"
|
||||
)
|
||||
|
||||
const Name = "gtreap"
|
||||
|
||||
type Store struct {
|
||||
m sync.Mutex
|
||||
t *gtreap.Treap
|
||||
mo store.MergeOperator
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
k []byte
|
||||
v []byte
|
||||
}
|
||||
|
||||
func itemCompare(a, b interface{}) int {
|
||||
return bytes.Compare(a.(*Item).k, b.(*Item).k)
|
||||
}
|
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify path")
|
||||
}
|
||||
if path != "" {
|
||||
return nil, os.ErrInvalid
|
||||
}
|
||||
|
||||
rv := Store{
|
||||
t: gtreap.NewTreap(itemCompare),
|
||||
mo: mo,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *Store) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) Reader() (store.KVReader, error) {
|
||||
s.m.Lock()
|
||||
t := s.t
|
||||
s.m.Unlock()
|
||||
return &Reader{t: t}, nil
|
||||
}
|
||||
|
||||
func (s *Store) Writer() (store.KVWriter, error) {
|
||||
return &Writer{s: s}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterKVStore(Name, New)
|
||||
}
|
76
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap/writer.go
generated
vendored
Normal file
76
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap/writer.go
generated
vendored
Normal file
|
@ -0,0 +1,76 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
s *Store
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch {
|
||||
return store.NewEmulatedBatch(w.s.mo)
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
|
||||
return make([]byte, options.TotalBytes), w.NewBatch(), nil
|
||||
}
|
||||
|
||||
func (w *Writer) ExecuteBatch(batch store.KVBatch) error {
|
||||
|
||||
emulatedBatch, ok := batch.(*store.EmulatedBatch)
|
||||
if !ok {
|
||||
return fmt.Errorf("wrong type of batch")
|
||||
}
|
||||
|
||||
w.s.m.Lock()
|
||||
for k, mergeOps := range emulatedBatch.Merger.Merges {
|
||||
kb := []byte(k)
|
||||
var existingVal []byte
|
||||
existingItem := w.s.t.Get(&Item{k: kb})
|
||||
if existingItem != nil {
|
||||
existingVal = w.s.t.Get(&Item{k: kb}).(*Item).v
|
||||
}
|
||||
mergedVal, fullMergeOk := w.s.mo.FullMerge(kb, existingVal, mergeOps)
|
||||
if !fullMergeOk {
|
||||
return fmt.Errorf("merge operator returned failure")
|
||||
}
|
||||
w.s.t = w.s.t.Upsert(&Item{k: kb, v: mergedVal}, rand.Int())
|
||||
}
|
||||
|
||||
for _, op := range emulatedBatch.Ops {
|
||||
if op.V != nil {
|
||||
w.s.t = w.s.t.Upsert(&Item{k: op.K, v: op.V}, rand.Int())
|
||||
} else {
|
||||
w.s.t = w.s.t.Delete(&Item{k: op.K})
|
||||
}
|
||||
}
|
||||
w.s.m.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) Close() error {
|
||||
w.s = nil
|
||||
return nil
|
||||
}
|
1069
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.go
generated
vendored
Normal file
1069
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
688
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.pb.go
generated
vendored
Normal file
688
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.pb.go
generated
vendored
Normal file
|
@ -0,0 +1,688 @@
|
|||
// Code generated by protoc-gen-gogo.
|
||||
// source: upsidedown.proto
|
||||
// DO NOT EDIT!
|
||||
|
||||
/*
|
||||
Package upsidedown is a generated protocol buffer package.
|
||||
|
||||
It is generated from these files:
|
||||
upsidedown.proto
|
||||
|
||||
It has these top-level messages:
|
||||
BackIndexTermsEntry
|
||||
BackIndexStoreEntry
|
||||
BackIndexRowValue
|
||||
*/
|
||||
package upsidedown
|
||||
|
||||
import proto "github.com/golang/protobuf/proto"
|
||||
import math "math"
|
||||
|
||||
import io "io"
|
||||
import fmt "fmt"
|
||||
import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto"
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
var _ = math.Inf
|
||||
|
||||
type BackIndexTermsEntry struct {
|
||||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
|
||||
Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexTermsEntry) Reset() { *m = BackIndexTermsEntry{} }
|
||||
func (m *BackIndexTermsEntry) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexTermsEntry) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexTermsEntry) GetField() uint32 {
|
||||
if m != nil && m.Field != nil {
|
||||
return *m.Field
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *BackIndexTermsEntry) GetTerms() []string {
|
||||
if m != nil {
|
||||
return m.Terms
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type BackIndexStoreEntry struct {
|
||||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
|
||||
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) Reset() { *m = BackIndexStoreEntry{} }
|
||||
func (m *BackIndexStoreEntry) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexStoreEntry) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexStoreEntry) GetField() uint32 {
|
||||
if m != nil && m.Field != nil {
|
||||
return *m.Field
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 {
|
||||
if m != nil {
|
||||
return m.ArrayPositions
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type BackIndexRowValue struct {
|
||||
TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"`
|
||||
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} }
|
||||
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexRowValue) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry {
|
||||
if m != nil {
|
||||
return m.TermsEntries
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
|
||||
if m != nil {
|
||||
return m.StoredEntries
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
|
||||
var hasFields [1]uint64
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
fieldNum := int32(wire >> 3)
|
||||
wireType := int(wire & 0x7)
|
||||
switch fieldNum {
|
||||
case 1:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
|
||||
}
|
||||
var v uint32
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
v |= (uint32(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
m.Field = &v
|
||||
hasFields[0] |= uint64(0x00000001)
|
||||
case 2:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
|
||||
}
|
||||
var stringLen uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
stringLen |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + int(stringLen)
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
|
||||
iNdEx = postIndex
|
||||
default:
|
||||
var sizeOfWire int
|
||||
for {
|
||||
sizeOfWire++
|
||||
wire >>= 7
|
||||
if wire == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx -= sizeOfWire
|
||||
skippy, err := skipUpsidedown(data[iNdEx:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skippy < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if (iNdEx + skippy) > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
|
||||
iNdEx += skippy
|
||||
}
|
||||
}
|
||||
if hasFields[0]&uint64(0x00000001) == 0 {
|
||||
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (m *BackIndexStoreEntry) Unmarshal(data []byte) error {
|
||||
var hasFields [1]uint64
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
fieldNum := int32(wire >> 3)
|
||||
wireType := int(wire & 0x7)
|
||||
switch fieldNum {
|
||||
case 1:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
|
||||
}
|
||||
var v uint32
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
v |= (uint32(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
m.Field = &v
|
||||
hasFields[0] |= uint64(0x00000001)
|
||||
case 2:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field ArrayPositions", wireType)
|
||||
}
|
||||
var v uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
v |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
m.ArrayPositions = append(m.ArrayPositions, v)
|
||||
default:
|
||||
var sizeOfWire int
|
||||
for {
|
||||
sizeOfWire++
|
||||
wire >>= 7
|
||||
if wire == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx -= sizeOfWire
|
||||
skippy, err := skipUpsidedown(data[iNdEx:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skippy < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if (iNdEx + skippy) > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
|
||||
iNdEx += skippy
|
||||
}
|
||||
}
|
||||
if hasFields[0]&uint64(0x00000001) == 0 {
|
||||
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (m *BackIndexRowValue) Unmarshal(data []byte) error {
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
fieldNum := int32(wire >> 3)
|
||||
wireType := int(wire & 0x7)
|
||||
switch fieldNum {
|
||||
case 1:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
|
||||
}
|
||||
var msglen int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
msglen |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + msglen
|
||||
if msglen < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
|
||||
if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
|
||||
return err
|
||||
}
|
||||
iNdEx = postIndex
|
||||
case 2:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
|
||||
}
|
||||
var msglen int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
msglen |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + msglen
|
||||
if msglen < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
|
||||
if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
|
||||
return err
|
||||
}
|
||||
iNdEx = postIndex
|
||||
default:
|
||||
var sizeOfWire int
|
||||
for {
|
||||
sizeOfWire++
|
||||
wire >>= 7
|
||||
if wire == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx -= sizeOfWire
|
||||
skippy, err := skipUpsidedown(data[iNdEx:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skippy < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if (iNdEx + skippy) > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
|
||||
iNdEx += skippy
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func skipUpsidedown(data []byte) (n int, err error) {
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
wireType := int(wire & 0x7)
|
||||
switch wireType {
|
||||
case 0:
|
||||
for {
|
||||
if iNdEx >= l {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
iNdEx++
|
||||
if data[iNdEx-1] < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return iNdEx, nil
|
||||
case 1:
|
||||
iNdEx += 8
|
||||
return iNdEx, nil
|
||||
case 2:
|
||||
var length int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
length |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx += length
|
||||
if length < 0 {
|
||||
return 0, ErrInvalidLengthUpsidedown
|
||||
}
|
||||
return iNdEx, nil
|
||||
case 3:
|
||||
for {
|
||||
var innerWire uint64
|
||||
var start int = iNdEx
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
innerWire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
innerWireType := int(innerWire & 0x7)
|
||||
if innerWireType == 4 {
|
||||
break
|
||||
}
|
||||
next, err := skipUpsidedown(data[start:])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
iNdEx = start + next
|
||||
}
|
||||
return iNdEx, nil
|
||||
case 4:
|
||||
return iNdEx, nil
|
||||
case 5:
|
||||
iNdEx += 4
|
||||
return iNdEx, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
var (
|
||||
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling")
|
||||
)
|
||||
|
||||
func (m *BackIndexTermsEntry) Size() (n int) {
|
||||
var l int
|
||||
_ = l
|
||||
if m.Field != nil {
|
||||
n += 1 + sovUpsidedown(uint64(*m.Field))
|
||||
}
|
||||
if len(m.Terms) > 0 {
|
||||
for _, s := range m.Terms {
|
||||
l = len(s)
|
||||
n += 1 + l + sovUpsidedown(uint64(l))
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
n += len(m.XXX_unrecognized)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) Size() (n int) {
|
||||
var l int
|
||||
_ = l
|
||||
if m.Field != nil {
|
||||
n += 1 + sovUpsidedown(uint64(*m.Field))
|
||||
}
|
||||
if len(m.ArrayPositions) > 0 {
|
||||
for _, e := range m.ArrayPositions {
|
||||
n += 1 + sovUpsidedown(uint64(e))
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
n += len(m.XXX_unrecognized)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) Size() (n int) {
|
||||
var l int
|
||||
_ = l
|
||||
if len(m.TermsEntries) > 0 {
|
||||
for _, e := range m.TermsEntries {
|
||||
l = e.Size()
|
||||
n += 1 + l + sovUpsidedown(uint64(l))
|
||||
}
|
||||
}
|
||||
if len(m.StoredEntries) > 0 {
|
||||
for _, e := range m.StoredEntries {
|
||||
l = e.Size()
|
||||
n += 1 + l + sovUpsidedown(uint64(l))
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
n += len(m.XXX_unrecognized)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func sovUpsidedown(x uint64) (n int) {
|
||||
for {
|
||||
n++
|
||||
x >>= 7
|
||||
if x == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
func sozUpsidedown(x uint64) (n int) {
|
||||
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63))))
|
||||
}
|
||||
func (m *BackIndexTermsEntry) Marshal() (data []byte, err error) {
|
||||
size := m.Size()
|
||||
data = make([]byte, size)
|
||||
n, err := m.MarshalTo(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return data[:n], nil
|
||||
}
|
||||
|
||||
func (m *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) {
|
||||
var i int
|
||||
_ = i
|
||||
var l int
|
||||
_ = l
|
||||
if m.Field == nil {
|
||||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
} else {
|
||||
data[i] = 0x8
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
|
||||
}
|
||||
if len(m.Terms) > 0 {
|
||||
for _, s := range m.Terms {
|
||||
data[i] = 0x12
|
||||
i++
|
||||
l = len(s)
|
||||
for l >= 1<<7 {
|
||||
data[i] = uint8(uint64(l)&0x7f | 0x80)
|
||||
l >>= 7
|
||||
i++
|
||||
}
|
||||
data[i] = uint8(l)
|
||||
i++
|
||||
i += copy(data[i:], s)
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
i += copy(data[i:], m.XXX_unrecognized)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) Marshal() (data []byte, err error) {
|
||||
size := m.Size()
|
||||
data = make([]byte, size)
|
||||
n, err := m.MarshalTo(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return data[:n], nil
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) {
|
||||
var i int
|
||||
_ = i
|
||||
var l int
|
||||
_ = l
|
||||
if m.Field == nil {
|
||||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
} else {
|
||||
data[i] = 0x8
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
|
||||
}
|
||||
if len(m.ArrayPositions) > 0 {
|
||||
for _, num := range m.ArrayPositions {
|
||||
data[i] = 0x10
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(num))
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
i += copy(data[i:], m.XXX_unrecognized)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) Marshal() (data []byte, err error) {
|
||||
size := m.Size()
|
||||
data = make([]byte, size)
|
||||
n, err := m.MarshalTo(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return data[:n], nil
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) {
|
||||
var i int
|
||||
_ = i
|
||||
var l int
|
||||
_ = l
|
||||
if len(m.TermsEntries) > 0 {
|
||||
for _, msg := range m.TermsEntries {
|
||||
data[i] = 0xa
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
|
||||
n, err := msg.MarshalTo(data[i:])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
i += n
|
||||
}
|
||||
}
|
||||
if len(m.StoredEntries) > 0 {
|
||||
for _, msg := range m.StoredEntries {
|
||||
data[i] = 0x12
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
|
||||
n, err := msg.MarshalTo(data[i:])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
i += n
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
i += copy(data[i:], m.XXX_unrecognized)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func encodeFixed64Upsidedown(data []byte, offset int, v uint64) int {
|
||||
data[offset] = uint8(v)
|
||||
data[offset+1] = uint8(v >> 8)
|
||||
data[offset+2] = uint8(v >> 16)
|
||||
data[offset+3] = uint8(v >> 24)
|
||||
data[offset+4] = uint8(v >> 32)
|
||||
data[offset+5] = uint8(v >> 40)
|
||||
data[offset+6] = uint8(v >> 48)
|
||||
data[offset+7] = uint8(v >> 56)
|
||||
return offset + 8
|
||||
}
|
||||
func encodeFixed32Upsidedown(data []byte, offset int, v uint32) int {
|
||||
data[offset] = uint8(v)
|
||||
data[offset+1] = uint8(v >> 8)
|
||||
data[offset+2] = uint8(v >> 16)
|
||||
data[offset+3] = uint8(v >> 24)
|
||||
return offset + 4
|
||||
}
|
||||
func encodeVarintUpsidedown(data []byte, offset int, v uint64) int {
|
||||
for v >= 1<<7 {
|
||||
data[offset] = uint8(v&0x7f | 0x80)
|
||||
v >>= 7
|
||||
offset++
|
||||
}
|
||||
data[offset] = uint8(v)
|
||||
return offset + 1
|
||||
}
|
14
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.proto
generated
vendored
Normal file
14
vendor/github.com/blevesearch/bleve/v2/index/upsidedown/upsidedown.proto
generated
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
message BackIndexTermsEntry {
|
||||
required uint32 field = 1;
|
||||
repeated string terms = 2;
|
||||
}
|
||||
|
||||
message BackIndexStoreEntry {
|
||||
required uint32 field = 1;
|
||||
repeated uint64 arrayPositions = 2;
|
||||
}
|
||||
|
||||
message BackIndexRowValue {
|
||||
repeated BackIndexTermsEntry termsEntries = 1;
|
||||
repeated BackIndexStoreEntry storedEntries = 2;
|
||||
}
|
37
vendor/github.com/blevesearch/bleve/v2/index_alias.go
generated
vendored
Normal file
37
vendor/github.com/blevesearch/bleve/v2/index_alias.go
generated
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
// An IndexAlias is a wrapper around one or more
|
||||
// Index objects. It has two distinct modes of
|
||||
// operation.
|
||||
// 1. When it points to a single index, ALL index
|
||||
// operations are valid and will be passed through
|
||||
// to the underlying index.
|
||||
// 2. When it points to more than one index, the only
|
||||
// valid operation is Search. In this case the
|
||||
// search will be performed across all the
|
||||
// underlying indexes and the results merged.
|
||||
// Calls to Add/Remove/Swap the underlying indexes
|
||||
// are atomic, so you can safely change the
|
||||
// underlying Index objects while other components
|
||||
// are performing operations.
|
||||
type IndexAlias interface {
|
||||
Index
|
||||
|
||||
Add(i ...Index)
|
||||
Remove(i ...Index)
|
||||
Swap(in, out []Index)
|
||||
}
|
612
vendor/github.com/blevesearch/bleve/v2/index_alias_impl.go
generated
vendored
Normal file
612
vendor/github.com/blevesearch/bleve/v2/index_alias_impl.go
generated
vendored
Normal file
|
@ -0,0 +1,612 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type indexAliasImpl struct {
|
||||
name string
|
||||
indexes []Index
|
||||
mutex sync.RWMutex
|
||||
open bool
|
||||
}
|
||||
|
||||
// NewIndexAlias creates a new IndexAlias over the provided
|
||||
// Index objects.
|
||||
func NewIndexAlias(indexes ...Index) *indexAliasImpl {
|
||||
return &indexAliasImpl{
|
||||
name: "alias",
|
||||
indexes: indexes,
|
||||
open: true,
|
||||
}
|
||||
}
|
||||
|
||||
// VisitIndexes invokes the visit callback on every
|
||||
// indexes included in the index alias.
|
||||
func (i *indexAliasImpl) VisitIndexes(visit func(Index)) {
|
||||
i.mutex.RLock()
|
||||
for _, idx := range i.indexes {
|
||||
visit(idx)
|
||||
}
|
||||
i.mutex.RUnlock()
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) isAliasToSingleIndex() error {
|
||||
if len(i.indexes) < 1 {
|
||||
return ErrorAliasEmpty
|
||||
} else if len(i.indexes) > 1 {
|
||||
return ErrorAliasMulti
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Index(id string, data interface{}) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return i.indexes[0].Index(id, data)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Delete(id string) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return i.indexes[0].Delete(id)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Batch(b *Batch) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return i.indexes[0].Batch(b)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Document(id string) (index.Document, error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return i.indexes[0].Document(id)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) DocCount() (uint64, error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
rv := uint64(0)
|
||||
|
||||
if !i.open {
|
||||
return 0, ErrorIndexClosed
|
||||
}
|
||||
|
||||
for _, index := range i.indexes {
|
||||
otherCount, err := index.DocCount()
|
||||
if err == nil {
|
||||
rv += otherCount
|
||||
}
|
||||
// tolerate errors to produce partial counts
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) {
|
||||
return i.SearchInContext(context.Background(), req)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
if len(i.indexes) < 1 {
|
||||
return nil, ErrorAliasEmpty
|
||||
}
|
||||
|
||||
// short circuit the simple case
|
||||
if len(i.indexes) == 1 {
|
||||
return i.indexes[0].SearchInContext(ctx, req)
|
||||
}
|
||||
|
||||
return MultiSearch(ctx, req, i.indexes...)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Fields() ([]string, error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return i.indexes[0].Fields()
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := i.indexes[0].FieldDict(field)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexAliasImplFieldDict{
|
||||
index: i,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexAliasImplFieldDict{
|
||||
index: i,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexAliasImplFieldDict{
|
||||
index: i,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Close() error {
|
||||
i.mutex.Lock()
|
||||
defer i.mutex.Unlock()
|
||||
|
||||
i.open = false
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Mapping() mapping.IndexMapping {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return i.indexes[0].Mapping()
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Stats() *IndexStat {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return i.indexes[0].Stats()
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) StatsMap() map[string]interface{} {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return i.indexes[0].StatsMap()
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) GetInternal(key []byte) ([]byte, error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return i.indexes[0].GetInternal(key)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) SetInternal(key, val []byte) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return i.indexes[0].SetInternal(key, val)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) DeleteInternal(key []byte) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return i.indexes[0].DeleteInternal(key)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Advanced() (index.Index, error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return i.indexes[0].Advanced()
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Add(indexes ...Index) {
|
||||
i.mutex.Lock()
|
||||
defer i.mutex.Unlock()
|
||||
|
||||
i.indexes = append(i.indexes, indexes...)
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) removeSingle(index Index) {
|
||||
for pos, in := range i.indexes {
|
||||
if in == index {
|
||||
i.indexes = append(i.indexes[:pos], i.indexes[pos+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Remove(indexes ...Index) {
|
||||
i.mutex.Lock()
|
||||
defer i.mutex.Unlock()
|
||||
|
||||
for _, in := range indexes {
|
||||
i.removeSingle(in)
|
||||
}
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Swap(in, out []Index) {
|
||||
i.mutex.Lock()
|
||||
defer i.mutex.Unlock()
|
||||
|
||||
// add
|
||||
i.indexes = append(i.indexes, in...)
|
||||
|
||||
// delete
|
||||
for _, ind := range out {
|
||||
i.removeSingle(ind)
|
||||
}
|
||||
}
|
||||
|
||||
// createChildSearchRequest creates a separate
|
||||
// request from the original
|
||||
// For now, avoid data race on req structure.
|
||||
// TODO disable highlight/field load on child
|
||||
// requests, and add code to do this only on
|
||||
// the actual final results.
|
||||
// Perhaps that part needs to be optional,
|
||||
// could be slower in remote usages.
|
||||
func createChildSearchRequest(req *SearchRequest) *SearchRequest {
|
||||
rv := SearchRequest{
|
||||
Query: req.Query,
|
||||
Size: req.Size + req.From,
|
||||
From: 0,
|
||||
Highlight: req.Highlight,
|
||||
Fields: req.Fields,
|
||||
Facets: req.Facets,
|
||||
Explain: req.Explain,
|
||||
Sort: req.Sort.Copy(),
|
||||
IncludeLocations: req.IncludeLocations,
|
||||
Score: req.Score,
|
||||
SearchAfter: req.SearchAfter,
|
||||
SearchBefore: req.SearchBefore,
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
type asyncSearchResult struct {
|
||||
Name string
|
||||
Result *SearchResult
|
||||
Err error
|
||||
}
|
||||
|
||||
// MultiSearch executes a SearchRequest across multiple Index objects,
|
||||
// then merges the results. The indexes must honor any ctx deadline.
|
||||
func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
|
||||
|
||||
searchStart := time.Now()
|
||||
asyncResults := make(chan *asyncSearchResult, len(indexes))
|
||||
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
req.Sort.Reverse()
|
||||
req.SearchAfter = req.SearchBefore
|
||||
req.SearchBefore = nil
|
||||
}
|
||||
|
||||
// run search on each index in separate go routine
|
||||
var waitGroup sync.WaitGroup
|
||||
|
||||
var searchChildIndex = func(in Index, childReq *SearchRequest) {
|
||||
rv := asyncSearchResult{Name: in.Name()}
|
||||
rv.Result, rv.Err = in.SearchInContext(ctx, childReq)
|
||||
asyncResults <- &rv
|
||||
waitGroup.Done()
|
||||
}
|
||||
|
||||
waitGroup.Add(len(indexes))
|
||||
for _, in := range indexes {
|
||||
go searchChildIndex(in, createChildSearchRequest(req))
|
||||
}
|
||||
|
||||
// on another go routine, close after finished
|
||||
go func() {
|
||||
waitGroup.Wait()
|
||||
close(asyncResults)
|
||||
}()
|
||||
|
||||
var sr *SearchResult
|
||||
indexErrors := make(map[string]error)
|
||||
|
||||
for asr := range asyncResults {
|
||||
if asr.Err == nil {
|
||||
if sr == nil {
|
||||
// first result
|
||||
sr = asr.Result
|
||||
} else {
|
||||
// merge with previous
|
||||
sr.Merge(asr.Result)
|
||||
}
|
||||
} else {
|
||||
indexErrors[asr.Name] = asr.Err
|
||||
}
|
||||
}
|
||||
|
||||
// merge just concatenated all the hits
|
||||
// now lets clean it up
|
||||
|
||||
// handle case where no results were successful
|
||||
if sr == nil {
|
||||
sr = &SearchResult{
|
||||
Status: &SearchStatus{
|
||||
Errors: make(map[string]error),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
sortFunc := req.SortFunc()
|
||||
// sort all hits with the requested order
|
||||
if len(req.Sort) > 0 {
|
||||
sorter := newSearchHitSorter(req.Sort, sr.Hits)
|
||||
sortFunc(sorter)
|
||||
}
|
||||
|
||||
// now skip over the correct From
|
||||
if req.From > 0 && len(sr.Hits) > req.From {
|
||||
sr.Hits = sr.Hits[req.From:]
|
||||
} else if req.From > 0 {
|
||||
sr.Hits = search.DocumentMatchCollection{}
|
||||
}
|
||||
|
||||
// now trim to the correct size
|
||||
if req.Size > 0 && len(sr.Hits) > req.Size {
|
||||
sr.Hits = sr.Hits[0:req.Size]
|
||||
}
|
||||
|
||||
// fix up facets
|
||||
for name, fr := range req.Facets {
|
||||
sr.Facets.Fixup(name, fr.Size)
|
||||
}
|
||||
|
||||
if reverseQueryExecution {
|
||||
// reverse the sort back to the original
|
||||
req.Sort.Reverse()
|
||||
// resort using the original order
|
||||
mhs := newSearchHitSorter(req.Sort, sr.Hits)
|
||||
sortFunc(mhs)
|
||||
// reset request
|
||||
req.SearchBefore = req.SearchAfter
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
// fix up original request
|
||||
sr.Request = req
|
||||
searchDuration := time.Since(searchStart)
|
||||
sr.Took = searchDuration
|
||||
|
||||
// fix up errors
|
||||
if len(indexErrors) > 0 {
|
||||
if sr.Status.Errors == nil {
|
||||
sr.Status.Errors = make(map[string]error)
|
||||
}
|
||||
for indexName, indexErr := range indexErrors {
|
||||
sr.Status.Errors[indexName] = indexErr
|
||||
sr.Status.Total++
|
||||
sr.Status.Failed++
|
||||
}
|
||||
}
|
||||
|
||||
return sr, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) NewBatch() *Batch {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return i.indexes[0].NewBatch()
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Name() string {
|
||||
return i.name
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) SetName(name string) {
|
||||
i.name = name
|
||||
}
|
||||
|
||||
type indexAliasImplFieldDict struct {
|
||||
index *indexAliasImpl
|
||||
fieldDict index.FieldDict
|
||||
}
|
||||
|
||||
func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) {
|
||||
return f.fieldDict.Next()
|
||||
}
|
||||
|
||||
func (f *indexAliasImplFieldDict) Close() error {
|
||||
defer f.index.mutex.RUnlock()
|
||||
return f.fieldDict.Close()
|
||||
}
|
912
vendor/github.com/blevesearch/bleve/v2/index_impl.go
generated
vendored
Normal file
912
vendor/github.com/blevesearch/bleve/v2/index_impl.go
generated
vendored
Normal file
|
@ -0,0 +1,912 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/collector"
|
||||
"github.com/blevesearch/bleve/v2/search/facet"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type indexImpl struct {
|
||||
path string
|
||||
name string
|
||||
meta *indexMeta
|
||||
i index.Index
|
||||
m mapping.IndexMapping
|
||||
mutex sync.RWMutex
|
||||
open bool
|
||||
stats *IndexStat
|
||||
}
|
||||
|
||||
const storePath = "store"
|
||||
|
||||
var mappingInternalKey = []byte("_mapping")
|
||||
|
||||
const SearchQueryStartCallbackKey = "_search_query_start_callback_key"
|
||||
const SearchQueryEndCallbackKey = "_search_query_end_callback_key"
|
||||
|
||||
type SearchQueryStartCallbackFn func(size uint64) error
|
||||
type SearchQueryEndCallbackFn func(size uint64) error
|
||||
|
||||
func indexStorePath(path string) string {
|
||||
return path + string(os.PathSeparator) + storePath
|
||||
}
|
||||
|
||||
func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (*indexImpl, error) {
|
||||
// first validate the mapping
|
||||
err := mapping.Validate()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if kvconfig == nil {
|
||||
kvconfig = map[string]interface{}{}
|
||||
}
|
||||
|
||||
if kvstore == "" {
|
||||
return nil, fmt.Errorf("bleve not configured for file based indexing")
|
||||
}
|
||||
|
||||
rv := indexImpl{
|
||||
path: path,
|
||||
name: path,
|
||||
m: mapping,
|
||||
meta: newIndexMeta(indexType, kvstore, kvconfig),
|
||||
}
|
||||
rv.stats = &IndexStat{i: &rv}
|
||||
// at this point there is hope that we can be successful, so save index meta
|
||||
if path != "" {
|
||||
err = rv.meta.Save(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
kvconfig["create_if_missing"] = true
|
||||
kvconfig["error_if_exists"] = true
|
||||
kvconfig["path"] = indexStorePath(path)
|
||||
} else {
|
||||
kvconfig["path"] = ""
|
||||
}
|
||||
|
||||
// open the index
|
||||
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType)
|
||||
if indexTypeConstructor == nil {
|
||||
return nil, ErrorUnknownIndexType
|
||||
}
|
||||
|
||||
rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = rv.i.Open()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func(rv *indexImpl) {
|
||||
if !rv.open {
|
||||
rv.i.Close()
|
||||
}
|
||||
}(&rv)
|
||||
|
||||
// now persist the mapping
|
||||
mappingBytes, err := json.Marshal(mapping)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = rv.i.SetInternal(mappingInternalKey, mappingBytes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// mark the index as open
|
||||
rv.mutex.Lock()
|
||||
defer rv.mutex.Unlock()
|
||||
rv.open = true
|
||||
indexStats.Register(&rv)
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) {
|
||||
rv = &indexImpl{
|
||||
path: path,
|
||||
name: path,
|
||||
}
|
||||
rv.stats = &IndexStat{i: rv}
|
||||
|
||||
rv.meta, err = openIndexMeta(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// backwards compatibility if index type is missing
|
||||
if rv.meta.IndexType == "" {
|
||||
rv.meta.IndexType = upsidedown.Name
|
||||
}
|
||||
|
||||
storeConfig := rv.meta.Config
|
||||
if storeConfig == nil {
|
||||
storeConfig = map[string]interface{}{}
|
||||
}
|
||||
|
||||
storeConfig["path"] = indexStorePath(path)
|
||||
storeConfig["create_if_missing"] = false
|
||||
storeConfig["error_if_exists"] = false
|
||||
for rck, rcv := range runtimeConfig {
|
||||
storeConfig[rck] = rcv
|
||||
}
|
||||
|
||||
// open the index
|
||||
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType)
|
||||
if indexTypeConstructor == nil {
|
||||
return nil, ErrorUnknownIndexType
|
||||
}
|
||||
|
||||
rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = rv.i.Open()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func(rv *indexImpl) {
|
||||
if !rv.open {
|
||||
rv.i.Close()
|
||||
}
|
||||
}(rv)
|
||||
|
||||
// now load the mapping
|
||||
indexReader, err := rv.i.Reader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := indexReader.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
mappingBytes, err := indexReader.GetInternal(mappingInternalKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var im *mapping.IndexMappingImpl
|
||||
err = json.Unmarshal(mappingBytes, &im)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes))
|
||||
}
|
||||
|
||||
// mark the index as open
|
||||
rv.mutex.Lock()
|
||||
defer rv.mutex.Unlock()
|
||||
rv.open = true
|
||||
|
||||
// validate the mapping
|
||||
err = im.Validate()
|
||||
if err != nil {
|
||||
// note even if the mapping is invalid
|
||||
// we still return an open usable index
|
||||
return rv, err
|
||||
}
|
||||
|
||||
rv.m = im
|
||||
indexStats.Register(rv)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
// Advanced returns internal index implementation
|
||||
func (i *indexImpl) Advanced() (index.Index, error) {
|
||||
return i.i, nil
|
||||
}
|
||||
|
||||
// Mapping returns the IndexMapping in use by this
|
||||
// Index.
|
||||
func (i *indexImpl) Mapping() mapping.IndexMapping {
|
||||
return i.m
|
||||
}
|
||||
|
||||
// Index the object with the specified identifier.
|
||||
// The IndexMapping for this index will determine
|
||||
// how the object is indexed.
|
||||
func (i *indexImpl) Index(id string, data interface{}) (err error) {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
doc := document.NewDocument(id)
|
||||
err = i.m.MapDocument(doc, data)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = i.i.Update(doc)
|
||||
return
|
||||
}
|
||||
|
||||
// IndexAdvanced takes a document.Document object
|
||||
// skips the mapping and indexes it.
|
||||
func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) {
|
||||
if doc.ID() == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err = i.i.Update(doc)
|
||||
return
|
||||
}
|
||||
|
||||
// Delete entries for the specified identifier from
|
||||
// the index.
|
||||
func (i *indexImpl) Delete(id string) (err error) {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err = i.i.Delete(id)
|
||||
return
|
||||
}
|
||||
|
||||
// Batch executes multiple Index and Delete
|
||||
// operations at the same time. There are often
|
||||
// significant performance benefits when performing
|
||||
// operations in a batch.
|
||||
func (i *indexImpl) Batch(b *Batch) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
return i.i.Batch(b.internal)
|
||||
}
|
||||
|
||||
// Document is used to find the values of all the
|
||||
// stored fields for a document in the index. These
|
||||
// stored fields are put back into a Document object
|
||||
// and returned.
|
||||
func (i *indexImpl) Document(id string) (doc index.Document, err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := indexReader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
doc, err = indexReader.Document(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
// DocCount returns the number of documents in the
|
||||
// index.
|
||||
func (i *indexImpl) DocCount() (count uint64, err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return 0, ErrorIndexClosed
|
||||
}
|
||||
|
||||
// open a reader for this search
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening index reader %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if cerr := indexReader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
count, err = indexReader.DocCount()
|
||||
return
|
||||
}
|
||||
|
||||
// Search executes a search request operation.
|
||||
// Returns a SearchResult object or an error.
|
||||
func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
|
||||
return i.SearchInContext(context.Background(), req)
|
||||
}
|
||||
|
||||
var documentMatchEmptySize int
|
||||
var searchContextEmptySize int
|
||||
var facetResultEmptySize int
|
||||
var documentEmptySize int
|
||||
|
||||
func init() {
|
||||
var dm search.DocumentMatch
|
||||
documentMatchEmptySize = dm.Size()
|
||||
|
||||
var sc search.SearchContext
|
||||
searchContextEmptySize = sc.Size()
|
||||
|
||||
var fr search.FacetResult
|
||||
facetResultEmptySize = fr.Size()
|
||||
|
||||
var d document.Document
|
||||
documentEmptySize = d.Size()
|
||||
}
|
||||
|
||||
// memNeededForSearch is a helper function that returns an estimate of RAM
|
||||
// needed to execute a search request.
|
||||
func memNeededForSearch(req *SearchRequest,
|
||||
searcher search.Searcher,
|
||||
topnCollector *collector.TopNCollector) uint64 {
|
||||
|
||||
backingSize := req.Size + req.From + 1
|
||||
if req.Size+req.From > collector.PreAllocSizeSkipCap {
|
||||
backingSize = collector.PreAllocSizeSkipCap + 1
|
||||
}
|
||||
numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
|
||||
|
||||
estimate := 0
|
||||
|
||||
// overhead, size in bytes from collector
|
||||
estimate += topnCollector.Size()
|
||||
|
||||
// pre-allocing DocumentMatchPool
|
||||
estimate += searchContextEmptySize + numDocMatches*documentMatchEmptySize
|
||||
|
||||
// searcher overhead
|
||||
estimate += searcher.Size()
|
||||
|
||||
// overhead from results, lowestMatchOutsideResults
|
||||
estimate += (numDocMatches + 1) * documentMatchEmptySize
|
||||
|
||||
// additional overhead from SearchResult
|
||||
estimate += reflectStaticSizeSearchResult + reflectStaticSizeSearchStatus
|
||||
|
||||
// overhead from facet results
|
||||
if req.Facets != nil {
|
||||
estimate += len(req.Facets) * facetResultEmptySize
|
||||
}
|
||||
|
||||
// highlighting, store
|
||||
if len(req.Fields) > 0 || req.Highlight != nil {
|
||||
// Size + From => number of hits
|
||||
estimate += (req.Size + req.From) * documentEmptySize
|
||||
}
|
||||
|
||||
return uint64(estimate)
|
||||
}
|
||||
|
||||
// SearchInContext executes a search request operation within the provided
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
searchStart := time.Now()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
req.Sort.Reverse()
|
||||
req.SearchAfter = req.SearchBefore
|
||||
req.SearchBefore = nil
|
||||
}
|
||||
|
||||
var coll *collector.TopNCollector
|
||||
if req.SearchAfter != nil {
|
||||
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
|
||||
} else {
|
||||
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
|
||||
}
|
||||
|
||||
// open a reader for this search
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening index reader %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if cerr := indexReader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
|
||||
Explain: req.Explain,
|
||||
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
|
||||
Score: req.Score,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if serr := searcher.Close(); err == nil && serr != nil {
|
||||
err = serr
|
||||
}
|
||||
}()
|
||||
|
||||
if req.Facets != nil {
|
||||
facetsBuilder := search.NewFacetsBuilder(indexReader)
|
||||
for facetName, facetRequest := range req.Facets {
|
||||
if facetRequest.NumericRanges != nil {
|
||||
// build numeric range facet
|
||||
facetBuilder := facet.NewNumericFacetBuilder(facetRequest.Field, facetRequest.Size)
|
||||
for _, nr := range facetRequest.NumericRanges {
|
||||
facetBuilder.AddRange(nr.Name, nr.Min, nr.Max)
|
||||
}
|
||||
facetsBuilder.Add(facetName, facetBuilder)
|
||||
} else if facetRequest.DateTimeRanges != nil {
|
||||
// build date range facet
|
||||
facetBuilder := facet.NewDateTimeFacetBuilder(facetRequest.Field, facetRequest.Size)
|
||||
dateTimeParser := i.m.DateTimeParserNamed("")
|
||||
for _, dr := range facetRequest.DateTimeRanges {
|
||||
start, end := dr.ParseDates(dateTimeParser)
|
||||
facetBuilder.AddRange(dr.Name, start, end)
|
||||
}
|
||||
facetsBuilder.Add(facetName, facetBuilder)
|
||||
} else {
|
||||
// build terms facet
|
||||
facetBuilder := facet.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size)
|
||||
facetsBuilder.Add(facetName, facetBuilder)
|
||||
}
|
||||
}
|
||||
coll.SetFacetsBuilder(facetsBuilder)
|
||||
}
|
||||
|
||||
memNeeded := memNeededForSearch(req, searcher, coll)
|
||||
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
|
||||
err = cbF(memNeeded)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if cb := ctx.Value(SearchQueryEndCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(SearchQueryEndCallbackFn); ok {
|
||||
defer func() {
|
||||
_ = cbF(memNeeded)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
err = coll.Collect(ctx, searcher, indexReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hits := coll.Results()
|
||||
|
||||
var highlighter highlight.Highlighter
|
||||
|
||||
if req.Highlight != nil {
|
||||
// get the right highlighter
|
||||
highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if req.Highlight.Style != nil {
|
||||
highlighter, err = Config.Cache.HighlighterNamed(*req.Highlight.Style)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if highlighter == nil {
|
||||
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
|
||||
}
|
||||
}
|
||||
|
||||
for _, hit := range hits {
|
||||
if i.name != "" {
|
||||
hit.Index = i.name
|
||||
}
|
||||
err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&i.stats.searches, 1)
|
||||
searchDuration := time.Since(searchStart)
|
||||
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
|
||||
|
||||
if Config.SlowSearchLogThreshold > 0 &&
|
||||
searchDuration > Config.SlowSearchLogThreshold {
|
||||
logger.Printf("slow search took %s - %v", searchDuration, req)
|
||||
}
|
||||
|
||||
if reverseQueryExecution {
|
||||
// reverse the sort back to the original
|
||||
req.Sort.Reverse()
|
||||
// resort using the original order
|
||||
mhs := newSearchHitSorter(req.Sort, hits)
|
||||
req.SortFunc()(mhs)
|
||||
// reset request
|
||||
req.SearchBefore = req.SearchAfter
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
return &SearchResult{
|
||||
Status: &SearchStatus{
|
||||
Total: 1,
|
||||
Successful: 1,
|
||||
},
|
||||
Request: req,
|
||||
Hits: hits,
|
||||
Total: coll.Total(),
|
||||
MaxScore: coll.MaxScore(),
|
||||
Took: searchDuration,
|
||||
Facets: coll.FacetResults(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
|
||||
indexName string, r index.IndexReader,
|
||||
highlighter highlight.Highlighter) error {
|
||||
if len(req.Fields) > 0 || highlighter != nil {
|
||||
doc, err := r.Document(hit.ID)
|
||||
if err == nil && doc != nil {
|
||||
if len(req.Fields) > 0 {
|
||||
fieldsToLoad := deDuplicate(req.Fields)
|
||||
for _, f := range fieldsToLoad {
|
||||
doc.VisitFields(func(docF index.Field) {
|
||||
if f == "*" || docF.Name() == f {
|
||||
var value interface{}
|
||||
switch docF := docF.(type) {
|
||||
case index.TextField:
|
||||
value = docF.Text()
|
||||
case index.NumericField:
|
||||
num, err := docF.Number()
|
||||
if err == nil {
|
||||
value = num
|
||||
}
|
||||
case index.DateTimeField:
|
||||
datetime, err := docF.DateTime()
|
||||
if err == nil {
|
||||
value = datetime.Format(time.RFC3339)
|
||||
}
|
||||
case index.BooleanField:
|
||||
boolean, err := docF.Boolean()
|
||||
if err == nil {
|
||||
value = boolean
|
||||
}
|
||||
case index.GeoPointField:
|
||||
lon, err := docF.Lon()
|
||||
if err == nil {
|
||||
lat, err := docF.Lat()
|
||||
if err == nil {
|
||||
value = []float64{lon, lat}
|
||||
}
|
||||
}
|
||||
}
|
||||
if value != nil {
|
||||
hit.AddFieldValue(docF.Name(), value)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
if highlighter != nil {
|
||||
highlightFields := req.Highlight.Fields
|
||||
if highlightFields == nil {
|
||||
// add all fields with matches
|
||||
highlightFields = make([]string, 0, len(hit.Locations))
|
||||
for k := range hit.Locations {
|
||||
highlightFields = append(highlightFields, k)
|
||||
}
|
||||
}
|
||||
for _, hf := range highlightFields {
|
||||
highlighter.BestFragmentsInField(hit, doc, hf, 1)
|
||||
}
|
||||
}
|
||||
} else if doc == nil {
|
||||
// unexpected case, a doc ID that was found as a search hit
|
||||
// was unable to be found during document lookup
|
||||
return ErrorIndexReadInconsistency
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Fields returns the name of all the fields this
|
||||
// Index has operated on.
|
||||
func (i *indexImpl) Fields() (fields []string, err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := indexReader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
fields, err = indexReader.Fields()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fields, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := indexReader.FieldDict(field)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexImplFieldDict{
|
||||
index: i,
|
||||
indexReader: indexReader,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexImplFieldDict{
|
||||
index: i,
|
||||
indexReader: indexReader,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexImplFieldDict{
|
||||
index: i,
|
||||
indexReader: indexReader,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) Close() error {
|
||||
i.mutex.Lock()
|
||||
defer i.mutex.Unlock()
|
||||
|
||||
indexStats.UnRegister(i)
|
||||
|
||||
i.open = false
|
||||
return i.i.Close()
|
||||
}
|
||||
|
||||
func (i *indexImpl) Stats() *IndexStat {
|
||||
return i.stats
|
||||
}
|
||||
|
||||
func (i *indexImpl) StatsMap() map[string]interface{} {
|
||||
return i.stats.statsMap()
|
||||
}
|
||||
|
||||
func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
reader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := reader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
val, err = reader.GetInternal(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) SetInternal(key, val []byte) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
return i.i.SetInternal(key, val)
|
||||
}
|
||||
|
||||
func (i *indexImpl) DeleteInternal(key []byte) error {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
if !i.open {
|
||||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
return i.i.DeleteInternal(key)
|
||||
}
|
||||
|
||||
// NewBatch creates a new empty batch.
|
||||
func (i *indexImpl) NewBatch() *Batch {
|
||||
return &Batch{
|
||||
index: i,
|
||||
internal: index.NewBatch(),
|
||||
}
|
||||
}
|
||||
|
||||
func (i *indexImpl) Name() string {
|
||||
return i.name
|
||||
}
|
||||
|
||||
func (i *indexImpl) SetName(name string) {
|
||||
indexStats.UnRegister(i)
|
||||
i.name = name
|
||||
indexStats.Register(i)
|
||||
}
|
||||
|
||||
type indexImplFieldDict struct {
|
||||
index *indexImpl
|
||||
indexReader index.IndexReader
|
||||
fieldDict index.FieldDict
|
||||
}
|
||||
|
||||
func (f *indexImplFieldDict) Next() (*index.DictEntry, error) {
|
||||
return f.fieldDict.Next()
|
||||
}
|
||||
|
||||
func (f *indexImplFieldDict) Close() error {
|
||||
defer f.index.mutex.RUnlock()
|
||||
err := f.fieldDict.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return f.indexReader.Close()
|
||||
}
|
||||
|
||||
// helper function to remove duplicate entries from slice of strings
|
||||
func deDuplicate(fields []string) []string {
|
||||
entries := make(map[string]struct{})
|
||||
ret := []string{}
|
||||
for _, entry := range fields {
|
||||
if _, exists := entries[entry]; !exists {
|
||||
entries[entry] = struct{}{}
|
||||
ret = append(ret, entry)
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
type searchHitSorter struct {
|
||||
hits search.DocumentMatchCollection
|
||||
sort search.SortOrder
|
||||
cachedScoring []bool
|
||||
cachedDesc []bool
|
||||
}
|
||||
|
||||
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
|
||||
return &searchHitSorter{
|
||||
sort: sort,
|
||||
hits: hits,
|
||||
cachedScoring: sort.CacheIsScore(),
|
||||
cachedDesc: sort.CacheDescending(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *searchHitSorter) Len() int { return len(m.hits) }
|
||||
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
|
||||
func (m *searchHitSorter) Less(i, j int) bool {
|
||||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
|
||||
return c < 0
|
||||
}
|
97
vendor/github.com/blevesearch/bleve/v2/index_meta.go
generated
vendored
Normal file
97
vendor/github.com/blevesearch/bleve/v2/index_meta.go
generated
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown"
|
||||
)
|
||||
|
||||
const metaFilename = "index_meta.json"
|
||||
|
||||
type indexMeta struct {
|
||||
Storage string `json:"storage"`
|
||||
IndexType string `json:"index_type"`
|
||||
Config map[string]interface{} `json:"config,omitempty"`
|
||||
}
|
||||
|
||||
func newIndexMeta(indexType string, storage string, config map[string]interface{}) *indexMeta {
|
||||
return &indexMeta{
|
||||
IndexType: indexType,
|
||||
Storage: storage,
|
||||
Config: config,
|
||||
}
|
||||
}
|
||||
|
||||
func openIndexMeta(path string) (*indexMeta, error) {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
return nil, ErrorIndexPathDoesNotExist
|
||||
}
|
||||
indexMetaPath := indexMetaPath(path)
|
||||
metaBytes, err := ioutil.ReadFile(indexMetaPath)
|
||||
if err != nil {
|
||||
return nil, ErrorIndexMetaMissing
|
||||
}
|
||||
var im indexMeta
|
||||
err = json.Unmarshal(metaBytes, &im)
|
||||
if err != nil {
|
||||
return nil, ErrorIndexMetaCorrupt
|
||||
}
|
||||
if im.IndexType == "" {
|
||||
im.IndexType = upsidedown.Name
|
||||
}
|
||||
return &im, nil
|
||||
}
|
||||
|
||||
func (i *indexMeta) Save(path string) (err error) {
|
||||
indexMetaPath := indexMetaPath(path)
|
||||
// ensure any necessary parent directories exist
|
||||
err = os.MkdirAll(path, 0700)
|
||||
if err != nil {
|
||||
if os.IsExist(err) {
|
||||
return ErrorIndexPathExists
|
||||
}
|
||||
return err
|
||||
}
|
||||
metaBytes, err := json.Marshal(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
indexMetaFile, err := os.OpenFile(indexMetaPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
|
||||
if err != nil {
|
||||
if os.IsExist(err) {
|
||||
return ErrorIndexPathExists
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if ierr := indexMetaFile.Close(); err == nil && ierr != nil {
|
||||
err = ierr
|
||||
}
|
||||
}()
|
||||
_, err = indexMetaFile.Write(metaBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func indexMetaPath(path string) string {
|
||||
return filepath.Join(path, metaFilename)
|
||||
}
|
75
vendor/github.com/blevesearch/bleve/v2/index_stats.go
generated
vendored
Normal file
75
vendor/github.com/blevesearch/bleve/v2/index_stats.go
generated
vendored
Normal file
|
@ -0,0 +1,75 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
type IndexStat struct {
|
||||
searches uint64
|
||||
searchTime uint64
|
||||
i *indexImpl
|
||||
}
|
||||
|
||||
func (is *IndexStat) statsMap() map[string]interface{} {
|
||||
m := map[string]interface{}{}
|
||||
m["index"] = is.i.i.StatsMap()
|
||||
m["searches"] = atomic.LoadUint64(&is.searches)
|
||||
m["search_time"] = atomic.LoadUint64(&is.searchTime)
|
||||
return m
|
||||
}
|
||||
|
||||
func (is *IndexStat) MarshalJSON() ([]byte, error) {
|
||||
m := is.statsMap()
|
||||
return json.Marshal(m)
|
||||
}
|
||||
|
||||
type IndexStats struct {
|
||||
indexes map[string]*IndexStat
|
||||
mutex sync.RWMutex
|
||||
}
|
||||
|
||||
func NewIndexStats() *IndexStats {
|
||||
return &IndexStats{
|
||||
indexes: make(map[string]*IndexStat),
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexStats) Register(index Index) {
|
||||
i.mutex.Lock()
|
||||
defer i.mutex.Unlock()
|
||||
i.indexes[index.Name()] = index.Stats()
|
||||
}
|
||||
|
||||
func (i *IndexStats) UnRegister(index Index) {
|
||||
i.mutex.Lock()
|
||||
defer i.mutex.Unlock()
|
||||
delete(i.indexes, index.Name())
|
||||
}
|
||||
|
||||
func (i *IndexStats) String() string {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
bytes, err := json.Marshal(i.indexes)
|
||||
if err != nil {
|
||||
return "error marshaling stats"
|
||||
}
|
||||
return string(bytes)
|
||||
}
|
||||
|
||||
var indexStats *IndexStats
|
65
vendor/github.com/blevesearch/bleve/v2/mapping.go
generated
vendored
Normal file
65
vendor/github.com/blevesearch/bleve/v2/mapping.go
generated
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import "github.com/blevesearch/bleve/v2/mapping"
|
||||
|
||||
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
|
||||
func NewIndexMapping() *mapping.IndexMappingImpl {
|
||||
return mapping.NewIndexMapping()
|
||||
}
|
||||
|
||||
// NewDocumentMapping returns a new document mapping
|
||||
// with all the default values.
|
||||
func NewDocumentMapping() *mapping.DocumentMapping {
|
||||
return mapping.NewDocumentMapping()
|
||||
}
|
||||
|
||||
// NewDocumentStaticMapping returns a new document
|
||||
// mapping that will not automatically index parts
|
||||
// of a document without an explicit mapping.
|
||||
func NewDocumentStaticMapping() *mapping.DocumentMapping {
|
||||
return mapping.NewDocumentStaticMapping()
|
||||
}
|
||||
|
||||
// NewDocumentDisabledMapping returns a new document
|
||||
// mapping that will not perform any indexing.
|
||||
func NewDocumentDisabledMapping() *mapping.DocumentMapping {
|
||||
return mapping.NewDocumentDisabledMapping()
|
||||
}
|
||||
|
||||
// NewTextFieldMapping returns a default field mapping for text
|
||||
func NewTextFieldMapping() *mapping.FieldMapping {
|
||||
return mapping.NewTextFieldMapping()
|
||||
}
|
||||
|
||||
// NewNumericFieldMapping returns a default field mapping for numbers
|
||||
func NewNumericFieldMapping() *mapping.FieldMapping {
|
||||
return mapping.NewNumericFieldMapping()
|
||||
}
|
||||
|
||||
// NewDateTimeFieldMapping returns a default field mapping for dates
|
||||
func NewDateTimeFieldMapping() *mapping.FieldMapping {
|
||||
return mapping.NewDateTimeFieldMapping()
|
||||
}
|
||||
|
||||
// NewBooleanFieldMapping returns a default field mapping for booleans
|
||||
func NewBooleanFieldMapping() *mapping.FieldMapping {
|
||||
return mapping.NewBooleanFieldMapping()
|
||||
}
|
||||
|
||||
func NewGeoPointFieldMapping() *mapping.FieldMapping {
|
||||
return mapping.NewGeoPointFieldMapping()
|
||||
}
|
99
vendor/github.com/blevesearch/bleve/v2/mapping/analysis.go
generated
vendored
Normal file
99
vendor/github.com/blevesearch/bleve/v2/mapping/analysis.go
generated
vendored
Normal file
|
@ -0,0 +1,99 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mapping
|
||||
|
||||
type customAnalysis struct {
|
||||
CharFilters map[string]map[string]interface{} `json:"char_filters,omitempty"`
|
||||
Tokenizers map[string]map[string]interface{} `json:"tokenizers,omitempty"`
|
||||
TokenMaps map[string]map[string]interface{} `json:"token_maps,omitempty"`
|
||||
TokenFilters map[string]map[string]interface{} `json:"token_filters,omitempty"`
|
||||
Analyzers map[string]map[string]interface{} `json:"analyzers,omitempty"`
|
||||
DateTimeParsers map[string]map[string]interface{} `json:"date_time_parsers,omitempty"`
|
||||
}
|
||||
|
||||
func (c *customAnalysis) registerAll(i *IndexMappingImpl) error {
|
||||
for name, config := range c.CharFilters {
|
||||
_, err := i.cache.DefineCharFilter(name, config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if len(c.Tokenizers) > 0 {
|
||||
// put all the names in map tracking work to do
|
||||
todo := map[string]struct{}{}
|
||||
for name := range c.Tokenizers {
|
||||
todo[name] = struct{}{}
|
||||
}
|
||||
registered := 1
|
||||
errs := []error{}
|
||||
// as long as we keep making progress, keep going
|
||||
for len(todo) > 0 && registered > 0 {
|
||||
registered = 0
|
||||
errs = []error{}
|
||||
for name := range todo {
|
||||
config := c.Tokenizers[name]
|
||||
_, err := i.cache.DefineTokenizer(name, config)
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
} else {
|
||||
delete(todo, name)
|
||||
registered++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return errs[0]
|
||||
}
|
||||
}
|
||||
for name, config := range c.TokenMaps {
|
||||
_, err := i.cache.DefineTokenMap(name, config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for name, config := range c.TokenFilters {
|
||||
_, err := i.cache.DefineTokenFilter(name, config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for name, config := range c.Analyzers {
|
||||
_, err := i.cache.DefineAnalyzer(name, config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for name, config := range c.DateTimeParsers {
|
||||
_, err := i.cache.DefineDateTimeParser(name, config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func newCustomAnalysis() *customAnalysis {
|
||||
rv := customAnalysis{
|
||||
CharFilters: make(map[string]map[string]interface{}),
|
||||
Tokenizers: make(map[string]map[string]interface{}),
|
||||
TokenMaps: make(map[string]map[string]interface{}),
|
||||
TokenFilters: make(map[string]map[string]interface{}),
|
||||
Analyzers: make(map[string]map[string]interface{}),
|
||||
DateTimeParsers: make(map[string]map[string]interface{}),
|
||||
}
|
||||
return &rv
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue